{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n========================================\nExample: Reusing index structures\n========================================\n\nThis example shows how to reuse index structures. If you want to first estimate hubness,\nand then perform kNN, you can avoid recomputing the ANN index structure, which can be\ncostly.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "from sklearn.datasets import make_classification\nfrom sklearn.model_selection import train_test_split\n\nfrom skhubness.analysis import Hubness\nfrom skhubness.neighbors import KNeighborsClassifier\n\nX, y = make_classification(n_samples=100_000,\n                           n_features=500,\n                           n_informative=400,\n                           random_state=543)\n\nX_train, X_test, y_train, y_test = train_test_split(X, y,\n                                                    test_size=0.01,\n                                                    stratify=y,\n                                                    shuffle=True,\n                                                    random_state=2346)\n\n# Approximate hubness estimation: Creates LSH index and computes local scaling factors\nhub = Hubness(k=10,\n              return_value='robinhood',\n              algorithm='falconn_lsh',\n              hubness='ls',\n              random_state=2345,\n              shuffle_equal=False,\n              verbose=1)\nhub.fit(X_train)\n\nrobin_hood = hub.score(X_test)\nprint(f'Hubness (Robin Hood): {robin_hood}:.4f')\n# 0.9060\n\n# Approximate hubness reduction for classification: Reuse index & factors\nknn = KNeighborsClassifier(n_neighbor=10,\n                           algorithm='falconn_lsh',\n                           hubness='ls',\n                           n_jobs=1)\n\nknn.fit(hub.nn_index_, y_train)  # REUSE INDEX HERE\nacc = knn.score(X_test, y_test)\nprint(f'Test accuracy: {acc:.3f}')\n# 0.959"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}