{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n========================================\nExample: Approximate hubness reduction\n========================================\n\nThis example shows how to combine approximate nearest neighbor search and hubness reduction\nin order to perform approximate hubness reduction for large data sets.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "from sklearn.datasets import make_classification\nfrom sklearn.metrics import accuracy_score\nfrom sklearn.model_selection import train_test_split\n\nfrom skhubness.analysis import Hubness\nfrom skhubness.neighbors import KNeighborsClassifier\n\n# High-dimensional artificial data\nX, y = make_classification(n_samples=1_000_000,\n                           n_features=500,\n                           n_informative=400,\n                           random_state=543)\n\nX_train, X_test, y_train, y_test = train_test_split(X, y,\n                                                    test_size=10_000,\n                                                    stratify=y,\n                                                    shuffle=True,\n                                                    random_state=2346)\n\n# Approximate hubness estimation\nhub = Hubness(k=10,\n              return_value='robinhood',\n              algorithm='hnsw',\n              random_state=2345,\n              shuffle_equal=False,\n              n_jobs=-1,\n              verbose=2)\nhub.fit(X_train)\nrobin_hood = hub.score(X_test)\nprint(f'Hubness (Robin Hood): {robin_hood:.3f}')\n# 0.944\n\n# Approximate hubness reduction for classification\nknn = KNeighborsClassifier(n_neighbor=10,\n                           algorithm='hnsw',\n                           hubness='ls',\n                           n_jobs=-1,\n                           verbose=2)\n\nknn.fit(X_train, y_train)\ny_pred = knn.predict(X_test)\nacc = accuracy_score(y_test, y_pred)\nprint(f'Test accuracy: {acc:.3f}')\n# Test accuracy: 0.987"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}