{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\npynet: hyper parameters tuning\n==============================\n\nCredit: A Grigis\nBased on:\n- https://github.com/autonomio/talos/blob/master/docs/Examples_PyTorch.md\n\nIn this tutorial, you will learn how to tune the hyperparameters using the\ntalos and the kerasplotlib modules.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# Imports\nimport os\nimport sys\nif \"CI_MODE\" in os.environ:\n    sys.exit()\n\nimport talos\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n# from torch_optimizer import torch_optimizer\n\nfrom sklearn.metrics import f1_score\n\nfrom pynet.interfaces import DeepLearningInterface\nfrom pynet.datasets import DataManager"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Data Preparation\n----------------\n\nFor this experiment, we're going to use the breast cancer dataset.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "x, y = talos.templates.datasets.breast_cancer()\nx = talos.utils.rescale_meanzero(x)\nx_train, y_train, x_val, y_val = talos.utils.val_split(x, y, .2)\nprint(\"Train: \", x_train.shape, y_train.shape)\nprint(\"Validation: \", x_val.shape, y_val.shape)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Model Preparation\n-----------------\n\nTalos works with any pynet model, without changing the structure of the\nmodel in anyway, or without introducing any new syntax. The below example\nshows clearly how this works.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "class BreastCancerNet(nn.Module, talos.utils.TorchHistory):\n    def __init__(self, n_feature, first_neuron, second_neuron, dropout):\n        super(BreastCancerNet, self).__init__()\n        self.hidden = torch.nn.Linear(n_feature, first_neuron)\n        torch.nn.init.normal_(self.hidden.weight)\n        self.hidden1 = torch.nn.Linear(first_neuron, second_neuron)\n        self.dropout = torch.nn.Dropout(dropout)\n        self.out = torch.nn.Linear(second_neuron, 2)\n\n    def forward(self, x):\n        x = F.relu(self.hidden(x))\n        x = self.dropout(x)\n        x = torch.sigmoid(self.hidden1(x))\n        x = self.out(x)\n        return x\n\n\ndef update_talos_history(signal):\n        \"\"\" Callback to update talos history.\n\n        Parameters\n        ----------\n        signal: SignalObject\n            an object with the trained model 'object', the emitted signal\n            'signal', the epoch number 'epoch' and the fold index 'fold'.\n        \"\"\"\n        net = signal.object.model\n        emitted_signal = signal.signal\n        epoch = signal.epoch\n        fold = signal.fold\n        for key in signal.keys:\n            if key in (\"epoch\", \"fold\"):\n                continue\n            value = getattr(signal, key)\n            if value is not None:\n                net.append_history(value, key)\n\n\ndef breast_cancer(x_train, y_train, x_val, y_val, params):\n    print(\"Iteration parameters: \", params)\n\n    def weights_init_uniform_rule(m):\n        classname = m.__class__.__name__\n        if classname.find('Linear') != -1:\n            n = m.in_features\n            y = 1.0 / np.sqrt(n)\n            m.weight.data.uniform_(-y, y)\n            m.bias.data.fill_(0)\n    manager = DataManager.from_numpy(\n        train_inputs=x_train, train_labels=y_train,\n        batch_size=params[\"batch_size\"], validation_inputs=x_val,\n        validation_labels=y_val)\n    net = BreastCancerNet(\n        n_feature=x_train.shape[1], first_neuron=params[\"first_neuron\"],\n        second_neuron=params[\"second_neuron\"], dropout=params[\"dropout\"])\n    net.apply(weights_init_uniform_rule)\n    net.init_history()\n    model = DeepLearningInterface(\n        model=net,\n        optimizer_name=params[\"optimizer_name\"],\n        learning_rate=params[\"learning_rate\"],\n        loss_name=params[\"loss_name\"],\n        metrics=[\"accuracy\"])\n    model.add_observer(\"after_epoch\", update_talos_history)\n    model.training(\n        manager=manager,\n        nb_epochs=params[\"epochs\"],\n        checkpointdir=None,\n        fold_index=0,\n        with_validation=True)\n    return net, net.parameters()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Setting the Parameter Space Boundaries\n--------------------------------------\n\nIn the last and final step, we're going to create the dictionary, which will\nthen be passed on to Talos together with the model above. Here we have\nthree different ways to input values:\n- as stepped ranges (min, max, steps)\n- as multiple values [in a list]\n- as a single value [in a list]\nFor values we don't want to use, it's ok to set it as None.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "params = {\n    \"first_neuron\": [200, 100],\n    \"second_neuron\": [30, 50],\n    \"dropout\": [0.2, 0.3],\n    \"optimizer_name\": [\"SGD\", \"Adam\"],\n    \"loss_name\": [\"CrossEntropyLoss\"],\n    \"learning_rate\": [1e-3, 1e-4],\n    \"batch_size\": [20, 50, 5],\n    \"epochs\": [10, 20]\n}"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Run the Hyperparameter scan\n---------------------------\n\nNow we are ready to run the model based on the parameters and the layer\nconfiguration above. The exact same process would apply with any other\nmodel, just make sure to pass the model function name in the Scan() command\nas in the below example. To get started quickly, we're going to invoke only\n10 rounds.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "os.chdir(\"/tmp\")\nscan_object = talos.Scan(x=x_train,\n                         y=y_train,\n                         params=params,\n                         model=breast_cancer,\n                         experiment_name=\"breast_cancer\",\n                         round_limit=10)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Access the results through the Scan object\n------------------------------------------\n\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "print(\"accessing the results data frame\")\nprint(scan_object.data.head())\n\nprint(\"accessing epoch entropy values for each round\")\nprint(scan_object.learning_entropy)\n\nprint(\"access the summary details\")\nprint(scan_object.details)\n\nprint(\"accessing the saved models\")\nprint(scan_object.saved_models)\n\nprint(\"accessing the saved weights for models\")\nprint(scan_object.saved_weights)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Analysing the Scan results with reporting\n-----------------------------------------\n\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "print(\"use Scan object as input\")\nanalyze_object = talos.Analyze(scan_object)\n\nprint(\"access the dataframe with the results\")\nprint(analyze_object.data)\n\nprint(\"get the number of rounds in the Scan\")\nprint(analyze_object.rounds())\n\nprint(\"et the highest result for any metric\")\nprint(analyze_object.high('val_accuracy'))\n\nprint(\"get the round with the best result\")\nprint(analyze_object.rounds2high('val_accuracy'))\n\nprint(\"get the best paramaters\")\nprint(analyze_object.best_params(\n    'val_accuracy', ['accuracy', 'loss', 'val_loss']))\n\nprint(\"get correlation for hyperparameters against a metric\")\nprint(analyze_object.correlate('val_loss', ['accuracy', 'loss', 'val_loss']))\n\nprint(\"a regression plot for two dimensions\")\nanalyze_object.plot_regs('val_accuracy', 'val_loss')\n\nprint(\"line plot\")\nanalyze_object.plot_line('val_accuracy')\n\nprint(\"up to two dimensional kernel density estimator\")\nanalyze_object.plot_kde('val_accuracy')\n\nprint(\"a simple histogram\")\nanalyze_object.plot_hist('val_accuracy', bins=50)\n\nprint(\"heatmap correlation\")\nanalyze_object.plot_corr('val_loss', ['accuracy', 'loss', 'val_loss'])\n\nprint(\"a four dimensional bar grid\")\nanalyze_object.plot_bars(\n    'batch_size', 'val_accuracy', 'first_neuron', 'learning_rate')\n\nif \"CI_MODE\" not in os.environ:\n    import matplotlib.pyplot as plt\n    plt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.12"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}