{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\nGenome Wide Association with DL for standing height\n===================================================\n\nCredit: V Frouin\n\nLoad the data\n-------------\n\nLoad some data.\nYou may need to change the 'datasetdir' parameter.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import os\nimport sys\nfrom pynet.datasets import DataManager, fetch_height_biobank\nfrom pynet.utils import setup_logging\n\n# This example cannot run in CI : it accesses NS intra filesystems\nif \"CI_MODE\" in os.environ:\n    sys.exit(0)\n\nsetup_logging(level=\"info\")\n\ndata = fetch_height_biobank(datasetdir=\"/neurospin/tmp/height_bb\")\nmanager = DataManager(\n    input_path=data.input_path,\n    labels=[\"Height\"],\n    metadata_path=data.metadata_path,\n    number_of_folds=2,\n    batch_size=5,\n    test_size=0.2,\n    continuous_labels=True)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Basic inspection\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import numpy as np\nimport matplotlib.pyplot as plt\n\ntrain_dataset = manager[\"train\"][0]\nX_train = train_dataset.inputs[train_dataset.indices]\ny_train = train_dataset.labels[train_dataset.indices]\ntest_dataset = manager[\"test\"]\nX_test = test_dataset.inputs[test_dataset.indices]\ny_test = test_dataset.labels[test_dataset.indices]\nprint(X_train.shape, y_train.shape)\nprint(X_test.shape, y_test.shape)\nprint(\"       min max mean sd\")\nprint(\"Train:\", y_train.min(), y_train.max(), y_train.mean(),\n      np.sqrt(y_train.var()))\nprint(\"Test:\", y_test.min(), y_test.max(), y_test.mean(),\n      np.sqrt(y_test.var()))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "SNP preselection according to a simple GWAS: select N_best most\nassociated SNPs or select by min_P_value.\nOptional: not used after.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "from scipy import stats\n\npvals = []\nfor idx in range(X_train.shape[1]):\n    b, intercept, r_value, p_value, std_err = stats.linregress(\n        X_train[:, idx], y_train)\n    pvals.append(-np.log10(p_value))\npvals = np.array(pvals)\nplt.figure()\nplt.ylabel(\"-log10 P-value\")\nplt.xlabel(\"SNP\")\nplt.plot(pvals, marker=\"o\")"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.12"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}