Helper Module for Deep Learning.
Source code for pynet.datasets.hcp
# -*- coding: utf-8 -*-
########################################################################
# NSAp - Copyright (C) CEA, 2020
# Distributed under the terms of the CeCILL-B license, as published by
# the CEA-CNRS-INRIA. Refer to the LICENSE file or to
# http://www.cecill.info/licences/Licence_CeCILL-B_V1-en.html
# for details.
########################################################################
"""
Module provides functions to prepare different datasets from HCP.
1) the T1 and associated brain masks.
"""
# Imports
import os
import logging
from collections import namedtuple
import boto3
from botocore.exceptions import NoCredentialsError
import nibabel as nib
import numpy as np
import pandas as pd
from scipy import ndimage
from pynet.datasets import Fetchers
# Global parameters
Item = namedtuple("Item", ["input_path", "output_path", "metadata_path"])
logger = logging.getLogger("pynet")
[docs]@Fetchers.register
def fetch_hcp_brain(datasetdir, low=False, small=True):
""" Fetch/prepare the HCP T1/brain mask dataset for pynet.
Go to 'https://db.humanconnectome.org' and get an account and log in.
Then, click on the Amazon S3 button that should give you a key pair.
Then use 'aws configure' to add this to our machine.
AWS Access Key ID: ****************
AWS Secret Access Key: ****************
Default region name: eu-west-3
Default output format: json
Parameters
----------
datasetdir: str
the dataset destination folder.
low: bool, default False
set images in low resolution.
small: bool, default True
fetch 45 brains if true, else 1200 brains.
Returns
-------
item: namedtuple
a named tuple containing 'input_path', 'output_path', and
'metadata_path'.
"""
logger.info("Loading HCP brain dataset...")
desc_path = os.path.join(datasetdir, "pynet_hcp_brain.tsv")
input_path = os.path.join(datasetdir, "pynet_hcp_brain_inputs.npy")
output_path = os.path.join(datasetdir, "pynet_hcp_brain_outputs.npy")
if not os.path.isfile(desc_path):
client = boto3.client("s3")
paginator = client.get_paginator("list_objects")
prefix = "HCP_1200/"
result = paginator.paginate(
Bucket="hcp-openaccess", Delimiter="/", Prefix=prefix)
try:
subjects_prefix = list(result.search("CommonPrefixes"))
except NoCredentialsError:
msg = """
Go to 'https://db.humanconnectome.org' and get an account and
log in.
Then, click on the Amazon S3 button that should give you a key
pair.
Then use 'aws configure' to add this to our machine.
AWS Access Key ID: ****************
AWS Secret Access Key: ****************
Default region name: eu-west-3
Default output format: json
"""
raise ValueError(msg)
if small:
subjects_prefix = subjects_prefix[:45]
images = []
masks = []
metadata = dict((key, []) for key in ("name", "modality"))
for subject in subjects_prefix:
subject_prefix = subject["Prefix"]
logger.info(" subject: {0}".format(subject_prefix))
data = get_hcp_data(datasetdir, subject_prefix, "T1w", low)
metadata["name"].append(subject_prefix[11: -1])
metadata["modality"].append("T1w")
images.append(data["image"])
masks.append(data["mask"].astype(int))
data = get_hcp_data(datasetdir, subject_prefix, "MNINonLinear",
low)
metadata["name"].append(subject_prefix[11: -1])
metadata["modality"].append("MNINonLinear")
images.append(data["image"])
masks.append(data["mask"].astype(int))
images = np.asarray(images)
masks = np.asarray(masks)
images = np.expand_dims(images, axis=1)
masks = np.expand_dims(masks, axis=1)
np.save(input_path, images)
np.save(output_path, masks)
df = pd.DataFrame.from_dict(metadata)
df.to_csv(desc_path, sep="\t", index=False)
logger.info("Done.")
return Item(input_path=input_path, output_path=output_path,
metadata_path=desc_path)
[docs]def load_image(filename, low=False):
""" Load an MRI image.
High resolution images are resampled to (256, 312, 256) and low resolution
images are resampled to (32, 40, 32) which can be divided by 8.
Parameters
----------
filename: str
file to be loaded.
low: bool, default False
set image in low resolution.
Returns
-------
img_data: np.array
loaded image.
"""
img = nib.load(filename)
img_data = img.get_data()
img_data = np.append(
img_data[2:-2, :, 2:-2], np.zeros((256, 1, 256)), axis=1)
if low:
img_data = ndimage.zoom(img_data, 1. / 8., order=0)
img_data = np.append(img_data, np.zeros((32, 1, 32)), axis=1)
return img_data
[docs]def get_hcp_data(datasetdir, subject_prefix, modality, low):
""" Get the requested data.
Parameters
----------
datasetdir: str
the dataset destination folder.
subject_prefix: str
subject path.
modality: str
type of image to be extracted ('T1w' or 'MNINonLinear').
low: bool
set image in low resolution.
Returns
-------
data: dict
the loaded data.
"""
s3 = boto3.resource("s3")
bucket = s3.Bucket("hcp-openaccess")
mapping = {
"T1w": {
"image": "T1w_acpc_dc_restore.nii.gz",
"mask": "brainmask_fs.nii.gz"},
"MNINonLinear": {
"image": "T1w_restore.nii.gz",
"mask": "brainmask_fs.nii.gz"}
}
if modality not in mapping:
raise ValueError("Unexpected modality '{0}'. Valid modalities "
"are: {1}".format(modality, mapping.keys()))
data = {}
for key, basename in mapping[modality].items():
url = subject_prefix + "/".join([modality, basename])
pattern = url.split("/")
destfile = os.path.join(datasetdir, *pattern)
if not os.path.isfile(destfile):
logger.info(" url: {0}".format(url))
logger.info(" dest: {0}".format(destfile))
dirname = os.path.dirname(destfile)
if not os.path.isdir(dirname):
os.makedirs(dirname)
obj = s3.Object(bucket, url)
bucket.download_file(obj.key, destfile)
data[key] = load_image(destfile, low)
return data
Follow us
© 2019, pynet developers .
Inspired by AZMIND template.
Inspired by AZMIND template.