"""
Voxel-Based Morphometry on Oasis dataset
========================================
This example uses Voxel-Based Morphometry (VBM) to study the relationship
between aging and gray matter density.
The data come from the `OASIS `_ project.
If you use it, you need to agree with the data usage agreement available
on the website.
It has been run through a standard VBM pipeline (using SPM8 and
NewSegment) to create VBM maps, which we study here.
Predictive modeling analysis: VBM bio-markers of aging?
--------------------------------------------------------
We run a standard SVM-ANOVA nilearn pipeline to predict age from the VBM
data. We use only 100 subjects from the OASIS dataset to limit the memory
usage.
Note that for an actual predictive modeling study of aging, the study
should be ran on the full set of subjects. Also, parameters such as the
smoothing applied to the data and the number of features selected by the
Anova step should be set by nested cross-validation, as they impact
significantly the prediction score.
Brain mapping with mass univariate
-----------------------------------
SVM weights are very noisy, partly because heavy smoothing is detrimental
for the prediction here. A standard analysis using mass-univariate GLM
(here permuted to have exact correction for multiple comparisons) gives a
much clearer view of the important regions.
____
"""
# Authors: Elvis Dhomatob, , Apr. 2014
# Virgile Fritsch, , Apr 2014
# Gael Varoquaux, Apr 2014
import numpy as np
import matplotlib.pyplot as plt
from nilearn import datasets
from nilearn.input_data import NiftiMasker
n_subjects = 100 # more subjects requires more memory
### Load Oasis dataset ########################################################
oasis_dataset = datasets.fetch_oasis_vbm(n_subjects=n_subjects)
gray_matter_map_filenames = oasis_dataset.gray_matter_maps
age = oasis_dataset.ext_vars['age'].astype(float)
# print basic information on the dataset
print('First gray-matter anatomy image (3D) is located at: %s' %
oasis_dataset.gray_matter_maps[0]) # 3D data
print('First white-matter anatomy image (3D) is located at: %s' %
oasis_dataset.white_matter_maps[0]) # 3D data
### Preprocess data ###########################################################
nifti_masker = NiftiMasker(
standardize=False,
smoothing_fwhm=2,
memory='nilearn_cache') # cache options
gm_maps_masked = nifti_masker.fit_transform(gray_matter_map_filenames)
n_samples, n_features = gm_maps_masked.shape
print("%d samples, %d features" % (n_subjects, n_features))
### Prediction with SVR #######################################################
print("ANOVA + SVR")
# Define the prediction function to be used.
# Here we use a Support Vector Classification, with a linear kernel
from sklearn.svm import SVR
svr = SVR(kernel='linear')
# Dimension reduction
from sklearn.feature_selection import VarianceThreshold, SelectKBest, \
f_regression
# Remove features with too low between-subject variance
variance_threshold = VarianceThreshold(threshold=.01)
# Here we use a classical univariate feature selection based on F-test,
# namely Anova.
feature_selection = SelectKBest(f_regression, k=2000)
# We have our predictor (SVR), our feature selection (SelectKBest), and now,
# we can plug them together in a *pipeline* that performs the two operations
# successively:
from sklearn.pipeline import Pipeline
anova_svr = Pipeline([
('variance_threshold', variance_threshold),
('anova', feature_selection),
('svr', svr)])
### Fit and predict
anova_svr.fit(gm_maps_masked, age)
age_pred = anova_svr.predict(gm_maps_masked)
# Visualization
# Look at the SVR's discriminating weights
coef = svr.coef_
# reverse feature selection
coef = feature_selection.inverse_transform(coef)
# reverse variance threshold
coef = variance_threshold.inverse_transform(coef)
# reverse masking
weight_img = nifti_masker.inverse_transform(coef)
# Create the figure
from nilearn.plotting import plot_stat_map, show
bg_filename = gray_matter_map_filenames[0]
z_slice = 0
fig = plt.figure(figsize=(5.5, 7.5), facecolor='k')
# Hard setting vmax to highlight weights more
display = plot_stat_map(weight_img, bg_img=bg_filename,
display_mode='z', cut_coords=[z_slice],
figure=fig, vmax=1)
display.title('SVM weights', y=1.2)
# Measure accuracy with cross validation
from sklearn.cross_validation import cross_val_score
cv_scores = cross_val_score(anova_svr, gm_maps_masked, age)
# Return the corresponding mean prediction accuracy
prediction_accuracy = np.mean(cv_scores)
print("=== ANOVA ===")
print("Prediction accuracy: %f" % prediction_accuracy)
print("")
### Inference with massively univariate model #################################
print("Massively univariate model")
# Statistical inference
from nilearn.mass_univariate import permuted_ols
data = variance_threshold.fit_transform(gm_maps_masked)
neg_log_pvals, t_scores_original_data, _ = permuted_ols(
age, data, # + intercept as a covariate by default
n_perm=2000, # 1,000 in the interest of time; 10000 would be better
n_jobs=1) # can be changed to use more CPUs
signed_neg_log_pvals = neg_log_pvals * np.sign(t_scores_original_data)
signed_neg_log_pvals_unmasked = nifti_masker.inverse_transform(
variance_threshold.inverse_transform(signed_neg_log_pvals))
# Show results
threshold = -np.log10(0.1) # 10% corrected
fig = plt.figure(figsize=(5.5, 7.5), facecolor='k')
display = plot_stat_map(signed_neg_log_pvals_unmasked, bg_img=bg_filename,
threshold=threshold, cmap=plt.cm.RdBu_r,
display_mode='z', cut_coords=[z_slice],
figure=fig)
title = ('Negative $\log_{10}$ p-values'
'\n(Non-parametric + max-type correction)')
display.title(title, y=1.2)
n_detections = (signed_neg_log_pvals_unmasked.get_data() > threshold).sum()
print('\n%d detections' % n_detections)
show()