Giving credit

Previous topic

8.3.12. Encoding models for visual stimuli from Miyawaki et al. 2008

Next topic

8.3.14. Reconstruction of visual stimuli from Miyawaki et al. 2008

8.3.13. Different classifiers in decoding the Haxby datasetΒΆ

Here we compare different classifiers on a visual object recognition decoding task.

We start by loading the data and applying simple transformations to it

# Fetch data using nilearn dataset fetcher
from nilearn import datasets
# by default 2nd subject data will be fetched
haxby_dataset = datasets.fetch_haxby()

# print basic information on the dataset
print('First subject anatomical nifti image (3D) located is at: %s' %
      haxby_dataset.anat[0])
print('First subject functional nifti image (4D) is located at: %s' %
      haxby_dataset.func[0])

# load labels
import numpy as np
labels = np.recfromcsv(haxby_dataset.session_target[0], delimiter=" ")
stimuli = labels['labels']
# identify resting state labels in order to be able to remove them
resting_state = stimuli == b'rest'

# find names of remaining active labels
categories = np.unique(stimuli[np.logical_not(resting_state)])

# extract tags indicating to which acquisition run a tag belongs
session_labels = labels["chunks"][np.logical_not(resting_state)]

# Load the fMRI data
from nilearn.input_data import NiftiMasker

# For decoding, standardizing is often very important
mask_filename = haxby_dataset.mask_vt[0]
masker = NiftiMasker(mask_img=mask_filename, standardize=True)
func_filename = haxby_dataset.func[0]
masked_timecourses = masker.fit_transform(
    func_filename)[np.logical_not(resting_state)]

Out:

First subject anatomical nifti image (3D) located is at: /home/parietal/gvaroqua/nilearn_data/haxby2001/subj2/anat.nii.gz
First subject functional nifti image (4D) is located at: /home/parietal/gvaroqua/nilearn_data/haxby2001/subj2/bold.nii.gz

Then we define the various classifiers that we use

# A support vector classifier
from sklearn.svm import SVC
svm = SVC(C=1., kernel="linear")

# The logistic regression
from sklearn.linear_model import LogisticRegression, RidgeClassifier, \
    RidgeClassifierCV
logistic = LogisticRegression(C=1., penalty="l1")
logistic_50 = LogisticRegression(C=50., penalty="l1")
logistic_l2 = LogisticRegression(C=1., penalty="l2")

# Cross-validated versions of these classifiers
from sklearn.grid_search import GridSearchCV
# GridSearchCV is slow, but note that it takes an 'n_jobs' parameter that
# can significantly speed up the fitting process on computers with
# multiple cores
svm_cv = GridSearchCV(SVC(C=1., kernel="linear"),
                      param_grid={'C': [.1, .5, 1., 5., 10., 50., 100.]},
                      scoring='f1', n_jobs=1)

logistic_cv = GridSearchCV(LogisticRegression(C=1., penalty="l1"),
                           param_grid={'C': [.1, .5, 1., 5., 10., 50., 100.]},
                           scoring='f1')
logistic_l2_cv = GridSearchCV(LogisticRegression(C=1., penalty="l2"),
                              param_grid={
                                  'C': [.1, .5, 1., 5., 10., 50., 100.]},
                              scoring='f1')

# The ridge classifier has a specific 'CV' object that can set it's
# parameters faster than using a GridSearchCV
ridge = RidgeClassifier()
ridge_cv = RidgeClassifierCV()

# A dictionary, to hold all our classifiers
classifiers = {'SVC': svm,
               'SVC cv': svm_cv,
               'log l1': logistic,
               'log l1 50': logistic_50,
               'log l1 cv': logistic_cv,
               'log l2': logistic_l2,
               'log l2 cv': logistic_l2_cv,
               'ridge': ridge,
               'ridge cv': ridge_cv}

Here we compute prediction scores and run time for all these classifiers

# Make a data splitting object for cross validation
from sklearn.cross_validation import LeaveOneLabelOut, cross_val_score
cv = LeaveOneLabelOut(session_labels)

import time

classifiers_scores = {}

for classifier_name, classifier in sorted(classifiers.items()):
    classifiers_scores[classifier_name] = {}
    print(70 * '_')

    for category in categories:
        task_mask = np.logical_not(resting_state)
        classification_target = (stimuli[task_mask] == category)
        t0 = time.time()
        classifiers_scores[classifier_name][category] = cross_val_score(
            classifier,
            masked_timecourses,
            classification_target,
            cv=cv, scoring="f1")

        print("%10s: %14s -- scores: %1.2f +- %1.2f, time %.2fs" % (
            classifier_name, category,
            classifiers_scores[classifier_name][category].mean(),
            classifiers_scores[classifier_name][category].std(),
            time.time() - t0))

Out:

______________________________________________________________________
       SVC:         bottle -- scores: 0.53 +- 0.21, time 1.58s
       SVC:            cat -- scores: 0.60 +- 0.25, time 1.06s
       SVC:          chair -- scores: 0.42 +- 0.25, time 1.12s
       SVC:           face -- scores: 0.58 +- 0.24, time 0.85s
       SVC:          house -- scores: 0.81 +- 0.28, time 0.65s
       SVC:       scissors -- scores: 0.41 +- 0.31, time 1.13s
       SVC:   scrambledpix -- scores: 0.79 +- 0.17, time 0.79s
       SVC:           shoe -- scores: 0.56 +- 0.23, time 1.12s
______________________________________________________________________
    SVC cv:         bottle -- scores: 0.53 +- 0.21, time 17.81s
    SVC cv:            cat -- scores: 0.60 +- 0.25, time 16.04s
    SVC cv:          chair -- scores: 0.42 +- 0.25, time 21.97s
    SVC cv:           face -- scores: 0.58 +- 0.24, time 13.53s
    SVC cv:          house -- scores: 0.81 +- 0.28, time 10.44s
    SVC cv:       scissors -- scores: 0.41 +- 0.31, time 17.14s
    SVC cv:   scrambledpix -- scores: 0.79 +- 0.17, time 13.22s
    SVC cv:           shoe -- scores: 0.56 +- 0.23, time 16.64s
______________________________________________________________________
    log l1:         bottle -- scores: 0.47 +- 0.15, time 1.18s
    log l1:            cat -- scores: 0.63 +- 0.23, time 0.95s
    log l1:          chair -- scores: 0.44 +- 0.14, time 1.01s
    log l1:           face -- scores: 0.74 +- 0.13, time 0.63s
    log l1:          house -- scores: 0.89 +- 0.13, time 0.50s
    log l1:       scissors -- scores: 0.43 +- 0.28, time 1.04s
    log l1:   scrambledpix -- scores: 0.70 +- 0.18, time 0.56s
    log l1:           shoe -- scores: 0.49 +- 0.19, time 0.98s
______________________________________________________________________
 log l1 50:         bottle -- scores: 0.49 +- 0.17, time 1.49s
 log l1 50:            cat -- scores: 0.63 +- 0.18, time 1.19s
 log l1 50:          chair -- scores: 0.45 +- 0.12, time 1.12s
 log l1 50:           face -- scores: 0.68 +- 0.14, time 0.72s
 log l1 50:          house -- scores: 0.86 +- 0.11, time 0.57s
 log l1 50:       scissors -- scores: 0.43 +- 0.25, time 1.38s
 log l1 50:   scrambledpix -- scores: 0.75 +- 0.19, time 0.52s
 log l1 50:           shoe -- scores: 0.48 +- 0.19, time 1.31s
______________________________________________________________________
 log l1 cv:         bottle -- scores: 0.37 +- 0.18, time 11.49s
 log l1 cv:            cat -- scores: 0.53 +- 0.30, time 9.90s
 log l1 cv:          chair -- scores: 0.40 +- 0.13, time 10.43s
 log l1 cv:           face -- scores: 0.69 +- 0.20, time 7.50s
 log l1 cv:          house -- scores: 0.85 +- 0.19, time 6.82s
 log l1 cv:       scissors -- scores: 0.37 +- 0.27, time 11.75s
 log l1 cv:   scrambledpix -- scores: 0.69 +- 0.18, time 7.41s
 log l1 cv:           shoe -- scores: 0.50 +- 0.19, time 10.94s
______________________________________________________________________
    log l2:         bottle -- scores: 0.43 +- 0.20, time 1.05s
    log l2:            cat -- scores: 0.54 +- 0.20, time 1.07s
    log l2:          chair -- scores: 0.54 +- 0.17, time 1.02s
    log l2:           face -- scores: 0.55 +- 0.19, time 0.96s
    log l2:          house -- scores: 0.63 +- 0.22, time 0.94s
    log l2:       scissors -- scores: 0.51 +- 0.15, time 1.10s
    log l2:   scrambledpix -- scores: 0.78 +- 0.18, time 0.92s
    log l2:           shoe -- scores: 0.51 +- 0.17, time 1.05s
______________________________________________________________________
 log l2 cv:         bottle -- scores: 0.44 +- 0.21, time 14.74s
 log l2 cv:            cat -- scores: 0.55 +- 0.20, time 13.66s
 log l2 cv:          chair -- scores: 0.53 +- 0.17, time 14.60s
 log l2 cv:           face -- scores: 0.56 +- 0.20, time 13.80s
 log l2 cv:          house -- scores: 0.64 +- 0.22, time 12.97s
 log l2 cv:       scissors -- scores: 0.49 +- 0.15, time 14.66s
 log l2 cv:   scrambledpix -- scores: 0.77 +- 0.18, time 12.65s
 log l2 cv:           shoe -- scores: 0.51 +- 0.17, time 13.25s
______________________________________________________________________
     ridge:         bottle -- scores: 0.41 +- 0.19, time 0.30s
     ridge:            cat -- scores: 0.43 +- 0.18, time 0.30s
     ridge:          chair -- scores: 0.50 +- 0.21, time 0.30s
     ridge:           face -- scores: 0.62 +- 0.18, time 0.30s
     ridge:          house -- scores: 0.90 +- 0.09, time 0.30s
     ridge:       scissors -- scores: 0.52 +- 0.22, time 0.30s
     ridge:   scrambledpix -- scores: 0.79 +- 0.17, time 0.30s
     ridge:           shoe -- scores: 0.48 +- 0.21, time 0.30s
______________________________________________________________________
  ridge cv:         bottle -- scores: 0.46 +- 0.24, time 2.81s
  ridge cv:            cat -- scores: 0.54 +- 0.22, time 2.82s
  ridge cv:          chair -- scores: 0.51 +- 0.21, time 2.81s
  ridge cv:           face -- scores: 0.70 +- 0.13, time 2.82s
  ridge cv:          house -- scores: 0.90 +- 0.13, time 2.82s
  ridge cv:       scissors -- scores: 0.56 +- 0.26, time 2.83s
  ridge cv:   scrambledpix -- scores: 0.85 +- 0.11, time 2.96s
  ridge cv:           shoe -- scores: 0.49 +- 0.18, time 3.40s

Then we make a rudimentary diagram

import matplotlib.pyplot as plt
plt.figure()

tick_position = np.arange(len(categories))
plt.xticks(tick_position, categories, rotation=45)

for color, classifier_name in zip(
        ['b', 'c', 'm', 'g', 'y', 'k', '.5', 'r', '#ffaaaa'],
        sorted(classifiers)):
    score_means = [classifiers_scores[classifier_name][category].mean()
                   for category in categories]
    plt.bar(tick_position, score_means, label=classifier_name,
            width=.11, color=color)
    tick_position = tick_position + .09

plt.ylabel('Classification accurancy (f1 score)')
plt.xlabel('Visual stimuli category')
plt.ylim(ymin=0)
plt.legend(loc='lower center', ncol=3)
plt.title('Category-specific classification accuracy for different classifiers')
plt.tight_layout()
../../_images/sphx_glr_plot_haxby_different_estimators_001.png

Finally, w plot the face vs house map for the different classifiers

# Use the average EPI as a background
from nilearn import image
mean_epi_img = image.mean_img(func_filename)

# Restrict the decoding to face vs house
condition_mask = np.logical_or(stimuli == b'face', stimuli == b'house')
masked_timecourses = masked_timecourses[
    condition_mask[np.logical_not(resting_state)]]
stimuli = stimuli[condition_mask]
# Transform the stimuli to binary values
stimuli = (stimuli == b'face').astype(np.int)

from nilearn.plotting import plot_stat_map, show

for classifier_name, classifier in sorted(classifiers.items()):
    classifier.fit(masked_timecourses, stimuli)

    if hasattr(classifier, 'coef_'):
        weights = classifier.coef_[0]
    elif hasattr(classifier, 'best_estimator_'):
        weights = classifier.best_estimator_.coef_[0]
    else:
        continue
    weight_img = masker.inverse_transform(weights)
    weight_map = weight_img.get_data()
    threshold = np.max(np.abs(weight_map)) * 1e-3
    plot_stat_map(weight_img, bg_img=mean_epi_img,
                  display_mode='z', cut_coords=[-15],
                  threshold=threshold,
                  title='%s: face vs house' % classifier_name)

show()
  • ../../_images/sphx_glr_plot_haxby_different_estimators_002.png
  • ../../_images/sphx_glr_plot_haxby_different_estimators_003.png
  • ../../_images/sphx_glr_plot_haxby_different_estimators_004.png
  • ../../_images/sphx_glr_plot_haxby_different_estimators_005.png
  • ../../_images/sphx_glr_plot_haxby_different_estimators_006.png
  • ../../_images/sphx_glr_plot_haxby_different_estimators_007.png
  • ../../_images/sphx_glr_plot_haxby_different_estimators_008.png
  • ../../_images/sphx_glr_plot_haxby_different_estimators_009.png
  • ../../_images/sphx_glr_plot_haxby_different_estimators_010.png

Total running time of the script: ( 6 minutes 25.407 seconds)

Generated by Sphinx-Gallery