8.3.13. Different classifiers in decoding the Haxby datasetΒΆ

Here we compare different classifiers on a visual object recognition decoding task.

We start by loading the data and applying simple transformations to it

# Fetch data using nilearn dataset fetcher
from nilearn import datasets
# by default 2nd subject data will be fetched
haxby_dataset = datasets.fetch_haxby()

# print basic information on the dataset
print('First subject anatomical nifti image (3D) located is at: %s' %
      haxby_dataset.anat[0])
print('First subject functional nifti image (4D) is located at: %s' %
      haxby_dataset.func[0])

# load labels
import numpy as np
labels = np.recfromcsv(haxby_dataset.session_target[0], delimiter=" ")
stimuli = labels['labels']
# identify resting state labels in order to be able to remove them
resting_state = stimuli == b'rest'

# find names of remaining active labels
categories = np.unique(stimuli[np.logical_not(resting_state)])

# extract tags indicating to which acquisition run a tag belongs
session_labels = labels["chunks"][np.logical_not(resting_state)]

# Load the fMRI data
from nilearn.input_data import NiftiMasker

# For decoding, standardizing is often very important
mask_filename = haxby_dataset.mask_vt[0]
masker = NiftiMasker(mask_img=mask_filename, standardize=True)
func_filename = haxby_dataset.func[0]
masked_timecourses = masker.fit_transform(
    func_filename)[np.logical_not(resting_state)]

Out:

First subject anatomical nifti image (3D) located is at: /home/kamalakar/nilearn_data/haxby2001/subj2/anat.nii.gz
First subject functional nifti image (4D) is located at: /home/kamalakar/nilearn_data/haxby2001/subj2/bold.nii.gz

Then we define the various classifiers that we use

# A support vector classifier
from sklearn.svm import SVC
svm = SVC(C=1., kernel="linear")

# The logistic regression
from sklearn.linear_model import LogisticRegression, RidgeClassifier, \
    RidgeClassifierCV
logistic = LogisticRegression(C=1., penalty="l1")
logistic_50 = LogisticRegression(C=50., penalty="l1")
logistic_l2 = LogisticRegression(C=1., penalty="l2")

# Cross-validated versions of these classifiers
from sklearn.grid_search import GridSearchCV
# GridSearchCV is slow, but note that it takes an 'n_jobs' parameter that
# can significantly speed up the fitting process on computers with
# multiple cores
svm_cv = GridSearchCV(SVC(C=1., kernel="linear"),
                      param_grid={'C': [.1, .5, 1., 5., 10., 50., 100.]},
                      scoring='f1', n_jobs=1)

logistic_cv = GridSearchCV(LogisticRegression(C=1., penalty="l1"),
                           param_grid={'C': [.1, .5, 1., 5., 10., 50., 100.]},
                           scoring='f1')
logistic_l2_cv = GridSearchCV(LogisticRegression(C=1., penalty="l2"),
                              param_grid={
                                  'C': [.1, .5, 1., 5., 10., 50., 100.]},
                              scoring='f1')

# The ridge classifier has a specific 'CV' object that can set it's
# parameters faster than using a GridSearchCV
ridge = RidgeClassifier()
ridge_cv = RidgeClassifierCV()

# A dictionary, to hold all our classifiers
classifiers = {'SVC': svm,
               'SVC cv': svm_cv,
               'log l1': logistic,
               'log l1 50': logistic_50,
               'log l1 cv': logistic_cv,
               'log l2': logistic_l2,
               'log l2 cv': logistic_l2_cv,
               'ridge': ridge,
               'ridge cv': ridge_cv}

Here we compute prediction scores and run time for all these classifiers

# Make a data splitting object for cross validation
from sklearn.cross_validation import LeaveOneLabelOut, cross_val_score
cv = LeaveOneLabelOut(session_labels)

import time

classifiers_scores = {}

for classifier_name, classifier in sorted(classifiers.items()):
    classifiers_scores[classifier_name] = {}
    print(70 * '_')

    for category in categories:
        task_mask = np.logical_not(resting_state)
        classification_target = (stimuli[task_mask] == category)
        t0 = time.time()
        classifiers_scores[classifier_name][category] = cross_val_score(
            classifier,
            masked_timecourses,
            classification_target,
            cv=cv, scoring="f1")

        print("%10s: %14s -- scores: %1.2f +- %1.2f, time %.2fs" % (
            classifier_name, category,
            classifiers_scores[classifier_name][category].mean(),
            classifiers_scores[classifier_name][category].std(),
            time.time() - t0))

Out:

______________________________________________________________________
       SVC:         bottle -- scores: 0.53 +- 0.21, time 1.21s
       SVC:            cat -- scores: 0.60 +- 0.25, time 1.05s
       SVC:          chair -- scores: 0.42 +- 0.25, time 1.09s
       SVC:           face -- scores: 0.58 +- 0.24, time 0.83s
       SVC:          house -- scores: 0.81 +- 0.28, time 0.63s
       SVC:       scissors -- scores: 0.41 +- 0.31, time 1.10s
       SVC:   scrambledpix -- scores: 0.79 +- 0.17, time 0.78s
       SVC:           shoe -- scores: 0.56 +- 0.23, time 1.08s
______________________________________________________________________
    SVC cv:         bottle -- scores: 0.53 +- 0.21, time 17.19s
    SVC cv:            cat -- scores: 0.60 +- 0.25, time 15.71s
    SVC cv:          chair -- scores: 0.42 +- 0.25, time 16.28s
    SVC cv:           face -- scores: 0.58 +- 0.24, time 13.18s
    SVC cv:          house -- scores: 0.81 +- 0.28, time 10.32s
    SVC cv:       scissors -- scores: 0.41 +- 0.31, time 16.38s
    SVC cv:   scrambledpix -- scores: 0.79 +- 0.17, time 12.39s
    SVC cv:           shoe -- scores: 0.56 +- 0.23, time 16.39s
______________________________________________________________________
    log l1:         bottle -- scores: 0.47 +- 0.15, time 1.55s
    log l1:            cat -- scores: 0.63 +- 0.23, time 1.27s
    log l1:          chair -- scores: 0.44 +- 0.14, time 1.39s
    log l1:           face -- scores: 0.74 +- 0.13, time 0.87s
    log l1:          house -- scores: 0.89 +- 0.13, time 0.65s
    log l1:       scissors -- scores: 0.43 +- 0.28, time 1.44s
    log l1:   scrambledpix -- scores: 0.70 +- 0.18, time 0.82s
    log l1:           shoe -- scores: 0.49 +- 0.19, time 1.48s
______________________________________________________________________
 log l1 50:         bottle -- scores: 0.50 +- 0.17, time 2.25s
 log l1 50:            cat -- scores: 0.65 +- 0.19, time 1.76s
 log l1 50:          chair -- scores: 0.48 +- 0.13, time 1.71s
 log l1 50:           face -- scores: 0.69 +- 0.14, time 1.23s
 log l1 50:          house -- scores: 0.87 +- 0.10, time 0.97s
 log l1 50:       scissors -- scores: 0.43 +- 0.24, time 1.89s
 log l1 50:   scrambledpix -- scores: 0.76 +- 0.21, time 0.97s
 log l1 50:           shoe -- scores: 0.48 +- 0.19, time 2.02s
______________________________________________________________________
 log l1 cv:         bottle -- scores: 0.40 +- 0.17, time 18.94s
 log l1 cv:            cat -- scores: 0.57 +- 0.28, time 17.48s
 log l1 cv:          chair -- scores: 0.42 +- 0.13, time 18.65s
 log l1 cv:           face -- scores: 0.69 +- 0.19, time 12.87s
 log l1 cv:          house -- scores: 0.85 +- 0.19, time 10.38s
 log l1 cv:       scissors -- scores: 0.35 +- 0.30, time 18.21s
 log l1 cv:   scrambledpix -- scores: 0.68 +- 0.19, time 11.83s
 log l1 cv:           shoe -- scores: 0.49 +- 0.19, time 18.83s
______________________________________________________________________
    log l2:         bottle -- scores: 0.43 +- 0.20, time 1.90s
    log l2:            cat -- scores: 0.54 +- 0.20, time 1.94s
    log l2:          chair -- scores: 0.54 +- 0.17, time 1.95s
    log l2:           face -- scores: 0.55 +- 0.19, time 1.82s
    log l2:          house -- scores: 0.63 +- 0.22, time 1.79s
    log l2:       scissors -- scores: 0.51 +- 0.15, time 2.00s
    log l2:   scrambledpix -- scores: 0.78 +- 0.18, time 1.74s
    log l2:           shoe -- scores: 0.51 +- 0.17, time 1.95s
______________________________________________________________________
 log l2 cv:         bottle -- scores: 0.44 +- 0.21, time 27.87s
 log l2 cv:            cat -- scores: 0.55 +- 0.20, time 26.10s
 log l2 cv:          chair -- scores: 0.53 +- 0.17, time 26.76s
 log l2 cv:           face -- scores: 0.56 +- 0.20, time 25.48s
 log l2 cv:          house -- scores: 0.64 +- 0.22, time 24.87s
 log l2 cv:       scissors -- scores: 0.49 +- 0.15, time 26.97s
 log l2 cv:   scrambledpix -- scores: 0.77 +- 0.18, time 24.97s
 log l2 cv:           shoe -- scores: 0.51 +- 0.17, time 26.21s
______________________________________________________________________
     ridge:         bottle -- scores: 0.41 +- 0.19, time 0.20s
     ridge:            cat -- scores: 0.43 +- 0.18, time 0.21s
     ridge:          chair -- scores: 0.50 +- 0.21, time 0.22s
     ridge:           face -- scores: 0.62 +- 0.18, time 0.21s
     ridge:          house -- scores: 0.90 +- 0.09, time 0.21s
     ridge:       scissors -- scores: 0.52 +- 0.22, time 0.21s
     ridge:   scrambledpix -- scores: 0.79 +- 0.17, time 0.22s
     ridge:           shoe -- scores: 0.48 +- 0.21, time 0.22s
______________________________________________________________________
  ridge cv:         bottle -- scores: 0.46 +- 0.24, time 2.12s
  ridge cv:            cat -- scores: 0.54 +- 0.22, time 2.13s
  ridge cv:          chair -- scores: 0.51 +- 0.21, time 2.69s
  ridge cv:           face -- scores: 0.70 +- 0.13, time 2.20s
  ridge cv:          house -- scores: 0.90 +- 0.13, time 2.18s
  ridge cv:       scissors -- scores: 0.56 +- 0.26, time 2.06s
  ridge cv:   scrambledpix -- scores: 0.85 +- 0.11, time 2.08s
  ridge cv:           shoe -- scores: 0.49 +- 0.18, time 2.33s

Then we make a rudimentary diagram

import matplotlib.pyplot as plt
plt.figure()

tick_position = np.arange(len(categories))
plt.xticks(tick_position, categories, rotation=45)

for color, classifier_name in zip(
        ['b', 'c', 'm', 'g', 'y', 'k', '.5', 'r', '#ffaaaa'],
        sorted(classifiers)):
    score_means = [classifiers_scores[classifier_name][category].mean()
                   for category in categories]
    plt.bar(tick_position, score_means, label=classifier_name,
            width=.11, color=color)
    tick_position = tick_position + .09

plt.ylabel('Classification accurancy (f1 score)')
plt.xlabel('Visual stimuli category')
plt.ylim(ymin=0)
plt.legend(loc='lower center', ncol=3)
plt.title('Category-specific classification accuracy for different classifiers')
plt.tight_layout()
../../_images/sphx_glr_plot_haxby_different_estimators_001.png

Finally, w plot the face vs house map for the different classifiers

# Use the average EPI as a background
from nilearn import image
mean_epi_img = image.mean_img(func_filename)

# Restrict the decoding to face vs house
condition_mask = np.logical_or(stimuli == b'face', stimuli == b'house')
masked_timecourses = masked_timecourses[
    condition_mask[np.logical_not(resting_state)]]
stimuli = stimuli[condition_mask]
# Transform the stimuli to binary values
stimuli = (stimuli == b'face').astype(np.int)

from nilearn.plotting import plot_stat_map, show

for classifier_name, classifier in sorted(classifiers.items()):
    classifier.fit(masked_timecourses, stimuli)

    if hasattr(classifier, 'coef_'):
        weights = classifier.coef_[0]
    elif hasattr(classifier, 'best_estimator_'):
        weights = classifier.best_estimator_.coef_[0]
    else:
        continue
    weight_img = masker.inverse_transform(weights)
    weight_map = weight_img.get_data()
    threshold = np.max(np.abs(weight_map)) * 1e-3
    plot_stat_map(weight_img, bg_img=mean_epi_img,
                  display_mode='z', cut_coords=[-15],
                  threshold=threshold,
                  title='%s: face vs house' % classifier_name)

show()
  • ../../_images/sphx_glr_plot_haxby_different_estimators_002.png
  • ../../_images/sphx_glr_plot_haxby_different_estimators_003.png
  • ../../_images/sphx_glr_plot_haxby_different_estimators_004.png
  • ../../_images/sphx_glr_plot_haxby_different_estimators_005.png
  • ../../_images/sphx_glr_plot_haxby_different_estimators_006.png
  • ../../_images/sphx_glr_plot_haxby_different_estimators_007.png
  • ../../_images/sphx_glr_plot_haxby_different_estimators_008.png
  • ../../_images/sphx_glr_plot_haxby_different_estimators_009.png
  • ../../_images/sphx_glr_plot_haxby_different_estimators_010.png

Total running time of the script: ( 8 minutes 54.709 seconds)

Generated by Sphinx-Gallery