diff --git a/.gitignore b/.gitignore index c80060ae1dd479b33753dcc9a42c0b8637779307..d06c9c187b4fcf04326cdacfcb6e4e88010609cd 100644 --- a/.gitignore +++ b/.gitignore @@ -128,9 +128,17 @@ dmypy.json # Pyre type checker .pyre/ -# Ignore some output files +# Doc build +public/* + +# Other files *slurm* *confusion_matrix* *graph* *.pickle *.pt +*.mat +*.csv +*.xlsx +*.ods +*.pdf diff --git a/README.md b/README.md index 8256c0e5abce749cac830bb874c606875fe97ebc..9163d1c3d5245329402d63ab4c2e5e54e3a0582d 100644 --- a/README.md +++ b/README.md @@ -2,11 +2,11 @@ BenchNIRS -> Benchmarking framework for machine learning with fNIRS +*Benchmarking framework for machine learning with fNIRS* **Quick links** → [*Journal article*](https://www.frontiersin.org/articles/10.3389/fnrgo.2023.994969) -→ [*BenchNIRS repository*](https://gitlab.com/HanBnrd/benchnirs) +→ [*BenchNIRS source code*](https://gitlab.com/HanBnrd/benchnirs) → [*Install BenchNIRS*](https://hanbnrd.gitlab.io/benchnirs/install.html) → [*Documentation*](https://hanbnrd.gitlab.io/benchnirs) → [*Issue tracker*](https://gitlab.com/HanBnrd/benchnirs/-/issues) @@ -40,20 +40,6 @@ The documentation of the framework with examples can be found [here](https://han A checklist of recommendations towards good practice for machine learning with fNIRS (for brain-computer interface applications) can be found [here](./CHECKLIST.md). We welcome contributions from the community in order to improve it, please see below for more information on how to contribute. -## Minimum tested requirements -[**Python 3.8**](https://www.python.org/downloads/) with the following libraries: -- [matplotlib 3.3](https://matplotlib.org/stable/) -- [mne 0.23](https://mne.tools/stable/install/index.html) -- [nirsimple 0.1](https://github.com/HanBnrd/NIRSimple#installation) -- [numpy 1.19](https://numpy.org/install/) -- [pandas 1.0](https://pandas.pydata.org/docs/getting_started/index.html#installation) -- [scikit-learn 0.24](https://scikit-learn.org/stable/install.html) -- [scipy 1.8](https://scipy.org/install/) -- [seaborn 0.11](https://seaborn.pydata.org/installing.html) -- [statsmodels 0.12.2](https://www.statsmodels.org/dev/install.html) -- [torch 1.5](https://pytorch.org/get-started/locally/) - - ## Setting up *BenchNIRS* 1. Download and install Python 3.8 or greater, for example with [Miniconda](https://docs.conda.io/projects/miniconda/en/latest/index.html). @@ -61,20 +47,17 @@ A checklist of recommendations towards good practice for machine learning with f ```bash pip install benchnirs ``` -> Alternatively to install from source, download and unzip the [repository](https://gitlab.com/HanBnrd/benchnirs/-/archive/main/benchnirs-main.zip). -> Then, in a terminal or command prompt (eg. Anaconda Prompt), navigate to the directory containing the `requirements.txt` file and run: -> ```bash -> python -m pip install -r requirements.txt -f https://download.pytorch.org/whl/torch_stable.html -> ``` 3. Download the datasets (see below). +> Alternatively to install from source in development mode, download and unzip the [repository](https://gitlab.com/HanBnrd/benchnirs/-/archive/main/benchnirs-main.zip) (or clone it with Git), and run `devinstall.py`. + ## Downloading the datasets -- *Herff et al. 2014* (n-back task): you can download the dataset by making a request [here](http://www.csl.uni-bremen.de/CorpusData/download.php?crps=fNIRS). In the examples, the unzipped folder has been renamed to *dataset_herff_2014* for convenience. -- *Shin et al. 2018* (n-back and word generation tasks): you can download the dataset [here](http://doc.ml.tu-berlin.de/simultaneous_EEG_NIRS/NIRS/NIRS_01-26_MATLAB.zip). In the examples, the unzipped folder has been renamed to *dataset_shin_2018* for convenience. -- *Shin et al. 2016* (mental arithmetic task): you can download the dataset by filling the form [here](http://doc.ml.tu-berlin.de/hBCI). Then click on *NIRS_01-29* to download the fNIRS data. In the examples, the unzipped folder has been renamed to *dataset_shin_2016* for convenience. -- *Bak et al. 2019* (motor execution task): you can download the dataset [here](https://figshare.com/ndownloader/files/18069143). In the examples, the unzipped folder has been renamed to *dataset_bak_2019* for convenience. +- *Herff et al. 2014* (n-back task): you can download the dataset by making a request [here](http://www.csl.uni-bremen.de/CorpusData/download.php?crps=fNIRS). +- *Shin et al. 2018* (n-back and word generation tasks): you can download the dataset [here](http://doc.ml.tu-berlin.de/simultaneous_EEG_NIRS/NIRS/NIRS_01-26_MATLAB.zip). +- *Shin et al. 2016* (mental arithmetic task): you can download the dataset by filling the form [here](http://doc.ml.tu-berlin.de/hBCI). Then click on *NIRS_01-29* to download the fNIRS data. +- *Bak et al. 2019* (motor execution task): you can download the dataset [here](https://figshare.com/ndownloader/files/18069143). ## Keeping *BenchNIRS* up to date @@ -84,8 +67,8 @@ pip install --upgrade benchnirs ``` -## Example -A full example script showing how to use the framework with a custom deep learning model can be found [here](https://hanbnrd.gitlab.io/benchnirs/example.html). +## Examples +A set of example scripts showing how to use the framework can be found [here](https://hanbnrd.gitlab.io/benchnirs/examples.html). ## Simple use case @@ -93,34 +76,14 @@ A full example script showing how to use the framework with a custom deep learni ```python import benchnirs as bn -epochs = bn.load_dataset('shin_2018_nb') -data = bn.process_epochs(epochs['0-back', '2-back', '3-back']) -results = bn.deep_learn(*data, my_model) +epochs = bn.load_dataset('bak_2019_me', dataset_path) +data = bn.process_epochs(epochs['right', 'left', 'foot']) +results = bn.deep_learn(*data, 'lstm') print(results) ``` -## Running main scripts -- [`generalised.py`](https://gitlab.com/HanBnrd/benchnirs/-/blob/main/src/generalised.py) compares the 6 models (LDA, SVC, kNN, ANN, CNN and LSTM) on the 5 datasets with a generalised approach (testing with unseen subjects) -- [`dataset_size.py`](https://gitlab.com/HanBnrd/benchnirs/-/blob/main/src/dataset_size.py) reproduces `generalised.py` but with a range of different dataset sizes (50% to 100% of dataset) to study the influence of this parameter on the classification accuracy -- [`window_size.py`](https://gitlab.com/HanBnrd/benchnirs/-/blob/main/src/window_size.py) reproduces `generalised.py` but with only the 4 models using feature extraction (LDA, SVC, kNN and ANN) and with a range of different window sizes (2 to 10 seconds) to study the influence of this parameter on the classification accuracy -- [`sliding_window.py`](https://gitlab.com/HanBnrd/benchnirs/-/blob/main/src/sliding_window.py) reproduces `generalised.py` but with only the 4 models using feature extraction (LDA, SVC, kNN and ANN) and with a 2-second sliding window on the 10-second epochs -- [`personalised.py`](https://gitlab.com/HanBnrd/benchnirs/-/blob/main/src/personalised.py) compares the 6 models (LDA, SVC, kNN, ANN, CNN and LSTM) on the 5 datasets with a personalised approach (training and testing with each subject individually) -- [`visualisation.py`](https://gitlab.com/HanBnrd/benchnirs/-/blob/main/src/visualisation.py) enables to visualise the data from the datasets with various signal processing - - -## Extra scripts: n-back tailored -- `tailored_generalised.py` compares the 6 models (LDA, SVC, kNN, ANN, CNN and LSTM) on the 2 n-back datasets with a generalised approach (testing with unseen subjects) -- `tailored_window_size.py` reproduces `tailored_generalised.py` but with only 5 models (LDA, SVC, kNN, ANN and LSTM) and with a range of different window sizes (5 to 40 seconds) to study the influence of this parameter on the classification accuracy -- `tailored_shin_nb.py` optimises and evaluates a tailored CNN on the *Shin et al. 2018* n-back dataset with a generalised approach (testing with unseen subjects) - - -## Extra scripts: transfer learning -- `transfer.py` optimises and evaluates a transfer learning model (pretext self-supervised representation learning task with unlabelled and labelled data using a CED, downstream supervised n-back classification task with labelled data) on the *Shin et al. 2018* n-back dataset with a generalised approach (testing with unseen subjects) -- `transfer_no_unlab.py` reproduces `transfer.py` but with only labelled data for the pretext task. - - ## Contributing to the repository Contributions from the community to this repository are highly appreciated. We are mainly interested in contributions to: - improving the recommendation checklist @@ -150,9 +113,9 @@ If you are using *BenchNIRS*, please cite [this article](https://doi.org/10.3389 } ``` -> If you are using the datasets of the framework, please also cite those related works. +> If you are using the datasets of the framework, please also cite those related works: > -> *Herff et al. 2014*: +> [*Herff et al. 2014*](https://doi.org/10.3389/fnhum.2013.00935) > ``` > @article{herff2014mental, > title={Mental workload during n-back task—quantified in the prefrontal cortex using fNIRS}, @@ -165,7 +128,7 @@ If you are using *BenchNIRS*, please cite [this article](https://doi.org/10.3389 > } > ``` > -> *Shin et al. 2018*: +> [*Shin et al. 2018*](https://doi.org/10.1038/sdata.2018.3) > ``` > @article{shin2018simultaneous, > title={Simultaneous acquisition of EEG and NIRS during cognitive tasks for an open access dataset}, @@ -178,7 +141,7 @@ If you are using *BenchNIRS*, please cite [this article](https://doi.org/10.3389 > } > ``` > -> *Shin et al. 2016*: +> [*Shin et al. 2016*](https://doi.org/10.1109/TNSRE.2016.2628057) > ``` > @article{shin2016open, > title={Open access dataset for EEG+NIRS single-trial classification}, @@ -192,7 +155,7 @@ If you are using *BenchNIRS*, please cite [this article](https://doi.org/10.3389 > } > ``` > -> *Bak et al. 2019*: +> [*Bak et al. 2019*](https://doi.org/10.3390/electronics8121486) > ``` > @article{bak2019open, > title={Open-Access fNIRS Dataset for Classification of Unilateral Finger-and Foot-Tapping}, diff --git a/benchnirs/__init__.py b/benchnirs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cae2e75397458346b6667e22037c83c22d3fe3bc --- /dev/null +++ b/benchnirs/__init__.py @@ -0,0 +1,25 @@ +""" +BenchNIRS +========= +Benchmarking framework for machine learning with fNIRS +""" + +import lazy_loader as lazy + +from importlib.metadata import version + + +try: + __version__ = version("benchnirs") +except Exception: + __version__ = "dev" + +__getattr__, __dir__, __all__ = lazy.attach( + __name__, + submod_attrs={ + 'load': ['load_dataset'], + 'viz': ['epochs_viz'], + 'process': ['process_epochs', 'extract_features'], + 'learn': ['machine_learn', 'deep_learn', 'deep_transfer_learn'] + } +) diff --git a/src/benchnirs/learn.py b/benchnirs/learn.py similarity index 89% rename from src/benchnirs/learn.py rename to benchnirs/learn.py index 738335140eebdbe52f99dd9fd2d2e4e601248587..adbeb4fda62130b6d9046e3c745585864cf01de6 100644 --- a/src/benchnirs/learn.py +++ b/benchnirs/learn.py @@ -11,7 +11,6 @@ import torch.optim as optim from pandas import DataFrame from torch.utils.data import DataLoader, Dataset -from scipy.stats import linregress from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.metrics import (accuracy_score, precision_recall_fscore_support, confusion_matrix) @@ -34,49 +33,7 @@ N_NEIGHBORS_LIST = list(range(1, 10)) PATIENCE = 5 # for early stopping -def _extract_features(nirs, feature_list): - """ - Perform feature extraction on NIRS data. - - Parameters - ---------- - nirs : array of shape (n_epochs, n_channels, n_times) - Processed NIRS data. - - feature_list : list of strings - List of features to extract. The list can include ``'mean'`` for the - mean along the time axis, ``'std'`` for standard deviation along the - time axis and ``'slope'`` for the slope of the linear regression along - the time axis. - - Returns - ------- - nirs_features : array of shape (n_epochs, n_channels*n_features) - Features extracted from NIRS data. - """ - nirs_features = [] - for feature in feature_list: - if feature == 'mean': - feature = np.mean(nirs, axis=2) - elif feature == 'std': - feature = np.std(nirs, axis=2) - elif feature == 'slope': - x = range(nirs.shape[2]) - feature = [] - for epoch in nirs: - ep_slopes = [] - for channel in epoch: - ep_slopes.append(linregress(x, channel).slope) - feature.append(ep_slopes) - nirs_features.append(feature) - - nirs_features = np.stack(nirs_features, axis=2) - nirs_features = nirs_features.reshape(len(nirs), -1) # flatten data - - return nirs_features - - -def machine_learn(nirs, labels, groups, model, features, normalize=False, +def machine_learn(nirs, labels, groups, model, normalize=None, random_state=None, output_folder='./outputs'): """ Perform nested k-fold cross-validation for standard machine learning models @@ -104,16 +61,11 @@ def machine_learn(nirs, labels, groups, model, features, normalize=False, discriminant analysis, ``'svc'`` for a linear support vector classifier or ``'knn'`` for a k-nearest neighbors classifier. - features : list of strings - List of features to extract. The list can include ``'mean'`` for the - mean along the time axis, ``'std'`` for standard deviation along the - time axis and ``'slope'`` for the slope of the linear regression along - the time axis. - - normalize : boolean - Whether to normalize data before feeding to the model with min-max - scaling based on the train set for each iteration of the outer - cross-validation. Defaults to ``False`` for no normalization. + normalize : tuple of integers | None + Axes on which to normalize data before feeding to the model with + min-max scaling based on the train set for each iteration of the outer + cross-validation. For example (0, 2) to normalize across epochs and + time. Defaults to ``None`` for no normalization. random_state : integer | None Controls the shuffling applied to data. Pass an integer for @@ -147,9 +99,6 @@ def machine_learn(nirs, labels, groups, model, features, normalize=False, if not os.path.isdir(output_folder): os.makedirs(output_folder) - # Feature extraction - nirs = _extract_features(nirs, features) - # K-fold cross-validator if groups is None: out_kf = StratifiedKFold(n_splits=OUTER_K) @@ -182,11 +131,14 @@ def machine_learn(nirs, labels, groups, model, features, normalize=False, # Min-max scaling if normalize: - maxs = nirs_train.max(axis=0)[np.newaxis, :] - mins = nirs_train.min(axis=0)[np.newaxis, :] + maxs = nirs_train.max(axis=normalize, keepdims=True) + mins = nirs_train.min(axis=normalize, keepdims=True) nirs_train = (nirs_train - mins) / (maxs - mins) nirs_test = (nirs_test - mins) / (maxs - mins) + nirs_train = nirs_train.reshape(len(nirs_train), -1) + nirs_test = nirs_test.reshape(len(nirs_test), -1) + in_split = in_kf.split(nirs_train, labels_train, groups_train) # LDA @@ -199,7 +151,7 @@ def machine_learn(nirs, labels, groups, model, features, normalize=False, # SVC elif model == 'svc': parameters = {'C': C_LIST} - svc = LinearSVC(max_iter=MAX_ITER) + svc = LinearSVC(max_iter=MAX_ITER, dual='auto') clf = GridSearchCV(svc, parameters, scoring='accuracy', cv=in_split) clf.fit(nirs_train, labels_train) @@ -259,6 +211,8 @@ class _ANNClassifier(nn.Module): self.fc3 = nn.Linear(4, n_classes) def forward(self, x): + batch_size = x.size(0) + x = x.view(batch_size, -1) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) @@ -466,8 +420,8 @@ def _test_dl(nirs_test, labels_test, clf): return results -def deep_learn(nirs, labels, groups, model_class, features=None, - normalize=False, batch_sizes=[4, 8, 16, 32, 64], +def deep_learn(nirs, labels, groups, model_class, normalize=None, + batch_sizes=[4, 8, 16, 32, 64], lrs=[1e-5, 1e-4, 1e-3, 1e-2, 1e-1], max_epoch=100, random_state=None, output_folder='./outputs'): """ @@ -496,17 +450,11 @@ def deep_learn(nirs, labels, groups, model_class, features=None, ``__init__()`` method must accept the number of classes as a parameter, and this needs to be the number of output neurons. - features : list of strings | None - List of features to extract. The list can include ``'mean'`` for the - mean along the time axis, ``'std'`` for standard deviation along the - time axis and ``'slope'`` for the slope of the linear regression along - the time axis. Defaults to ``None`` for no feature extration and using - the raw data. - - normalize : boolean - Whether to normalize data before feeding to the model with min-max - scaling based on the train set for each iteration of the outer - cross-validation. Defaults to ``False`` for no normalization. + normalize : tuple of integers | None + Axes on which to normalize data before feeding to the model with + min-max scaling based on the train set for each iteration of the outer + cross-validation. For example (0, 2) to normalize across epochs and + time. Defaults to ``None`` for no normalization. batch_sizes : list of integers List of batch sizes to test for optimization. @@ -533,7 +481,7 @@ def deep_learn(nirs, labels, groups, model_class, features=None, outer cross-validation). all_hps : list of tuples - List of hyperparameters (one tuple for each iteration of the outer + List of best hyperparameters (one tuple for each iteration of the outer cross-validation). Each tuple will be `(batch size, learning rate)`. additional_metrics : list of tuples @@ -559,10 +507,6 @@ def deep_learn(nirs, labels, groups, model_class, features=None, print(f'Deep learning: {model_class.__name__}') - # Feature extraction - if features is not None: - nirs = _extract_features(nirs, features) - # Outer split if os.path.isfile(f'{output_folder}/split.pickle'): print('\tSaved k-fold split found, loading it...', end=' ') @@ -604,14 +548,8 @@ def deep_learn(nirs, labels, groups, model_class, features=None, # Min-max scaling if normalize: - if features is not None: - maxs = nirs_train.max(axis=0)[np.newaxis, :] - mins = nirs_train.min(axis=0)[np.newaxis, :] - else: - maxs = nirs_train.max(axis=(0, 2)) - maxs = maxs[np.newaxis, :, np.newaxis] - mins = nirs_train.min(axis=(0, 2)) - mins = mins[np.newaxis, :, np.newaxis] + maxs = nirs_train.max(axis=normalize, keepdims=True) + mins = nirs_train.min(axis=normalize, keepdims=True) nirs_train = (nirs_train - mins) / (maxs - mins) if os.path.isfile(f'{output_folder}/model_k{k}.pt'): @@ -687,14 +625,10 @@ def deep_learn(nirs, labels, groups, model_class, features=None, nirs_train, nirs_test = nirs[out_idx[0]], nirs[out_idx[1]] labels_test = labels[out_idx[1]] - # Min-max scaling + # Min-max scaling of test set using training set only to avoid leakage if normalize: - if features is not None: - maxs = nirs_train.max(axis=0)[np.newaxis, :] - mins = nirs_train.min(axis=0)[np.newaxis, :] - else: - maxs = nirs_train.max(axis=(0, 2))[np.newaxis, :, np.newaxis] - mins = nirs_train.min(axis=(0, 2))[np.newaxis, :, np.newaxis] + maxs = nirs_train.max(axis=normalize, keepdims=True) + mins = nirs_train.min(axis=normalize, keepdims=True) nirs_test = (nirs_test - mins) / (maxs - mins) # Load trained model, hyperparameters and training results @@ -966,7 +900,7 @@ def _proxy_optim(nirs_train, targets_train, groups_train, enc_class, dec_class, def deep_transfer_learn(nirs, labels, groups, enc_class, dec_class, - model_class, features=None, normalize=False, + model_class, normalize=None, batch_sizes=[4, 8, 16, 32, 64], lrs=[1e-5, 1e-4, 1e-3, 1e-2, 1e-1], max_epoch=100, random_state=None, output_folder='./outputs'): @@ -1004,17 +938,11 @@ def deep_transfer_learn(nirs, labels, groups, enc_class, dec_class, parameters. The number of classes needs to be the number of output neurons. - features : list of strings | None - List of features to extract. The list can include ``'mean'`` for the - mean along the time axis, ``'std'`` for standard deviation along the - time axis and ``'slope'`` for the slope of the linear regression along - the time axis. Defaults to ``None`` for no feature extration and using - the raw data. - - normalize : boolean - Whether to normalize data before feeding to the model with min-max - scaling based on the train set for each iteration of the outer - cross-validation. Defaults to ``False`` for no normalization. + normalize : tuple of integers | None + Axes on which to normalize data before feeding to the model with + min-max scaling based on the train set for each iteration of the outer + cross-validation. For example (0, 2) to normalize across epochs and + time. Defaults to ``None`` for no normalization. batch_sizes : list of integers List of batch sizes to test for optimization. @@ -1041,8 +969,8 @@ def deep_transfer_learn(nirs, labels, groups, enc_class, dec_class, each iteration of the outer cross-validation). all_hps : list of tuples - List of hyperparameters for the overall classifier (one tuple for each - iteration of the outer cross-validation). Each tuple will be + List of best hyperparameters for the overall classifier (one tuple for + each iteration of the outer cross-validation). Each tuple will be `(batch size, learning rate)`. additional_metrics : list of tuples @@ -1065,10 +993,6 @@ def deep_transfer_learn(nirs, labels, groups, enc_class, dec_class, print(f'Deep transfer learning: {enc_class.__name__}/' f'{dec_class.__name__}-{model_class.__name__}') - # Feature extraction - if features is not None: - nirs = _extract_features(nirs, features) - # Get index to split channel types mid_idx = nirs.shape[1] / 2 if mid_idx.is_integer(): @@ -1115,14 +1039,8 @@ def deep_transfer_learn(nirs, labels, groups, enc_class, dec_class, # Min-max scaling if normalize: - if features is not None: - maxs = nirs_train.max(axis=0)[np.newaxis, :] - mins = nirs_train.min(axis=0)[np.newaxis, :] - else: - maxs = nirs_train.max(axis=(0, 2)) - maxs = maxs[np.newaxis, :, np.newaxis] - mins = nirs_train.min(axis=(0, 2)) - mins = mins[np.newaxis, :, np.newaxis] + maxs = nirs_train.max(axis=normalize, keepdims=True) + mins = nirs_train.min(axis=normalize, keepdims=True) nirs_train = (nirs_train - mins) / (maxs - mins) # Train and optimise self-supervised models @@ -1239,14 +1157,10 @@ def deep_transfer_learn(nirs, labels, groups, enc_class, dec_class, nirs_train, nirs_test = nirs[out_idx[0]], nirs[out_idx[1]] labels_test = labels[out_idx[1]] - # Min-max scaling + # Min-max scaling of test set using training set only to avoid leakage if normalize: - if features is not None: - maxs = nirs_train.max(axis=0)[np.newaxis, :] - mins = nirs_train.min(axis=0)[np.newaxis, :] - else: - maxs = nirs_train.max(axis=(0, 2))[np.newaxis, :, np.newaxis] - mins = nirs_train.min(axis=(0, 2))[np.newaxis, :, np.newaxis] + maxs = nirs_train.max(axis=normalize, keepdims=True) + mins = nirs_train.min(axis=normalize, keepdims=True) nirs_test = (nirs_test - mins) / (maxs - mins) # Load trained model, hyperparameters and training results diff --git a/src/benchnirs/load.py b/benchnirs/load.py similarity index 98% rename from src/benchnirs/load.py rename to benchnirs/load.py index 95a34adc50b3ac9e2a8412cff2a9128b00982fe5..a4df4db9147545dd44584566d6fc32cda13159ec 100644 --- a/src/benchnirs/load.py +++ b/benchnirs/load.py @@ -494,7 +494,7 @@ class _DatasetBak2019ME(): return data -def load_dataset(dataset, path=None, bandpass=None, order=4, tddr=False, +def load_dataset(dataset, path, bandpass=None, order=4, tddr=False, baseline=(None, 0), roi_sides=False): """ Load and filter one of the open access dataset. @@ -514,9 +514,8 @@ def load_dataset(dataset, path=None, bandpass=None, order=4, tddr=False, ``'bak_2019_me'`` for motor execution from Bak et al., 2019 (epoch interval: -2 to 10 seconds). - path : string | None - Path of the dataset selected with the ``dataset`` parameter. Defaults - to ``None`` to use the default path. + path : string + Path of the dataset selected with the ``dataset`` parameter. bandpass : list of floats | None Cutoff frequencies of the bandpass Butterworth filter. Defaults to @@ -566,10 +565,13 @@ def load_dataset(dataset, path=None, bandpass=None, order=4, tddr=False, loader = None for subj_id, subj in enumerate(loader.subject_list): - if path is None: - data = loader.load(subj) - else: + try: data = loader.load(subj, path) + except FileNotFoundError: + raise FileNotFoundError( + f"dataset not found, please make sure the dataset has been " + f"downloaded and the proper path has been provided (cf. " + f"https://hanbnrd.gitlab.io/benchnirs/install.html)") # Create MNE raw object from delta_c info = mne.create_info(ch_names=data['ch_names'], sfreq=data['sfreq'], diff --git a/src/benchnirs/process.py b/benchnirs/process.py similarity index 65% rename from src/benchnirs/process.py rename to benchnirs/process.py index 409df1ef47b984ba1a95f7717ec91b56c401beac..89676ac2e3d3ab48322e4f7d82a3f8c4f1613084 100644 --- a/src/benchnirs/process.py +++ b/benchnirs/process.py @@ -1,3 +1,8 @@ +import numpy as np + +from scipy.stats import linregress + + def process_epochs(mne_epochs, tmax=None, tslide=None, sort=False, reject_criteria=None): """ @@ -76,3 +81,45 @@ def process_epochs(mne_epochs, tmax=None, tslide=None, sort=False, print(f'Dataset shape: {nirs.shape}') return nirs, labels, groups + + +def extract_features(nirs, feature_list): + """ + Perform feature extraction on NIRS data. + + Parameters + ---------- + nirs : array of shape (n_epochs, n_channels, n_times) + Processed NIRS data. + + feature_list : list of strings + List of features to extract. The list can include ``'mean'`` for the + mean along the time axis, ``'std'`` for standard deviation along the + time axis and ``'slope'`` for the slope of the linear regression along + the time axis. + + Returns + ------- + nirs_features : array of shape (n_epochs, n_channels, n_features) + Features extracted from NIRS data. + """ + nirs_features = [] + for feature in feature_list: + if feature == 'mean': + nirs_feature = np.mean(nirs, axis=-1, keepdims=True) + elif feature == 'std': + nirs_feature = np.std(nirs, axis=-1, keepdims=True) + elif feature == 'slope': + x = range(nirs.shape[-1]) + nirs_feature = [] + for epoch in nirs: + ep_slopes = [] + for channel in epoch: + ep_slopes.append(linregress(x, channel).slope) + nirs_feature.append(ep_slopes) + nirs_feature = np.expand_dims(nirs_feature, -1) + nirs_features.append(nirs_feature) + + nirs_features = np.concatenate(nirs_features, axis=-1) + + return nirs_features diff --git a/src/benchnirs/viz.py b/benchnirs/viz.py similarity index 100% rename from src/benchnirs/viz.py rename to benchnirs/viz.py diff --git a/devinstall.py b/devinstall.py new file mode 100644 index 0000000000000000000000000000000000000000..a0cf3dee4301a1f8eece5f2ae831cf3ebd781f65 --- /dev/null +++ b/devinstall.py @@ -0,0 +1,2 @@ +import os +os.system('pip install -e .') diff --git a/docs/source/conf.py b/docs/source/conf.py index 7e8f1945d285d579e8c5c733c24a6cbe24fd97ab..59129b5245319cb5eabdb1bb032ebdaede64fedc 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -13,7 +13,7 @@ from datetime import datetime, timezone # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(0, os.path.abspath('../../src/benchnirs')) +sys.path.insert(0, os.path.abspath('../../benchnirs')) # -- Project information ----------------------------------------------------- @@ -24,7 +24,7 @@ copyright = f'2021-{current_year}, Johann Benerradi' author = 'Johann Benerradi' # The full version, including alpha/beta/rc tags -release = '1.2.1' +release = '1.2.2' # -- General configuration --------------------------------------------------- diff --git a/docs/source/example.rst b/docs/source/example.rst deleted file mode 100644 index 1b674a15734aec6f1455bd0a3d5caf728b870d9d..0000000000000000000000000000000000000000 --- a/docs/source/example.rst +++ /dev/null @@ -1,149 +0,0 @@ -Example -======= - -Below is an example of how to use `BenchNIRS` with a custom convolutional neural network (CNN). - -.. code-block:: python - - import datetime - import matplotlib.pyplot as plt - import numpy as np - import os - import pandas as pd - import seaborn as sns - import torch - import torch.nn as nn - import torch.nn.functional as F - - from scipy import stats - - from benchnirs.load import load_dataset - from benchnirs.process import process_epochs - from benchnirs.learn import deep_learn - - - ALL_DATA_PATH = '/folder/with/datasets/' # path to the datasets - DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'], - 'shin_2018_nb': ['0-back', '2-back', '3-back'], - 'shin_2018_wg': ['baseline', 'word generation'], - 'shin_2016_ma': ['baseline', 'mental arithmetic'], - 'bak_2019_me': ['right', 'left', 'foot']} - CONFIDENCE = 0.05 # stat confidence at 95 % - - - class CustomCNN(nn.Module): - - def __init__(self, n_classes): - super(CustomCNN, self).__init__() - self.conv1 = nn.Conv1d(4, 4, kernel_size=10, stride=2) # tempo conv - self.pool1 = nn.MaxPool1d(2) - self.conv2 = nn.Conv1d(4, 4, kernel_size=5, stride=2) # tempo conv - self.pool2 = nn.MaxPool1d(2) - self.fc1 = nn.Linear(20, 10) - self.fc2 = nn.Linear(10, n_classes) - - def forward(self, x): - batch_size = x.size(0) - x = F.relu(self.conv1(x)) - x = self.pool1(x) - x = F.relu(self.conv2(x)) - x = self.pool2(x) - x = x.view(batch_size, -1) - x = F.relu(self.fc1(x)) - x = self.fc2(x) - return x - - - start_time = datetime.datetime.now() - out_folder = f'../results/custom' - if not os.path.isdir(out_folder): - os.makedirs(out_folder) - print(f'Main output folder: {out_folder}/') - - print(f'Number of GPUs: {torch.cuda.device_count()}') - - with open(f'{out_folder}/summary.md', 'w') as w: - w.write('# Accuracy table\n\n(Standard deviation on the cross-validation)') - w.write('\n\n|Dataset|Chance level|Average accuracy (sd)|\n') - w.write('|:---:|:---:|:---:|\n') - - with open(f'{out_folder}/results.csv', 'w') as w: - w.write('dataset;fold;accuracy;hyperparameters;additional_metrics\n') - - - dict_accuracies = {'Accuracy': [], 'Dataset': []} - for dataset in DATASETS.keys(): - print(f'=====\n{dataset}\n=====') - data_path = f'{ALL_DATA_PATH}dataset_{dataset[:-3]}/' - out_path = f'{out_folder}/{dataset}_' - - # Load and preprocess data - epochs = load_dataset(dataset, path=data_path, bandpass=[0.01, 0.5], - baseline=(-2, 0), roi_sides=True, tddr=True) - classes = DATASETS[dataset] - epochs_lab = epochs[classes] - - # Run models - nirs, labels, groups = process_epochs(epochs_lab, 9.9) - cnn, hps_cnn, additional_metrics_cnn = deep_learn( - nirs, labels, groups, CustomCNN, features=None, - output_folder=f'{out_path}cnn') - - # Write results - results = {'CNN': [cnn, hps_cnn]} - chance_level = np.around(1/len(classes), decimals=3) - w_summary = open(f'{out_folder}/summary.md', 'a') - w_results = open(f'{out_folder}/results.csv', 'a') - w_summary.write(f'|{dataset}|{chance_level}|') - w_summary.write( - f'{np.around(np.mean(cnn), decimals=3)} ' - f'({np.around(np.std(cnn), decimals=3)})|') - for fold, accuracy in enumerate(cnn): - w_results.write(f'{dataset};{fold+1};{accuracy};"{hps_cnn[fold]}";') - w_results.write(f'"{additional_metrics_cnn[fold]}"\n') - w_summary.write('\n') - w_summary.close() - w_results.close() - dict_accuracies['Accuracy'] += cnn - dict_accuracies['Dataset'] += [dataset] * len(cnn) - - - df_accuracies = pd.DataFrame(dict_accuracies) - sns.barplot(data=df_accuracies, y='Accuracy', x='Dataset', capsize=.1, - palette='colorblind') - plt.savefig(f'{out_folder}/summary.png') - plt.close() - - - # Stats - print('Stats...') - with open(f'{out_folder}/stats.md', 'w') as w: - df = pd.read_csv(f'{out_folder}/results.csv', delimiter=';') - w.write('## Comparison of the model accuracy to chance level\n\n') - w.write('|Dataset|Shapiro p-value|Test|p-value|\n') - w.write('|:---:|:---:|:---:|:---:|\n') - for dataset in DATASETS.keys(): - dataset_accuracies = [] - chance_level = 1 / len(DATASETS[dataset]) - normality = True - w.write(f'|{dataset}|') - sub_df = df[df['dataset'] == dataset] - accuracies = sub_df['accuracy'].to_numpy() - dataset_accuracies.append(accuracies) - # Check normality of the distribution - _, p_shap = stats.shapiro(accuracies) - w.write(f'{p_shap}|') - if p_shap > CONFIDENCE: - # t-test - _, p_tt = stats.ttest_1samp(accuracies, chance_level) - w.write(f't-test|{p_tt}|\n') - else: - normality = False - # Wilcoxon - _, p_wilcox = stats.wilcoxon(accuracies-chance_level) - w.write(f'Wilcoxon|{p_wilcox}|\n') - - - end_time = datetime.datetime.now() - elapsed_time = end_time - start_time - print(f'===\nElapsed time: {elapsed_time}') diff --git a/docs/source/examples.rst b/docs/source/examples.rst new file mode 100644 index 0000000000000000000000000000000000000000..2c4c50fc397c48633d8dcb528ff3f22622201c5e --- /dev/null +++ b/docs/source/examples.rst @@ -0,0 +1,13 @@ +Examples +======== + +.. toctree:: + :maxdepth: 2 + + examples/custom-model + examples/generalised + examples/personalised + examples/sliding-window + examples/window-size + examples/dataset-size + examples/visualisation diff --git a/docs/source/examples/custom-model.rst b/docs/source/examples/custom-model.rst new file mode 100644 index 0000000000000000000000000000000000000000..5442ad46921b96b3dd18c90a62164fd2f1d40107 --- /dev/null +++ b/docs/source/examples/custom-model.rst @@ -0,0 +1,6 @@ +Custom model training +===================== + +Below is an example of how to use `BenchNIRS` to train a custom convolutional neural network (CNN) on one of the datasets. + +.. literalinclude:: ../../../examples/tailored_shin_nb.py diff --git a/docs/source/examples/dataset-size.rst b/docs/source/examples/dataset-size.rst new file mode 100644 index 0000000000000000000000000000000000000000..aaf6f589867a03616e457cc53a0ea3b4e4d963a6 --- /dev/null +++ b/docs/source/examples/dataset-size.rst @@ -0,0 +1,10 @@ +Dataset size benchmarking +========================= + +Below is a comparison of 6 machine learning models (LDA, SVC, kNN, ANN, CNN and LSTM) on the 5 datasets with a subject-independent approach (testing with unseen subjects), with a range of different dataset sizes (50% to 100% of the dataset) to study the influence of this parameter on the classification accuracy [#benerradi2023]_. + +.. literalinclude:: ../../../examples/dataset_size.py + + +.. rubric:: References +.. [#benerradi2023] Benerradi, J., Clos, J., Landowska, A., Valstar, M. F., & Wilson, M. L. (2023). Benchmarking framework for machine learning classification from fNIRS data. Frontiers in Neuroergonomics, 4, 994969. diff --git a/docs/source/examples/generalised.rst b/docs/source/examples/generalised.rst new file mode 100644 index 0000000000000000000000000000000000000000..991b6d183c68eb75d9e569f138bba11dfbd60eb7 --- /dev/null +++ b/docs/source/examples/generalised.rst @@ -0,0 +1,10 @@ +Subject-independent benchmarking +================================ + +Below is a comparison of 6 machine learning models (LDA, SVC, kNN, ANN, CNN and LSTM) on the 5 datasets with a subject-independent approach (testing with unseen subjects) [#benerradi2023]_. + +.. literalinclude:: ../../../examples/generalised.py + + +.. rubric:: References +.. [#benerradi2023] Benerradi, J., Clos, J., Landowska, A., Valstar, M. F., & Wilson, M. L. (2023). Benchmarking framework for machine learning classification from fNIRS data. Frontiers in Neuroergonomics, 4, 994969. diff --git a/docs/source/examples/personalised.rst b/docs/source/examples/personalised.rst new file mode 100644 index 0000000000000000000000000000000000000000..17aaacb0603bded96e7bf1e352f47b1d8d44fa50 --- /dev/null +++ b/docs/source/examples/personalised.rst @@ -0,0 +1,10 @@ +Subject-specific benchmarking +============================= + +Below is a comparison of 6 models (LDA, SVC, kNN, ANN, CNN and LSTM) on the 5 datasets with a subject-specific approach (training and testing with each subject individually) [#benerradi2023]_. + +.. literalinclude:: ../../../examples/personalised.py + + +.. rubric:: References +.. [#benerradi2023] Benerradi, J., Clos, J., Landowska, A., Valstar, M. F., & Wilson, M. L. (2023). Benchmarking framework for machine learning classification from fNIRS data. Frontiers in Neuroergonomics, 4, 994969. diff --git a/docs/source/examples/sliding-window.rst b/docs/source/examples/sliding-window.rst new file mode 100644 index 0000000000000000000000000000000000000000..60d98583cdfbb761bcf5d4d1a42c60fa0c9f5e32 --- /dev/null +++ b/docs/source/examples/sliding-window.rst @@ -0,0 +1,10 @@ +Sliding window benchmarking +=========================== + +Below is a comparison of 4 machine learning models (LDA, SVC, kNN, ANN) on the 5 datasets with a subject-independent approach (testing with unseen subjects), with a 2-second sliding window on the epochs to split the data into more examples [#benerradi2023]_. + +.. literalinclude:: ../../../examples/sliding_window.py + + +.. rubric:: References +.. [#benerradi2023] Benerradi, J., Clos, J., Landowska, A., Valstar, M. F., & Wilson, M. L. (2023). Benchmarking framework for machine learning classification from fNIRS data. Frontiers in Neuroergonomics, 4, 994969. diff --git a/docs/source/examples/visualisation.rst b/docs/source/examples/visualisation.rst new file mode 100644 index 0000000000000000000000000000000000000000..4ef275a89f6f20e0617d21e4f68087f5b324ace2 --- /dev/null +++ b/docs/source/examples/visualisation.rst @@ -0,0 +1,10 @@ +Epochs visualisation +==================== + +Below is a visualisation the epochs from the 5 datasets using the MNE backend [#gramfort2013]_. + +.. literalinclude:: ../../../examples/visualisation.py + + +.. rubric:: References +.. [#gramfort2013] Gramfort, A., Luessi, M., Larson, E., Engemann, D. A., Strohmeier, D., Brodbeck, C., ... & Hämäläinen, M. (2013). MEG and EEG data analysis with MNE-Python. Frontiers in neuroscience, 7, 70133. diff --git a/docs/source/examples/window-size.rst b/docs/source/examples/window-size.rst new file mode 100644 index 0000000000000000000000000000000000000000..f24e80d363070815b710177b6bc33d7186ae01a3 --- /dev/null +++ b/docs/source/examples/window-size.rst @@ -0,0 +1,10 @@ +Window size benchmarking +======================== + +Below is a comparison of 4 machine learning models (LDA, SVC, kNN, ANN) on the 5 datasets with a subject-independent approach (testing with unseen subjects), with a range of different durations per trial (2 to 10 seconds) to study the influence of this parameter on the classification accuracy [#benerradi2023]_. + +.. literalinclude:: ../../../examples/window_size.py + + +.. rubric:: References +.. [#benerradi2023] Benerradi, J., Clos, J., Landowska, A., Valstar, M. F., & Wilson, M. L. (2023). Benchmarking framework for machine learning classification from fNIRS data. Frontiers in Neuroergonomics, 4, 994969. diff --git a/docs/source/index.rst b/docs/source/index.rst index 6256151aa8003228ec79859273fc0dbe4259a08e..08877a5038f075c4ba7ed2afea57df477c9bc0b2 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -22,6 +22,11 @@ Features: * supervised, self-supervised and transfer learning * much more! +.. role:: raw-html(raw) + :format: html + +:raw-html:`→` `Source code on GitLab `_ + .. image:: https://img.shields.io/badge/doi-10.3389%2Ffnrgo.2023.994969-blue :target: https://doi.org/10.3389/fnrgo.2023.994969 @@ -43,7 +48,7 @@ Features: install modules - example + examples Recommendation checklist @@ -95,7 +100,7 @@ Please refer to `this tutorial `_. +This project is licensed under the `GNU General Public License v3+ `_, if you are using `BenchNIRS` please cite `this article `_. Indices and tables diff --git a/docs/source/install.rst b/docs/source/install.rst index c55c4e90aa8cdbdb7cdf01ff9114e0ad91532aeb..5847b0c86cfbc4920f8a19ef8412c521d013c5c6 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -14,6 +14,9 @@ Setting up BenchNIRS #. Download the datasets (see below). +.. note:: + Alternatively to install from source in development mode, download and unzip the `repository `_ (or clone it with Git), and run :code:`devinstall.py`. + Downloading the datasets ------------------------ @@ -31,13 +34,3 @@ To update `BenchNIRS` to the latest version with `pip`, open a terminal (eg. Ana .. code-block:: console pip install --upgrade benchnirs - - - -.. note:: - Alternatively to install from source, download and unzip the `repository `_. - Then, in a terminal or command prompt (eg. Anaconda Prompt), navigate to the directory containing the :code:`requirements.txt` file and run: - - .. code-block:: console - - pip install -r requirements.txt -f https://download.pytorch.org/whl/torch_stable.html diff --git a/docs/source/modules.rst b/docs/source/modules.rst index 052c72311dc82d9056b5a7194e5241972f16fbeb..4e2279e1e3154b222628c19f4d5e42c5c865760d 100644 --- a/docs/source/modules.rst +++ b/docs/source/modules.rst @@ -2,9 +2,9 @@ BenchNIRS API ============= .. toctree:: - :maxdepth: 4 + :maxdepth: 2 - learn - load - process - viz + modules/learn + modules/load + modules/process + modules/viz diff --git a/docs/source/learn.rst b/docs/source/modules/learn.rst similarity index 100% rename from docs/source/learn.rst rename to docs/source/modules/learn.rst diff --git a/docs/source/load.rst b/docs/source/modules/load.rst similarity index 100% rename from docs/source/load.rst rename to docs/source/modules/load.rst diff --git a/docs/source/process.rst b/docs/source/modules/process.rst similarity index 100% rename from docs/source/process.rst rename to docs/source/modules/process.rst diff --git a/docs/source/viz.rst b/docs/source/modules/viz.rst similarity index 100% rename from docs/source/viz.rst rename to docs/source/modules/viz.rst diff --git a/example.png b/example.png deleted file mode 100644 index c5ce10112c4a490c420cfa77785ebd4f0f53fbcc..0000000000000000000000000000000000000000 Binary files a/example.png and /dev/null differ diff --git a/src/dataset_size.py b/examples/dataset_size.py similarity index 86% rename from src/dataset_size.py rename to examples/dataset_size.py index 23a444f56b30745c902e27d72cc766690f08ad18..341ddb42bb07a54f29a68ece98ee6ff181b4187f 100644 --- a/src/dataset_size.py +++ b/examples/dataset_size.py @@ -1,6 +1,5 @@ import datetime import matplotlib.pyplot as plt -import numpy as np import os import pandas as pd import seaborn as sns @@ -11,10 +10,11 @@ from sklearn.model_selection import train_test_split from sklearn.utils import shuffle from benchnirs.load import load_dataset -from benchnirs.process import process_epochs +from benchnirs.process import process_epochs, extract_features from benchnirs.learn import machine_learn, deep_learn +ALL_DATA_PATH = '../../data/dataset_' # path to the datasets DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'], 'shin_2018_nb': ['0-back', '2-back', '3-back'], 'shin_2018_wg': ['baseline', 'word generation'], @@ -26,7 +26,7 @@ CONFIDENCE = 0.05 # stat confidence at 95 % start_time = datetime.datetime.now() date = start_time.strftime('%Y_%m_%d_%H%M') -out_folder = f'../results/dataset_size_{date}' +out_folder = f'./results/dataset_size_{date}' os.makedirs(out_folder) print(f'Main output folder: {out_folder}/') @@ -38,11 +38,12 @@ with open(f'{out_folder}/results.csv', 'w') as w: for dataset in DATASETS.keys(): print(f'=====\n{dataset}\n=====') + data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/' out_path = f'{out_folder}/{dataset}_' # Load and preprocess data - epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0), - roi_sides=True, tddr=True) + epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5], + baseline=(-2, 0), roi_sides=True, tddr=True) classes = DATASETS[dataset] epochs_lab = epochs[classes] @@ -59,26 +60,25 @@ for dataset in DATASETS.keys(): else: nirs, labels, groups = shuffle( all_nirs, all_labels, all_groups, random_state=42) + nirs_features = extract_features(nirs, ['mean', 'std', 'slope']) # Run models lda, hps_lda, _ = machine_learn( - nirs, labels, groups, 'lda', features=['mean', 'std', 'slope'], + nirs_features, labels, groups, 'lda', output_folder=f'{out_path}{ts}_lda') svc, hps_svc, _ = machine_learn( - nirs, labels, groups, 'svc', features=['mean', 'std', 'slope'], + nirs_features, labels, groups, 'svc', output_folder=f'{out_path}{ts}_svc') knn, hps_knn, _ = machine_learn( - nirs, labels, groups, 'knn', features=['mean', 'std', 'slope'], + nirs_features, labels, groups, 'knn', output_folder=f'{out_path}{ts}_knn') ann, hps_ann, _ = deep_learn( - nirs, labels, groups, 'ann', features=['mean', 'std', 'slope'], + nirs_features, labels, groups, 'ann', output_folder=f'{out_path}{ts}_ann') cnn, hps_cnn, _ = deep_learn( - nirs, labels, groups, 'cnn', features=None, - output_folder=f'{out_path}{ts}_cnn') + nirs, labels, groups, 'cnn', output_folder=f'{out_path}{ts}_cnn') lstm, hps_lstm, _ = deep_learn( - nirs, labels, groups, 'lstm', features=None, - output_folder=f'{out_path}{ts}_lstm') + nirs, labels, groups, 'lstm', output_folder=f'{out_path}{ts}_lstm') dict_train_size['Chance'] += [1/len(classes) for _ in lda] dict_train_size['LDA'] += lda dict_train_size['SVC'] += svc diff --git a/src/generalised.py b/examples/generalised.py similarity index 86% rename from src/generalised.py rename to examples/generalised.py index c5a41125339d6f84f0c5cdba1978c28a8daf0a35..e88d89882854d7dfe746ca5ccc70528aa43c2813 100644 --- a/src/generalised.py +++ b/examples/generalised.py @@ -9,10 +9,11 @@ import torch from scipy import stats from benchnirs.load import load_dataset -from benchnirs.process import process_epochs +from benchnirs.process import process_epochs, extract_features from benchnirs.learn import machine_learn, deep_learn +ALL_DATA_PATH = '../../data/dataset_' # path to the datasets DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'], 'shin_2018_nb': ['0-back', '2-back', '3-back'], 'shin_2018_wg': ['baseline', 'word generation'], @@ -23,7 +24,7 @@ CONFIDENCE = 0.05 # stat confidence at 95 % start_time = datetime.datetime.now() date = start_time.strftime('%Y_%m_%d_%H%M') -out_folder = f'../results/generalised_{date}' +out_folder = f'./results/generalised_{date}' os.makedirs(out_folder) print(f'Main output folder: {out_folder}/') @@ -42,34 +43,30 @@ with open(f'{out_folder}/results.csv', 'w') as w: dict_accuracies = {} for dataset in DATASETS.keys(): print(f'=====\n{dataset}\n=====') + data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/' out_path = f'{out_folder}/{dataset}_' # Load and preprocess data - epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0), - roi_sides=True, tddr=True) + epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5], + baseline=(-2, 0), roi_sides=True, tddr=True) classes = DATASETS[dataset] epochs_lab = epochs[classes] # Run models nirs, labels, groups = process_epochs(epochs_lab, 9.9) + nirs_features = extract_features(nirs, ['mean', 'std', 'slope']) lda, hps_lda, _ = machine_learn( - nirs, labels, groups, 'lda', features=['mean', 'std', 'slope'], - output_folder=f'{out_path}lda') + nirs_features, labels, groups, 'lda', output_folder=f'{out_path}lda') svc, hps_svc, _ = machine_learn( - nirs, labels, groups, 'svc', features=['mean', 'std', 'slope'], - output_folder=f'{out_path}svc') + nirs_features, labels, groups, 'svc', output_folder=f'{out_path}svc') knn, hps_knn, _ = machine_learn( - nirs, labels, groups, 'knn', features=['mean', 'std', 'slope'], - output_folder=f'{out_path}knn') + nirs_features, labels, groups, 'knn', output_folder=f'{out_path}knn') ann, hps_ann, _ = deep_learn( - nirs, labels, groups, 'ann', features=['mean', 'std', 'slope'], - output_folder=f'{out_path}ann') + nirs_features, labels, groups, 'ann', output_folder=f'{out_path}ann') cnn, hps_cnn, _ = deep_learn( - nirs, labels, groups, 'cnn', features=None, - output_folder=f'{out_path}cnn') + nirs, labels, groups, 'cnn', output_folder=f'{out_path}cnn') lstm, hps_lstm, _ = deep_learn( - nirs, labels, groups, 'lstm', features=None, - output_folder=f'{out_path}lstm') + nirs, labels, groups, 'lstm', output_folder=f'{out_path}lstm') # Write results results = {'LDA': [lda, hps_lda], 'SVC': [svc, hps_svc], diff --git a/src/personalised.py b/examples/personalised.py similarity index 88% rename from src/personalised.py rename to examples/personalised.py index 1da19675861f4758ba791d52b118b46d243cc5a4..504e85a7ccece2a9ae023eee3cbfe25815d3954c 100644 --- a/src/personalised.py +++ b/examples/personalised.py @@ -9,10 +9,11 @@ import torch from scipy import stats from benchnirs.load import load_dataset -from benchnirs.process import process_epochs +from benchnirs.process import process_epochs, extract_features from benchnirs.learn import machine_learn, deep_learn +ALL_DATA_PATH = '../../data/dataset_' # path to the datasets DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'], 'shin_2018_nb': ['0-back', '2-back', '3-back'], 'shin_2018_wg': ['baseline', 'word generation'], @@ -23,7 +24,7 @@ CONFIDENCE = 0.05 # stat confidence at 95 % start_time = datetime.datetime.now() date = start_time.strftime('%Y_%m_%d_%H%M') -out_folder = f'../results/personalised_{date}' +out_folder = f'./results/personalised_{date}' os.makedirs(out_folder) print(f'Main output folder: {out_folder}/') @@ -41,11 +42,12 @@ with open(f'{out_folder}/results.csv', 'w') as w: for dataset in DATASETS.keys(): print(f'=====\n{dataset}\n=====') + data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/' out_path = f'{out_folder}/{dataset}_' # Load and preprocess data - epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0), - roi_sides=True, tddr=True) + epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5], + baseline=(-2, 0), roi_sides=True, tddr=True) classes = DATASETS[dataset] epochs_lab = epochs[classes] @@ -58,25 +60,26 @@ for dataset in DATASETS.keys(): print(f'-----\nSubject {subj+1}\n-----') indices = [i for i, value in enumerate(all_groups) if value == subj] nirs, labels = all_nirs[indices], all_labels[indices] + nirs_features = extract_features(nirs, ['mean', 'std', 'slope']) # Run models lda, hps_lda, _ = machine_learn( - nirs, labels, None, 'lda', features=['mean', 'std', 'slope'], + nirs_features, labels, groups=None, model='lda', output_folder=f'{out_path}{subj+1}_lda') svc, hps_svc, _ = machine_learn( - nirs, labels, None, 'svc', features=['mean', 'std', 'slope'], + nirs_features, labels, groups=None, model='svc', output_folder=f'{out_path}{subj+1}_svc') knn, hps_knn, _ = machine_learn( - nirs, labels, None, 'knn', features=['mean', 'std', 'slope'], + nirs_features, labels, groups=None, model='knn', output_folder=f'{out_path}{subj+1}_knn') ann, hps_ann, _ = deep_learn( - nirs, labels, None, 'ann', features=['mean', 'std', 'slope'], + nirs_features, labels, groups=None, model_class='ann', output_folder=f'{out_path}{subj+1}_ann') cnn, hps_cnn, _ = deep_learn( - nirs, labels, None, 'cnn', features=None, + nirs, labels, groups=None, model_class='cnn', output_folder=f'{out_path}{subj+1}_cnn') lstm, hps_lstm, _ = deep_learn( - nirs, labels, None, 'lstm', features=None, + nirs, labels, groups=None, model_class='lstm', output_folder=f'{out_path}{subj+1}_lstm') # Write results diff --git a/src/sliding_window.py b/examples/sliding_window.py similarity index 87% rename from src/sliding_window.py rename to examples/sliding_window.py index 4b6669e70fad83d7d3283c0cb8ba76ceaeacd4c6..f69d272afadbabeed93397bbba384bce20369fb8 100644 --- a/src/sliding_window.py +++ b/examples/sliding_window.py @@ -9,10 +9,11 @@ import torch from scipy import stats from benchnirs.load import load_dataset -from benchnirs.process import process_epochs +from benchnirs.process import process_epochs, extract_features from benchnirs.learn import machine_learn, deep_learn +ALL_DATA_PATH = '../../data/dataset_' # path to the datasets DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'], 'shin_2018_nb': ['0-back', '2-back', '3-back'], 'shin_2018_wg': ['baseline', 'word generation'], @@ -23,7 +24,7 @@ CONFIDENCE = 0.05 # stat confidence at 95 % start_time = datetime.datetime.now() date = start_time.strftime('%Y_%m_%d_%H%M') -out_folder = f'../results/sliding_window_{date}' +out_folder = f'./results/sliding_window_{date}' os.makedirs(out_folder) print(f'Main output folder: {out_folder}/') @@ -42,28 +43,26 @@ with open(f'{out_folder}/results.csv', 'w') as w: dict_accuracies = {} for dataset in DATASETS.keys(): print(f'=====\n{dataset}\n=====') + data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/' out_path = f'{out_folder}/{dataset}_' # Load and preprocess data - epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0), - roi_sides=True, tddr=True) + epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5], + baseline=(-2, 0), roi_sides=True, tddr=True) classes = DATASETS[dataset] epochs_lab = epochs[classes] # Run models nirs, labels, groups = process_epochs(epochs_lab, 9.9, tslide=2) + nirs_features = extract_features(nirs, ['mean', 'std', 'slope']) lda, hps_lda, _ = machine_learn( - nirs, labels, groups, 'lda', features=['mean', 'std', 'slope'], - output_folder=f'{out_path}lda') + nirs_features, labels, groups, 'lda', output_folder=f'{out_path}lda') svc, hps_svc, _ = machine_learn( - nirs, labels, groups, 'svc', features=['mean', 'std', 'slope'], - output_folder=f'{out_path}svc') + nirs_features, labels, groups, 'svc', output_folder=f'{out_path}svc') knn, hps_knn, _ = machine_learn( - nirs, labels, groups, 'knn', features=['mean', 'std', 'slope'], - output_folder=f'{out_path}knn') + nirs_features, labels, groups, 'knn', output_folder=f'{out_path}knn') ann, hps_ann, _ = deep_learn( - nirs, labels, groups, 'ann', features=['mean', 'std', 'slope'], - output_folder=f'{out_path}ann') + nirs_features, labels, groups, 'ann', output_folder=f'{out_path}ann') # Write results results = {'LDA': [lda, hps_lda], 'SVC': [svc, hps_svc], diff --git a/src/stats/comparison_stats_dataset.py b/examples/stats/comparison_stats_dataset.py similarity index 95% rename from src/stats/comparison_stats_dataset.py rename to examples/stats/comparison_stats_dataset.py index 353ffa6c0160a70275c4cfb0cf638b156a0de6e0..c1f2afcd55ae7bd275fcc3229ea1fb1bbba3148c 100644 --- a/src/stats/comparison_stats_dataset.py +++ b/examples/stats/comparison_stats_dataset.py @@ -6,8 +6,8 @@ from scipy import stats CONFIDENCE = 0.05 # stat confidence at 95 % -new_results = './results_new_model.csv' -old_results = './results_old_model.csv' +new_results = '../results_new_model.csv' +old_results = '../results_old_model.csv' # Stats print('Stats...') diff --git a/src/stats/comparison_stats_task.py b/examples/stats/comparison_stats_task.py similarity index 97% rename from src/stats/comparison_stats_task.py rename to examples/stats/comparison_stats_task.py index b828140c2bb04f3c0e6a962cd66b85767f72a1c4..65cbad9e9f53a2b2f1ab4913d06b294e14335fce 100644 --- a/src/stats/comparison_stats_task.py +++ b/examples/stats/comparison_stats_task.py @@ -8,8 +8,8 @@ DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'], CONFIDENCE = 0.05 # stat confidence at 95 % -new_results = './new_results.csv' -old_results = './old_results.csv' +new_results = '../new_results.csv' +old_results = '../old_results.csv' models = ['LDA', 'SVC', 'kNN', 'ANN', 'CNN', 'LSTM'] # Stats diff --git a/src/stats/extra_stats.py b/examples/stats/extra_stats.py similarity index 100% rename from src/stats/extra_stats.py rename to examples/stats/extra_stats.py diff --git a/src/tailored_generalised.py b/examples/tailored_generalised.py similarity index 87% rename from src/tailored_generalised.py rename to examples/tailored_generalised.py index 274639d2bc46f95f36844486d875df9e6d73fe57..c01e81bf90a74b6c5bc5efe9174976cfb0cd17a5 100644 --- a/src/tailored_generalised.py +++ b/examples/tailored_generalised.py @@ -11,10 +11,11 @@ import torch.nn.functional as F from scipy import stats from benchnirs.load import load_dataset -from benchnirs.process import process_epochs +from benchnirs.process import process_epochs, extract_features from benchnirs.learn import machine_learn, deep_learn +ALL_DATA_PATH = '../../data/dataset_' # path to the datasets DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'], 'shin_2018_nb': ['0-back', '2-back', '3-back']} CONFIDENCE = 0.05 # stat confidence at 95 % @@ -43,10 +44,9 @@ class _CNNnback(nn.Module): return x - start_time = datetime.datetime.now() date = start_time.strftime('%Y_%m_%d_%H%M') -out_folder = f'../results/tailored_generalised_{date}' +out_folder = f'./results/tailored_generalised_{date}' os.makedirs(out_folder) @@ -67,34 +67,30 @@ with open(f'{out_folder}/results.csv', 'w') as w: dict_accuracies = {} for dataset in DATASETS.keys(): print(f'=====\n{dataset}\n=====') + data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/' out_path = f'{out_folder}/{dataset}_' # Load and preprocess data - epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0), - roi_sides=True, tddr=True) + epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5], + baseline=(-2, 0), roi_sides=True, tddr=True) classes = DATASETS[dataset] epochs_lab = epochs[classes] # Run models nirs, labels, groups = process_epochs(epochs_lab, 39.9) + nirs_features = extract_features(nirs, ['mean', 'std', 'slope']) lda, hps_lda, _ = machine_learn( - nirs, labels, groups, 'lda', features=['mean', 'std', 'slope'], - output_folder=f'{out_path}lda') + nirs_features, labels, groups, 'lda', output_folder=f'{out_path}lda') svc, hps_svc, _ = machine_learn( - nirs, labels, groups, 'svc', features=['mean', 'std', 'slope'], - output_folder=f'{out_path}svc') + nirs_features, labels, groups, 'svc', output_folder=f'{out_path}svc') knn, hps_knn, _ = machine_learn( - nirs, labels, groups, 'knn', features=['mean', 'std', 'slope'], - output_folder=f'{out_path}knn') + nirs_features, labels, groups, 'knn', output_folder=f'{out_path}knn') ann, hps_ann, _ = deep_learn( - nirs, labels, groups, 'ann', features=['mean', 'std', 'slope'], - output_folder=f'{out_path}ann') + nirs_features, labels, groups, 'ann', output_folder=f'{out_path}ann') cnn, hps_cnn, _ = deep_learn( - nirs, labels, groups, _CNNnback, features=None, - output_folder=f'{out_path}cnn') + nirs, labels, groups, _CNNnback, output_folder=f'{out_path}cnn') lstm, hps_lstm, _ = deep_learn( - nirs, labels, groups, 'lstm', features=None, - output_folder=f'{out_path}lstm') + nirs, labels, groups, 'lstm', output_folder=f'{out_path}lstm') # Write results results = {'LDA': [lda, hps_lda], 'SVC': [svc, hps_svc], diff --git a/src/tailored_shin_nb.py b/examples/tailored_shin_nb.py similarity index 92% rename from src/tailored_shin_nb.py rename to examples/tailored_shin_nb.py index 080e19dc477a1695bdcff0ca16c20ff236caf87f..9e086bd3afe785688b697c69cc1c66edf49b1e7b 100644 --- a/src/tailored_shin_nb.py +++ b/examples/tailored_shin_nb.py @@ -12,6 +12,7 @@ from benchnirs.process import process_epochs from benchnirs.learn import deep_learn +DATA_PATH = '../../data/dataset_shin_2018/' # path to the dataset CLASSES = ['0-back', '2-back', '3-back'] CONFIDENCE = 0.05 # stat confidence at 95 % ROIS = {'Right PFC HbO': [9, 10, 19, 20, 21, 22, 23], @@ -23,7 +24,7 @@ ROIS = {'Right PFC HbO': [9, 10, 19, 20, 21, 22, 23], start_time = datetime.datetime.now() date = start_time.strftime('%Y_%m_%d_%H%M') -out_folder = f'../results/tailored_shin_nb_{date}' +out_folder = f'./results/tailored_shin_nb_{date}' class CustomCNN(nn.Module): @@ -64,7 +65,7 @@ print(f'Number of GPUs: {torch.cuda.device_count()}') print(f'=====\nshin_2018_nb\n=====') # Load and preprocess data -epochs = load_dataset('shin_2018_nb', bandpass=[0.01, 0.5], +epochs = load_dataset('shin_2018_nb', DATA_PATH, bandpass=[0.01, 0.5], baseline=(-2, 0), tddr=True) ch_picks = [] for group in ROIS.values(): @@ -76,8 +77,7 @@ epochs_lab = epochs[CLASSES] nirs, labels, groups = process_epochs(epochs_lab, tmax=39.9, sort=True) print(nirs.shape) accuracies, hps, additional_metrics = deep_learn( - nirs, labels, groups, CustomCNN, - features=None, normalize=True, + nirs, labels, groups, CustomCNN, normalize=(0, 2), output_folder=f'{out_folder}') # Write results diff --git a/src/tailored_window_size.py b/examples/tailored_window_size.py similarity index 85% rename from src/tailored_window_size.py rename to examples/tailored_window_size.py index 9c5711984c746e62aa972f4ad72bd4b2c5fc3d6c..1214573f89f018657de7e7d3ebdfaeec01b19d16 100644 --- a/src/tailored_window_size.py +++ b/examples/tailored_window_size.py @@ -1,6 +1,5 @@ import datetime import matplotlib.pyplot as plt -import numpy as np import os import pandas as pd import seaborn as sns @@ -9,10 +8,11 @@ import torch from scipy import stats from benchnirs.load import load_dataset -from benchnirs.process import process_epochs +from benchnirs.process import process_epochs, extract_features from benchnirs.learn import machine_learn, deep_learn +ALL_DATA_PATH = '../../data/dataset_' # path to the datasets DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'], 'shin_2018_nb': ['0-back', '2-back', '3-back']} WINDOW_SIZES = [4.9, 9.9, 14.9, 19.9, 24.9, 29.9, 34.9, 39.9] @@ -21,7 +21,7 @@ CONFIDENCE = 0.05 # stat confidence at 95 % start_time = datetime.datetime.now() date = start_time.strftime('%Y_%m_%d_%H%M') -out_folder = f'../results/tailored_window_size_{date}' +out_folder = f'./results/tailored_window_size_{date}' os.makedirs(out_folder) print(f'Main output folder: {out_folder}/') @@ -32,11 +32,12 @@ with open(f'{out_folder}/results.csv', 'w') as w: for dataset in DATASETS.keys(): print(f'=====\n{dataset}\n=====') + data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/' out_path = f'{out_folder}/{dataset}_' # Load and preprocess data - epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0), - roi_sides=True, tddr=True) + epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5], + baseline=(-2, 0), roi_sides=True, tddr=True) classes = DATASETS[dataset] epochs_lab = epochs[classes] @@ -45,23 +46,23 @@ for dataset in DATASETS.keys(): for ws in WINDOW_SIZES: print(f'-----\nWindow size {ws}\n-----') nirs, labels, groups = process_epochs(epochs_lab, ws) + nirs_features = extract_features(nirs, ['mean', 'std', 'slope']) # Run models lda, hps_lda, _ = machine_learn( - nirs, labels, groups, 'lda', features=['mean', 'std', 'slope'], + nirs_features, labels, groups, 'lda', output_folder=f'{out_path}{ws}_lda') svc, hps_svc, _ = machine_learn( - nirs, labels, groups, 'svc', features=['mean', 'std', 'slope'], + nirs_features, labels, groups, 'svc', output_folder=f'{out_path}{ws}_svc') knn, hps_knn, _ = machine_learn( - nirs, labels, groups, 'knn', features=['mean', 'std', 'slope'], + nirs_features, labels, groups, 'knn', output_folder=f'{out_path}{ws}_knn') ann, hps_ann, _ = deep_learn( - nirs, labels, groups, 'ann', features=['mean', 'std', 'slope'], + nirs_features, labels, groups, 'ann', output_folder=f'{out_path}{ws}_ann') lstm, hps_lstm, _ = deep_learn( - nirs, labels, groups, 'lstm', features=None, - output_folder=f'{out_path}{ws}_lstm') + nirs, labels, groups, 'lstm', output_folder=f'{out_path}{ws}_lstm') dict_window_size['Chance'] += [1/len(classes) for _ in lda] dict_window_size['LDA'] += lda dict_window_size['SVC'] += svc diff --git a/src/transfer.py b/examples/transfer.py similarity index 93% rename from src/transfer.py rename to examples/transfer.py index 41b8f1eb25c13b39e34e65a146754c310efe8394..50041323dbe3546bfabdd68058556d26bbe3342a 100644 --- a/src/transfer.py +++ b/examples/transfer.py @@ -12,6 +12,8 @@ from benchnirs.process import process_epochs from benchnirs.learn import deep_transfer_learn +DATA_PATH = '../../data/dataset_shin_2018/' # path to the dataset +# CLASSES = ['0-back', '2-back', '3-back'] CLASSES = ['0-back', '2-back', '3-back', 'unlabelled'] CONFIDENCE = 0.05 # stat confidence at 95 % ROIS = {'Right PFC HbO': [9, 10, 19, 20, 21, 22, 23], @@ -23,7 +25,7 @@ ROIS = {'Right PFC HbO': [9, 10, 19, 20, 21, 22, 23], start_time = datetime.datetime.now() date = start_time.strftime('%Y_%m_%d_%H%M') -out_folder = f'../results/transfer_{date}' +out_folder = f'./results/transfer_{date}' class HbEncoder(nn.Module): @@ -99,7 +101,7 @@ print(f'Number of GPUs: {torch.cuda.device_count()}') print(f'=====\nshin_2018_nb\n=====') # Load and preprocess data -epochs = load_dataset('shin_2018_nb', bandpass=[0.01, 0.5], +epochs = load_dataset('shin_2018_nb', DATA_PATH, bandpass=[0.01, 0.5], baseline=(-2, 0), tddr=True) print(epochs) ch_picks = [] @@ -112,8 +114,7 @@ epochs_lab = epochs[CLASSES] nirs, labels, groups = process_epochs(epochs_lab, tmax=39.9, sort=True) print(nirs.shape) accuracies, hps, additional_metrics = deep_transfer_learn( - nirs, labels, groups, HbEncoder, HbDecoder, Classifier, - features=None, normalize=True, + nirs, labels, groups, HbEncoder, HbDecoder, Classifier, normalize=(0, 2), output_folder=f'{out_folder}', max_epoch=500) # Write results diff --git a/src/visualisation.py b/examples/visualisation.py similarity index 77% rename from src/visualisation.py rename to examples/visualisation.py index f1c332f5ef3e1e14da7fdd2cfb615a66a710536b..87d3bcf0748c15fa02d52f9e224b7b37fbc113f1 100644 --- a/src/visualisation.py +++ b/examples/visualisation.py @@ -2,7 +2,7 @@ from benchnirs.load import load_dataset from benchnirs.viz import epochs_viz -ALL_DATA_PATH = '/folder/with/datasets/' # path to the datasets +ALL_DATA_PATH = '../../data/dataset_' # path to the datasets DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'], 'shin_2018_nb': ['0-back', '2-back', '3-back'], 'shin_2018_wg': ['baseline', 'word generation'], @@ -12,10 +12,10 @@ DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'], for dataset in DATASETS.keys(): print(f'=====\n{dataset}\n=====') - path = f'{ALL_DATA_PATH}{dataset[:-3]}/' + data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/' # Load and preprocess data - epochs = load_dataset(dataset, path=path, bandpass=[0.01, 0.5], + epochs = load_dataset(dataset, path=data_path, bandpass=[0.01, 0.5], baseline=(-1.99, 0), roi_sides=True, tddr=True) classes = DATASETS[dataset] epochs_lab = epochs[classes] diff --git a/src/window_size.py b/examples/window_size.py similarity index 87% rename from src/window_size.py rename to examples/window_size.py index 929cee9bb07dc1ab2879caee64559da56e42ef1e..387ae106e0ccded3993dd07476bad9ff3d5eb04b 100644 --- a/src/window_size.py +++ b/examples/window_size.py @@ -1,6 +1,5 @@ import datetime import matplotlib.pyplot as plt -import numpy as np import os import pandas as pd import seaborn as sns @@ -9,10 +8,11 @@ import torch from scipy import stats from benchnirs.load import load_dataset -from benchnirs.process import process_epochs +from benchnirs.process import process_epochs, extract_features from benchnirs.learn import machine_learn, deep_learn +ALL_DATA_PATH = '../../data/dataset_' # path to the datasets DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'], 'shin_2018_nb': ['0-back', '2-back', '3-back'], 'shin_2018_wg': ['baseline', 'word generation'], @@ -24,7 +24,7 @@ CONFIDENCE = 0.05 # stat confidence at 95 % start_time = datetime.datetime.now() date = start_time.strftime('%Y_%m_%d_%H%M') -out_folder = f'../results/window_size_{date}' +out_folder = f'./results/window_size_{date}' os.makedirs(out_folder) print(f'Main output folder: {out_folder}/') @@ -35,11 +35,12 @@ with open(f'{out_folder}/results.csv', 'w') as w: for dataset in DATASETS.keys(): print(f'=====\n{dataset}\n=====') + data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/' out_path = f'{out_folder}/{dataset}_' # Load and preprocess data - epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0), - roi_sides=True, tddr=True) + epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5], + baseline=(-2, 0), roi_sides=True, tddr=True) classes = DATASETS[dataset] epochs_lab = epochs[classes] @@ -48,19 +49,20 @@ for dataset in DATASETS.keys(): for ws in WINDOW_SIZES: print(f'-----\nWindow size {ws}\n-----') nirs, labels, groups = process_epochs(epochs_lab, ws) + nirs_features = extract_features(nirs, ['mean', 'std', 'slope']) # Run models lda, hps_lda, _ = machine_learn( - nirs, labels, groups, 'lda', features=['mean', 'std', 'slope'], + nirs_features, labels, groups, 'lda', output_folder=f'{out_path}{ws}_lda') svc, hps_svc, _ = machine_learn( - nirs, labels, groups, 'svc', features=['mean', 'std', 'slope'], + nirs_features, labels, groups, 'svc', output_folder=f'{out_path}{ws}_svc') knn, hps_knn, _ = machine_learn( - nirs, labels, groups, 'knn', features=['mean', 'std', 'slope'], + nirs_features, labels, groups, 'knn', output_folder=f'{out_path}{ws}_knn') ann, hps_ann, _ = deep_learn( - nirs, labels, groups, 'ann', features=['mean', 'std', 'slope'], + nirs_features, labels, groups, 'ann', output_folder=f'{out_path}{ws}_ann') dict_window_size['Chance'] += [1/len(classes) for _ in lda] dict_window_size['LDA'] += lda diff --git a/requirements.txt b/requirements.txt index cf3ca65caa34b7666adc2f1327116e37c41d2250..06eb815cc6822bc6364579c9201105b52a5d3d13 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,16 @@ # To install all the required packages, run in a terminal or command prompt: # python -m pip install -r requirements.txt -f https://download.pytorch.org/whl/torch_stable.html -matplotlib>=3.3.1 -mne>=0.23.4 -nirsimple>=0.1.2 -numpy>=1.19.5 -pandas>=1.0.5 -scikit-learn>=0.24.2 -scipy>=1.8.1 -seaborn>=0.11.1 -statsmodels>=0.12.2 -torch>=1.5.1+cu101 -torchvision>=0.6.1+cu101 +importlib +lazy_loader +numpy +pandas +scipy +mne +matplotlib +seaborn +scikit-learn +torch +torchvision +nirsimple +statsmodels diff --git a/setup.py b/setup.py index 8f2cf0ec03b0da9c35ad86112023661fdd7c7137..1b7701995b92d1189d61ff07dc11df84a07bb745 100644 --- a/setup.py +++ b/setup.py @@ -5,17 +5,18 @@ with open("README.md", "r") as fh: setuptools.setup( name="benchnirs", - version="1.2.1", + version="1.2.2", author="Johann Benerradi", author_email="johann.benerradi@gmail.com", description="Benchmarking framework for machine learning with fNIRS", long_description=long_description, long_description_content_type="text/markdown", url="https://gitlab.com/HanBnrd/benchnirs", - license='GNU GPLv3+', - package_dir={"": "src"}, - packages=setuptools.find_packages(where="src"), + license="GNU GPLv3+", + packages=setuptools.find_packages(), install_requires=[ + "importlib", + "lazy_loader", "numpy", "pandas", "scipy", diff --git a/src/benchnirs/__init__.py b/src/benchnirs/__init__.py deleted file mode 100644 index ef6bcf6661395741d926ef75ef8a4a1fc2a7172d..0000000000000000000000000000000000000000 --- a/src/benchnirs/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -""" -BenchNIRS -========= -Benchmarking framework for machine learning with fNIRS -""" - -from .load import load_dataset -from .viz import epochs_viz -from .process import process_epochs -from .learn import machine_learn, deep_learn diff --git a/src/custom_model.py b/src/custom_model.py deleted file mode 100644 index c5bd321cfa045bdec1ab4687dbaca74e8a25c8d2..0000000000000000000000000000000000000000 --- a/src/custom_model.py +++ /dev/null @@ -1,142 +0,0 @@ -import datetime -import matplotlib.pyplot as plt -import numpy as np -import os -import pandas as pd -import seaborn as sns -import torch -import torch.nn as nn -import torch.nn.functional as F - -from scipy import stats - -from benchnirs.load import load_dataset -from benchnirs.process import process_epochs -from benchnirs.learn import deep_learn - - -ALL_DATA_PATH = '/folder/with/datasets/' # path to the datasets -DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'], - 'shin_2018_nb': ['0-back', '2-back', '3-back'], - 'shin_2018_wg': ['baseline', 'word generation'], - 'shin_2016_ma': ['baseline', 'mental arithmetic'], - 'bak_2019_me': ['right', 'left', 'foot']} -CONFIDENCE = 0.05 # stat confidence at 95 % - - -class CustomCNN(nn.Module): - - def __init__(self, n_classes): - super(CustomCNN, self).__init__() - self.conv1 = nn.Conv1d(4, 4, kernel_size=10, stride=2) # tempo conv - self.pool1 = nn.MaxPool1d(2) - self.conv2 = nn.Conv1d(4, 4, kernel_size=5, stride=2) # tempo conv - self.pool2 = nn.MaxPool1d(2) - self.fc1 = nn.Linear(20, 10) - self.fc2 = nn.Linear(10, n_classes) - - def forward(self, x): - batch_size = x.size(0) - x = F.relu(self.conv1(x)) - x = self.pool1(x) - x = F.relu(self.conv2(x)) - x = self.pool2(x) - x = x.view(batch_size, -1) - x = F.relu(self.fc1(x)) - x = self.fc2(x) - return x - - -start_time = datetime.datetime.now() -out_folder = f'../results/custom' -if not os.path.isdir(out_folder): - os.makedirs(out_folder) -print(f'Main output folder: {out_folder}/') - -print(f'Number of GPUs: {torch.cuda.device_count()}') - -with open(f'{out_folder}/summary.md', 'w') as w: - w.write('# Accuracy table\n\n(Standard deviation on the cross-validation)') - w.write('\n\n|Dataset|Chance level|Average accuracy (sd)|\n') - w.write('|:---:|:---:|:---:|\n') - -with open(f'{out_folder}/results.csv', 'w') as w: - w.write('dataset;fold;accuracy;hyperparameters;additional_metrics\n') - - -dict_accuracies = {'Accuracy': [], 'Dataset': []} -for dataset in DATASETS.keys(): - print(f'=====\n{dataset}\n=====') - data_path = f'{ALL_DATA_PATH}dataset_{dataset[:-3]}/' - out_path = f'{out_folder}/{dataset}_' - - # Load and preprocess data - epochs = load_dataset(dataset, path=data_path, bandpass=[0.01, 0.5], - baseline=(-2, 0), roi_sides=True, tddr=True) - classes = DATASETS[dataset] - epochs_lab = epochs[classes] - - # Run models - nirs, labels, groups = process_epochs(epochs_lab, 9.9) - cnn, hps_cnn, additional_metrics_cnn = deep_learn( - nirs, labels, groups, CustomCNN, features=None, - output_folder=f'{out_path}cnn') - - # Write results - results = {'CNN': [cnn, hps_cnn]} - chance_level = np.around(1/len(classes), decimals=3) - w_summary = open(f'{out_folder}/summary.md', 'a') - w_results = open(f'{out_folder}/results.csv', 'a') - w_summary.write(f'|{dataset}|{chance_level}|') - w_summary.write( - f'{np.around(np.mean(cnn), decimals=3)} ' - f'({np.around(np.std(cnn), decimals=3)})|') - for fold, accuracy in enumerate(cnn): - w_results.write(f'{dataset};{fold+1};{accuracy};"{hps_cnn[fold]}";') - w_results.write(f'"{additional_metrics_cnn[fold]}"\n') - w_summary.write('\n') - w_summary.close() - w_results.close() - dict_accuracies['Accuracy'] += cnn - dict_accuracies['Dataset'] += [dataset] * len(cnn) - - -df_accuracies = pd.DataFrame(dict_accuracies) -sns.barplot(data=df_accuracies, y='Accuracy', x='Dataset', capsize=.1, - palette='colorblind') -plt.savefig(f'{out_folder}/summary.png') -plt.close() - - -# Stats -print('Stats...') -with open(f'{out_folder}/stats.md', 'w') as w: - df = pd.read_csv(f'{out_folder}/results.csv', delimiter=';') - w.write('## Comparison of the model accuracy to chance level\n\n') - w.write('|Dataset|Shapiro p-value|Test|p-value|\n') - w.write('|:---:|:---:|:---:|:---:|\n') - for dataset in DATASETS.keys(): - dataset_accuracies = [] - chance_level = 1 / len(DATASETS[dataset]) - normality = True - w.write(f'|{dataset}|') - sub_df = df[df['dataset'] == dataset] - accuracies = sub_df['accuracy'].to_numpy() - dataset_accuracies.append(accuracies) - # Check normality of the distribution - _, p_shap = stats.shapiro(accuracies) - w.write(f'{p_shap}|') - if p_shap > CONFIDENCE: - # t-test - _, p_tt = stats.ttest_1samp(accuracies, chance_level) - w.write(f't-test|{p_tt}|\n') - else: - normality = False - # Wilcoxon - _, p_wilcox = stats.wilcoxon(accuracies-chance_level) - w.write(f'Wilcoxon|{p_wilcox}|\n') - - -end_time = datetime.datetime.now() -elapsed_time = end_time - start_time -print(f'===\nElapsed time: {elapsed_time}') diff --git a/src/transfer_no_unlab.py b/src/transfer_no_unlab.py deleted file mode 100644 index 7d4432c12009ee763a09151aa2b64a01c07f883a..0000000000000000000000000000000000000000 --- a/src/transfer_no_unlab.py +++ /dev/null @@ -1,140 +0,0 @@ -import datetime -import numpy as np -import os -import torch -import torch.nn as nn -import torch.nn.functional as F - -from scipy import stats - -from benchnirs.load import load_dataset -from benchnirs.process import process_epochs -from benchnirs.learn import deep_transfer_learn - - -CLASSES = ['0-back', '2-back', '3-back'] -CONFIDENCE = 0.05 # stat confidence at 95 % -ROIS = {'Right PFC HbO': [9, 10, 19, 20, 21, 22, 23], - 'Right PFC HbR': [45, 46, 55, 56, 57, 58, 59], - 'Left PFC HbO': [0, 1, 2, 3, 4, 5, 6], - 'Left PFC HbR': [36, 37, 38, 39, 40, 41, 42], - 'Central PFC HbO': [7, 8], - 'Central PFC HbR': [43, 44]} - -start_time = datetime.datetime.now() -date = start_time.strftime('%Y_%m_%d_%H%M') -out_folder = f'../results/transfer_no_unlab_{date}' - - -class HbEncoder(nn.Module): - - def __init__(self): - super(HbEncoder, self).__init__() - self.conv1 = nn.Conv1d(16, 8, kernel_size=15, stride=5) # tempo conv - self.conv2 = nn.Conv1d(8, 8, kernel_size=12, stride=6) # tempo conv - self.fc3 = nn.Linear(96, 56) - self.fc4 = nn.Linear(56, 16) - self.bn = nn.BatchNorm1d(8) - - def forward(self, x): - batch_size = x.size(0) - x = F.relu(self.conv1(x)) - x = F.relu(self.bn(self.conv2(x))) - x = x.view(batch_size, -1) # flatten - x = F.relu(self.fc3(x)) - x = F.relu(self.fc4(x)) - return x - - -class HbDecoder(nn.Module): - - def __init__(self): - super(HbDecoder, self).__init__() - self.fc4 = nn.Linear(16, 56) - self.fc3 = nn.Linear(56, 96) - self.tconv2 = nn.ConvTranspose1d(8, 8, kernel_size=12, stride=6) - self.tconv1 = nn.ConvTranspose1d(8, 16, kernel_size=15, stride=5) - - def forward(self, x): - batch_size = x.size(0) - x = F.relu(self.fc4(x)) - x = F.relu(self.fc3(x)) - x = x.view(batch_size, 8, -1) # un-flatten - x = F.relu(self.tconv2(x)) - x = self.tconv1(x) - return x - - -class Classifier(nn.Module): - """ - Classifier layers to connect with the encoder - """ - - def __init__(self, n_classes, encoder_hbo, encoder_hbr): - super(Classifier, self).__init__() - self.encoder_hbo = encoder_hbo - self.encoder_hbr = encoder_hbr - self.fc1 = nn.Linear(32, 16) - self.fc2 = nn.Linear(16, n_classes) - - def forward(self, x): - mid_idx = x.size(1) / 2 - if mid_idx.is_integer(): - mid_idx = int(mid_idx) - features_from_hbo = self.encoder_hbo(x[:, :mid_idx]) - features_from_hbr = self.encoder_hbr(x[:, mid_idx:]) - features_from_hb = torch.cat((features_from_hbo, - features_from_hbr), 1) - x = F.relu(self.fc1(features_from_hb.detach())) - x = self.fc2(x) - return x - - -if not os.path.isdir(out_folder): - os.makedirs(out_folder) -print(f'Main output folder: {out_folder}/') - -print(f'Number of GPUs: {torch.cuda.device_count()}') - -print(f'=====\nshin_2018_nb\n=====') - -# Load and preprocess data -epochs = load_dataset('shin_2018_nb', bandpass=[0.01, 0.5], - baseline=(-2, 0), tddr=True) -print(epochs) -ch_picks = [] -for group in ROIS.values(): - ch_picks += group -epochs.pick(ch_picks) -epochs_lab = epochs[CLASSES] - -# Run models -nirs, labels, groups = process_epochs(epochs_lab, tmax=39.9, sort=True) -print(nirs.shape) -accuracies, hps, additional_metrics = deep_transfer_learn( - nirs, labels, groups, HbEncoder, HbDecoder, Classifier, - features=None, normalize=True, - output_folder=f'{out_folder}', max_epoch=500) - -# Write results -with open(f'{out_folder}/results.csv', 'w') as w: - w.write('dataset;model;fold;accuracy;hyperparameters\n') - for fold, accuracy in enumerate(accuracies): - hp = hps[fold] - w.write(f'shin_2018_nb;CNN;{fold+1};{accuracy};"{hp}"\n') - -print(f'Average accuracy: {np.mean(accuracies)}') -_, p_shap = stats.shapiro(accuracies) -print(f'Shapiro p-value: {p_shap}') -if p_shap > CONFIDENCE: - s_tt, p_tt = stats.ttest_1samp(accuracies, 1/3, alternative='greater') - print(f't-test = {s_tt} (p-value = {p_tt})') -else: - s_wilcox, p_wilcox = stats.wilcoxon(accuracies - np.array(1/3), - alternative='greater') - print(f'Wilcoxon = {s_wilcox} (p-value = {p_wilcox})') - - -end_time = datetime.datetime.now() -elapsed_time = end_time - start_time -print(f'===\nElapsed time: {elapsed_time}')