diff --git a/.gitignore b/.gitignore
index c80060ae1dd479b33753dcc9a42c0b8637779307..d06c9c187b4fcf04326cdacfcb6e4e88010609cd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -128,9 +128,17 @@ dmypy.json
# Pyre type checker
.pyre/
-# Ignore some output files
+# Doc build
+public/*
+
+# Other files
*slurm*
*confusion_matrix*
*graph*
*.pickle
*.pt
+*.mat
+*.csv
+*.xlsx
+*.ods
+*.pdf
diff --git a/README.md b/README.md
index 8256c0e5abce749cac830bb874c606875fe97ebc..9163d1c3d5245329402d63ab4c2e5e54e3a0582d 100644
--- a/README.md
+++ b/README.md
@@ -2,11 +2,11 @@
-> Benchmarking framework for machine learning with fNIRS
+*Benchmarking framework for machine learning with fNIRS*
**Quick links**
→ [*Journal article*](https://www.frontiersin.org/articles/10.3389/fnrgo.2023.994969)
-→ [*BenchNIRS repository*](https://gitlab.com/HanBnrd/benchnirs)
+→ [*BenchNIRS source code*](https://gitlab.com/HanBnrd/benchnirs)
→ [*Install BenchNIRS*](https://hanbnrd.gitlab.io/benchnirs/install.html)
→ [*Documentation*](https://hanbnrd.gitlab.io/benchnirs)
→ [*Issue tracker*](https://gitlab.com/HanBnrd/benchnirs/-/issues)
@@ -40,20 +40,6 @@ The documentation of the framework with examples can be found [here](https://han
A checklist of recommendations towards good practice for machine learning with fNIRS (for brain-computer interface applications) can be found [here](./CHECKLIST.md). We welcome contributions from the community in order to improve it, please see below for more information on how to contribute.
-## Minimum tested requirements
-[**Python 3.8**](https://www.python.org/downloads/) with the following libraries:
-- [matplotlib 3.3](https://matplotlib.org/stable/)
-- [mne 0.23](https://mne.tools/stable/install/index.html)
-- [nirsimple 0.1](https://github.com/HanBnrd/NIRSimple#installation)
-- [numpy 1.19](https://numpy.org/install/)
-- [pandas 1.0](https://pandas.pydata.org/docs/getting_started/index.html#installation)
-- [scikit-learn 0.24](https://scikit-learn.org/stable/install.html)
-- [scipy 1.8](https://scipy.org/install/)
-- [seaborn 0.11](https://seaborn.pydata.org/installing.html)
-- [statsmodels 0.12.2](https://www.statsmodels.org/dev/install.html)
-- [torch 1.5](https://pytorch.org/get-started/locally/)
-
-
## Setting up *BenchNIRS*
1. Download and install Python 3.8 or greater, for example with [Miniconda](https://docs.conda.io/projects/miniconda/en/latest/index.html).
@@ -61,20 +47,17 @@ A checklist of recommendations towards good practice for machine learning with f
```bash
pip install benchnirs
```
-> Alternatively to install from source, download and unzip the [repository](https://gitlab.com/HanBnrd/benchnirs/-/archive/main/benchnirs-main.zip).
-> Then, in a terminal or command prompt (eg. Anaconda Prompt), navigate to the directory containing the `requirements.txt` file and run:
-> ```bash
-> python -m pip install -r requirements.txt -f https://download.pytorch.org/whl/torch_stable.html
-> ```
3. Download the datasets (see below).
+> Alternatively to install from source in development mode, download and unzip the [repository](https://gitlab.com/HanBnrd/benchnirs/-/archive/main/benchnirs-main.zip) (or clone it with Git), and run `devinstall.py`.
+
## Downloading the datasets
-- *Herff et al. 2014* (n-back task): you can download the dataset by making a request [here](http://www.csl.uni-bremen.de/CorpusData/download.php?crps=fNIRS). In the examples, the unzipped folder has been renamed to *dataset_herff_2014* for convenience.
-- *Shin et al. 2018* (n-back and word generation tasks): you can download the dataset [here](http://doc.ml.tu-berlin.de/simultaneous_EEG_NIRS/NIRS/NIRS_01-26_MATLAB.zip). In the examples, the unzipped folder has been renamed to *dataset_shin_2018* for convenience.
-- *Shin et al. 2016* (mental arithmetic task): you can download the dataset by filling the form [here](http://doc.ml.tu-berlin.de/hBCI). Then click on *NIRS_01-29* to download the fNIRS data. In the examples, the unzipped folder has been renamed to *dataset_shin_2016* for convenience.
-- *Bak et al. 2019* (motor execution task): you can download the dataset [here](https://figshare.com/ndownloader/files/18069143). In the examples, the unzipped folder has been renamed to *dataset_bak_2019* for convenience.
+- *Herff et al. 2014* (n-back task): you can download the dataset by making a request [here](http://www.csl.uni-bremen.de/CorpusData/download.php?crps=fNIRS).
+- *Shin et al. 2018* (n-back and word generation tasks): you can download the dataset [here](http://doc.ml.tu-berlin.de/simultaneous_EEG_NIRS/NIRS/NIRS_01-26_MATLAB.zip).
+- *Shin et al. 2016* (mental arithmetic task): you can download the dataset by filling the form [here](http://doc.ml.tu-berlin.de/hBCI). Then click on *NIRS_01-29* to download the fNIRS data.
+- *Bak et al. 2019* (motor execution task): you can download the dataset [here](https://figshare.com/ndownloader/files/18069143).
## Keeping *BenchNIRS* up to date
@@ -84,8 +67,8 @@ pip install --upgrade benchnirs
```
-## Example
-A full example script showing how to use the framework with a custom deep learning model can be found [here](https://hanbnrd.gitlab.io/benchnirs/example.html).
+## Examples
+A set of example scripts showing how to use the framework can be found [here](https://hanbnrd.gitlab.io/benchnirs/examples.html).
## Simple use case
@@ -93,34 +76,14 @@ A full example script showing how to use the framework with a custom deep learni
```python
import benchnirs as bn
-epochs = bn.load_dataset('shin_2018_nb')
-data = bn.process_epochs(epochs['0-back', '2-back', '3-back'])
-results = bn.deep_learn(*data, my_model)
+epochs = bn.load_dataset('bak_2019_me', dataset_path)
+data = bn.process_epochs(epochs['right', 'left', 'foot'])
+results = bn.deep_learn(*data, 'lstm')
print(results)
```
-## Running main scripts
-- [`generalised.py`](https://gitlab.com/HanBnrd/benchnirs/-/blob/main/src/generalised.py) compares the 6 models (LDA, SVC, kNN, ANN, CNN and LSTM) on the 5 datasets with a generalised approach (testing with unseen subjects)
-- [`dataset_size.py`](https://gitlab.com/HanBnrd/benchnirs/-/blob/main/src/dataset_size.py) reproduces `generalised.py` but with a range of different dataset sizes (50% to 100% of dataset) to study the influence of this parameter on the classification accuracy
-- [`window_size.py`](https://gitlab.com/HanBnrd/benchnirs/-/blob/main/src/window_size.py) reproduces `generalised.py` but with only the 4 models using feature extraction (LDA, SVC, kNN and ANN) and with a range of different window sizes (2 to 10 seconds) to study the influence of this parameter on the classification accuracy
-- [`sliding_window.py`](https://gitlab.com/HanBnrd/benchnirs/-/blob/main/src/sliding_window.py) reproduces `generalised.py` but with only the 4 models using feature extraction (LDA, SVC, kNN and ANN) and with a 2-second sliding window on the 10-second epochs
-- [`personalised.py`](https://gitlab.com/HanBnrd/benchnirs/-/blob/main/src/personalised.py) compares the 6 models (LDA, SVC, kNN, ANN, CNN and LSTM) on the 5 datasets with a personalised approach (training and testing with each subject individually)
-- [`visualisation.py`](https://gitlab.com/HanBnrd/benchnirs/-/blob/main/src/visualisation.py) enables to visualise the data from the datasets with various signal processing
-
-
-## Extra scripts: n-back tailored
-- `tailored_generalised.py` compares the 6 models (LDA, SVC, kNN, ANN, CNN and LSTM) on the 2 n-back datasets with a generalised approach (testing with unseen subjects)
-- `tailored_window_size.py` reproduces `tailored_generalised.py` but with only 5 models (LDA, SVC, kNN, ANN and LSTM) and with a range of different window sizes (5 to 40 seconds) to study the influence of this parameter on the classification accuracy
-- `tailored_shin_nb.py` optimises and evaluates a tailored CNN on the *Shin et al. 2018* n-back dataset with a generalised approach (testing with unseen subjects)
-
-
-## Extra scripts: transfer learning
-- `transfer.py` optimises and evaluates a transfer learning model (pretext self-supervised representation learning task with unlabelled and labelled data using a CED, downstream supervised n-back classification task with labelled data) on the *Shin et al. 2018* n-back dataset with a generalised approach (testing with unseen subjects)
-- `transfer_no_unlab.py` reproduces `transfer.py` but with only labelled data for the pretext task.
-
-
## Contributing to the repository
Contributions from the community to this repository are highly appreciated. We are mainly interested in contributions to:
- improving the recommendation checklist
@@ -150,9 +113,9 @@ If you are using *BenchNIRS*, please cite [this article](https://doi.org/10.3389
}
```
-> If you are using the datasets of the framework, please also cite those related works.
+> If you are using the datasets of the framework, please also cite those related works:
>
-> *Herff et al. 2014*:
+> [*Herff et al. 2014*](https://doi.org/10.3389/fnhum.2013.00935)
> ```
> @article{herff2014mental,
> title={Mental workload during n-back task—quantified in the prefrontal cortex using fNIRS},
@@ -165,7 +128,7 @@ If you are using *BenchNIRS*, please cite [this article](https://doi.org/10.3389
> }
> ```
>
-> *Shin et al. 2018*:
+> [*Shin et al. 2018*](https://doi.org/10.1038/sdata.2018.3)
> ```
> @article{shin2018simultaneous,
> title={Simultaneous acquisition of EEG and NIRS during cognitive tasks for an open access dataset},
@@ -178,7 +141,7 @@ If you are using *BenchNIRS*, please cite [this article](https://doi.org/10.3389
> }
> ```
>
-> *Shin et al. 2016*:
+> [*Shin et al. 2016*](https://doi.org/10.1109/TNSRE.2016.2628057)
> ```
> @article{shin2016open,
> title={Open access dataset for EEG+NIRS single-trial classification},
@@ -192,7 +155,7 @@ If you are using *BenchNIRS*, please cite [this article](https://doi.org/10.3389
> }
> ```
>
-> *Bak et al. 2019*:
+> [*Bak et al. 2019*](https://doi.org/10.3390/electronics8121486)
> ```
> @article{bak2019open,
> title={Open-Access fNIRS Dataset for Classification of Unilateral Finger-and Foot-Tapping},
diff --git a/benchnirs/__init__.py b/benchnirs/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..cae2e75397458346b6667e22037c83c22d3fe3bc
--- /dev/null
+++ b/benchnirs/__init__.py
@@ -0,0 +1,25 @@
+"""
+BenchNIRS
+=========
+Benchmarking framework for machine learning with fNIRS
+"""
+
+import lazy_loader as lazy
+
+from importlib.metadata import version
+
+
+try:
+ __version__ = version("benchnirs")
+except Exception:
+ __version__ = "dev"
+
+__getattr__, __dir__, __all__ = lazy.attach(
+ __name__,
+ submod_attrs={
+ 'load': ['load_dataset'],
+ 'viz': ['epochs_viz'],
+ 'process': ['process_epochs', 'extract_features'],
+ 'learn': ['machine_learn', 'deep_learn', 'deep_transfer_learn']
+ }
+)
diff --git a/src/benchnirs/learn.py b/benchnirs/learn.py
similarity index 89%
rename from src/benchnirs/learn.py
rename to benchnirs/learn.py
index 738335140eebdbe52f99dd9fd2d2e4e601248587..adbeb4fda62130b6d9046e3c745585864cf01de6 100644
--- a/src/benchnirs/learn.py
+++ b/benchnirs/learn.py
@@ -11,7 +11,6 @@ import torch.optim as optim
from pandas import DataFrame
from torch.utils.data import DataLoader, Dataset
-from scipy.stats import linregress
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import (accuracy_score, precision_recall_fscore_support,
confusion_matrix)
@@ -34,49 +33,7 @@ N_NEIGHBORS_LIST = list(range(1, 10))
PATIENCE = 5 # for early stopping
-def _extract_features(nirs, feature_list):
- """
- Perform feature extraction on NIRS data.
-
- Parameters
- ----------
- nirs : array of shape (n_epochs, n_channels, n_times)
- Processed NIRS data.
-
- feature_list : list of strings
- List of features to extract. The list can include ``'mean'`` for the
- mean along the time axis, ``'std'`` for standard deviation along the
- time axis and ``'slope'`` for the slope of the linear regression along
- the time axis.
-
- Returns
- -------
- nirs_features : array of shape (n_epochs, n_channels*n_features)
- Features extracted from NIRS data.
- """
- nirs_features = []
- for feature in feature_list:
- if feature == 'mean':
- feature = np.mean(nirs, axis=2)
- elif feature == 'std':
- feature = np.std(nirs, axis=2)
- elif feature == 'slope':
- x = range(nirs.shape[2])
- feature = []
- for epoch in nirs:
- ep_slopes = []
- for channel in epoch:
- ep_slopes.append(linregress(x, channel).slope)
- feature.append(ep_slopes)
- nirs_features.append(feature)
-
- nirs_features = np.stack(nirs_features, axis=2)
- nirs_features = nirs_features.reshape(len(nirs), -1) # flatten data
-
- return nirs_features
-
-
-def machine_learn(nirs, labels, groups, model, features, normalize=False,
+def machine_learn(nirs, labels, groups, model, normalize=None,
random_state=None, output_folder='./outputs'):
"""
Perform nested k-fold cross-validation for standard machine learning models
@@ -104,16 +61,11 @@ def machine_learn(nirs, labels, groups, model, features, normalize=False,
discriminant analysis, ``'svc'`` for a linear support vector
classifier or ``'knn'`` for a k-nearest neighbors classifier.
- features : list of strings
- List of features to extract. The list can include ``'mean'`` for the
- mean along the time axis, ``'std'`` for standard deviation along the
- time axis and ``'slope'`` for the slope of the linear regression along
- the time axis.
-
- normalize : boolean
- Whether to normalize data before feeding to the model with min-max
- scaling based on the train set for each iteration of the outer
- cross-validation. Defaults to ``False`` for no normalization.
+ normalize : tuple of integers | None
+ Axes on which to normalize data before feeding to the model with
+ min-max scaling based on the train set for each iteration of the outer
+ cross-validation. For example (0, 2) to normalize across epochs and
+ time. Defaults to ``None`` for no normalization.
random_state : integer | None
Controls the shuffling applied to data. Pass an integer for
@@ -147,9 +99,6 @@ def machine_learn(nirs, labels, groups, model, features, normalize=False,
if not os.path.isdir(output_folder):
os.makedirs(output_folder)
- # Feature extraction
- nirs = _extract_features(nirs, features)
-
# K-fold cross-validator
if groups is None:
out_kf = StratifiedKFold(n_splits=OUTER_K)
@@ -182,11 +131,14 @@ def machine_learn(nirs, labels, groups, model, features, normalize=False,
# Min-max scaling
if normalize:
- maxs = nirs_train.max(axis=0)[np.newaxis, :]
- mins = nirs_train.min(axis=0)[np.newaxis, :]
+ maxs = nirs_train.max(axis=normalize, keepdims=True)
+ mins = nirs_train.min(axis=normalize, keepdims=True)
nirs_train = (nirs_train - mins) / (maxs - mins)
nirs_test = (nirs_test - mins) / (maxs - mins)
+ nirs_train = nirs_train.reshape(len(nirs_train), -1)
+ nirs_test = nirs_test.reshape(len(nirs_test), -1)
+
in_split = in_kf.split(nirs_train, labels_train, groups_train)
# LDA
@@ -199,7 +151,7 @@ def machine_learn(nirs, labels, groups, model, features, normalize=False,
# SVC
elif model == 'svc':
parameters = {'C': C_LIST}
- svc = LinearSVC(max_iter=MAX_ITER)
+ svc = LinearSVC(max_iter=MAX_ITER, dual='auto')
clf = GridSearchCV(svc, parameters, scoring='accuracy',
cv=in_split)
clf.fit(nirs_train, labels_train)
@@ -259,6 +211,8 @@ class _ANNClassifier(nn.Module):
self.fc3 = nn.Linear(4, n_classes)
def forward(self, x):
+ batch_size = x.size(0)
+ x = x.view(batch_size, -1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
@@ -466,8 +420,8 @@ def _test_dl(nirs_test, labels_test, clf):
return results
-def deep_learn(nirs, labels, groups, model_class, features=None,
- normalize=False, batch_sizes=[4, 8, 16, 32, 64],
+def deep_learn(nirs, labels, groups, model_class, normalize=None,
+ batch_sizes=[4, 8, 16, 32, 64],
lrs=[1e-5, 1e-4, 1e-3, 1e-2, 1e-1], max_epoch=100,
random_state=None, output_folder='./outputs'):
"""
@@ -496,17 +450,11 @@ def deep_learn(nirs, labels, groups, model_class, features=None,
``__init__()`` method must accept the number of classes as a parameter,
and this needs to be the number of output neurons.
- features : list of strings | None
- List of features to extract. The list can include ``'mean'`` for the
- mean along the time axis, ``'std'`` for standard deviation along the
- time axis and ``'slope'`` for the slope of the linear regression along
- the time axis. Defaults to ``None`` for no feature extration and using
- the raw data.
-
- normalize : boolean
- Whether to normalize data before feeding to the model with min-max
- scaling based on the train set for each iteration of the outer
- cross-validation. Defaults to ``False`` for no normalization.
+ normalize : tuple of integers | None
+ Axes on which to normalize data before feeding to the model with
+ min-max scaling based on the train set for each iteration of the outer
+ cross-validation. For example (0, 2) to normalize across epochs and
+ time. Defaults to ``None`` for no normalization.
batch_sizes : list of integers
List of batch sizes to test for optimization.
@@ -533,7 +481,7 @@ def deep_learn(nirs, labels, groups, model_class, features=None,
outer cross-validation).
all_hps : list of tuples
- List of hyperparameters (one tuple for each iteration of the outer
+ List of best hyperparameters (one tuple for each iteration of the outer
cross-validation). Each tuple will be `(batch size, learning rate)`.
additional_metrics : list of tuples
@@ -559,10 +507,6 @@ def deep_learn(nirs, labels, groups, model_class, features=None,
print(f'Deep learning: {model_class.__name__}')
- # Feature extraction
- if features is not None:
- nirs = _extract_features(nirs, features)
-
# Outer split
if os.path.isfile(f'{output_folder}/split.pickle'):
print('\tSaved k-fold split found, loading it...', end=' ')
@@ -604,14 +548,8 @@ def deep_learn(nirs, labels, groups, model_class, features=None,
# Min-max scaling
if normalize:
- if features is not None:
- maxs = nirs_train.max(axis=0)[np.newaxis, :]
- mins = nirs_train.min(axis=0)[np.newaxis, :]
- else:
- maxs = nirs_train.max(axis=(0, 2))
- maxs = maxs[np.newaxis, :, np.newaxis]
- mins = nirs_train.min(axis=(0, 2))
- mins = mins[np.newaxis, :, np.newaxis]
+ maxs = nirs_train.max(axis=normalize, keepdims=True)
+ mins = nirs_train.min(axis=normalize, keepdims=True)
nirs_train = (nirs_train - mins) / (maxs - mins)
if os.path.isfile(f'{output_folder}/model_k{k}.pt'):
@@ -687,14 +625,10 @@ def deep_learn(nirs, labels, groups, model_class, features=None,
nirs_train, nirs_test = nirs[out_idx[0]], nirs[out_idx[1]]
labels_test = labels[out_idx[1]]
- # Min-max scaling
+ # Min-max scaling of test set using training set only to avoid leakage
if normalize:
- if features is not None:
- maxs = nirs_train.max(axis=0)[np.newaxis, :]
- mins = nirs_train.min(axis=0)[np.newaxis, :]
- else:
- maxs = nirs_train.max(axis=(0, 2))[np.newaxis, :, np.newaxis]
- mins = nirs_train.min(axis=(0, 2))[np.newaxis, :, np.newaxis]
+ maxs = nirs_train.max(axis=normalize, keepdims=True)
+ mins = nirs_train.min(axis=normalize, keepdims=True)
nirs_test = (nirs_test - mins) / (maxs - mins)
# Load trained model, hyperparameters and training results
@@ -966,7 +900,7 @@ def _proxy_optim(nirs_train, targets_train, groups_train, enc_class, dec_class,
def deep_transfer_learn(nirs, labels, groups, enc_class, dec_class,
- model_class, features=None, normalize=False,
+ model_class, normalize=None,
batch_sizes=[4, 8, 16, 32, 64],
lrs=[1e-5, 1e-4, 1e-3, 1e-2, 1e-1], max_epoch=100,
random_state=None, output_folder='./outputs'):
@@ -1004,17 +938,11 @@ def deep_transfer_learn(nirs, labels, groups, enc_class, dec_class,
parameters. The number of classes needs to be the number of output
neurons.
- features : list of strings | None
- List of features to extract. The list can include ``'mean'`` for the
- mean along the time axis, ``'std'`` for standard deviation along the
- time axis and ``'slope'`` for the slope of the linear regression along
- the time axis. Defaults to ``None`` for no feature extration and using
- the raw data.
-
- normalize : boolean
- Whether to normalize data before feeding to the model with min-max
- scaling based on the train set for each iteration of the outer
- cross-validation. Defaults to ``False`` for no normalization.
+ normalize : tuple of integers | None
+ Axes on which to normalize data before feeding to the model with
+ min-max scaling based on the train set for each iteration of the outer
+ cross-validation. For example (0, 2) to normalize across epochs and
+ time. Defaults to ``None`` for no normalization.
batch_sizes : list of integers
List of batch sizes to test for optimization.
@@ -1041,8 +969,8 @@ def deep_transfer_learn(nirs, labels, groups, enc_class, dec_class,
each iteration of the outer cross-validation).
all_hps : list of tuples
- List of hyperparameters for the overall classifier (one tuple for each
- iteration of the outer cross-validation). Each tuple will be
+ List of best hyperparameters for the overall classifier (one tuple for
+ each iteration of the outer cross-validation). Each tuple will be
`(batch size, learning rate)`.
additional_metrics : list of tuples
@@ -1065,10 +993,6 @@ def deep_transfer_learn(nirs, labels, groups, enc_class, dec_class,
print(f'Deep transfer learning: {enc_class.__name__}/'
f'{dec_class.__name__}-{model_class.__name__}')
- # Feature extraction
- if features is not None:
- nirs = _extract_features(nirs, features)
-
# Get index to split channel types
mid_idx = nirs.shape[1] / 2
if mid_idx.is_integer():
@@ -1115,14 +1039,8 @@ def deep_transfer_learn(nirs, labels, groups, enc_class, dec_class,
# Min-max scaling
if normalize:
- if features is not None:
- maxs = nirs_train.max(axis=0)[np.newaxis, :]
- mins = nirs_train.min(axis=0)[np.newaxis, :]
- else:
- maxs = nirs_train.max(axis=(0, 2))
- maxs = maxs[np.newaxis, :, np.newaxis]
- mins = nirs_train.min(axis=(0, 2))
- mins = mins[np.newaxis, :, np.newaxis]
+ maxs = nirs_train.max(axis=normalize, keepdims=True)
+ mins = nirs_train.min(axis=normalize, keepdims=True)
nirs_train = (nirs_train - mins) / (maxs - mins)
# Train and optimise self-supervised models
@@ -1239,14 +1157,10 @@ def deep_transfer_learn(nirs, labels, groups, enc_class, dec_class,
nirs_train, nirs_test = nirs[out_idx[0]], nirs[out_idx[1]]
labels_test = labels[out_idx[1]]
- # Min-max scaling
+ # Min-max scaling of test set using training set only to avoid leakage
if normalize:
- if features is not None:
- maxs = nirs_train.max(axis=0)[np.newaxis, :]
- mins = nirs_train.min(axis=0)[np.newaxis, :]
- else:
- maxs = nirs_train.max(axis=(0, 2))[np.newaxis, :, np.newaxis]
- mins = nirs_train.min(axis=(0, 2))[np.newaxis, :, np.newaxis]
+ maxs = nirs_train.max(axis=normalize, keepdims=True)
+ mins = nirs_train.min(axis=normalize, keepdims=True)
nirs_test = (nirs_test - mins) / (maxs - mins)
# Load trained model, hyperparameters and training results
diff --git a/src/benchnirs/load.py b/benchnirs/load.py
similarity index 98%
rename from src/benchnirs/load.py
rename to benchnirs/load.py
index 95a34adc50b3ac9e2a8412cff2a9128b00982fe5..a4df4db9147545dd44584566d6fc32cda13159ec 100644
--- a/src/benchnirs/load.py
+++ b/benchnirs/load.py
@@ -494,7 +494,7 @@ class _DatasetBak2019ME():
return data
-def load_dataset(dataset, path=None, bandpass=None, order=4, tddr=False,
+def load_dataset(dataset, path, bandpass=None, order=4, tddr=False,
baseline=(None, 0), roi_sides=False):
"""
Load and filter one of the open access dataset.
@@ -514,9 +514,8 @@ def load_dataset(dataset, path=None, bandpass=None, order=4, tddr=False,
``'bak_2019_me'`` for motor execution from Bak et al., 2019
(epoch interval: -2 to 10 seconds).
- path : string | None
- Path of the dataset selected with the ``dataset`` parameter. Defaults
- to ``None`` to use the default path.
+ path : string
+ Path of the dataset selected with the ``dataset`` parameter.
bandpass : list of floats | None
Cutoff frequencies of the bandpass Butterworth filter. Defaults to
@@ -566,10 +565,13 @@ def load_dataset(dataset, path=None, bandpass=None, order=4, tddr=False,
loader = None
for subj_id, subj in enumerate(loader.subject_list):
- if path is None:
- data = loader.load(subj)
- else:
+ try:
data = loader.load(subj, path)
+ except FileNotFoundError:
+ raise FileNotFoundError(
+ f"dataset not found, please make sure the dataset has been "
+ f"downloaded and the proper path has been provided (cf. "
+ f"https://hanbnrd.gitlab.io/benchnirs/install.html)")
# Create MNE raw object from delta_c
info = mne.create_info(ch_names=data['ch_names'], sfreq=data['sfreq'],
diff --git a/src/benchnirs/process.py b/benchnirs/process.py
similarity index 65%
rename from src/benchnirs/process.py
rename to benchnirs/process.py
index 409df1ef47b984ba1a95f7717ec91b56c401beac..89676ac2e3d3ab48322e4f7d82a3f8c4f1613084 100644
--- a/src/benchnirs/process.py
+++ b/benchnirs/process.py
@@ -1,3 +1,8 @@
+import numpy as np
+
+from scipy.stats import linregress
+
+
def process_epochs(mne_epochs, tmax=None, tslide=None, sort=False,
reject_criteria=None):
"""
@@ -76,3 +81,45 @@ def process_epochs(mne_epochs, tmax=None, tslide=None, sort=False,
print(f'Dataset shape: {nirs.shape}')
return nirs, labels, groups
+
+
+def extract_features(nirs, feature_list):
+ """
+ Perform feature extraction on NIRS data.
+
+ Parameters
+ ----------
+ nirs : array of shape (n_epochs, n_channels, n_times)
+ Processed NIRS data.
+
+ feature_list : list of strings
+ List of features to extract. The list can include ``'mean'`` for the
+ mean along the time axis, ``'std'`` for standard deviation along the
+ time axis and ``'slope'`` for the slope of the linear regression along
+ the time axis.
+
+ Returns
+ -------
+ nirs_features : array of shape (n_epochs, n_channels, n_features)
+ Features extracted from NIRS data.
+ """
+ nirs_features = []
+ for feature in feature_list:
+ if feature == 'mean':
+ nirs_feature = np.mean(nirs, axis=-1, keepdims=True)
+ elif feature == 'std':
+ nirs_feature = np.std(nirs, axis=-1, keepdims=True)
+ elif feature == 'slope':
+ x = range(nirs.shape[-1])
+ nirs_feature = []
+ for epoch in nirs:
+ ep_slopes = []
+ for channel in epoch:
+ ep_slopes.append(linregress(x, channel).slope)
+ nirs_feature.append(ep_slopes)
+ nirs_feature = np.expand_dims(nirs_feature, -1)
+ nirs_features.append(nirs_feature)
+
+ nirs_features = np.concatenate(nirs_features, axis=-1)
+
+ return nirs_features
diff --git a/src/benchnirs/viz.py b/benchnirs/viz.py
similarity index 100%
rename from src/benchnirs/viz.py
rename to benchnirs/viz.py
diff --git a/devinstall.py b/devinstall.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0cf3dee4301a1f8eece5f2ae831cf3ebd781f65
--- /dev/null
+++ b/devinstall.py
@@ -0,0 +1,2 @@
+import os
+os.system('pip install -e .')
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 7e8f1945d285d579e8c5c733c24a6cbe24fd97ab..59129b5245319cb5eabdb1bb032ebdaede64fedc 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -13,7 +13,7 @@ from datetime import datetime, timezone
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
-sys.path.insert(0, os.path.abspath('../../src/benchnirs'))
+sys.path.insert(0, os.path.abspath('../../benchnirs'))
# -- Project information -----------------------------------------------------
@@ -24,7 +24,7 @@ copyright = f'2021-{current_year}, Johann Benerradi'
author = 'Johann Benerradi'
# The full version, including alpha/beta/rc tags
-release = '1.2.1'
+release = '1.2.2'
# -- General configuration ---------------------------------------------------
diff --git a/docs/source/example.rst b/docs/source/example.rst
deleted file mode 100644
index 1b674a15734aec6f1455bd0a3d5caf728b870d9d..0000000000000000000000000000000000000000
--- a/docs/source/example.rst
+++ /dev/null
@@ -1,149 +0,0 @@
-Example
-=======
-
-Below is an example of how to use `BenchNIRS` with a custom convolutional neural network (CNN).
-
-.. code-block:: python
-
- import datetime
- import matplotlib.pyplot as plt
- import numpy as np
- import os
- import pandas as pd
- import seaborn as sns
- import torch
- import torch.nn as nn
- import torch.nn.functional as F
-
- from scipy import stats
-
- from benchnirs.load import load_dataset
- from benchnirs.process import process_epochs
- from benchnirs.learn import deep_learn
-
-
- ALL_DATA_PATH = '/folder/with/datasets/' # path to the datasets
- DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
- 'shin_2018_nb': ['0-back', '2-back', '3-back'],
- 'shin_2018_wg': ['baseline', 'word generation'],
- 'shin_2016_ma': ['baseline', 'mental arithmetic'],
- 'bak_2019_me': ['right', 'left', 'foot']}
- CONFIDENCE = 0.05 # stat confidence at 95 %
-
-
- class CustomCNN(nn.Module):
-
- def __init__(self, n_classes):
- super(CustomCNN, self).__init__()
- self.conv1 = nn.Conv1d(4, 4, kernel_size=10, stride=2) # tempo conv
- self.pool1 = nn.MaxPool1d(2)
- self.conv2 = nn.Conv1d(4, 4, kernel_size=5, stride=2) # tempo conv
- self.pool2 = nn.MaxPool1d(2)
- self.fc1 = nn.Linear(20, 10)
- self.fc2 = nn.Linear(10, n_classes)
-
- def forward(self, x):
- batch_size = x.size(0)
- x = F.relu(self.conv1(x))
- x = self.pool1(x)
- x = F.relu(self.conv2(x))
- x = self.pool2(x)
- x = x.view(batch_size, -1)
- x = F.relu(self.fc1(x))
- x = self.fc2(x)
- return x
-
-
- start_time = datetime.datetime.now()
- out_folder = f'../results/custom'
- if not os.path.isdir(out_folder):
- os.makedirs(out_folder)
- print(f'Main output folder: {out_folder}/')
-
- print(f'Number of GPUs: {torch.cuda.device_count()}')
-
- with open(f'{out_folder}/summary.md', 'w') as w:
- w.write('# Accuracy table\n\n(Standard deviation on the cross-validation)')
- w.write('\n\n|Dataset|Chance level|Average accuracy (sd)|\n')
- w.write('|:---:|:---:|:---:|\n')
-
- with open(f'{out_folder}/results.csv', 'w') as w:
- w.write('dataset;fold;accuracy;hyperparameters;additional_metrics\n')
-
-
- dict_accuracies = {'Accuracy': [], 'Dataset': []}
- for dataset in DATASETS.keys():
- print(f'=====\n{dataset}\n=====')
- data_path = f'{ALL_DATA_PATH}dataset_{dataset[:-3]}/'
- out_path = f'{out_folder}/{dataset}_'
-
- # Load and preprocess data
- epochs = load_dataset(dataset, path=data_path, bandpass=[0.01, 0.5],
- baseline=(-2, 0), roi_sides=True, tddr=True)
- classes = DATASETS[dataset]
- epochs_lab = epochs[classes]
-
- # Run models
- nirs, labels, groups = process_epochs(epochs_lab, 9.9)
- cnn, hps_cnn, additional_metrics_cnn = deep_learn(
- nirs, labels, groups, CustomCNN, features=None,
- output_folder=f'{out_path}cnn')
-
- # Write results
- results = {'CNN': [cnn, hps_cnn]}
- chance_level = np.around(1/len(classes), decimals=3)
- w_summary = open(f'{out_folder}/summary.md', 'a')
- w_results = open(f'{out_folder}/results.csv', 'a')
- w_summary.write(f'|{dataset}|{chance_level}|')
- w_summary.write(
- f'{np.around(np.mean(cnn), decimals=3)} '
- f'({np.around(np.std(cnn), decimals=3)})|')
- for fold, accuracy in enumerate(cnn):
- w_results.write(f'{dataset};{fold+1};{accuracy};"{hps_cnn[fold]}";')
- w_results.write(f'"{additional_metrics_cnn[fold]}"\n')
- w_summary.write('\n')
- w_summary.close()
- w_results.close()
- dict_accuracies['Accuracy'] += cnn
- dict_accuracies['Dataset'] += [dataset] * len(cnn)
-
-
- df_accuracies = pd.DataFrame(dict_accuracies)
- sns.barplot(data=df_accuracies, y='Accuracy', x='Dataset', capsize=.1,
- palette='colorblind')
- plt.savefig(f'{out_folder}/summary.png')
- plt.close()
-
-
- # Stats
- print('Stats...')
- with open(f'{out_folder}/stats.md', 'w') as w:
- df = pd.read_csv(f'{out_folder}/results.csv', delimiter=';')
- w.write('## Comparison of the model accuracy to chance level\n\n')
- w.write('|Dataset|Shapiro p-value|Test|p-value|\n')
- w.write('|:---:|:---:|:---:|:---:|\n')
- for dataset in DATASETS.keys():
- dataset_accuracies = []
- chance_level = 1 / len(DATASETS[dataset])
- normality = True
- w.write(f'|{dataset}|')
- sub_df = df[df['dataset'] == dataset]
- accuracies = sub_df['accuracy'].to_numpy()
- dataset_accuracies.append(accuracies)
- # Check normality of the distribution
- _, p_shap = stats.shapiro(accuracies)
- w.write(f'{p_shap}|')
- if p_shap > CONFIDENCE:
- # t-test
- _, p_tt = stats.ttest_1samp(accuracies, chance_level)
- w.write(f't-test|{p_tt}|\n')
- else:
- normality = False
- # Wilcoxon
- _, p_wilcox = stats.wilcoxon(accuracies-chance_level)
- w.write(f'Wilcoxon|{p_wilcox}|\n')
-
-
- end_time = datetime.datetime.now()
- elapsed_time = end_time - start_time
- print(f'===\nElapsed time: {elapsed_time}')
diff --git a/docs/source/examples.rst b/docs/source/examples.rst
new file mode 100644
index 0000000000000000000000000000000000000000..2c4c50fc397c48633d8dcb528ff3f22622201c5e
--- /dev/null
+++ b/docs/source/examples.rst
@@ -0,0 +1,13 @@
+Examples
+========
+
+.. toctree::
+ :maxdepth: 2
+
+ examples/custom-model
+ examples/generalised
+ examples/personalised
+ examples/sliding-window
+ examples/window-size
+ examples/dataset-size
+ examples/visualisation
diff --git a/docs/source/examples/custom-model.rst b/docs/source/examples/custom-model.rst
new file mode 100644
index 0000000000000000000000000000000000000000..5442ad46921b96b3dd18c90a62164fd2f1d40107
--- /dev/null
+++ b/docs/source/examples/custom-model.rst
@@ -0,0 +1,6 @@
+Custom model training
+=====================
+
+Below is an example of how to use `BenchNIRS` to train a custom convolutional neural network (CNN) on one of the datasets.
+
+.. literalinclude:: ../../../examples/tailored_shin_nb.py
diff --git a/docs/source/examples/dataset-size.rst b/docs/source/examples/dataset-size.rst
new file mode 100644
index 0000000000000000000000000000000000000000..aaf6f589867a03616e457cc53a0ea3b4e4d963a6
--- /dev/null
+++ b/docs/source/examples/dataset-size.rst
@@ -0,0 +1,10 @@
+Dataset size benchmarking
+=========================
+
+Below is a comparison of 6 machine learning models (LDA, SVC, kNN, ANN, CNN and LSTM) on the 5 datasets with a subject-independent approach (testing with unseen subjects), with a range of different dataset sizes (50% to 100% of the dataset) to study the influence of this parameter on the classification accuracy [#benerradi2023]_.
+
+.. literalinclude:: ../../../examples/dataset_size.py
+
+
+.. rubric:: References
+.. [#benerradi2023] Benerradi, J., Clos, J., Landowska, A., Valstar, M. F., & Wilson, M. L. (2023). Benchmarking framework for machine learning classification from fNIRS data. Frontiers in Neuroergonomics, 4, 994969.
diff --git a/docs/source/examples/generalised.rst b/docs/source/examples/generalised.rst
new file mode 100644
index 0000000000000000000000000000000000000000..991b6d183c68eb75d9e569f138bba11dfbd60eb7
--- /dev/null
+++ b/docs/source/examples/generalised.rst
@@ -0,0 +1,10 @@
+Subject-independent benchmarking
+================================
+
+Below is a comparison of 6 machine learning models (LDA, SVC, kNN, ANN, CNN and LSTM) on the 5 datasets with a subject-independent approach (testing with unseen subjects) [#benerradi2023]_.
+
+.. literalinclude:: ../../../examples/generalised.py
+
+
+.. rubric:: References
+.. [#benerradi2023] Benerradi, J., Clos, J., Landowska, A., Valstar, M. F., & Wilson, M. L. (2023). Benchmarking framework for machine learning classification from fNIRS data. Frontiers in Neuroergonomics, 4, 994969.
diff --git a/docs/source/examples/personalised.rst b/docs/source/examples/personalised.rst
new file mode 100644
index 0000000000000000000000000000000000000000..17aaacb0603bded96e7bf1e352f47b1d8d44fa50
--- /dev/null
+++ b/docs/source/examples/personalised.rst
@@ -0,0 +1,10 @@
+Subject-specific benchmarking
+=============================
+
+Below is a comparison of 6 models (LDA, SVC, kNN, ANN, CNN and LSTM) on the 5 datasets with a subject-specific approach (training and testing with each subject individually) [#benerradi2023]_.
+
+.. literalinclude:: ../../../examples/personalised.py
+
+
+.. rubric:: References
+.. [#benerradi2023] Benerradi, J., Clos, J., Landowska, A., Valstar, M. F., & Wilson, M. L. (2023). Benchmarking framework for machine learning classification from fNIRS data. Frontiers in Neuroergonomics, 4, 994969.
diff --git a/docs/source/examples/sliding-window.rst b/docs/source/examples/sliding-window.rst
new file mode 100644
index 0000000000000000000000000000000000000000..60d98583cdfbb761bcf5d4d1a42c60fa0c9f5e32
--- /dev/null
+++ b/docs/source/examples/sliding-window.rst
@@ -0,0 +1,10 @@
+Sliding window benchmarking
+===========================
+
+Below is a comparison of 4 machine learning models (LDA, SVC, kNN, ANN) on the 5 datasets with a subject-independent approach (testing with unseen subjects), with a 2-second sliding window on the epochs to split the data into more examples [#benerradi2023]_.
+
+.. literalinclude:: ../../../examples/sliding_window.py
+
+
+.. rubric:: References
+.. [#benerradi2023] Benerradi, J., Clos, J., Landowska, A., Valstar, M. F., & Wilson, M. L. (2023). Benchmarking framework for machine learning classification from fNIRS data. Frontiers in Neuroergonomics, 4, 994969.
diff --git a/docs/source/examples/visualisation.rst b/docs/source/examples/visualisation.rst
new file mode 100644
index 0000000000000000000000000000000000000000..4ef275a89f6f20e0617d21e4f68087f5b324ace2
--- /dev/null
+++ b/docs/source/examples/visualisation.rst
@@ -0,0 +1,10 @@
+Epochs visualisation
+====================
+
+Below is a visualisation the epochs from the 5 datasets using the MNE backend [#gramfort2013]_.
+
+.. literalinclude:: ../../../examples/visualisation.py
+
+
+.. rubric:: References
+.. [#gramfort2013] Gramfort, A., Luessi, M., Larson, E., Engemann, D. A., Strohmeier, D., Brodbeck, C., ... & Hämäläinen, M. (2013). MEG and EEG data analysis with MNE-Python. Frontiers in neuroscience, 7, 70133.
diff --git a/docs/source/examples/window-size.rst b/docs/source/examples/window-size.rst
new file mode 100644
index 0000000000000000000000000000000000000000..f24e80d363070815b710177b6bc33d7186ae01a3
--- /dev/null
+++ b/docs/source/examples/window-size.rst
@@ -0,0 +1,10 @@
+Window size benchmarking
+========================
+
+Below is a comparison of 4 machine learning models (LDA, SVC, kNN, ANN) on the 5 datasets with a subject-independent approach (testing with unseen subjects), with a range of different durations per trial (2 to 10 seconds) to study the influence of this parameter on the classification accuracy [#benerradi2023]_.
+
+.. literalinclude:: ../../../examples/window_size.py
+
+
+.. rubric:: References
+.. [#benerradi2023] Benerradi, J., Clos, J., Landowska, A., Valstar, M. F., & Wilson, M. L. (2023). Benchmarking framework for machine learning classification from fNIRS data. Frontiers in Neuroergonomics, 4, 994969.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 6256151aa8003228ec79859273fc0dbe4259a08e..08877a5038f075c4ba7ed2afea57df477c9bc0b2 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -22,6 +22,11 @@ Features:
* supervised, self-supervised and transfer learning
* much more!
+.. role:: raw-html(raw)
+ :format: html
+
+:raw-html:`→` `Source code on GitLab `_
+
.. image:: https://img.shields.io/badge/doi-10.3389%2Ffnrgo.2023.994969-blue
:target: https://doi.org/10.3389/fnrgo.2023.994969
@@ -43,7 +48,7 @@ Features:
install
modules
- example
+ examples
Recommendation checklist
@@ -95,7 +100,7 @@ Please refer to `this tutorial `_.
+This project is licensed under the `GNU General Public License v3+ `_, if you are using `BenchNIRS` please cite `this article `_.
Indices and tables
diff --git a/docs/source/install.rst b/docs/source/install.rst
index c55c4e90aa8cdbdb7cdf01ff9114e0ad91532aeb..5847b0c86cfbc4920f8a19ef8412c521d013c5c6 100644
--- a/docs/source/install.rst
+++ b/docs/source/install.rst
@@ -14,6 +14,9 @@ Setting up BenchNIRS
#. Download the datasets (see below).
+.. note::
+ Alternatively to install from source in development mode, download and unzip the `repository `_ (or clone it with Git), and run :code:`devinstall.py`.
+
Downloading the datasets
------------------------
@@ -31,13 +34,3 @@ To update `BenchNIRS` to the latest version with `pip`, open a terminal (eg. Ana
.. code-block:: console
pip install --upgrade benchnirs
-
-
-
-.. note::
- Alternatively to install from source, download and unzip the `repository `_.
- Then, in a terminal or command prompt (eg. Anaconda Prompt), navigate to the directory containing the :code:`requirements.txt` file and run:
-
- .. code-block:: console
-
- pip install -r requirements.txt -f https://download.pytorch.org/whl/torch_stable.html
diff --git a/docs/source/modules.rst b/docs/source/modules.rst
index 052c72311dc82d9056b5a7194e5241972f16fbeb..4e2279e1e3154b222628c19f4d5e42c5c865760d 100644
--- a/docs/source/modules.rst
+++ b/docs/source/modules.rst
@@ -2,9 +2,9 @@ BenchNIRS API
=============
.. toctree::
- :maxdepth: 4
+ :maxdepth: 2
- learn
- load
- process
- viz
+ modules/learn
+ modules/load
+ modules/process
+ modules/viz
diff --git a/docs/source/learn.rst b/docs/source/modules/learn.rst
similarity index 100%
rename from docs/source/learn.rst
rename to docs/source/modules/learn.rst
diff --git a/docs/source/load.rst b/docs/source/modules/load.rst
similarity index 100%
rename from docs/source/load.rst
rename to docs/source/modules/load.rst
diff --git a/docs/source/process.rst b/docs/source/modules/process.rst
similarity index 100%
rename from docs/source/process.rst
rename to docs/source/modules/process.rst
diff --git a/docs/source/viz.rst b/docs/source/modules/viz.rst
similarity index 100%
rename from docs/source/viz.rst
rename to docs/source/modules/viz.rst
diff --git a/example.png b/example.png
deleted file mode 100644
index c5ce10112c4a490c420cfa77785ebd4f0f53fbcc..0000000000000000000000000000000000000000
Binary files a/example.png and /dev/null differ
diff --git a/src/dataset_size.py b/examples/dataset_size.py
similarity index 86%
rename from src/dataset_size.py
rename to examples/dataset_size.py
index 23a444f56b30745c902e27d72cc766690f08ad18..341ddb42bb07a54f29a68ece98ee6ff181b4187f 100644
--- a/src/dataset_size.py
+++ b/examples/dataset_size.py
@@ -1,6 +1,5 @@
import datetime
import matplotlib.pyplot as plt
-import numpy as np
import os
import pandas as pd
import seaborn as sns
@@ -11,10 +10,11 @@ from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from benchnirs.load import load_dataset
-from benchnirs.process import process_epochs
+from benchnirs.process import process_epochs, extract_features
from benchnirs.learn import machine_learn, deep_learn
+ALL_DATA_PATH = '../../data/dataset_' # path to the datasets
DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
'shin_2018_nb': ['0-back', '2-back', '3-back'],
'shin_2018_wg': ['baseline', 'word generation'],
@@ -26,7 +26,7 @@ CONFIDENCE = 0.05 # stat confidence at 95 %
start_time = datetime.datetime.now()
date = start_time.strftime('%Y_%m_%d_%H%M')
-out_folder = f'../results/dataset_size_{date}'
+out_folder = f'./results/dataset_size_{date}'
os.makedirs(out_folder)
print(f'Main output folder: {out_folder}/')
@@ -38,11 +38,12 @@ with open(f'{out_folder}/results.csv', 'w') as w:
for dataset in DATASETS.keys():
print(f'=====\n{dataset}\n=====')
+ data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/'
out_path = f'{out_folder}/{dataset}_'
# Load and preprocess data
- epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0),
- roi_sides=True, tddr=True)
+ epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5],
+ baseline=(-2, 0), roi_sides=True, tddr=True)
classes = DATASETS[dataset]
epochs_lab = epochs[classes]
@@ -59,26 +60,25 @@ for dataset in DATASETS.keys():
else:
nirs, labels, groups = shuffle(
all_nirs, all_labels, all_groups, random_state=42)
+ nirs_features = extract_features(nirs, ['mean', 'std', 'slope'])
# Run models
lda, hps_lda, _ = machine_learn(
- nirs, labels, groups, 'lda', features=['mean', 'std', 'slope'],
+ nirs_features, labels, groups, 'lda',
output_folder=f'{out_path}{ts}_lda')
svc, hps_svc, _ = machine_learn(
- nirs, labels, groups, 'svc', features=['mean', 'std', 'slope'],
+ nirs_features, labels, groups, 'svc',
output_folder=f'{out_path}{ts}_svc')
knn, hps_knn, _ = machine_learn(
- nirs, labels, groups, 'knn', features=['mean', 'std', 'slope'],
+ nirs_features, labels, groups, 'knn',
output_folder=f'{out_path}{ts}_knn')
ann, hps_ann, _ = deep_learn(
- nirs, labels, groups, 'ann', features=['mean', 'std', 'slope'],
+ nirs_features, labels, groups, 'ann',
output_folder=f'{out_path}{ts}_ann')
cnn, hps_cnn, _ = deep_learn(
- nirs, labels, groups, 'cnn', features=None,
- output_folder=f'{out_path}{ts}_cnn')
+ nirs, labels, groups, 'cnn', output_folder=f'{out_path}{ts}_cnn')
lstm, hps_lstm, _ = deep_learn(
- nirs, labels, groups, 'lstm', features=None,
- output_folder=f'{out_path}{ts}_lstm')
+ nirs, labels, groups, 'lstm', output_folder=f'{out_path}{ts}_lstm')
dict_train_size['Chance'] += [1/len(classes) for _ in lda]
dict_train_size['LDA'] += lda
dict_train_size['SVC'] += svc
diff --git a/src/generalised.py b/examples/generalised.py
similarity index 86%
rename from src/generalised.py
rename to examples/generalised.py
index c5a41125339d6f84f0c5cdba1978c28a8daf0a35..e88d89882854d7dfe746ca5ccc70528aa43c2813 100644
--- a/src/generalised.py
+++ b/examples/generalised.py
@@ -9,10 +9,11 @@ import torch
from scipy import stats
from benchnirs.load import load_dataset
-from benchnirs.process import process_epochs
+from benchnirs.process import process_epochs, extract_features
from benchnirs.learn import machine_learn, deep_learn
+ALL_DATA_PATH = '../../data/dataset_' # path to the datasets
DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
'shin_2018_nb': ['0-back', '2-back', '3-back'],
'shin_2018_wg': ['baseline', 'word generation'],
@@ -23,7 +24,7 @@ CONFIDENCE = 0.05 # stat confidence at 95 %
start_time = datetime.datetime.now()
date = start_time.strftime('%Y_%m_%d_%H%M')
-out_folder = f'../results/generalised_{date}'
+out_folder = f'./results/generalised_{date}'
os.makedirs(out_folder)
print(f'Main output folder: {out_folder}/')
@@ -42,34 +43,30 @@ with open(f'{out_folder}/results.csv', 'w') as w:
dict_accuracies = {}
for dataset in DATASETS.keys():
print(f'=====\n{dataset}\n=====')
+ data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/'
out_path = f'{out_folder}/{dataset}_'
# Load and preprocess data
- epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0),
- roi_sides=True, tddr=True)
+ epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5],
+ baseline=(-2, 0), roi_sides=True, tddr=True)
classes = DATASETS[dataset]
epochs_lab = epochs[classes]
# Run models
nirs, labels, groups = process_epochs(epochs_lab, 9.9)
+ nirs_features = extract_features(nirs, ['mean', 'std', 'slope'])
lda, hps_lda, _ = machine_learn(
- nirs, labels, groups, 'lda', features=['mean', 'std', 'slope'],
- output_folder=f'{out_path}lda')
+ nirs_features, labels, groups, 'lda', output_folder=f'{out_path}lda')
svc, hps_svc, _ = machine_learn(
- nirs, labels, groups, 'svc', features=['mean', 'std', 'slope'],
- output_folder=f'{out_path}svc')
+ nirs_features, labels, groups, 'svc', output_folder=f'{out_path}svc')
knn, hps_knn, _ = machine_learn(
- nirs, labels, groups, 'knn', features=['mean', 'std', 'slope'],
- output_folder=f'{out_path}knn')
+ nirs_features, labels, groups, 'knn', output_folder=f'{out_path}knn')
ann, hps_ann, _ = deep_learn(
- nirs, labels, groups, 'ann', features=['mean', 'std', 'slope'],
- output_folder=f'{out_path}ann')
+ nirs_features, labels, groups, 'ann', output_folder=f'{out_path}ann')
cnn, hps_cnn, _ = deep_learn(
- nirs, labels, groups, 'cnn', features=None,
- output_folder=f'{out_path}cnn')
+ nirs, labels, groups, 'cnn', output_folder=f'{out_path}cnn')
lstm, hps_lstm, _ = deep_learn(
- nirs, labels, groups, 'lstm', features=None,
- output_folder=f'{out_path}lstm')
+ nirs, labels, groups, 'lstm', output_folder=f'{out_path}lstm')
# Write results
results = {'LDA': [lda, hps_lda], 'SVC': [svc, hps_svc],
diff --git a/src/personalised.py b/examples/personalised.py
similarity index 88%
rename from src/personalised.py
rename to examples/personalised.py
index 1da19675861f4758ba791d52b118b46d243cc5a4..504e85a7ccece2a9ae023eee3cbfe25815d3954c 100644
--- a/src/personalised.py
+++ b/examples/personalised.py
@@ -9,10 +9,11 @@ import torch
from scipy import stats
from benchnirs.load import load_dataset
-from benchnirs.process import process_epochs
+from benchnirs.process import process_epochs, extract_features
from benchnirs.learn import machine_learn, deep_learn
+ALL_DATA_PATH = '../../data/dataset_' # path to the datasets
DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
'shin_2018_nb': ['0-back', '2-back', '3-back'],
'shin_2018_wg': ['baseline', 'word generation'],
@@ -23,7 +24,7 @@ CONFIDENCE = 0.05 # stat confidence at 95 %
start_time = datetime.datetime.now()
date = start_time.strftime('%Y_%m_%d_%H%M')
-out_folder = f'../results/personalised_{date}'
+out_folder = f'./results/personalised_{date}'
os.makedirs(out_folder)
print(f'Main output folder: {out_folder}/')
@@ -41,11 +42,12 @@ with open(f'{out_folder}/results.csv', 'w') as w:
for dataset in DATASETS.keys():
print(f'=====\n{dataset}\n=====')
+ data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/'
out_path = f'{out_folder}/{dataset}_'
# Load and preprocess data
- epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0),
- roi_sides=True, tddr=True)
+ epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5],
+ baseline=(-2, 0), roi_sides=True, tddr=True)
classes = DATASETS[dataset]
epochs_lab = epochs[classes]
@@ -58,25 +60,26 @@ for dataset in DATASETS.keys():
print(f'-----\nSubject {subj+1}\n-----')
indices = [i for i, value in enumerate(all_groups) if value == subj]
nirs, labels = all_nirs[indices], all_labels[indices]
+ nirs_features = extract_features(nirs, ['mean', 'std', 'slope'])
# Run models
lda, hps_lda, _ = machine_learn(
- nirs, labels, None, 'lda', features=['mean', 'std', 'slope'],
+ nirs_features, labels, groups=None, model='lda',
output_folder=f'{out_path}{subj+1}_lda')
svc, hps_svc, _ = machine_learn(
- nirs, labels, None, 'svc', features=['mean', 'std', 'slope'],
+ nirs_features, labels, groups=None, model='svc',
output_folder=f'{out_path}{subj+1}_svc')
knn, hps_knn, _ = machine_learn(
- nirs, labels, None, 'knn', features=['mean', 'std', 'slope'],
+ nirs_features, labels, groups=None, model='knn',
output_folder=f'{out_path}{subj+1}_knn')
ann, hps_ann, _ = deep_learn(
- nirs, labels, None, 'ann', features=['mean', 'std', 'slope'],
+ nirs_features, labels, groups=None, model_class='ann',
output_folder=f'{out_path}{subj+1}_ann')
cnn, hps_cnn, _ = deep_learn(
- nirs, labels, None, 'cnn', features=None,
+ nirs, labels, groups=None, model_class='cnn',
output_folder=f'{out_path}{subj+1}_cnn')
lstm, hps_lstm, _ = deep_learn(
- nirs, labels, None, 'lstm', features=None,
+ nirs, labels, groups=None, model_class='lstm',
output_folder=f'{out_path}{subj+1}_lstm')
# Write results
diff --git a/src/sliding_window.py b/examples/sliding_window.py
similarity index 87%
rename from src/sliding_window.py
rename to examples/sliding_window.py
index 4b6669e70fad83d7d3283c0cb8ba76ceaeacd4c6..f69d272afadbabeed93397bbba384bce20369fb8 100644
--- a/src/sliding_window.py
+++ b/examples/sliding_window.py
@@ -9,10 +9,11 @@ import torch
from scipy import stats
from benchnirs.load import load_dataset
-from benchnirs.process import process_epochs
+from benchnirs.process import process_epochs, extract_features
from benchnirs.learn import machine_learn, deep_learn
+ALL_DATA_PATH = '../../data/dataset_' # path to the datasets
DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
'shin_2018_nb': ['0-back', '2-back', '3-back'],
'shin_2018_wg': ['baseline', 'word generation'],
@@ -23,7 +24,7 @@ CONFIDENCE = 0.05 # stat confidence at 95 %
start_time = datetime.datetime.now()
date = start_time.strftime('%Y_%m_%d_%H%M')
-out_folder = f'../results/sliding_window_{date}'
+out_folder = f'./results/sliding_window_{date}'
os.makedirs(out_folder)
print(f'Main output folder: {out_folder}/')
@@ -42,28 +43,26 @@ with open(f'{out_folder}/results.csv', 'w') as w:
dict_accuracies = {}
for dataset in DATASETS.keys():
print(f'=====\n{dataset}\n=====')
+ data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/'
out_path = f'{out_folder}/{dataset}_'
# Load and preprocess data
- epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0),
- roi_sides=True, tddr=True)
+ epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5],
+ baseline=(-2, 0), roi_sides=True, tddr=True)
classes = DATASETS[dataset]
epochs_lab = epochs[classes]
# Run models
nirs, labels, groups = process_epochs(epochs_lab, 9.9, tslide=2)
+ nirs_features = extract_features(nirs, ['mean', 'std', 'slope'])
lda, hps_lda, _ = machine_learn(
- nirs, labels, groups, 'lda', features=['mean', 'std', 'slope'],
- output_folder=f'{out_path}lda')
+ nirs_features, labels, groups, 'lda', output_folder=f'{out_path}lda')
svc, hps_svc, _ = machine_learn(
- nirs, labels, groups, 'svc', features=['mean', 'std', 'slope'],
- output_folder=f'{out_path}svc')
+ nirs_features, labels, groups, 'svc', output_folder=f'{out_path}svc')
knn, hps_knn, _ = machine_learn(
- nirs, labels, groups, 'knn', features=['mean', 'std', 'slope'],
- output_folder=f'{out_path}knn')
+ nirs_features, labels, groups, 'knn', output_folder=f'{out_path}knn')
ann, hps_ann, _ = deep_learn(
- nirs, labels, groups, 'ann', features=['mean', 'std', 'slope'],
- output_folder=f'{out_path}ann')
+ nirs_features, labels, groups, 'ann', output_folder=f'{out_path}ann')
# Write results
results = {'LDA': [lda, hps_lda], 'SVC': [svc, hps_svc],
diff --git a/src/stats/comparison_stats_dataset.py b/examples/stats/comparison_stats_dataset.py
similarity index 95%
rename from src/stats/comparison_stats_dataset.py
rename to examples/stats/comparison_stats_dataset.py
index 353ffa6c0160a70275c4cfb0cf638b156a0de6e0..c1f2afcd55ae7bd275fcc3229ea1fb1bbba3148c 100644
--- a/src/stats/comparison_stats_dataset.py
+++ b/examples/stats/comparison_stats_dataset.py
@@ -6,8 +6,8 @@ from scipy import stats
CONFIDENCE = 0.05 # stat confidence at 95 %
-new_results = './results_new_model.csv'
-old_results = './results_old_model.csv'
+new_results = '../results_new_model.csv'
+old_results = '../results_old_model.csv'
# Stats
print('Stats...')
diff --git a/src/stats/comparison_stats_task.py b/examples/stats/comparison_stats_task.py
similarity index 97%
rename from src/stats/comparison_stats_task.py
rename to examples/stats/comparison_stats_task.py
index b828140c2bb04f3c0e6a962cd66b85767f72a1c4..65cbad9e9f53a2b2f1ab4913d06b294e14335fce 100644
--- a/src/stats/comparison_stats_task.py
+++ b/examples/stats/comparison_stats_task.py
@@ -8,8 +8,8 @@ DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
CONFIDENCE = 0.05 # stat confidence at 95 %
-new_results = './new_results.csv'
-old_results = './old_results.csv'
+new_results = '../new_results.csv'
+old_results = '../old_results.csv'
models = ['LDA', 'SVC', 'kNN', 'ANN', 'CNN', 'LSTM']
# Stats
diff --git a/src/stats/extra_stats.py b/examples/stats/extra_stats.py
similarity index 100%
rename from src/stats/extra_stats.py
rename to examples/stats/extra_stats.py
diff --git a/src/tailored_generalised.py b/examples/tailored_generalised.py
similarity index 87%
rename from src/tailored_generalised.py
rename to examples/tailored_generalised.py
index 274639d2bc46f95f36844486d875df9e6d73fe57..c01e81bf90a74b6c5bc5efe9174976cfb0cd17a5 100644
--- a/src/tailored_generalised.py
+++ b/examples/tailored_generalised.py
@@ -11,10 +11,11 @@ import torch.nn.functional as F
from scipy import stats
from benchnirs.load import load_dataset
-from benchnirs.process import process_epochs
+from benchnirs.process import process_epochs, extract_features
from benchnirs.learn import machine_learn, deep_learn
+ALL_DATA_PATH = '../../data/dataset_' # path to the datasets
DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
'shin_2018_nb': ['0-back', '2-back', '3-back']}
CONFIDENCE = 0.05 # stat confidence at 95 %
@@ -43,10 +44,9 @@ class _CNNnback(nn.Module):
return x
-
start_time = datetime.datetime.now()
date = start_time.strftime('%Y_%m_%d_%H%M')
-out_folder = f'../results/tailored_generalised_{date}'
+out_folder = f'./results/tailored_generalised_{date}'
os.makedirs(out_folder)
@@ -67,34 +67,30 @@ with open(f'{out_folder}/results.csv', 'w') as w:
dict_accuracies = {}
for dataset in DATASETS.keys():
print(f'=====\n{dataset}\n=====')
+ data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/'
out_path = f'{out_folder}/{dataset}_'
# Load and preprocess data
- epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0),
- roi_sides=True, tddr=True)
+ epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5],
+ baseline=(-2, 0), roi_sides=True, tddr=True)
classes = DATASETS[dataset]
epochs_lab = epochs[classes]
# Run models
nirs, labels, groups = process_epochs(epochs_lab, 39.9)
+ nirs_features = extract_features(nirs, ['mean', 'std', 'slope'])
lda, hps_lda, _ = machine_learn(
- nirs, labels, groups, 'lda', features=['mean', 'std', 'slope'],
- output_folder=f'{out_path}lda')
+ nirs_features, labels, groups, 'lda', output_folder=f'{out_path}lda')
svc, hps_svc, _ = machine_learn(
- nirs, labels, groups, 'svc', features=['mean', 'std', 'slope'],
- output_folder=f'{out_path}svc')
+ nirs_features, labels, groups, 'svc', output_folder=f'{out_path}svc')
knn, hps_knn, _ = machine_learn(
- nirs, labels, groups, 'knn', features=['mean', 'std', 'slope'],
- output_folder=f'{out_path}knn')
+ nirs_features, labels, groups, 'knn', output_folder=f'{out_path}knn')
ann, hps_ann, _ = deep_learn(
- nirs, labels, groups, 'ann', features=['mean', 'std', 'slope'],
- output_folder=f'{out_path}ann')
+ nirs_features, labels, groups, 'ann', output_folder=f'{out_path}ann')
cnn, hps_cnn, _ = deep_learn(
- nirs, labels, groups, _CNNnback, features=None,
- output_folder=f'{out_path}cnn')
+ nirs, labels, groups, _CNNnback, output_folder=f'{out_path}cnn')
lstm, hps_lstm, _ = deep_learn(
- nirs, labels, groups, 'lstm', features=None,
- output_folder=f'{out_path}lstm')
+ nirs, labels, groups, 'lstm', output_folder=f'{out_path}lstm')
# Write results
results = {'LDA': [lda, hps_lda], 'SVC': [svc, hps_svc],
diff --git a/src/tailored_shin_nb.py b/examples/tailored_shin_nb.py
similarity index 92%
rename from src/tailored_shin_nb.py
rename to examples/tailored_shin_nb.py
index 080e19dc477a1695bdcff0ca16c20ff236caf87f..9e086bd3afe785688b697c69cc1c66edf49b1e7b 100644
--- a/src/tailored_shin_nb.py
+++ b/examples/tailored_shin_nb.py
@@ -12,6 +12,7 @@ from benchnirs.process import process_epochs
from benchnirs.learn import deep_learn
+DATA_PATH = '../../data/dataset_shin_2018/' # path to the dataset
CLASSES = ['0-back', '2-back', '3-back']
CONFIDENCE = 0.05 # stat confidence at 95 %
ROIS = {'Right PFC HbO': [9, 10, 19, 20, 21, 22, 23],
@@ -23,7 +24,7 @@ ROIS = {'Right PFC HbO': [9, 10, 19, 20, 21, 22, 23],
start_time = datetime.datetime.now()
date = start_time.strftime('%Y_%m_%d_%H%M')
-out_folder = f'../results/tailored_shin_nb_{date}'
+out_folder = f'./results/tailored_shin_nb_{date}'
class CustomCNN(nn.Module):
@@ -64,7 +65,7 @@ print(f'Number of GPUs: {torch.cuda.device_count()}')
print(f'=====\nshin_2018_nb\n=====')
# Load and preprocess data
-epochs = load_dataset('shin_2018_nb', bandpass=[0.01, 0.5],
+epochs = load_dataset('shin_2018_nb', DATA_PATH, bandpass=[0.01, 0.5],
baseline=(-2, 0), tddr=True)
ch_picks = []
for group in ROIS.values():
@@ -76,8 +77,7 @@ epochs_lab = epochs[CLASSES]
nirs, labels, groups = process_epochs(epochs_lab, tmax=39.9, sort=True)
print(nirs.shape)
accuracies, hps, additional_metrics = deep_learn(
- nirs, labels, groups, CustomCNN,
- features=None, normalize=True,
+ nirs, labels, groups, CustomCNN, normalize=(0, 2),
output_folder=f'{out_folder}')
# Write results
diff --git a/src/tailored_window_size.py b/examples/tailored_window_size.py
similarity index 85%
rename from src/tailored_window_size.py
rename to examples/tailored_window_size.py
index 9c5711984c746e62aa972f4ad72bd4b2c5fc3d6c..1214573f89f018657de7e7d3ebdfaeec01b19d16 100644
--- a/src/tailored_window_size.py
+++ b/examples/tailored_window_size.py
@@ -1,6 +1,5 @@
import datetime
import matplotlib.pyplot as plt
-import numpy as np
import os
import pandas as pd
import seaborn as sns
@@ -9,10 +8,11 @@ import torch
from scipy import stats
from benchnirs.load import load_dataset
-from benchnirs.process import process_epochs
+from benchnirs.process import process_epochs, extract_features
from benchnirs.learn import machine_learn, deep_learn
+ALL_DATA_PATH = '../../data/dataset_' # path to the datasets
DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
'shin_2018_nb': ['0-back', '2-back', '3-back']}
WINDOW_SIZES = [4.9, 9.9, 14.9, 19.9, 24.9, 29.9, 34.9, 39.9]
@@ -21,7 +21,7 @@ CONFIDENCE = 0.05 # stat confidence at 95 %
start_time = datetime.datetime.now()
date = start_time.strftime('%Y_%m_%d_%H%M')
-out_folder = f'../results/tailored_window_size_{date}'
+out_folder = f'./results/tailored_window_size_{date}'
os.makedirs(out_folder)
print(f'Main output folder: {out_folder}/')
@@ -32,11 +32,12 @@ with open(f'{out_folder}/results.csv', 'w') as w:
for dataset in DATASETS.keys():
print(f'=====\n{dataset}\n=====')
+ data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/'
out_path = f'{out_folder}/{dataset}_'
# Load and preprocess data
- epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0),
- roi_sides=True, tddr=True)
+ epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5],
+ baseline=(-2, 0), roi_sides=True, tddr=True)
classes = DATASETS[dataset]
epochs_lab = epochs[classes]
@@ -45,23 +46,23 @@ for dataset in DATASETS.keys():
for ws in WINDOW_SIZES:
print(f'-----\nWindow size {ws}\n-----')
nirs, labels, groups = process_epochs(epochs_lab, ws)
+ nirs_features = extract_features(nirs, ['mean', 'std', 'slope'])
# Run models
lda, hps_lda, _ = machine_learn(
- nirs, labels, groups, 'lda', features=['mean', 'std', 'slope'],
+ nirs_features, labels, groups, 'lda',
output_folder=f'{out_path}{ws}_lda')
svc, hps_svc, _ = machine_learn(
- nirs, labels, groups, 'svc', features=['mean', 'std', 'slope'],
+ nirs_features, labels, groups, 'svc',
output_folder=f'{out_path}{ws}_svc')
knn, hps_knn, _ = machine_learn(
- nirs, labels, groups, 'knn', features=['mean', 'std', 'slope'],
+ nirs_features, labels, groups, 'knn',
output_folder=f'{out_path}{ws}_knn')
ann, hps_ann, _ = deep_learn(
- nirs, labels, groups, 'ann', features=['mean', 'std', 'slope'],
+ nirs_features, labels, groups, 'ann',
output_folder=f'{out_path}{ws}_ann')
lstm, hps_lstm, _ = deep_learn(
- nirs, labels, groups, 'lstm', features=None,
- output_folder=f'{out_path}{ws}_lstm')
+ nirs, labels, groups, 'lstm', output_folder=f'{out_path}{ws}_lstm')
dict_window_size['Chance'] += [1/len(classes) for _ in lda]
dict_window_size['LDA'] += lda
dict_window_size['SVC'] += svc
diff --git a/src/transfer.py b/examples/transfer.py
similarity index 93%
rename from src/transfer.py
rename to examples/transfer.py
index 41b8f1eb25c13b39e34e65a146754c310efe8394..50041323dbe3546bfabdd68058556d26bbe3342a 100644
--- a/src/transfer.py
+++ b/examples/transfer.py
@@ -12,6 +12,8 @@ from benchnirs.process import process_epochs
from benchnirs.learn import deep_transfer_learn
+DATA_PATH = '../../data/dataset_shin_2018/' # path to the dataset
+# CLASSES = ['0-back', '2-back', '3-back']
CLASSES = ['0-back', '2-back', '3-back', 'unlabelled']
CONFIDENCE = 0.05 # stat confidence at 95 %
ROIS = {'Right PFC HbO': [9, 10, 19, 20, 21, 22, 23],
@@ -23,7 +25,7 @@ ROIS = {'Right PFC HbO': [9, 10, 19, 20, 21, 22, 23],
start_time = datetime.datetime.now()
date = start_time.strftime('%Y_%m_%d_%H%M')
-out_folder = f'../results/transfer_{date}'
+out_folder = f'./results/transfer_{date}'
class HbEncoder(nn.Module):
@@ -99,7 +101,7 @@ print(f'Number of GPUs: {torch.cuda.device_count()}')
print(f'=====\nshin_2018_nb\n=====')
# Load and preprocess data
-epochs = load_dataset('shin_2018_nb', bandpass=[0.01, 0.5],
+epochs = load_dataset('shin_2018_nb', DATA_PATH, bandpass=[0.01, 0.5],
baseline=(-2, 0), tddr=True)
print(epochs)
ch_picks = []
@@ -112,8 +114,7 @@ epochs_lab = epochs[CLASSES]
nirs, labels, groups = process_epochs(epochs_lab, tmax=39.9, sort=True)
print(nirs.shape)
accuracies, hps, additional_metrics = deep_transfer_learn(
- nirs, labels, groups, HbEncoder, HbDecoder, Classifier,
- features=None, normalize=True,
+ nirs, labels, groups, HbEncoder, HbDecoder, Classifier, normalize=(0, 2),
output_folder=f'{out_folder}', max_epoch=500)
# Write results
diff --git a/src/visualisation.py b/examples/visualisation.py
similarity index 77%
rename from src/visualisation.py
rename to examples/visualisation.py
index f1c332f5ef3e1e14da7fdd2cfb615a66a710536b..87d3bcf0748c15fa02d52f9e224b7b37fbc113f1 100644
--- a/src/visualisation.py
+++ b/examples/visualisation.py
@@ -2,7 +2,7 @@ from benchnirs.load import load_dataset
from benchnirs.viz import epochs_viz
-ALL_DATA_PATH = '/folder/with/datasets/' # path to the datasets
+ALL_DATA_PATH = '../../data/dataset_' # path to the datasets
DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
'shin_2018_nb': ['0-back', '2-back', '3-back'],
'shin_2018_wg': ['baseline', 'word generation'],
@@ -12,10 +12,10 @@ DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
for dataset in DATASETS.keys():
print(f'=====\n{dataset}\n=====')
- path = f'{ALL_DATA_PATH}{dataset[:-3]}/'
+ data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/'
# Load and preprocess data
- epochs = load_dataset(dataset, path=path, bandpass=[0.01, 0.5],
+ epochs = load_dataset(dataset, path=data_path, bandpass=[0.01, 0.5],
baseline=(-1.99, 0), roi_sides=True, tddr=True)
classes = DATASETS[dataset]
epochs_lab = epochs[classes]
diff --git a/src/window_size.py b/examples/window_size.py
similarity index 87%
rename from src/window_size.py
rename to examples/window_size.py
index 929cee9bb07dc1ab2879caee64559da56e42ef1e..387ae106e0ccded3993dd07476bad9ff3d5eb04b 100644
--- a/src/window_size.py
+++ b/examples/window_size.py
@@ -1,6 +1,5 @@
import datetime
import matplotlib.pyplot as plt
-import numpy as np
import os
import pandas as pd
import seaborn as sns
@@ -9,10 +8,11 @@ import torch
from scipy import stats
from benchnirs.load import load_dataset
-from benchnirs.process import process_epochs
+from benchnirs.process import process_epochs, extract_features
from benchnirs.learn import machine_learn, deep_learn
+ALL_DATA_PATH = '../../data/dataset_' # path to the datasets
DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
'shin_2018_nb': ['0-back', '2-back', '3-back'],
'shin_2018_wg': ['baseline', 'word generation'],
@@ -24,7 +24,7 @@ CONFIDENCE = 0.05 # stat confidence at 95 %
start_time = datetime.datetime.now()
date = start_time.strftime('%Y_%m_%d_%H%M')
-out_folder = f'../results/window_size_{date}'
+out_folder = f'./results/window_size_{date}'
os.makedirs(out_folder)
print(f'Main output folder: {out_folder}/')
@@ -35,11 +35,12 @@ with open(f'{out_folder}/results.csv', 'w') as w:
for dataset in DATASETS.keys():
print(f'=====\n{dataset}\n=====')
+ data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/'
out_path = f'{out_folder}/{dataset}_'
# Load and preprocess data
- epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0),
- roi_sides=True, tddr=True)
+ epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5],
+ baseline=(-2, 0), roi_sides=True, tddr=True)
classes = DATASETS[dataset]
epochs_lab = epochs[classes]
@@ -48,19 +49,20 @@ for dataset in DATASETS.keys():
for ws in WINDOW_SIZES:
print(f'-----\nWindow size {ws}\n-----')
nirs, labels, groups = process_epochs(epochs_lab, ws)
+ nirs_features = extract_features(nirs, ['mean', 'std', 'slope'])
# Run models
lda, hps_lda, _ = machine_learn(
- nirs, labels, groups, 'lda', features=['mean', 'std', 'slope'],
+ nirs_features, labels, groups, 'lda',
output_folder=f'{out_path}{ws}_lda')
svc, hps_svc, _ = machine_learn(
- nirs, labels, groups, 'svc', features=['mean', 'std', 'slope'],
+ nirs_features, labels, groups, 'svc',
output_folder=f'{out_path}{ws}_svc')
knn, hps_knn, _ = machine_learn(
- nirs, labels, groups, 'knn', features=['mean', 'std', 'slope'],
+ nirs_features, labels, groups, 'knn',
output_folder=f'{out_path}{ws}_knn')
ann, hps_ann, _ = deep_learn(
- nirs, labels, groups, 'ann', features=['mean', 'std', 'slope'],
+ nirs_features, labels, groups, 'ann',
output_folder=f'{out_path}{ws}_ann')
dict_window_size['Chance'] += [1/len(classes) for _ in lda]
dict_window_size['LDA'] += lda
diff --git a/requirements.txt b/requirements.txt
index cf3ca65caa34b7666adc2f1327116e37c41d2250..06eb815cc6822bc6364579c9201105b52a5d3d13 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,14 +1,16 @@
# To install all the required packages, run in a terminal or command prompt:
# python -m pip install -r requirements.txt -f https://download.pytorch.org/whl/torch_stable.html
-matplotlib>=3.3.1
-mne>=0.23.4
-nirsimple>=0.1.2
-numpy>=1.19.5
-pandas>=1.0.5
-scikit-learn>=0.24.2
-scipy>=1.8.1
-seaborn>=0.11.1
-statsmodels>=0.12.2
-torch>=1.5.1+cu101
-torchvision>=0.6.1+cu101
+importlib
+lazy_loader
+numpy
+pandas
+scipy
+mne
+matplotlib
+seaborn
+scikit-learn
+torch
+torchvision
+nirsimple
+statsmodels
diff --git a/setup.py b/setup.py
index 8f2cf0ec03b0da9c35ad86112023661fdd7c7137..1b7701995b92d1189d61ff07dc11df84a07bb745 100644
--- a/setup.py
+++ b/setup.py
@@ -5,17 +5,18 @@ with open("README.md", "r") as fh:
setuptools.setup(
name="benchnirs",
- version="1.2.1",
+ version="1.2.2",
author="Johann Benerradi",
author_email="johann.benerradi@gmail.com",
description="Benchmarking framework for machine learning with fNIRS",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://gitlab.com/HanBnrd/benchnirs",
- license='GNU GPLv3+',
- package_dir={"": "src"},
- packages=setuptools.find_packages(where="src"),
+ license="GNU GPLv3+",
+ packages=setuptools.find_packages(),
install_requires=[
+ "importlib",
+ "lazy_loader",
"numpy",
"pandas",
"scipy",
diff --git a/src/benchnirs/__init__.py b/src/benchnirs/__init__.py
deleted file mode 100644
index ef6bcf6661395741d926ef75ef8a4a1fc2a7172d..0000000000000000000000000000000000000000
--- a/src/benchnirs/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-"""
-BenchNIRS
-=========
-Benchmarking framework for machine learning with fNIRS
-"""
-
-from .load import load_dataset
-from .viz import epochs_viz
-from .process import process_epochs
-from .learn import machine_learn, deep_learn
diff --git a/src/custom_model.py b/src/custom_model.py
deleted file mode 100644
index c5bd321cfa045bdec1ab4687dbaca74e8a25c8d2..0000000000000000000000000000000000000000
--- a/src/custom_model.py
+++ /dev/null
@@ -1,142 +0,0 @@
-import datetime
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-import pandas as pd
-import seaborn as sns
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-from scipy import stats
-
-from benchnirs.load import load_dataset
-from benchnirs.process import process_epochs
-from benchnirs.learn import deep_learn
-
-
-ALL_DATA_PATH = '/folder/with/datasets/' # path to the datasets
-DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
- 'shin_2018_nb': ['0-back', '2-back', '3-back'],
- 'shin_2018_wg': ['baseline', 'word generation'],
- 'shin_2016_ma': ['baseline', 'mental arithmetic'],
- 'bak_2019_me': ['right', 'left', 'foot']}
-CONFIDENCE = 0.05 # stat confidence at 95 %
-
-
-class CustomCNN(nn.Module):
-
- def __init__(self, n_classes):
- super(CustomCNN, self).__init__()
- self.conv1 = nn.Conv1d(4, 4, kernel_size=10, stride=2) # tempo conv
- self.pool1 = nn.MaxPool1d(2)
- self.conv2 = nn.Conv1d(4, 4, kernel_size=5, stride=2) # tempo conv
- self.pool2 = nn.MaxPool1d(2)
- self.fc1 = nn.Linear(20, 10)
- self.fc2 = nn.Linear(10, n_classes)
-
- def forward(self, x):
- batch_size = x.size(0)
- x = F.relu(self.conv1(x))
- x = self.pool1(x)
- x = F.relu(self.conv2(x))
- x = self.pool2(x)
- x = x.view(batch_size, -1)
- x = F.relu(self.fc1(x))
- x = self.fc2(x)
- return x
-
-
-start_time = datetime.datetime.now()
-out_folder = f'../results/custom'
-if not os.path.isdir(out_folder):
- os.makedirs(out_folder)
-print(f'Main output folder: {out_folder}/')
-
-print(f'Number of GPUs: {torch.cuda.device_count()}')
-
-with open(f'{out_folder}/summary.md', 'w') as w:
- w.write('# Accuracy table\n\n(Standard deviation on the cross-validation)')
- w.write('\n\n|Dataset|Chance level|Average accuracy (sd)|\n')
- w.write('|:---:|:---:|:---:|\n')
-
-with open(f'{out_folder}/results.csv', 'w') as w:
- w.write('dataset;fold;accuracy;hyperparameters;additional_metrics\n')
-
-
-dict_accuracies = {'Accuracy': [], 'Dataset': []}
-for dataset in DATASETS.keys():
- print(f'=====\n{dataset}\n=====')
- data_path = f'{ALL_DATA_PATH}dataset_{dataset[:-3]}/'
- out_path = f'{out_folder}/{dataset}_'
-
- # Load and preprocess data
- epochs = load_dataset(dataset, path=data_path, bandpass=[0.01, 0.5],
- baseline=(-2, 0), roi_sides=True, tddr=True)
- classes = DATASETS[dataset]
- epochs_lab = epochs[classes]
-
- # Run models
- nirs, labels, groups = process_epochs(epochs_lab, 9.9)
- cnn, hps_cnn, additional_metrics_cnn = deep_learn(
- nirs, labels, groups, CustomCNN, features=None,
- output_folder=f'{out_path}cnn')
-
- # Write results
- results = {'CNN': [cnn, hps_cnn]}
- chance_level = np.around(1/len(classes), decimals=3)
- w_summary = open(f'{out_folder}/summary.md', 'a')
- w_results = open(f'{out_folder}/results.csv', 'a')
- w_summary.write(f'|{dataset}|{chance_level}|')
- w_summary.write(
- f'{np.around(np.mean(cnn), decimals=3)} '
- f'({np.around(np.std(cnn), decimals=3)})|')
- for fold, accuracy in enumerate(cnn):
- w_results.write(f'{dataset};{fold+1};{accuracy};"{hps_cnn[fold]}";')
- w_results.write(f'"{additional_metrics_cnn[fold]}"\n')
- w_summary.write('\n')
- w_summary.close()
- w_results.close()
- dict_accuracies['Accuracy'] += cnn
- dict_accuracies['Dataset'] += [dataset] * len(cnn)
-
-
-df_accuracies = pd.DataFrame(dict_accuracies)
-sns.barplot(data=df_accuracies, y='Accuracy', x='Dataset', capsize=.1,
- palette='colorblind')
-plt.savefig(f'{out_folder}/summary.png')
-plt.close()
-
-
-# Stats
-print('Stats...')
-with open(f'{out_folder}/stats.md', 'w') as w:
- df = pd.read_csv(f'{out_folder}/results.csv', delimiter=';')
- w.write('## Comparison of the model accuracy to chance level\n\n')
- w.write('|Dataset|Shapiro p-value|Test|p-value|\n')
- w.write('|:---:|:---:|:---:|:---:|\n')
- for dataset in DATASETS.keys():
- dataset_accuracies = []
- chance_level = 1 / len(DATASETS[dataset])
- normality = True
- w.write(f'|{dataset}|')
- sub_df = df[df['dataset'] == dataset]
- accuracies = sub_df['accuracy'].to_numpy()
- dataset_accuracies.append(accuracies)
- # Check normality of the distribution
- _, p_shap = stats.shapiro(accuracies)
- w.write(f'{p_shap}|')
- if p_shap > CONFIDENCE:
- # t-test
- _, p_tt = stats.ttest_1samp(accuracies, chance_level)
- w.write(f't-test|{p_tt}|\n')
- else:
- normality = False
- # Wilcoxon
- _, p_wilcox = stats.wilcoxon(accuracies-chance_level)
- w.write(f'Wilcoxon|{p_wilcox}|\n')
-
-
-end_time = datetime.datetime.now()
-elapsed_time = end_time - start_time
-print(f'===\nElapsed time: {elapsed_time}')
diff --git a/src/transfer_no_unlab.py b/src/transfer_no_unlab.py
deleted file mode 100644
index 7d4432c12009ee763a09151aa2b64a01c07f883a..0000000000000000000000000000000000000000
--- a/src/transfer_no_unlab.py
+++ /dev/null
@@ -1,140 +0,0 @@
-import datetime
-import numpy as np
-import os
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-from scipy import stats
-
-from benchnirs.load import load_dataset
-from benchnirs.process import process_epochs
-from benchnirs.learn import deep_transfer_learn
-
-
-CLASSES = ['0-back', '2-back', '3-back']
-CONFIDENCE = 0.05 # stat confidence at 95 %
-ROIS = {'Right PFC HbO': [9, 10, 19, 20, 21, 22, 23],
- 'Right PFC HbR': [45, 46, 55, 56, 57, 58, 59],
- 'Left PFC HbO': [0, 1, 2, 3, 4, 5, 6],
- 'Left PFC HbR': [36, 37, 38, 39, 40, 41, 42],
- 'Central PFC HbO': [7, 8],
- 'Central PFC HbR': [43, 44]}
-
-start_time = datetime.datetime.now()
-date = start_time.strftime('%Y_%m_%d_%H%M')
-out_folder = f'../results/transfer_no_unlab_{date}'
-
-
-class HbEncoder(nn.Module):
-
- def __init__(self):
- super(HbEncoder, self).__init__()
- self.conv1 = nn.Conv1d(16, 8, kernel_size=15, stride=5) # tempo conv
- self.conv2 = nn.Conv1d(8, 8, kernel_size=12, stride=6) # tempo conv
- self.fc3 = nn.Linear(96, 56)
- self.fc4 = nn.Linear(56, 16)
- self.bn = nn.BatchNorm1d(8)
-
- def forward(self, x):
- batch_size = x.size(0)
- x = F.relu(self.conv1(x))
- x = F.relu(self.bn(self.conv2(x)))
- x = x.view(batch_size, -1) # flatten
- x = F.relu(self.fc3(x))
- x = F.relu(self.fc4(x))
- return x
-
-
-class HbDecoder(nn.Module):
-
- def __init__(self):
- super(HbDecoder, self).__init__()
- self.fc4 = nn.Linear(16, 56)
- self.fc3 = nn.Linear(56, 96)
- self.tconv2 = nn.ConvTranspose1d(8, 8, kernel_size=12, stride=6)
- self.tconv1 = nn.ConvTranspose1d(8, 16, kernel_size=15, stride=5)
-
- def forward(self, x):
- batch_size = x.size(0)
- x = F.relu(self.fc4(x))
- x = F.relu(self.fc3(x))
- x = x.view(batch_size, 8, -1) # un-flatten
- x = F.relu(self.tconv2(x))
- x = self.tconv1(x)
- return x
-
-
-class Classifier(nn.Module):
- """
- Classifier layers to connect with the encoder
- """
-
- def __init__(self, n_classes, encoder_hbo, encoder_hbr):
- super(Classifier, self).__init__()
- self.encoder_hbo = encoder_hbo
- self.encoder_hbr = encoder_hbr
- self.fc1 = nn.Linear(32, 16)
- self.fc2 = nn.Linear(16, n_classes)
-
- def forward(self, x):
- mid_idx = x.size(1) / 2
- if mid_idx.is_integer():
- mid_idx = int(mid_idx)
- features_from_hbo = self.encoder_hbo(x[:, :mid_idx])
- features_from_hbr = self.encoder_hbr(x[:, mid_idx:])
- features_from_hb = torch.cat((features_from_hbo,
- features_from_hbr), 1)
- x = F.relu(self.fc1(features_from_hb.detach()))
- x = self.fc2(x)
- return x
-
-
-if not os.path.isdir(out_folder):
- os.makedirs(out_folder)
-print(f'Main output folder: {out_folder}/')
-
-print(f'Number of GPUs: {torch.cuda.device_count()}')
-
-print(f'=====\nshin_2018_nb\n=====')
-
-# Load and preprocess data
-epochs = load_dataset('shin_2018_nb', bandpass=[0.01, 0.5],
- baseline=(-2, 0), tddr=True)
-print(epochs)
-ch_picks = []
-for group in ROIS.values():
- ch_picks += group
-epochs.pick(ch_picks)
-epochs_lab = epochs[CLASSES]
-
-# Run models
-nirs, labels, groups = process_epochs(epochs_lab, tmax=39.9, sort=True)
-print(nirs.shape)
-accuracies, hps, additional_metrics = deep_transfer_learn(
- nirs, labels, groups, HbEncoder, HbDecoder, Classifier,
- features=None, normalize=True,
- output_folder=f'{out_folder}', max_epoch=500)
-
-# Write results
-with open(f'{out_folder}/results.csv', 'w') as w:
- w.write('dataset;model;fold;accuracy;hyperparameters\n')
- for fold, accuracy in enumerate(accuracies):
- hp = hps[fold]
- w.write(f'shin_2018_nb;CNN;{fold+1};{accuracy};"{hp}"\n')
-
-print(f'Average accuracy: {np.mean(accuracies)}')
-_, p_shap = stats.shapiro(accuracies)
-print(f'Shapiro p-value: {p_shap}')
-if p_shap > CONFIDENCE:
- s_tt, p_tt = stats.ttest_1samp(accuracies, 1/3, alternative='greater')
- print(f't-test = {s_tt} (p-value = {p_tt})')
-else:
- s_wilcox, p_wilcox = stats.wilcoxon(accuracies - np.array(1/3),
- alternative='greater')
- print(f'Wilcoxon = {s_wilcox} (p-value = {p_wilcox})')
-
-
-end_time = datetime.datetime.now()
-elapsed_time = end_time - start_time
-print(f'===\nElapsed time: {elapsed_time}')