diff --git a/.gitignore b/.gitignore
index c80060ae1dd479b33753dcc9a42c0b8637779307..d06c9c187b4fcf04326cdacfcb6e4e88010609cd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -128,9 +128,17 @@ dmypy.json
 # Pyre type checker
 .pyre/
 
-# Ignore some output files
+# Doc build
+public/*
+
+# Other files
 *slurm*
 *confusion_matrix*
 *graph*
 *.pickle
 *.pt
+*.mat
+*.csv
+*.xlsx
+*.ods
+*.pdf
diff --git a/README.md b/README.md
index 8256c0e5abce749cac830bb874c606875fe97ebc..9163d1c3d5245329402d63ab4c2e5e54e3a0582d 100644
--- a/README.md
+++ b/README.md
@@ -2,11 +2,11 @@
 
 <img title="BenchNIRS" align="right" width="150" height="150" src="https://hanbnrd.gitlab.io/assets/img/logos/benchnirs.png" alt="BenchNIRS">
 
-> Benchmarking framework for machine learning with fNIRS
+*Benchmarking framework for machine learning with fNIRS*
 
 **Quick links**  
 &rarr; [*Journal article*](https://www.frontiersin.org/articles/10.3389/fnrgo.2023.994969)  
-&rarr; [*BenchNIRS repository*](https://gitlab.com/HanBnrd/benchnirs)  
+&rarr; [*BenchNIRS source code*](https://gitlab.com/HanBnrd/benchnirs)  
 &rarr; [*Install BenchNIRS*](https://hanbnrd.gitlab.io/benchnirs/install.html)  
 &rarr; [*Documentation*](https://hanbnrd.gitlab.io/benchnirs)  
 &rarr; [*Issue tracker*](https://gitlab.com/HanBnrd/benchnirs/-/issues)  
@@ -40,20 +40,6 @@ The documentation of the framework with examples can be found [here](https://han
 A checklist of recommendations towards good practice for machine learning with fNIRS (for brain-computer interface applications) can be found [here](./CHECKLIST.md). We welcome contributions from the community in order to improve it, please see below for more information on how to contribute.
 
 
-## Minimum tested requirements
-[**Python 3.8**](https://www.python.org/downloads/) with the following libraries:
-- [matplotlib 3.3](https://matplotlib.org/stable/)
-- [mne 0.23](https://mne.tools/stable/install/index.html)
-- [nirsimple 0.1](https://github.com/HanBnrd/NIRSimple#installation)
-- [numpy 1.19](https://numpy.org/install/)
-- [pandas 1.0](https://pandas.pydata.org/docs/getting_started/index.html#installation)
-- [scikit-learn 0.24](https://scikit-learn.org/stable/install.html)
-- [scipy 1.8](https://scipy.org/install/)
-- [seaborn 0.11](https://seaborn.pydata.org/installing.html)
-- [statsmodels 0.12.2](https://www.statsmodels.org/dev/install.html)
-- [torch 1.5](https://pytorch.org/get-started/locally/)
-
-
 ## Setting up *BenchNIRS*
 1. Download and install Python 3.8 or greater, for example with [Miniconda](https://docs.conda.io/projects/miniconda/en/latest/index.html).
 
@@ -61,20 +47,17 @@ A checklist of recommendations towards good practice for machine learning with f
 ```bash
 pip install benchnirs
 ```
-> Alternatively to install from source, download and unzip the [repository](https://gitlab.com/HanBnrd/benchnirs/-/archive/main/benchnirs-main.zip).
-> Then, in a terminal or command prompt (eg. Anaconda Prompt), navigate to the directory containing the `requirements.txt` file and run:
-> ```bash
-> python -m pip install -r requirements.txt -f https://download.pytorch.org/whl/torch_stable.html
-> ```
 
 3. Download the datasets (see below).
 
+> Alternatively to install from source in development mode, download and unzip the [repository](https://gitlab.com/HanBnrd/benchnirs/-/archive/main/benchnirs-main.zip) (or clone it with Git), and run `devinstall.py`.
+
 
 ## Downloading the datasets
-- *Herff et al. 2014* (n-back task): you can download the dataset by making a request [here](http://www.csl.uni-bremen.de/CorpusData/download.php?crps=fNIRS). In the examples, the unzipped folder has been renamed to *dataset_herff_2014* for convenience.
-- *Shin et al. 2018* (n-back and word generation tasks): you can download the dataset [here](http://doc.ml.tu-berlin.de/simultaneous_EEG_NIRS/NIRS/NIRS_01-26_MATLAB.zip). In the examples, the unzipped folder has been renamed to *dataset_shin_2018* for convenience.
-- *Shin et al. 2016* (mental arithmetic task): you can download the dataset by filling the form [here](http://doc.ml.tu-berlin.de/hBCI). Then click on *NIRS_01-29* to download the fNIRS data. In the examples, the unzipped folder has been renamed to *dataset_shin_2016* for convenience.
-- *Bak et al. 2019* (motor execution task): you can download the dataset [here](https://figshare.com/ndownloader/files/18069143). In the examples, the unzipped folder has been renamed to *dataset_bak_2019* for convenience.
+- *Herff et al. 2014* (n-back task): you can download the dataset by making a request [here](http://www.csl.uni-bremen.de/CorpusData/download.php?crps=fNIRS).
+- *Shin et al. 2018* (n-back and word generation tasks): you can download the dataset [here](http://doc.ml.tu-berlin.de/simultaneous_EEG_NIRS/NIRS/NIRS_01-26_MATLAB.zip).
+- *Shin et al. 2016* (mental arithmetic task): you can download the dataset by filling the form [here](http://doc.ml.tu-berlin.de/hBCI). Then click on *NIRS_01-29* to download the fNIRS data.
+- *Bak et al. 2019* (motor execution task): you can download the dataset [here](https://figshare.com/ndownloader/files/18069143).
 
 
 ## Keeping *BenchNIRS* up to date
@@ -84,8 +67,8 @@ pip install --upgrade benchnirs
 ```
 
 
-## Example
-A full example script showing how to use the framework with a custom deep learning model can be found [here](https://hanbnrd.gitlab.io/benchnirs/example.html).
+## Examples
+A set of example scripts showing how to use the framework can be found [here](https://hanbnrd.gitlab.io/benchnirs/examples.html).
 
 
 ## Simple use case
@@ -93,34 +76,14 @@ A full example script showing how to use the framework with a custom deep learni
 ```python
 import benchnirs as bn
 
-epochs = bn.load_dataset('shin_2018_nb')
-data = bn.process_epochs(epochs['0-back', '2-back', '3-back'])
-results = bn.deep_learn(*data, my_model)
+epochs = bn.load_dataset('bak_2019_me', dataset_path)
+data = bn.process_epochs(epochs['right', 'left', 'foot'])
+results = bn.deep_learn(*data, 'lstm')
 
 print(results)
 ```
 
 
-## Running main scripts
-- [`generalised.py`](https://gitlab.com/HanBnrd/benchnirs/-/blob/main/src/generalised.py) compares the 6 models (LDA, SVC, kNN, ANN, CNN and LSTM) on the 5 datasets with a generalised approach (testing with unseen subjects)
-- [`dataset_size.py`](https://gitlab.com/HanBnrd/benchnirs/-/blob/main/src/dataset_size.py) reproduces `generalised.py` but with a range of different dataset sizes (50% to 100% of dataset) to study the influence of this parameter on the classification accuracy
-- [`window_size.py`](https://gitlab.com/HanBnrd/benchnirs/-/blob/main/src/window_size.py) reproduces `generalised.py` but with only the 4 models using feature extraction (LDA, SVC, kNN and ANN) and with a range of different window sizes (2 to 10 seconds) to study the influence of this parameter on the classification accuracy
-- [`sliding_window.py`](https://gitlab.com/HanBnrd/benchnirs/-/blob/main/src/sliding_window.py) reproduces `generalised.py` but with only the 4 models using feature extraction (LDA, SVC, kNN and ANN) and with a 2-second sliding window on the 10-second epochs
-- [`personalised.py`](https://gitlab.com/HanBnrd/benchnirs/-/blob/main/src/personalised.py) compares the 6 models (LDA, SVC, kNN, ANN, CNN and LSTM) on the 5 datasets with a personalised approach (training and testing with each subject individually)
-- [`visualisation.py`](https://gitlab.com/HanBnrd/benchnirs/-/blob/main/src/visualisation.py) enables to visualise the data from the datasets with various signal processing
-
-
-## Extra scripts: n-back tailored
-- `tailored_generalised.py` compares the 6 models (LDA, SVC, kNN, ANN, CNN and LSTM) on the 2 n-back datasets with a generalised approach (testing with unseen subjects)
-- `tailored_window_size.py` reproduces `tailored_generalised.py` but with only 5 models (LDA, SVC, kNN, ANN and LSTM) and with a range of different window sizes (5 to 40 seconds) to study the influence of this parameter on the classification accuracy
-- `tailored_shin_nb.py` optimises and evaluates a tailored CNN on the *Shin et al. 2018* n-back dataset with a generalised approach (testing with unseen subjects)
-
-
-## Extra scripts: transfer learning
-- `transfer.py` optimises and evaluates a transfer learning model (pretext self-supervised representation learning task with unlabelled and labelled data using a CED, downstream supervised n-back classification task with labelled data) on the *Shin et al. 2018* n-back dataset with a generalised approach (testing with unseen subjects)
-- `transfer_no_unlab.py` reproduces `transfer.py` but with only labelled data for the pretext task.
-
-
 ## Contributing to the repository
 Contributions from the community to this repository are highly appreciated. We are mainly interested in contributions to:
 - improving the recommendation checklist
@@ -150,9 +113,9 @@ If you are using *BenchNIRS*, please cite [this article](https://doi.org/10.3389
 }
 ```
 
-> If you are using the datasets of the framework, please also cite those related works.
+> If you are using the datasets of the framework, please also cite those related works:
 > 
-> *Herff et al. 2014*:
+> [*Herff et al. 2014*](https://doi.org/10.3389/fnhum.2013.00935)
 > ```
 > @article{herff2014mental,
 > 	title={Mental workload during n-back task—quantified in the prefrontal cortex using fNIRS},
@@ -165,7 +128,7 @@ If you are using *BenchNIRS*, please cite [this article](https://doi.org/10.3389
 > }
 > ```
 > 
-> *Shin et al. 2018*:
+> [*Shin et al. 2018*](https://doi.org/10.1038/sdata.2018.3)
 > ```
 > @article{shin2018simultaneous,
 > 	title={Simultaneous acquisition of EEG and NIRS during cognitive tasks for an open access dataset},
@@ -178,7 +141,7 @@ If you are using *BenchNIRS*, please cite [this article](https://doi.org/10.3389
 > }
 > ```
 > 
-> *Shin et al. 2016*:
+> [*Shin et al. 2016*](https://doi.org/10.1109/TNSRE.2016.2628057)
 > ```
 > @article{shin2016open,
 > 	title={Open access dataset for EEG+NIRS single-trial classification},
@@ -192,7 +155,7 @@ If you are using *BenchNIRS*, please cite [this article](https://doi.org/10.3389
 > }
 > ```
 > 
-> *Bak et al. 2019*:
+> [*Bak et al. 2019*](https://doi.org/10.3390/electronics8121486)
 > ```
 > @article{bak2019open,
 > 	title={Open-Access fNIRS Dataset for Classification of Unilateral Finger-and Foot-Tapping},
diff --git a/benchnirs/__init__.py b/benchnirs/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..cae2e75397458346b6667e22037c83c22d3fe3bc
--- /dev/null
+++ b/benchnirs/__init__.py
@@ -0,0 +1,25 @@
+"""
+BenchNIRS
+=========
+Benchmarking framework for machine learning with fNIRS
+"""
+
+import lazy_loader as lazy
+
+from importlib.metadata import version
+
+
+try:
+    __version__ = version("benchnirs")
+except Exception:
+    __version__ = "dev"
+
+__getattr__, __dir__, __all__ = lazy.attach(
+    __name__,
+    submod_attrs={
+        'load': ['load_dataset'],
+        'viz': ['epochs_viz'],
+        'process': ['process_epochs', 'extract_features'],
+        'learn': ['machine_learn', 'deep_learn', 'deep_transfer_learn']
+    }
+)
diff --git a/src/benchnirs/learn.py b/benchnirs/learn.py
similarity index 89%
rename from src/benchnirs/learn.py
rename to benchnirs/learn.py
index 738335140eebdbe52f99dd9fd2d2e4e601248587..adbeb4fda62130b6d9046e3c745585864cf01de6 100644
--- a/src/benchnirs/learn.py
+++ b/benchnirs/learn.py
@@ -11,7 +11,6 @@ import torch.optim as optim
 
 from pandas import DataFrame
 from torch.utils.data import DataLoader, Dataset
-from scipy.stats import linregress
 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
 from sklearn.metrics import (accuracy_score, precision_recall_fscore_support,
                              confusion_matrix)
@@ -34,49 +33,7 @@ N_NEIGHBORS_LIST = list(range(1, 10))
 PATIENCE = 5  # for early stopping
 
 
-def _extract_features(nirs, feature_list):
-    """
-    Perform feature extraction on NIRS data.
-
-    Parameters
-    ----------
-    nirs : array of shape (n_epochs, n_channels, n_times)
-        Processed NIRS data.
-
-    feature_list : list of strings
-        List of features to extract. The list can include ``'mean'`` for the
-        mean along the time axis, ``'std'`` for standard deviation along the
-        time axis and ``'slope'`` for the slope of the linear regression along
-        the time axis.
-
-    Returns
-    -------
-    nirs_features : array of shape (n_epochs, n_channels*n_features)
-        Features extracted from NIRS data.
-    """
-    nirs_features = []
-    for feature in feature_list:
-        if feature == 'mean':
-            feature = np.mean(nirs, axis=2)
-        elif feature == 'std':
-            feature = np.std(nirs, axis=2)
-        elif feature == 'slope':
-            x = range(nirs.shape[2])
-            feature = []
-            for epoch in nirs:
-                ep_slopes = []
-                for channel in epoch:
-                    ep_slopes.append(linregress(x, channel).slope)
-                feature.append(ep_slopes)
-        nirs_features.append(feature)
-
-    nirs_features = np.stack(nirs_features, axis=2)
-    nirs_features = nirs_features.reshape(len(nirs), -1)  # flatten data
-
-    return nirs_features
-
-
-def machine_learn(nirs, labels, groups, model, features, normalize=False,
+def machine_learn(nirs, labels, groups, model, normalize=None,
                   random_state=None, output_folder='./outputs'):
     """
     Perform nested k-fold cross-validation for standard machine learning models
@@ -104,16 +61,11 @@ def machine_learn(nirs, labels, groups, model, features, normalize=False,
         discriminant analysis, ``'svc'`` for a linear support vector
         classifier or ``'knn'`` for a k-nearest neighbors classifier.
 
-    features : list of strings
-        List of features to extract. The list can include ``'mean'`` for the
-        mean along the time axis, ``'std'`` for standard deviation along the
-        time axis and ``'slope'`` for the slope of the linear regression along
-        the time axis.
-
-    normalize : boolean
-        Whether to normalize data before feeding to the model with min-max
-        scaling based on the train set for each iteration of the outer
-        cross-validation. Defaults to ``False`` for no normalization.
+    normalize : tuple of integers | None
+        Axes on which to normalize data before feeding to the model with
+        min-max scaling based on the train set for each iteration of the outer
+        cross-validation. For example (0, 2) to normalize across epochs and
+        time. Defaults to ``None`` for no normalization.
 
     random_state : integer | None
         Controls the shuffling applied to data. Pass an integer for
@@ -147,9 +99,6 @@ def machine_learn(nirs, labels, groups, model, features, normalize=False,
     if not os.path.isdir(output_folder):
         os.makedirs(output_folder)
 
-    # Feature extraction
-    nirs = _extract_features(nirs, features)
-
     # K-fold cross-validator
     if groups is None:
         out_kf = StratifiedKFold(n_splits=OUTER_K)
@@ -182,11 +131,14 @@ def machine_learn(nirs, labels, groups, model, features, normalize=False,
 
         # Min-max scaling
         if normalize:
-            maxs = nirs_train.max(axis=0)[np.newaxis, :]
-            mins = nirs_train.min(axis=0)[np.newaxis, :]
+            maxs = nirs_train.max(axis=normalize, keepdims=True)
+            mins = nirs_train.min(axis=normalize, keepdims=True)
             nirs_train = (nirs_train - mins) / (maxs - mins)
             nirs_test = (nirs_test - mins) / (maxs - mins)
 
+        nirs_train = nirs_train.reshape(len(nirs_train), -1)
+        nirs_test = nirs_test.reshape(len(nirs_test), -1)
+
         in_split = in_kf.split(nirs_train, labels_train, groups_train)
 
         # LDA
@@ -199,7 +151,7 @@ def machine_learn(nirs, labels, groups, model, features, normalize=False,
         # SVC
         elif model == 'svc':
             parameters = {'C': C_LIST}
-            svc = LinearSVC(max_iter=MAX_ITER)
+            svc = LinearSVC(max_iter=MAX_ITER, dual='auto')
             clf = GridSearchCV(svc, parameters, scoring='accuracy',
                                cv=in_split)
             clf.fit(nirs_train, labels_train)
@@ -259,6 +211,8 @@ class _ANNClassifier(nn.Module):
         self.fc3 = nn.Linear(4, n_classes)
 
     def forward(self, x):
+        batch_size = x.size(0)
+        x = x.view(batch_size, -1)
         x = F.relu(self.fc1(x))
         x = F.relu(self.fc2(x))
         x = self.fc3(x)
@@ -466,8 +420,8 @@ def _test_dl(nirs_test, labels_test, clf):
     return results
 
 
-def deep_learn(nirs, labels, groups, model_class, features=None,
-               normalize=False, batch_sizes=[4, 8, 16, 32, 64],
+def deep_learn(nirs, labels, groups, model_class, normalize=None,
+               batch_sizes=[4, 8, 16, 32, 64],
                lrs=[1e-5, 1e-4, 1e-3, 1e-2, 1e-1], max_epoch=100,
                random_state=None, output_folder='./outputs'):
     """
@@ -496,17 +450,11 @@ def deep_learn(nirs, labels, groups, model_class, features=None,
         ``__init__()`` method must accept the number of classes as a parameter,
         and this needs to be the number of output neurons.
 
-    features : list of strings | None
-        List of features to extract. The list can include ``'mean'`` for the
-        mean along the time axis, ``'std'`` for standard deviation along the
-        time axis and ``'slope'`` for the slope of the linear regression along
-        the time axis. Defaults to ``None`` for no feature extration and using
-        the raw data.
-
-    normalize : boolean
-        Whether to normalize data before feeding to the model with min-max
-        scaling based on the train set for each iteration of the outer
-        cross-validation. Defaults to ``False`` for no normalization.
+    normalize : tuple of integers | None
+        Axes on which to normalize data before feeding to the model with
+        min-max scaling based on the train set for each iteration of the outer
+        cross-validation. For example (0, 2) to normalize across epochs and
+        time. Defaults to ``None`` for no normalization.
 
     batch_sizes : list of integers
         List of batch sizes to test for optimization.
@@ -533,7 +481,7 @@ def deep_learn(nirs, labels, groups, model_class, features=None,
         outer cross-validation).
 
     all_hps : list of tuples
-        List of hyperparameters (one tuple for each iteration of the outer
+        List of best hyperparameters (one tuple for each iteration of the outer
         cross-validation). Each tuple will be `(batch size, learning rate)`.
 
     additional_metrics : list of tuples
@@ -559,10 +507,6 @@ def deep_learn(nirs, labels, groups, model_class, features=None,
 
     print(f'Deep learning: {model_class.__name__}')
 
-    # Feature extraction
-    if features is not None:
-        nirs = _extract_features(nirs, features)
-
     # Outer split
     if os.path.isfile(f'{output_folder}/split.pickle'):
         print('\tSaved k-fold split found, loading it...', end=' ')
@@ -604,14 +548,8 @@ def deep_learn(nirs, labels, groups, model_class, features=None,
 
         # Min-max scaling
         if normalize:
-            if features is not None:
-                maxs = nirs_train.max(axis=0)[np.newaxis, :]
-                mins = nirs_train.min(axis=0)[np.newaxis, :]
-            else:
-                maxs = nirs_train.max(axis=(0, 2))
-                maxs = maxs[np.newaxis, :, np.newaxis]
-                mins = nirs_train.min(axis=(0, 2))
-                mins = mins[np.newaxis, :, np.newaxis]
+            maxs = nirs_train.max(axis=normalize, keepdims=True)
+            mins = nirs_train.min(axis=normalize, keepdims=True)
             nirs_train = (nirs_train - mins) / (maxs - mins)
 
         if os.path.isfile(f'{output_folder}/model_k{k}.pt'):
@@ -687,14 +625,10 @@ def deep_learn(nirs, labels, groups, model_class, features=None,
         nirs_train, nirs_test = nirs[out_idx[0]], nirs[out_idx[1]]
         labels_test = labels[out_idx[1]]
 
-        # Min-max scaling
+        # Min-max scaling of test set using training set only to avoid leakage
         if normalize:
-            if features is not None:
-                maxs = nirs_train.max(axis=0)[np.newaxis, :]
-                mins = nirs_train.min(axis=0)[np.newaxis, :]
-            else:
-                maxs = nirs_train.max(axis=(0, 2))[np.newaxis, :, np.newaxis]
-                mins = nirs_train.min(axis=(0, 2))[np.newaxis, :, np.newaxis]
+            maxs = nirs_train.max(axis=normalize, keepdims=True)
+            mins = nirs_train.min(axis=normalize, keepdims=True)
             nirs_test = (nirs_test - mins) / (maxs - mins)
 
         # Load trained model, hyperparameters and training results
@@ -966,7 +900,7 @@ def _proxy_optim(nirs_train, targets_train, groups_train, enc_class, dec_class,
 
 
 def deep_transfer_learn(nirs, labels, groups, enc_class, dec_class,
-                        model_class, features=None, normalize=False,
+                        model_class, normalize=None,
                         batch_sizes=[4, 8, 16, 32, 64],
                         lrs=[1e-5, 1e-4, 1e-3, 1e-2, 1e-1], max_epoch=100,
                         random_state=None, output_folder='./outputs'):
@@ -1004,17 +938,11 @@ def deep_transfer_learn(nirs, labels, groups, enc_class, dec_class,
         parameters. The number of classes needs to be the number of output
         neurons.
 
-    features : list of strings | None
-        List of features to extract. The list can include ``'mean'`` for the
-        mean along the time axis, ``'std'`` for standard deviation along the
-        time axis and ``'slope'`` for the slope of the linear regression along
-        the time axis. Defaults to ``None`` for no feature extration and using
-        the raw data.
-
-    normalize : boolean
-        Whether to normalize data before feeding to the model with min-max
-        scaling based on the train set for each iteration of the outer
-        cross-validation. Defaults to ``False`` for no normalization.
+    normalize : tuple of integers | None
+        Axes on which to normalize data before feeding to the model with
+        min-max scaling based on the train set for each iteration of the outer
+        cross-validation. For example (0, 2) to normalize across epochs and
+        time. Defaults to ``None`` for no normalization.
 
     batch_sizes : list of integers
         List of batch sizes to test for optimization.
@@ -1041,8 +969,8 @@ def deep_transfer_learn(nirs, labels, groups, enc_class, dec_class,
         each iteration of the outer cross-validation).
 
     all_hps : list of tuples
-        List of hyperparameters for the overall classifier (one tuple for each
-        iteration of the outer cross-validation). Each tuple will be
+        List of best hyperparameters for the overall classifier (one tuple for
+        each iteration of the outer cross-validation). Each tuple will be
         `(batch size, learning rate)`.
 
     additional_metrics : list of tuples
@@ -1065,10 +993,6 @@ def deep_transfer_learn(nirs, labels, groups, enc_class, dec_class,
     print(f'Deep transfer learning: {enc_class.__name__}/'
           f'{dec_class.__name__}-{model_class.__name__}')
 
-    # Feature extraction
-    if features is not None:
-        nirs = _extract_features(nirs, features)
-
     # Get index to split channel types
     mid_idx = nirs.shape[1] / 2
     if mid_idx.is_integer():
@@ -1115,14 +1039,8 @@ def deep_transfer_learn(nirs, labels, groups, enc_class, dec_class,
 
         # Min-max scaling
         if normalize:
-            if features is not None:
-                maxs = nirs_train.max(axis=0)[np.newaxis, :]
-                mins = nirs_train.min(axis=0)[np.newaxis, :]
-            else:
-                maxs = nirs_train.max(axis=(0, 2))
-                maxs = maxs[np.newaxis, :, np.newaxis]
-                mins = nirs_train.min(axis=(0, 2))
-                mins = mins[np.newaxis, :, np.newaxis]
+            maxs = nirs_train.max(axis=normalize, keepdims=True)
+            mins = nirs_train.min(axis=normalize, keepdims=True)
             nirs_train = (nirs_train - mins) / (maxs - mins)
 
         # Train and optimise self-supervised models
@@ -1239,14 +1157,10 @@ def deep_transfer_learn(nirs, labels, groups, enc_class, dec_class,
         nirs_train, nirs_test = nirs[out_idx[0]], nirs[out_idx[1]]
         labels_test = labels[out_idx[1]]
 
-        # Min-max scaling
+        # Min-max scaling of test set using training set only to avoid leakage
         if normalize:
-            if features is not None:
-                maxs = nirs_train.max(axis=0)[np.newaxis, :]
-                mins = nirs_train.min(axis=0)[np.newaxis, :]
-            else:
-                maxs = nirs_train.max(axis=(0, 2))[np.newaxis, :, np.newaxis]
-                mins = nirs_train.min(axis=(0, 2))[np.newaxis, :, np.newaxis]
+            maxs = nirs_train.max(axis=normalize, keepdims=True)
+            mins = nirs_train.min(axis=normalize, keepdims=True)
             nirs_test = (nirs_test - mins) / (maxs - mins)
 
         # Load trained model, hyperparameters and training results
diff --git a/src/benchnirs/load.py b/benchnirs/load.py
similarity index 98%
rename from src/benchnirs/load.py
rename to benchnirs/load.py
index 95a34adc50b3ac9e2a8412cff2a9128b00982fe5..a4df4db9147545dd44584566d6fc32cda13159ec 100644
--- a/src/benchnirs/load.py
+++ b/benchnirs/load.py
@@ -494,7 +494,7 @@ class _DatasetBak2019ME():
         return data
 
 
-def load_dataset(dataset, path=None, bandpass=None, order=4, tddr=False,
+def load_dataset(dataset, path, bandpass=None, order=4, tddr=False,
                  baseline=(None, 0), roi_sides=False):
     """
     Load and filter one of the open access dataset.
@@ -514,9 +514,8 @@ def load_dataset(dataset, path=None, bandpass=None, order=4, tddr=False,
         ``'bak_2019_me'`` for motor execution from Bak et al., 2019
         (epoch interval: -2 to 10 seconds).
 
-    path : string | None
-        Path of the dataset selected with the ``dataset`` parameter. Defaults
-        to ``None`` to use the default path.
+    path : string
+        Path of the dataset selected with the ``dataset`` parameter.
 
     bandpass : list of floats | None
         Cutoff frequencies of the bandpass Butterworth filter. Defaults to
@@ -566,10 +565,13 @@ def load_dataset(dataset, path=None, bandpass=None, order=4, tddr=False,
         loader = None
 
     for subj_id, subj in enumerate(loader.subject_list):
-        if path is None:
-            data = loader.load(subj)
-        else:
+        try:
             data = loader.load(subj, path)
+        except FileNotFoundError:
+            raise FileNotFoundError(
+                f"dataset not found, please make sure the dataset has been "
+                f"downloaded and the proper path has been provided (cf. "
+                f"https://hanbnrd.gitlab.io/benchnirs/install.html)")
 
         # Create MNE raw object from delta_c
         info = mne.create_info(ch_names=data['ch_names'], sfreq=data['sfreq'],
diff --git a/src/benchnirs/process.py b/benchnirs/process.py
similarity index 65%
rename from src/benchnirs/process.py
rename to benchnirs/process.py
index 409df1ef47b984ba1a95f7717ec91b56c401beac..89676ac2e3d3ab48322e4f7d82a3f8c4f1613084 100644
--- a/src/benchnirs/process.py
+++ b/benchnirs/process.py
@@ -1,3 +1,8 @@
+import numpy as np
+
+from scipy.stats import linregress
+
+
 def process_epochs(mne_epochs, tmax=None, tslide=None, sort=False,
                    reject_criteria=None):
     """
@@ -76,3 +81,45 @@ def process_epochs(mne_epochs, tmax=None, tslide=None, sort=False,
     print(f'Dataset shape: {nirs.shape}')
 
     return nirs, labels, groups
+
+
+def extract_features(nirs, feature_list):
+    """
+    Perform feature extraction on NIRS data.
+
+    Parameters
+    ----------
+    nirs : array of shape (n_epochs, n_channels, n_times)
+        Processed NIRS data.
+
+    feature_list : list of strings
+        List of features to extract. The list can include ``'mean'`` for the
+        mean along the time axis, ``'std'`` for standard deviation along the
+        time axis and ``'slope'`` for the slope of the linear regression along
+        the time axis.
+
+    Returns
+    -------
+    nirs_features : array of shape (n_epochs, n_channels, n_features)
+        Features extracted from NIRS data.
+    """
+    nirs_features = []
+    for feature in feature_list:
+        if feature == 'mean':
+            nirs_feature = np.mean(nirs, axis=-1, keepdims=True)
+        elif feature == 'std':
+            nirs_feature = np.std(nirs, axis=-1, keepdims=True)
+        elif feature == 'slope':
+            x = range(nirs.shape[-1])
+            nirs_feature = []
+            for epoch in nirs:
+                ep_slopes = []
+                for channel in epoch:
+                    ep_slopes.append(linregress(x, channel).slope)
+                nirs_feature.append(ep_slopes)
+            nirs_feature = np.expand_dims(nirs_feature, -1)
+        nirs_features.append(nirs_feature)
+
+    nirs_features = np.concatenate(nirs_features, axis=-1)
+
+    return nirs_features
diff --git a/src/benchnirs/viz.py b/benchnirs/viz.py
similarity index 100%
rename from src/benchnirs/viz.py
rename to benchnirs/viz.py
diff --git a/devinstall.py b/devinstall.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0cf3dee4301a1f8eece5f2ae831cf3ebd781f65
--- /dev/null
+++ b/devinstall.py
@@ -0,0 +1,2 @@
+import os
+os.system('pip install -e .')
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 7e8f1945d285d579e8c5c733c24a6cbe24fd97ab..59129b5245319cb5eabdb1bb032ebdaede64fedc 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -13,7 +13,7 @@ from datetime import datetime, timezone
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-sys.path.insert(0, os.path.abspath('../../src/benchnirs'))
+sys.path.insert(0, os.path.abspath('../../benchnirs'))
 
 
 # -- Project information -----------------------------------------------------
@@ -24,7 +24,7 @@ copyright = f'2021-{current_year}, Johann Benerradi'
 author = 'Johann Benerradi'
 
 # The full version, including alpha/beta/rc tags
-release = '1.2.1'
+release = '1.2.2'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/docs/source/example.rst b/docs/source/example.rst
deleted file mode 100644
index 1b674a15734aec6f1455bd0a3d5caf728b870d9d..0000000000000000000000000000000000000000
--- a/docs/source/example.rst
+++ /dev/null
@@ -1,149 +0,0 @@
-Example
-=======
-
-Below is an example of how to use `BenchNIRS` with a custom convolutional neural network (CNN).
-
-.. code-block:: python
-
-    import datetime
-    import matplotlib.pyplot as plt
-    import numpy as np
-    import os
-    import pandas as pd
-    import seaborn as sns
-    import torch
-    import torch.nn as nn
-    import torch.nn.functional as F
-
-    from scipy import stats
-
-    from benchnirs.load import load_dataset
-    from benchnirs.process import process_epochs
-    from benchnirs.learn import deep_learn
-
-
-    ALL_DATA_PATH = '/folder/with/datasets/'  # path to the datasets
-    DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
-                'shin_2018_nb': ['0-back', '2-back', '3-back'],
-                'shin_2018_wg': ['baseline', 'word generation'],
-                'shin_2016_ma': ['baseline', 'mental arithmetic'],
-                'bak_2019_me': ['right', 'left', 'foot']}
-    CONFIDENCE = 0.05  # stat confidence at 95 %
-
-
-    class CustomCNN(nn.Module):
-
-        def __init__(self, n_classes):
-            super(CustomCNN, self).__init__()
-            self.conv1 = nn.Conv1d(4, 4, kernel_size=10, stride=2)  # tempo conv
-            self.pool1 = nn.MaxPool1d(2)
-            self.conv2 = nn.Conv1d(4, 4, kernel_size=5, stride=2)  # tempo conv
-            self.pool2 = nn.MaxPool1d(2)
-            self.fc1 = nn.Linear(20, 10)
-            self.fc2 = nn.Linear(10, n_classes)
-
-        def forward(self, x):
-            batch_size = x.size(0)
-            x = F.relu(self.conv1(x))
-            x = self.pool1(x)
-            x = F.relu(self.conv2(x))
-            x = self.pool2(x)
-            x = x.view(batch_size, -1)
-            x = F.relu(self.fc1(x))
-            x = self.fc2(x)
-            return x
-
-
-    start_time = datetime.datetime.now()
-    out_folder = f'../results/custom'
-    if not os.path.isdir(out_folder):
-        os.makedirs(out_folder)
-    print(f'Main output folder: {out_folder}/')
-
-    print(f'Number of GPUs: {torch.cuda.device_count()}')
-
-    with open(f'{out_folder}/summary.md', 'w') as w:
-        w.write('# Accuracy table\n\n(Standard deviation on the cross-validation)')
-        w.write('\n\n|Dataset|Chance level|Average accuracy (sd)|\n')
-        w.write('|:---:|:---:|:---:|\n')
-
-    with open(f'{out_folder}/results.csv', 'w') as w:
-        w.write('dataset;fold;accuracy;hyperparameters;additional_metrics\n')
-
-
-    dict_accuracies = {'Accuracy': [], 'Dataset': []}
-    for dataset in DATASETS.keys():
-        print(f'=====\n{dataset}\n=====')
-        data_path = f'{ALL_DATA_PATH}dataset_{dataset[:-3]}/'
-        out_path = f'{out_folder}/{dataset}_'
-
-        # Load and preprocess data
-        epochs = load_dataset(dataset, path=data_path, bandpass=[0.01, 0.5],
-                            baseline=(-2, 0), roi_sides=True, tddr=True)
-        classes = DATASETS[dataset]
-        epochs_lab = epochs[classes]
-
-        # Run models
-        nirs, labels, groups = process_epochs(epochs_lab, 9.9)
-        cnn, hps_cnn, additional_metrics_cnn = deep_learn(
-            nirs, labels, groups, CustomCNN, features=None,
-            output_folder=f'{out_path}cnn')
-
-        # Write results
-        results = {'CNN': [cnn, hps_cnn]}
-        chance_level = np.around(1/len(classes), decimals=3)
-        w_summary = open(f'{out_folder}/summary.md', 'a')
-        w_results = open(f'{out_folder}/results.csv', 'a')
-        w_summary.write(f'|{dataset}|{chance_level}|')
-        w_summary.write(
-            f'{np.around(np.mean(cnn), decimals=3)} '
-            f'({np.around(np.std(cnn), decimals=3)})|')
-        for fold, accuracy in enumerate(cnn):
-            w_results.write(f'{dataset};{fold+1};{accuracy};"{hps_cnn[fold]}";')
-            w_results.write(f'"{additional_metrics_cnn[fold]}"\n')
-        w_summary.write('\n')
-        w_summary.close()
-        w_results.close()
-        dict_accuracies['Accuracy'] += cnn
-        dict_accuracies['Dataset'] += [dataset] * len(cnn)
-
-
-    df_accuracies = pd.DataFrame(dict_accuracies)
-    sns.barplot(data=df_accuracies, y='Accuracy', x='Dataset', capsize=.1,
-                palette='colorblind')
-    plt.savefig(f'{out_folder}/summary.png')
-    plt.close()
-
-
-    # Stats
-    print('Stats...')
-    with open(f'{out_folder}/stats.md', 'w') as w:
-        df = pd.read_csv(f'{out_folder}/results.csv', delimiter=';')
-        w.write('## Comparison of the model accuracy to chance level\n\n')
-        w.write('|Dataset|Shapiro p-value|Test|p-value|\n')
-        w.write('|:---:|:---:|:---:|:---:|\n')
-        for dataset in DATASETS.keys():
-            dataset_accuracies = []
-            chance_level = 1 / len(DATASETS[dataset])
-            normality = True
-            w.write(f'|{dataset}|')
-            sub_df = df[df['dataset'] == dataset]
-            accuracies = sub_df['accuracy'].to_numpy()
-            dataset_accuracies.append(accuracies)
-            # Check normality of the distribution
-            _, p_shap = stats.shapiro(accuracies)
-            w.write(f'{p_shap}|')
-            if p_shap > CONFIDENCE:
-                # t-test
-                _, p_tt = stats.ttest_1samp(accuracies, chance_level)
-                w.write(f't-test|{p_tt}|\n')
-            else:
-                normality = False
-                # Wilcoxon
-                _, p_wilcox = stats.wilcoxon(accuracies-chance_level)
-                w.write(f'Wilcoxon|{p_wilcox}|\n')
-
-
-    end_time = datetime.datetime.now()
-    elapsed_time = end_time - start_time
-    print(f'===\nElapsed time: {elapsed_time}')
diff --git a/docs/source/examples.rst b/docs/source/examples.rst
new file mode 100644
index 0000000000000000000000000000000000000000..2c4c50fc397c48633d8dcb528ff3f22622201c5e
--- /dev/null
+++ b/docs/source/examples.rst
@@ -0,0 +1,13 @@
+Examples
+========
+
+.. toctree::
+   :maxdepth: 2
+
+   examples/custom-model
+   examples/generalised
+   examples/personalised
+   examples/sliding-window
+   examples/window-size
+   examples/dataset-size
+   examples/visualisation
diff --git a/docs/source/examples/custom-model.rst b/docs/source/examples/custom-model.rst
new file mode 100644
index 0000000000000000000000000000000000000000..5442ad46921b96b3dd18c90a62164fd2f1d40107
--- /dev/null
+++ b/docs/source/examples/custom-model.rst
@@ -0,0 +1,6 @@
+Custom model training
+=====================
+
+Below is an example of how to use `BenchNIRS` to train a custom convolutional neural network (CNN) on one of the datasets.
+
+.. literalinclude:: ../../../examples/tailored_shin_nb.py
diff --git a/docs/source/examples/dataset-size.rst b/docs/source/examples/dataset-size.rst
new file mode 100644
index 0000000000000000000000000000000000000000..aaf6f589867a03616e457cc53a0ea3b4e4d963a6
--- /dev/null
+++ b/docs/source/examples/dataset-size.rst
@@ -0,0 +1,10 @@
+Dataset size benchmarking
+=========================
+
+Below is a comparison of 6 machine learning models (LDA, SVC, kNN, ANN, CNN and LSTM) on the 5 datasets with a subject-independent approach (testing with unseen subjects), with a range of different dataset sizes (50% to 100% of the dataset) to study the influence of this parameter on the classification accuracy [#benerradi2023]_.
+
+.. literalinclude:: ../../../examples/dataset_size.py
+
+
+.. rubric:: References
+.. [#benerradi2023] Benerradi, J., Clos, J., Landowska, A., Valstar, M. F., & Wilson, M. L. (2023). Benchmarking framework for machine learning classification from fNIRS data. Frontiers in Neuroergonomics, 4, 994969.
diff --git a/docs/source/examples/generalised.rst b/docs/source/examples/generalised.rst
new file mode 100644
index 0000000000000000000000000000000000000000..991b6d183c68eb75d9e569f138bba11dfbd60eb7
--- /dev/null
+++ b/docs/source/examples/generalised.rst
@@ -0,0 +1,10 @@
+Subject-independent benchmarking
+================================
+
+Below is a comparison of 6 machine learning models (LDA, SVC, kNN, ANN, CNN and LSTM) on the 5 datasets with a subject-independent approach (testing with unseen subjects) [#benerradi2023]_.
+
+.. literalinclude:: ../../../examples/generalised.py
+
+
+.. rubric:: References
+.. [#benerradi2023] Benerradi, J., Clos, J., Landowska, A., Valstar, M. F., & Wilson, M. L. (2023). Benchmarking framework for machine learning classification from fNIRS data. Frontiers in Neuroergonomics, 4, 994969.
diff --git a/docs/source/examples/personalised.rst b/docs/source/examples/personalised.rst
new file mode 100644
index 0000000000000000000000000000000000000000..17aaacb0603bded96e7bf1e352f47b1d8d44fa50
--- /dev/null
+++ b/docs/source/examples/personalised.rst
@@ -0,0 +1,10 @@
+Subject-specific benchmarking
+=============================
+
+Below is a comparison of 6 models (LDA, SVC, kNN, ANN, CNN and LSTM) on the 5 datasets with a subject-specific approach (training and testing with each subject individually) [#benerradi2023]_.
+
+.. literalinclude:: ../../../examples/personalised.py
+
+
+.. rubric:: References
+.. [#benerradi2023] Benerradi, J., Clos, J., Landowska, A., Valstar, M. F., & Wilson, M. L. (2023). Benchmarking framework for machine learning classification from fNIRS data. Frontiers in Neuroergonomics, 4, 994969.
diff --git a/docs/source/examples/sliding-window.rst b/docs/source/examples/sliding-window.rst
new file mode 100644
index 0000000000000000000000000000000000000000..60d98583cdfbb761bcf5d4d1a42c60fa0c9f5e32
--- /dev/null
+++ b/docs/source/examples/sliding-window.rst
@@ -0,0 +1,10 @@
+Sliding window benchmarking
+===========================
+
+Below is a comparison of 4 machine learning models (LDA, SVC, kNN, ANN) on the 5 datasets with a subject-independent approach (testing with unseen subjects), with a 2-second sliding window on the epochs to split the data into more examples [#benerradi2023]_.
+
+.. literalinclude:: ../../../examples/sliding_window.py
+
+
+.. rubric:: References
+.. [#benerradi2023] Benerradi, J., Clos, J., Landowska, A., Valstar, M. F., & Wilson, M. L. (2023). Benchmarking framework for machine learning classification from fNIRS data. Frontiers in Neuroergonomics, 4, 994969.
diff --git a/docs/source/examples/visualisation.rst b/docs/source/examples/visualisation.rst
new file mode 100644
index 0000000000000000000000000000000000000000..4ef275a89f6f20e0617d21e4f68087f5b324ace2
--- /dev/null
+++ b/docs/source/examples/visualisation.rst
@@ -0,0 +1,10 @@
+Epochs visualisation
+====================
+
+Below is a visualisation the epochs from the 5 datasets using the MNE backend [#gramfort2013]_.
+
+.. literalinclude:: ../../../examples/visualisation.py
+
+
+.. rubric:: References
+.. [#gramfort2013] Gramfort, A., Luessi, M., Larson, E., Engemann, D. A., Strohmeier, D., Brodbeck, C., ... & Hämäläinen, M. (2013). MEG and EEG data analysis with MNE-Python. Frontiers in neuroscience, 7, 70133.
diff --git a/docs/source/examples/window-size.rst b/docs/source/examples/window-size.rst
new file mode 100644
index 0000000000000000000000000000000000000000..f24e80d363070815b710177b6bc33d7186ae01a3
--- /dev/null
+++ b/docs/source/examples/window-size.rst
@@ -0,0 +1,10 @@
+Window size benchmarking
+========================
+
+Below is a comparison of 4 machine learning models (LDA, SVC, kNN, ANN) on the 5 datasets with a subject-independent approach (testing with unseen subjects), with a range of different durations per trial (2 to 10 seconds) to study the influence of this parameter on the classification accuracy [#benerradi2023]_.
+
+.. literalinclude:: ../../../examples/window_size.py
+
+
+.. rubric:: References
+.. [#benerradi2023] Benerradi, J., Clos, J., Landowska, A., Valstar, M. F., & Wilson, M. L. (2023). Benchmarking framework for machine learning classification from fNIRS data. Frontiers in Neuroergonomics, 4, 994969.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 6256151aa8003228ec79859273fc0dbe4259a08e..08877a5038f075c4ba7ed2afea57df477c9bc0b2 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -22,6 +22,11 @@ Features:
 * supervised, self-supervised and transfer learning
 * much more!
 
+.. role::  raw-html(raw)
+    :format: html
+
+:raw-html:`&rarr;` `Source code on GitLab <https://gitlab.com/HanBnrd/benchnirs>`_
+
 .. image:: https://img.shields.io/badge/doi-10.3389%2Ffnrgo.2023.994969-blue
   :target: https://doi.org/10.3389/fnrgo.2023.994969
 
@@ -43,7 +48,7 @@ Features:
 
    install
    modules
-   example
+   examples
 
 
 Recommendation checklist
@@ -95,7 +100,7 @@ Please refer to `this tutorial <https://docs.gitlab.com/ee/user/project/reposito
 Acknowledgements
 ----------------
 
-If you are using `BenchNIRS`, please cite `this article <https://doi.org/10.3389/fnrgo.2023.994969>`_.
+This project is licensed under the `GNU General Public License v3+ <https://gitlab.com/HanBnrd/benchnirs/-/blob/main/LICENSE>`_, if you are using `BenchNIRS` please cite `this article <https://doi.org/10.3389/fnrgo.2023.994969>`_.
 
 
 Indices and tables
diff --git a/docs/source/install.rst b/docs/source/install.rst
index c55c4e90aa8cdbdb7cdf01ff9114e0ad91532aeb..5847b0c86cfbc4920f8a19ef8412c521d013c5c6 100644
--- a/docs/source/install.rst
+++ b/docs/source/install.rst
@@ -14,6 +14,9 @@ Setting up BenchNIRS
 
 #. Download the datasets (see below).
 
+.. note::
+    Alternatively to install from source in development mode, download and unzip the `repository <https://gitlab.com/HanBnrd/benchnirs/-/archive/main/benchnirs-main.zip>`_ (or clone it with Git), and run :code:`devinstall.py`.
+
 
 Downloading the datasets
 ------------------------
@@ -31,13 +34,3 @@ To update `BenchNIRS` to the latest version with `pip`, open a terminal (eg. Ana
 .. code-block:: console
 
    pip install --upgrade benchnirs
-
-
-
-.. note::
-    Alternatively to install from source, download and unzip the `repository <https://gitlab.com/HanBnrd/benchnirs/-/archive/main/benchnirs-main.zip>`_.
-    Then, in a terminal or command prompt (eg. Anaconda Prompt), navigate to the directory containing the :code:`requirements.txt` file and run:
-
-    .. code-block:: console
-
-        pip install -r requirements.txt -f https://download.pytorch.org/whl/torch_stable.html
diff --git a/docs/source/modules.rst b/docs/source/modules.rst
index 052c72311dc82d9056b5a7194e5241972f16fbeb..4e2279e1e3154b222628c19f4d5e42c5c865760d 100644
--- a/docs/source/modules.rst
+++ b/docs/source/modules.rst
@@ -2,9 +2,9 @@ BenchNIRS API
 =============
 
 .. toctree::
-   :maxdepth: 4
+   :maxdepth: 2
 
-   learn
-   load
-   process
-   viz
+   modules/learn
+   modules/load
+   modules/process
+   modules/viz
diff --git a/docs/source/learn.rst b/docs/source/modules/learn.rst
similarity index 100%
rename from docs/source/learn.rst
rename to docs/source/modules/learn.rst
diff --git a/docs/source/load.rst b/docs/source/modules/load.rst
similarity index 100%
rename from docs/source/load.rst
rename to docs/source/modules/load.rst
diff --git a/docs/source/process.rst b/docs/source/modules/process.rst
similarity index 100%
rename from docs/source/process.rst
rename to docs/source/modules/process.rst
diff --git a/docs/source/viz.rst b/docs/source/modules/viz.rst
similarity index 100%
rename from docs/source/viz.rst
rename to docs/source/modules/viz.rst
diff --git a/example.png b/example.png
deleted file mode 100644
index c5ce10112c4a490c420cfa77785ebd4f0f53fbcc..0000000000000000000000000000000000000000
Binary files a/example.png and /dev/null differ
diff --git a/src/dataset_size.py b/examples/dataset_size.py
similarity index 86%
rename from src/dataset_size.py
rename to examples/dataset_size.py
index 23a444f56b30745c902e27d72cc766690f08ad18..341ddb42bb07a54f29a68ece98ee6ff181b4187f 100644
--- a/src/dataset_size.py
+++ b/examples/dataset_size.py
@@ -1,6 +1,5 @@
 import datetime
 import matplotlib.pyplot as plt
-import numpy as np
 import os
 import pandas as pd
 import seaborn as sns
@@ -11,10 +10,11 @@ from sklearn.model_selection import train_test_split
 from sklearn.utils import shuffle
 
 from benchnirs.load import load_dataset
-from benchnirs.process import process_epochs
+from benchnirs.process import process_epochs, extract_features
 from benchnirs.learn import machine_learn, deep_learn
 
 
+ALL_DATA_PATH = '../../data/dataset_'  # path to the datasets
 DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
             'shin_2018_nb': ['0-back', '2-back', '3-back'],
             'shin_2018_wg': ['baseline', 'word generation'],
@@ -26,7 +26,7 @@ CONFIDENCE = 0.05  # stat confidence at 95 %
 
 start_time = datetime.datetime.now()
 date = start_time.strftime('%Y_%m_%d_%H%M')
-out_folder = f'../results/dataset_size_{date}'
+out_folder = f'./results/dataset_size_{date}'
 os.makedirs(out_folder)
 print(f'Main output folder: {out_folder}/')
 
@@ -38,11 +38,12 @@ with open(f'{out_folder}/results.csv', 'w') as w:
 
 for dataset in DATASETS.keys():
     print(f'=====\n{dataset}\n=====')
+    data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/'
     out_path = f'{out_folder}/{dataset}_'
 
     # Load and preprocess data
-    epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0),
-                          roi_sides=True, tddr=True)
+    epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5],
+                          baseline=(-2, 0), roi_sides=True, tddr=True)
     classes = DATASETS[dataset]
     epochs_lab = epochs[classes]
 
@@ -59,26 +60,25 @@ for dataset in DATASETS.keys():
         else:
             nirs, labels, groups = shuffle(
                 all_nirs, all_labels, all_groups, random_state=42)
+        nirs_features = extract_features(nirs, ['mean', 'std', 'slope'])
 
         # Run models
         lda, hps_lda, _ = machine_learn(
-            nirs, labels, groups, 'lda', features=['mean', 'std', 'slope'],
+            nirs_features, labels, groups, 'lda',
             output_folder=f'{out_path}{ts}_lda')
         svc, hps_svc, _ = machine_learn(
-            nirs, labels, groups, 'svc', features=['mean', 'std', 'slope'],
+            nirs_features, labels, groups, 'svc',
             output_folder=f'{out_path}{ts}_svc')
         knn, hps_knn, _ = machine_learn(
-            nirs, labels, groups, 'knn', features=['mean', 'std', 'slope'],
+            nirs_features, labels, groups, 'knn',
             output_folder=f'{out_path}{ts}_knn')
         ann, hps_ann, _ = deep_learn(
-            nirs, labels, groups, 'ann', features=['mean', 'std', 'slope'],
+            nirs_features, labels, groups, 'ann',
             output_folder=f'{out_path}{ts}_ann')
         cnn, hps_cnn, _ = deep_learn(
-            nirs, labels, groups, 'cnn', features=None,
-            output_folder=f'{out_path}{ts}_cnn')
+            nirs, labels, groups, 'cnn', output_folder=f'{out_path}{ts}_cnn')
         lstm, hps_lstm, _ = deep_learn(
-            nirs, labels, groups, 'lstm', features=None,
-            output_folder=f'{out_path}{ts}_lstm')
+            nirs, labels, groups, 'lstm', output_folder=f'{out_path}{ts}_lstm')
         dict_train_size['Chance'] += [1/len(classes) for _ in lda]
         dict_train_size['LDA'] += lda
         dict_train_size['SVC'] += svc
diff --git a/src/generalised.py b/examples/generalised.py
similarity index 86%
rename from src/generalised.py
rename to examples/generalised.py
index c5a41125339d6f84f0c5cdba1978c28a8daf0a35..e88d89882854d7dfe746ca5ccc70528aa43c2813 100644
--- a/src/generalised.py
+++ b/examples/generalised.py
@@ -9,10 +9,11 @@ import torch
 from scipy import stats
 
 from benchnirs.load import load_dataset
-from benchnirs.process import process_epochs
+from benchnirs.process import process_epochs, extract_features
 from benchnirs.learn import machine_learn, deep_learn
 
 
+ALL_DATA_PATH = '../../data/dataset_'  # path to the datasets
 DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
             'shin_2018_nb': ['0-back', '2-back', '3-back'],
             'shin_2018_wg': ['baseline', 'word generation'],
@@ -23,7 +24,7 @@ CONFIDENCE = 0.05  # stat confidence at 95 %
 
 start_time = datetime.datetime.now()
 date = start_time.strftime('%Y_%m_%d_%H%M')
-out_folder = f'../results/generalised_{date}'
+out_folder = f'./results/generalised_{date}'
 os.makedirs(out_folder)
 print(f'Main output folder: {out_folder}/')
 
@@ -42,34 +43,30 @@ with open(f'{out_folder}/results.csv', 'w') as w:
 dict_accuracies = {}
 for dataset in DATASETS.keys():
     print(f'=====\n{dataset}\n=====')
+    data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/'
     out_path = f'{out_folder}/{dataset}_'
 
     # Load and preprocess data
-    epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0),
-                          roi_sides=True, tddr=True)
+    epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5],
+                          baseline=(-2, 0), roi_sides=True, tddr=True)
     classes = DATASETS[dataset]
     epochs_lab = epochs[classes]
 
     # Run models
     nirs, labels, groups = process_epochs(epochs_lab, 9.9)
+    nirs_features = extract_features(nirs, ['mean', 'std', 'slope'])
     lda, hps_lda, _ = machine_learn(
-        nirs, labels, groups, 'lda', features=['mean', 'std', 'slope'],
-        output_folder=f'{out_path}lda')
+        nirs_features, labels, groups, 'lda', output_folder=f'{out_path}lda')
     svc, hps_svc, _ = machine_learn(
-        nirs, labels, groups, 'svc', features=['mean', 'std', 'slope'],
-        output_folder=f'{out_path}svc')
+        nirs_features, labels, groups, 'svc', output_folder=f'{out_path}svc')
     knn, hps_knn, _ = machine_learn(
-        nirs, labels, groups, 'knn', features=['mean', 'std', 'slope'],
-        output_folder=f'{out_path}knn')
+        nirs_features, labels, groups, 'knn', output_folder=f'{out_path}knn')
     ann, hps_ann, _ = deep_learn(
-        nirs, labels, groups, 'ann', features=['mean', 'std', 'slope'],
-        output_folder=f'{out_path}ann')
+        nirs_features, labels, groups, 'ann', output_folder=f'{out_path}ann')
     cnn, hps_cnn, _ = deep_learn(
-        nirs, labels, groups, 'cnn', features=None,
-        output_folder=f'{out_path}cnn')
+        nirs, labels, groups, 'cnn', output_folder=f'{out_path}cnn')
     lstm, hps_lstm, _ = deep_learn(
-        nirs, labels, groups, 'lstm', features=None,
-        output_folder=f'{out_path}lstm')
+        nirs, labels, groups, 'lstm', output_folder=f'{out_path}lstm')
 
     # Write results
     results = {'LDA': [lda, hps_lda], 'SVC': [svc, hps_svc],
diff --git a/src/personalised.py b/examples/personalised.py
similarity index 88%
rename from src/personalised.py
rename to examples/personalised.py
index 1da19675861f4758ba791d52b118b46d243cc5a4..504e85a7ccece2a9ae023eee3cbfe25815d3954c 100644
--- a/src/personalised.py
+++ b/examples/personalised.py
@@ -9,10 +9,11 @@ import torch
 from scipy import stats
 
 from benchnirs.load import load_dataset
-from benchnirs.process import process_epochs
+from benchnirs.process import process_epochs, extract_features
 from benchnirs.learn import machine_learn, deep_learn
 
 
+ALL_DATA_PATH = '../../data/dataset_'  # path to the datasets
 DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
             'shin_2018_nb': ['0-back', '2-back', '3-back'],
             'shin_2018_wg': ['baseline', 'word generation'],
@@ -23,7 +24,7 @@ CONFIDENCE = 0.05  # stat confidence at 95 %
 
 start_time = datetime.datetime.now()
 date = start_time.strftime('%Y_%m_%d_%H%M')
-out_folder = f'../results/personalised_{date}'
+out_folder = f'./results/personalised_{date}'
 os.makedirs(out_folder)
 print(f'Main output folder: {out_folder}/')
 
@@ -41,11 +42,12 @@ with open(f'{out_folder}/results.csv', 'w') as w:
 
 for dataset in DATASETS.keys():
     print(f'=====\n{dataset}\n=====')
+    data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/'
     out_path = f'{out_folder}/{dataset}_'
 
     # Load and preprocess data
-    epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0),
-                          roi_sides=True, tddr=True)
+    epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5],
+                          baseline=(-2, 0), roi_sides=True, tddr=True)
     classes = DATASETS[dataset]
     epochs_lab = epochs[classes]
 
@@ -58,25 +60,26 @@ for dataset in DATASETS.keys():
         print(f'-----\nSubject {subj+1}\n-----')
         indices = [i for i, value in enumerate(all_groups) if value == subj]
         nirs, labels = all_nirs[indices], all_labels[indices]
+        nirs_features = extract_features(nirs, ['mean', 'std', 'slope'])
 
         # Run models
         lda, hps_lda, _ = machine_learn(
-            nirs, labels, None, 'lda', features=['mean', 'std', 'slope'],
+            nirs_features, labels, groups=None, model='lda',
             output_folder=f'{out_path}{subj+1}_lda')
         svc, hps_svc, _ = machine_learn(
-            nirs, labels, None, 'svc', features=['mean', 'std', 'slope'],
+            nirs_features, labels, groups=None, model='svc',
             output_folder=f'{out_path}{subj+1}_svc')
         knn, hps_knn, _ = machine_learn(
-            nirs, labels, None, 'knn', features=['mean', 'std', 'slope'],
+            nirs_features, labels, groups=None, model='knn',
             output_folder=f'{out_path}{subj+1}_knn')
         ann, hps_ann, _ = deep_learn(
-            nirs, labels, None, 'ann', features=['mean', 'std', 'slope'],
+            nirs_features, labels, groups=None, model_class='ann',
             output_folder=f'{out_path}{subj+1}_ann')
         cnn, hps_cnn, _ = deep_learn(
-            nirs, labels, None, 'cnn', features=None,
+            nirs, labels, groups=None, model_class='cnn',
             output_folder=f'{out_path}{subj+1}_cnn')
         lstm, hps_lstm, _ = deep_learn(
-            nirs, labels, None, 'lstm', features=None,
+            nirs, labels, groups=None, model_class='lstm',
             output_folder=f'{out_path}{subj+1}_lstm')
 
         # Write results
diff --git a/src/sliding_window.py b/examples/sliding_window.py
similarity index 87%
rename from src/sliding_window.py
rename to examples/sliding_window.py
index 4b6669e70fad83d7d3283c0cb8ba76ceaeacd4c6..f69d272afadbabeed93397bbba384bce20369fb8 100644
--- a/src/sliding_window.py
+++ b/examples/sliding_window.py
@@ -9,10 +9,11 @@ import torch
 from scipy import stats
 
 from benchnirs.load import load_dataset
-from benchnirs.process import process_epochs
+from benchnirs.process import process_epochs, extract_features
 from benchnirs.learn import machine_learn, deep_learn
 
 
+ALL_DATA_PATH = '../../data/dataset_'  # path to the datasets
 DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
             'shin_2018_nb': ['0-back', '2-back', '3-back'],
             'shin_2018_wg': ['baseline', 'word generation'],
@@ -23,7 +24,7 @@ CONFIDENCE = 0.05  # stat confidence at 95 %
 
 start_time = datetime.datetime.now()
 date = start_time.strftime('%Y_%m_%d_%H%M')
-out_folder = f'../results/sliding_window_{date}'
+out_folder = f'./results/sliding_window_{date}'
 os.makedirs(out_folder)
 print(f'Main output folder: {out_folder}/')
 
@@ -42,28 +43,26 @@ with open(f'{out_folder}/results.csv', 'w') as w:
 dict_accuracies = {}
 for dataset in DATASETS.keys():
     print(f'=====\n{dataset}\n=====')
+    data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/'
     out_path = f'{out_folder}/{dataset}_'
 
     # Load and preprocess data
-    epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0),
-                          roi_sides=True, tddr=True)
+    epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5],
+                          baseline=(-2, 0), roi_sides=True, tddr=True)
     classes = DATASETS[dataset]
     epochs_lab = epochs[classes]
 
     # Run models
     nirs, labels, groups = process_epochs(epochs_lab, 9.9, tslide=2)
+    nirs_features = extract_features(nirs, ['mean', 'std', 'slope'])
     lda, hps_lda, _ = machine_learn(
-        nirs, labels, groups, 'lda', features=['mean', 'std', 'slope'],
-        output_folder=f'{out_path}lda')
+        nirs_features, labels, groups, 'lda', output_folder=f'{out_path}lda')
     svc, hps_svc, _ = machine_learn(
-        nirs, labels, groups, 'svc', features=['mean', 'std', 'slope'],
-        output_folder=f'{out_path}svc')
+        nirs_features, labels, groups, 'svc', output_folder=f'{out_path}svc')
     knn, hps_knn, _ = machine_learn(
-        nirs, labels, groups, 'knn', features=['mean', 'std', 'slope'],
-        output_folder=f'{out_path}knn')
+        nirs_features, labels, groups, 'knn', output_folder=f'{out_path}knn')
     ann, hps_ann, _ = deep_learn(
-        nirs, labels, groups, 'ann', features=['mean', 'std', 'slope'],
-        output_folder=f'{out_path}ann')
+        nirs_features, labels, groups, 'ann', output_folder=f'{out_path}ann')
 
     # Write results
     results = {'LDA': [lda, hps_lda], 'SVC': [svc, hps_svc],
diff --git a/src/stats/comparison_stats_dataset.py b/examples/stats/comparison_stats_dataset.py
similarity index 95%
rename from src/stats/comparison_stats_dataset.py
rename to examples/stats/comparison_stats_dataset.py
index 353ffa6c0160a70275c4cfb0cf638b156a0de6e0..c1f2afcd55ae7bd275fcc3229ea1fb1bbba3148c 100644
--- a/src/stats/comparison_stats_dataset.py
+++ b/examples/stats/comparison_stats_dataset.py
@@ -6,8 +6,8 @@ from scipy import stats
 CONFIDENCE = 0.05  # stat confidence at 95 %
 
 
-new_results = './results_new_model.csv'
-old_results = './results_old_model.csv'
+new_results = '../results_new_model.csv'
+old_results = '../results_old_model.csv'
 
 # Stats
 print('Stats...')
diff --git a/src/stats/comparison_stats_task.py b/examples/stats/comparison_stats_task.py
similarity index 97%
rename from src/stats/comparison_stats_task.py
rename to examples/stats/comparison_stats_task.py
index b828140c2bb04f3c0e6a962cd66b85767f72a1c4..65cbad9e9f53a2b2f1ab4913d06b294e14335fce 100644
--- a/src/stats/comparison_stats_task.py
+++ b/examples/stats/comparison_stats_task.py
@@ -8,8 +8,8 @@ DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
 CONFIDENCE = 0.05  # stat confidence at 95 %
 
 
-new_results = './new_results.csv'
-old_results = './old_results.csv'
+new_results = '../new_results.csv'
+old_results = '../old_results.csv'
 models = ['LDA', 'SVC', 'kNN', 'ANN', 'CNN', 'LSTM']
 
 # Stats
diff --git a/src/stats/extra_stats.py b/examples/stats/extra_stats.py
similarity index 100%
rename from src/stats/extra_stats.py
rename to examples/stats/extra_stats.py
diff --git a/src/tailored_generalised.py b/examples/tailored_generalised.py
similarity index 87%
rename from src/tailored_generalised.py
rename to examples/tailored_generalised.py
index 274639d2bc46f95f36844486d875df9e6d73fe57..c01e81bf90a74b6c5bc5efe9174976cfb0cd17a5 100644
--- a/src/tailored_generalised.py
+++ b/examples/tailored_generalised.py
@@ -11,10 +11,11 @@ import torch.nn.functional as F
 from scipy import stats
 
 from benchnirs.load import load_dataset
-from benchnirs.process import process_epochs
+from benchnirs.process import process_epochs, extract_features
 from benchnirs.learn import machine_learn, deep_learn
 
 
+ALL_DATA_PATH = '../../data/dataset_'  # path to the datasets
 DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
             'shin_2018_nb': ['0-back', '2-back', '3-back']}
 CONFIDENCE = 0.05  # stat confidence at 95 %
@@ -43,10 +44,9 @@ class _CNNnback(nn.Module):
         return x
 
 
-
 start_time = datetime.datetime.now()
 date = start_time.strftime('%Y_%m_%d_%H%M')
-out_folder = f'../results/tailored_generalised_{date}'
+out_folder = f'./results/tailored_generalised_{date}'
 
 
 os.makedirs(out_folder)
@@ -67,34 +67,30 @@ with open(f'{out_folder}/results.csv', 'w') as w:
 dict_accuracies = {}
 for dataset in DATASETS.keys():
     print(f'=====\n{dataset}\n=====')
+    data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/'
     out_path = f'{out_folder}/{dataset}_'
 
     # Load and preprocess data
-    epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0),
-                          roi_sides=True, tddr=True)
+    epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5],
+                          baseline=(-2, 0), roi_sides=True, tddr=True)
     classes = DATASETS[dataset]
     epochs_lab = epochs[classes]
 
     # Run models
     nirs, labels, groups = process_epochs(epochs_lab, 39.9)
+    nirs_features = extract_features(nirs, ['mean', 'std', 'slope'])
     lda, hps_lda, _ = machine_learn(
-        nirs, labels, groups, 'lda', features=['mean', 'std', 'slope'],
-        output_folder=f'{out_path}lda')
+        nirs_features, labels, groups, 'lda', output_folder=f'{out_path}lda')
     svc, hps_svc, _ = machine_learn(
-        nirs, labels, groups, 'svc', features=['mean', 'std', 'slope'],
-        output_folder=f'{out_path}svc')
+        nirs_features, labels, groups, 'svc', output_folder=f'{out_path}svc')
     knn, hps_knn, _ = machine_learn(
-        nirs, labels, groups, 'knn', features=['mean', 'std', 'slope'],
-        output_folder=f'{out_path}knn')
+        nirs_features, labels, groups, 'knn', output_folder=f'{out_path}knn')
     ann, hps_ann, _ = deep_learn(
-        nirs, labels, groups, 'ann', features=['mean', 'std', 'slope'],
-        output_folder=f'{out_path}ann')
+        nirs_features, labels, groups, 'ann', output_folder=f'{out_path}ann')
     cnn, hps_cnn, _ = deep_learn(
-        nirs, labels, groups, _CNNnback, features=None,
-        output_folder=f'{out_path}cnn')
+        nirs, labels, groups, _CNNnback, output_folder=f'{out_path}cnn')
     lstm, hps_lstm, _ = deep_learn(
-        nirs, labels, groups, 'lstm', features=None,
-        output_folder=f'{out_path}lstm')
+        nirs, labels, groups, 'lstm', output_folder=f'{out_path}lstm')
 
     # Write results
     results = {'LDA': [lda, hps_lda], 'SVC': [svc, hps_svc],
diff --git a/src/tailored_shin_nb.py b/examples/tailored_shin_nb.py
similarity index 92%
rename from src/tailored_shin_nb.py
rename to examples/tailored_shin_nb.py
index 080e19dc477a1695bdcff0ca16c20ff236caf87f..9e086bd3afe785688b697c69cc1c66edf49b1e7b 100644
--- a/src/tailored_shin_nb.py
+++ b/examples/tailored_shin_nb.py
@@ -12,6 +12,7 @@ from benchnirs.process import process_epochs
 from benchnirs.learn import deep_learn
 
 
+DATA_PATH = '../../data/dataset_shin_2018/'  # path to the dataset
 CLASSES = ['0-back', '2-back', '3-back']
 CONFIDENCE = 0.05  # stat confidence at 95 %
 ROIS = {'Right PFC HbO': [9, 10, 19, 20, 21, 22, 23],
@@ -23,7 +24,7 @@ ROIS = {'Right PFC HbO': [9, 10, 19, 20, 21, 22, 23],
 
 start_time = datetime.datetime.now()
 date = start_time.strftime('%Y_%m_%d_%H%M')
-out_folder = f'../results/tailored_shin_nb_{date}'
+out_folder = f'./results/tailored_shin_nb_{date}'
 
 
 class CustomCNN(nn.Module):
@@ -64,7 +65,7 @@ print(f'Number of GPUs: {torch.cuda.device_count()}')
 print(f'=====\nshin_2018_nb\n=====')
 
 # Load and preprocess data
-epochs = load_dataset('shin_2018_nb', bandpass=[0.01, 0.5],
+epochs = load_dataset('shin_2018_nb', DATA_PATH, bandpass=[0.01, 0.5],
                       baseline=(-2, 0), tddr=True)
 ch_picks = []
 for group in ROIS.values():
@@ -76,8 +77,7 @@ epochs_lab = epochs[CLASSES]
 nirs, labels, groups = process_epochs(epochs_lab, tmax=39.9, sort=True)
 print(nirs.shape)
 accuracies, hps, additional_metrics = deep_learn(
-    nirs, labels, groups, CustomCNN,
-    features=None, normalize=True,
+    nirs, labels, groups, CustomCNN, normalize=(0, 2),
     output_folder=f'{out_folder}')
 
 # Write results
diff --git a/src/tailored_window_size.py b/examples/tailored_window_size.py
similarity index 85%
rename from src/tailored_window_size.py
rename to examples/tailored_window_size.py
index 9c5711984c746e62aa972f4ad72bd4b2c5fc3d6c..1214573f89f018657de7e7d3ebdfaeec01b19d16 100644
--- a/src/tailored_window_size.py
+++ b/examples/tailored_window_size.py
@@ -1,6 +1,5 @@
 import datetime
 import matplotlib.pyplot as plt
-import numpy as np
 import os
 import pandas as pd
 import seaborn as sns
@@ -9,10 +8,11 @@ import torch
 from scipy import stats
 
 from benchnirs.load import load_dataset
-from benchnirs.process import process_epochs
+from benchnirs.process import process_epochs, extract_features
 from benchnirs.learn import machine_learn, deep_learn
 
 
+ALL_DATA_PATH = '../../data/dataset_'  # path to the datasets
 DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
             'shin_2018_nb': ['0-back', '2-back', '3-back']}
 WINDOW_SIZES = [4.9, 9.9, 14.9, 19.9, 24.9, 29.9, 34.9, 39.9]
@@ -21,7 +21,7 @@ CONFIDENCE = 0.05  # stat confidence at 95 %
 
 start_time = datetime.datetime.now()
 date = start_time.strftime('%Y_%m_%d_%H%M')
-out_folder = f'../results/tailored_window_size_{date}'
+out_folder = f'./results/tailored_window_size_{date}'
 os.makedirs(out_folder)
 print(f'Main output folder: {out_folder}/')
 
@@ -32,11 +32,12 @@ with open(f'{out_folder}/results.csv', 'w') as w:
 
 for dataset in DATASETS.keys():
     print(f'=====\n{dataset}\n=====')
+    data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/'
     out_path = f'{out_folder}/{dataset}_'
 
     # Load and preprocess data
-    epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0),
-                          roi_sides=True, tddr=True)
+    epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5],
+                          baseline=(-2, 0), roi_sides=True, tddr=True)
     classes = DATASETS[dataset]
     epochs_lab = epochs[classes]
 
@@ -45,23 +46,23 @@ for dataset in DATASETS.keys():
     for ws in WINDOW_SIZES:
         print(f'-----\nWindow size {ws}\n-----')
         nirs, labels, groups = process_epochs(epochs_lab, ws)
+        nirs_features = extract_features(nirs, ['mean', 'std', 'slope'])
 
         # Run models
         lda, hps_lda, _ = machine_learn(
-            nirs, labels, groups, 'lda', features=['mean', 'std', 'slope'],
+            nirs_features, labels, groups, 'lda',
             output_folder=f'{out_path}{ws}_lda')
         svc, hps_svc, _ = machine_learn(
-            nirs, labels, groups, 'svc', features=['mean', 'std', 'slope'],
+            nirs_features, labels, groups, 'svc',
             output_folder=f'{out_path}{ws}_svc')
         knn, hps_knn, _ = machine_learn(
-            nirs, labels, groups, 'knn', features=['mean', 'std', 'slope'],
+            nirs_features, labels, groups, 'knn',
             output_folder=f'{out_path}{ws}_knn')
         ann, hps_ann, _ = deep_learn(
-            nirs, labels, groups, 'ann', features=['mean', 'std', 'slope'],
+            nirs_features, labels, groups, 'ann',
             output_folder=f'{out_path}{ws}_ann')
         lstm, hps_lstm, _ = deep_learn(
-            nirs, labels, groups, 'lstm', features=None,
-            output_folder=f'{out_path}{ws}_lstm')
+            nirs, labels, groups, 'lstm', output_folder=f'{out_path}{ws}_lstm')
         dict_window_size['Chance'] += [1/len(classes) for _ in lda]
         dict_window_size['LDA'] += lda
         dict_window_size['SVC'] += svc
diff --git a/src/transfer.py b/examples/transfer.py
similarity index 93%
rename from src/transfer.py
rename to examples/transfer.py
index 41b8f1eb25c13b39e34e65a146754c310efe8394..50041323dbe3546bfabdd68058556d26bbe3342a 100644
--- a/src/transfer.py
+++ b/examples/transfer.py
@@ -12,6 +12,8 @@ from benchnirs.process import process_epochs
 from benchnirs.learn import deep_transfer_learn
 
 
+DATA_PATH = '../../data/dataset_shin_2018/'  # path to the dataset
+# CLASSES = ['0-back', '2-back', '3-back']
 CLASSES = ['0-back', '2-back', '3-back', 'unlabelled']
 CONFIDENCE = 0.05  # stat confidence at 95 %
 ROIS = {'Right PFC HbO': [9, 10, 19, 20, 21, 22, 23],
@@ -23,7 +25,7 @@ ROIS = {'Right PFC HbO': [9, 10, 19, 20, 21, 22, 23],
 
 start_time = datetime.datetime.now()
 date = start_time.strftime('%Y_%m_%d_%H%M')
-out_folder = f'../results/transfer_{date}'
+out_folder = f'./results/transfer_{date}'
 
 
 class HbEncoder(nn.Module):
@@ -99,7 +101,7 @@ print(f'Number of GPUs: {torch.cuda.device_count()}')
 print(f'=====\nshin_2018_nb\n=====')
 
 # Load and preprocess data
-epochs = load_dataset('shin_2018_nb', bandpass=[0.01, 0.5],
+epochs = load_dataset('shin_2018_nb', DATA_PATH, bandpass=[0.01, 0.5],
                       baseline=(-2, 0), tddr=True)
 print(epochs)
 ch_picks = []
@@ -112,8 +114,7 @@ epochs_lab = epochs[CLASSES]
 nirs, labels, groups = process_epochs(epochs_lab, tmax=39.9, sort=True)
 print(nirs.shape)
 accuracies, hps, additional_metrics = deep_transfer_learn(
-    nirs, labels, groups, HbEncoder, HbDecoder, Classifier,
-    features=None, normalize=True,
+    nirs, labels, groups, HbEncoder, HbDecoder, Classifier, normalize=(0, 2),
     output_folder=f'{out_folder}', max_epoch=500)
 
 # Write results
diff --git a/src/visualisation.py b/examples/visualisation.py
similarity index 77%
rename from src/visualisation.py
rename to examples/visualisation.py
index f1c332f5ef3e1e14da7fdd2cfb615a66a710536b..87d3bcf0748c15fa02d52f9e224b7b37fbc113f1 100644
--- a/src/visualisation.py
+++ b/examples/visualisation.py
@@ -2,7 +2,7 @@ from benchnirs.load import load_dataset
 from benchnirs.viz import epochs_viz
 
 
-ALL_DATA_PATH = '/folder/with/datasets/'  # path to the datasets
+ALL_DATA_PATH = '../../data/dataset_'  # path to the datasets
 DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
             'shin_2018_nb': ['0-back', '2-back', '3-back'],
             'shin_2018_wg': ['baseline', 'word generation'],
@@ -12,10 +12,10 @@ DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
 
 for dataset in DATASETS.keys():
     print(f'=====\n{dataset}\n=====')
-    path = f'{ALL_DATA_PATH}{dataset[:-3]}/'
+    data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/'
 
     # Load and preprocess data
-    epochs = load_dataset(dataset, path=path, bandpass=[0.01, 0.5],
+    epochs = load_dataset(dataset, path=data_path, bandpass=[0.01, 0.5],
                           baseline=(-1.99, 0), roi_sides=True, tddr=True)
     classes = DATASETS[dataset]
     epochs_lab = epochs[classes]
diff --git a/src/window_size.py b/examples/window_size.py
similarity index 87%
rename from src/window_size.py
rename to examples/window_size.py
index 929cee9bb07dc1ab2879caee64559da56e42ef1e..387ae106e0ccded3993dd07476bad9ff3d5eb04b 100644
--- a/src/window_size.py
+++ b/examples/window_size.py
@@ -1,6 +1,5 @@
 import datetime
 import matplotlib.pyplot as plt
-import numpy as np
 import os
 import pandas as pd
 import seaborn as sns
@@ -9,10 +8,11 @@ import torch
 from scipy import stats
 
 from benchnirs.load import load_dataset
-from benchnirs.process import process_epochs
+from benchnirs.process import process_epochs, extract_features
 from benchnirs.learn import machine_learn, deep_learn
 
 
+ALL_DATA_PATH = '../../data/dataset_'  # path to the datasets
 DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
             'shin_2018_nb': ['0-back', '2-back', '3-back'],
             'shin_2018_wg': ['baseline', 'word generation'],
@@ -24,7 +24,7 @@ CONFIDENCE = 0.05  # stat confidence at 95 %
 
 start_time = datetime.datetime.now()
 date = start_time.strftime('%Y_%m_%d_%H%M')
-out_folder = f'../results/window_size_{date}'
+out_folder = f'./results/window_size_{date}'
 os.makedirs(out_folder)
 print(f'Main output folder: {out_folder}/')
 
@@ -35,11 +35,12 @@ with open(f'{out_folder}/results.csv', 'w') as w:
 
 for dataset in DATASETS.keys():
     print(f'=====\n{dataset}\n=====')
+    data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/'
     out_path = f'{out_folder}/{dataset}_'
 
     # Load and preprocess data
-    epochs = load_dataset(dataset, bandpass=[0.01, 0.5], baseline=(-2, 0),
-                          roi_sides=True, tddr=True)
+    epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5],
+                          baseline=(-2, 0), roi_sides=True, tddr=True)
     classes = DATASETS[dataset]
     epochs_lab = epochs[classes]
 
@@ -48,19 +49,20 @@ for dataset in DATASETS.keys():
     for ws in WINDOW_SIZES:
         print(f'-----\nWindow size {ws}\n-----')
         nirs, labels, groups = process_epochs(epochs_lab, ws)
+        nirs_features = extract_features(nirs, ['mean', 'std', 'slope'])
 
         # Run models
         lda, hps_lda, _ = machine_learn(
-            nirs, labels, groups, 'lda', features=['mean', 'std', 'slope'],
+            nirs_features, labels, groups, 'lda',
             output_folder=f'{out_path}{ws}_lda')
         svc, hps_svc, _ = machine_learn(
-            nirs, labels, groups, 'svc', features=['mean', 'std', 'slope'],
+            nirs_features, labels, groups, 'svc',
             output_folder=f'{out_path}{ws}_svc')
         knn, hps_knn, _ = machine_learn(
-            nirs, labels, groups, 'knn', features=['mean', 'std', 'slope'],
+            nirs_features, labels, groups, 'knn',
             output_folder=f'{out_path}{ws}_knn')
         ann, hps_ann, _ = deep_learn(
-            nirs, labels, groups, 'ann', features=['mean', 'std', 'slope'],
+            nirs_features, labels, groups, 'ann',
             output_folder=f'{out_path}{ws}_ann')
         dict_window_size['Chance'] += [1/len(classes) for _ in lda]
         dict_window_size['LDA'] += lda
diff --git a/requirements.txt b/requirements.txt
index cf3ca65caa34b7666adc2f1327116e37c41d2250..06eb815cc6822bc6364579c9201105b52a5d3d13 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,14 +1,16 @@
 # To install all the required packages, run in a terminal or command prompt:
 # python -m pip install -r requirements.txt -f https://download.pytorch.org/whl/torch_stable.html
 
-matplotlib>=3.3.1
-mne>=0.23.4
-nirsimple>=0.1.2
-numpy>=1.19.5
-pandas>=1.0.5
-scikit-learn>=0.24.2
-scipy>=1.8.1
-seaborn>=0.11.1
-statsmodels>=0.12.2
-torch>=1.5.1+cu101
-torchvision>=0.6.1+cu101
+importlib
+lazy_loader
+numpy
+pandas
+scipy
+mne
+matplotlib
+seaborn
+scikit-learn
+torch
+torchvision
+nirsimple
+statsmodels
diff --git a/setup.py b/setup.py
index 8f2cf0ec03b0da9c35ad86112023661fdd7c7137..1b7701995b92d1189d61ff07dc11df84a07bb745 100644
--- a/setup.py
+++ b/setup.py
@@ -5,17 +5,18 @@ with open("README.md", "r") as fh:
 
 setuptools.setup(
     name="benchnirs",
-    version="1.2.1",
+    version="1.2.2",
     author="Johann Benerradi",
     author_email="johann.benerradi@gmail.com",
     description="Benchmarking framework for machine learning with fNIRS",
     long_description=long_description,
     long_description_content_type="text/markdown",
     url="https://gitlab.com/HanBnrd/benchnirs",
-    license='GNU GPLv3+',
-    package_dir={"": "src"},
-    packages=setuptools.find_packages(where="src"),
+    license="GNU GPLv3+",
+    packages=setuptools.find_packages(),
     install_requires=[
+        "importlib",
+        "lazy_loader",
         "numpy",
         "pandas",
         "scipy",
diff --git a/src/benchnirs/__init__.py b/src/benchnirs/__init__.py
deleted file mode 100644
index ef6bcf6661395741d926ef75ef8a4a1fc2a7172d..0000000000000000000000000000000000000000
--- a/src/benchnirs/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-"""
-BenchNIRS
-=========
-Benchmarking framework for machine learning with fNIRS
-"""
-
-from .load import load_dataset
-from .viz import epochs_viz
-from .process import process_epochs
-from .learn import machine_learn, deep_learn
diff --git a/src/custom_model.py b/src/custom_model.py
deleted file mode 100644
index c5bd321cfa045bdec1ab4687dbaca74e8a25c8d2..0000000000000000000000000000000000000000
--- a/src/custom_model.py
+++ /dev/null
@@ -1,142 +0,0 @@
-import datetime
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-import pandas as pd
-import seaborn as sns
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-from scipy import stats
-
-from benchnirs.load import load_dataset
-from benchnirs.process import process_epochs
-from benchnirs.learn import deep_learn
-
-
-ALL_DATA_PATH = '/folder/with/datasets/'  # path to the datasets
-DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
-            'shin_2018_nb': ['0-back', '2-back', '3-back'],
-            'shin_2018_wg': ['baseline', 'word generation'],
-            'shin_2016_ma': ['baseline', 'mental arithmetic'],
-            'bak_2019_me': ['right', 'left', 'foot']}
-CONFIDENCE = 0.05  # stat confidence at 95 %
-
-
-class CustomCNN(nn.Module):
-
-    def __init__(self, n_classes):
-        super(CustomCNN, self).__init__()
-        self.conv1 = nn.Conv1d(4, 4, kernel_size=10, stride=2)  # tempo conv
-        self.pool1 = nn.MaxPool1d(2)
-        self.conv2 = nn.Conv1d(4, 4, kernel_size=5, stride=2)  # tempo conv
-        self.pool2 = nn.MaxPool1d(2)
-        self.fc1 = nn.Linear(20, 10)
-        self.fc2 = nn.Linear(10, n_classes)
-
-    def forward(self, x):
-        batch_size = x.size(0)
-        x = F.relu(self.conv1(x))
-        x = self.pool1(x)
-        x = F.relu(self.conv2(x))
-        x = self.pool2(x)
-        x = x.view(batch_size, -1)
-        x = F.relu(self.fc1(x))
-        x = self.fc2(x)
-        return x
-
-
-start_time = datetime.datetime.now()
-out_folder = f'../results/custom'
-if not os.path.isdir(out_folder):
-    os.makedirs(out_folder)
-print(f'Main output folder: {out_folder}/')
-
-print(f'Number of GPUs: {torch.cuda.device_count()}')
-
-with open(f'{out_folder}/summary.md', 'w') as w:
-    w.write('# Accuracy table\n\n(Standard deviation on the cross-validation)')
-    w.write('\n\n|Dataset|Chance level|Average accuracy (sd)|\n')
-    w.write('|:---:|:---:|:---:|\n')
-
-with open(f'{out_folder}/results.csv', 'w') as w:
-    w.write('dataset;fold;accuracy;hyperparameters;additional_metrics\n')
-
-
-dict_accuracies = {'Accuracy': [], 'Dataset': []}
-for dataset in DATASETS.keys():
-    print(f'=====\n{dataset}\n=====')
-    data_path = f'{ALL_DATA_PATH}dataset_{dataset[:-3]}/'
-    out_path = f'{out_folder}/{dataset}_'
-
-    # Load and preprocess data
-    epochs = load_dataset(dataset, path=data_path, bandpass=[0.01, 0.5],
-                          baseline=(-2, 0), roi_sides=True, tddr=True)
-    classes = DATASETS[dataset]
-    epochs_lab = epochs[classes]
-
-    # Run models
-    nirs, labels, groups = process_epochs(epochs_lab, 9.9)
-    cnn, hps_cnn, additional_metrics_cnn = deep_learn(
-        nirs, labels, groups, CustomCNN, features=None,
-        output_folder=f'{out_path}cnn')
-
-    # Write results
-    results = {'CNN': [cnn, hps_cnn]}
-    chance_level = np.around(1/len(classes), decimals=3)
-    w_summary = open(f'{out_folder}/summary.md', 'a')
-    w_results = open(f'{out_folder}/results.csv', 'a')
-    w_summary.write(f'|{dataset}|{chance_level}|')
-    w_summary.write(
-        f'{np.around(np.mean(cnn), decimals=3)} '
-        f'({np.around(np.std(cnn), decimals=3)})|')
-    for fold, accuracy in enumerate(cnn):
-        w_results.write(f'{dataset};{fold+1};{accuracy};"{hps_cnn[fold]}";')
-        w_results.write(f'"{additional_metrics_cnn[fold]}"\n')
-    w_summary.write('\n')
-    w_summary.close()
-    w_results.close()
-    dict_accuracies['Accuracy'] += cnn
-    dict_accuracies['Dataset'] += [dataset] * len(cnn)
-
-
-df_accuracies = pd.DataFrame(dict_accuracies)
-sns.barplot(data=df_accuracies, y='Accuracy', x='Dataset', capsize=.1,
-            palette='colorblind')
-plt.savefig(f'{out_folder}/summary.png')
-plt.close()
-
-
-# Stats
-print('Stats...')
-with open(f'{out_folder}/stats.md', 'w') as w:
-    df = pd.read_csv(f'{out_folder}/results.csv', delimiter=';')
-    w.write('## Comparison of the model accuracy to chance level\n\n')
-    w.write('|Dataset|Shapiro p-value|Test|p-value|\n')
-    w.write('|:---:|:---:|:---:|:---:|\n')
-    for dataset in DATASETS.keys():
-        dataset_accuracies = []
-        chance_level = 1 / len(DATASETS[dataset])
-        normality = True
-        w.write(f'|{dataset}|')
-        sub_df = df[df['dataset'] == dataset]
-        accuracies = sub_df['accuracy'].to_numpy()
-        dataset_accuracies.append(accuracies)
-        # Check normality of the distribution
-        _, p_shap = stats.shapiro(accuracies)
-        w.write(f'{p_shap}|')
-        if p_shap > CONFIDENCE:
-            # t-test
-            _, p_tt = stats.ttest_1samp(accuracies, chance_level)
-            w.write(f't-test|{p_tt}|\n')
-        else:
-            normality = False
-            # Wilcoxon
-            _, p_wilcox = stats.wilcoxon(accuracies-chance_level)
-            w.write(f'Wilcoxon|{p_wilcox}|\n')
-
-
-end_time = datetime.datetime.now()
-elapsed_time = end_time - start_time
-print(f'===\nElapsed time: {elapsed_time}')
diff --git a/src/transfer_no_unlab.py b/src/transfer_no_unlab.py
deleted file mode 100644
index 7d4432c12009ee763a09151aa2b64a01c07f883a..0000000000000000000000000000000000000000
--- a/src/transfer_no_unlab.py
+++ /dev/null
@@ -1,140 +0,0 @@
-import datetime
-import numpy as np
-import os
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-from scipy import stats
-
-from benchnirs.load import load_dataset
-from benchnirs.process import process_epochs
-from benchnirs.learn import deep_transfer_learn
-
-
-CLASSES = ['0-back', '2-back', '3-back']
-CONFIDENCE = 0.05  # stat confidence at 95 %
-ROIS = {'Right PFC HbO': [9, 10, 19, 20, 21, 22, 23],
-        'Right PFC HbR': [45, 46, 55, 56, 57, 58, 59],
-        'Left PFC HbO': [0, 1, 2, 3, 4, 5, 6],
-        'Left PFC HbR': [36, 37, 38, 39, 40, 41, 42],
-        'Central PFC HbO': [7, 8],
-        'Central PFC HbR': [43, 44]}
-
-start_time = datetime.datetime.now()
-date = start_time.strftime('%Y_%m_%d_%H%M')
-out_folder = f'../results/transfer_no_unlab_{date}'
-
-
-class HbEncoder(nn.Module):
-
-    def __init__(self):
-        super(HbEncoder, self).__init__()
-        self.conv1 = nn.Conv1d(16, 8, kernel_size=15, stride=5)  # tempo conv
-        self.conv2 = nn.Conv1d(8, 8, kernel_size=12, stride=6)  # tempo conv
-        self.fc3 = nn.Linear(96, 56)
-        self.fc4 = nn.Linear(56, 16)
-        self.bn = nn.BatchNorm1d(8)
-
-    def forward(self, x):
-        batch_size = x.size(0)
-        x = F.relu(self.conv1(x))
-        x = F.relu(self.bn(self.conv2(x)))
-        x = x.view(batch_size, -1)  # flatten
-        x = F.relu(self.fc3(x))
-        x = F.relu(self.fc4(x))
-        return x
-
-
-class HbDecoder(nn.Module):
-
-    def __init__(self):
-        super(HbDecoder, self).__init__()
-        self.fc4 = nn.Linear(16, 56)
-        self.fc3 = nn.Linear(56, 96)
-        self.tconv2 = nn.ConvTranspose1d(8, 8, kernel_size=12, stride=6)
-        self.tconv1 = nn.ConvTranspose1d(8, 16, kernel_size=15, stride=5)
-
-    def forward(self, x):
-        batch_size = x.size(0)
-        x = F.relu(self.fc4(x))
-        x = F.relu(self.fc3(x))
-        x = x.view(batch_size, 8, -1)  # un-flatten
-        x = F.relu(self.tconv2(x))
-        x = self.tconv1(x)
-        return x
-
-
-class Classifier(nn.Module):
-    """
-    Classifier layers to connect with the encoder
-    """
-
-    def __init__(self, n_classes, encoder_hbo, encoder_hbr):
-        super(Classifier, self).__init__()
-        self.encoder_hbo = encoder_hbo
-        self.encoder_hbr = encoder_hbr
-        self.fc1 = nn.Linear(32, 16)
-        self.fc2 = nn.Linear(16, n_classes)
-
-    def forward(self, x):
-        mid_idx = x.size(1) / 2
-        if mid_idx.is_integer():
-            mid_idx = int(mid_idx)
-        features_from_hbo = self.encoder_hbo(x[:, :mid_idx])
-        features_from_hbr = self.encoder_hbr(x[:, mid_idx:])
-        features_from_hb = torch.cat((features_from_hbo,
-                                      features_from_hbr), 1)
-        x = F.relu(self.fc1(features_from_hb.detach()))
-        x = self.fc2(x)
-        return x
-
-
-if not os.path.isdir(out_folder):
-    os.makedirs(out_folder)
-print(f'Main output folder: {out_folder}/')
-
-print(f'Number of GPUs: {torch.cuda.device_count()}')
-
-print(f'=====\nshin_2018_nb\n=====')
-
-# Load and preprocess data
-epochs = load_dataset('shin_2018_nb', bandpass=[0.01, 0.5],
-                      baseline=(-2, 0), tddr=True)
-print(epochs)
-ch_picks = []
-for group in ROIS.values():
-    ch_picks += group
-epochs.pick(ch_picks)
-epochs_lab = epochs[CLASSES]
-
-# Run models
-nirs, labels, groups = process_epochs(epochs_lab, tmax=39.9, sort=True)
-print(nirs.shape)
-accuracies, hps, additional_metrics = deep_transfer_learn(
-    nirs, labels, groups, HbEncoder, HbDecoder, Classifier,
-    features=None, normalize=True,
-    output_folder=f'{out_folder}', max_epoch=500)
-
-# Write results
-with open(f'{out_folder}/results.csv', 'w') as w:
-    w.write('dataset;model;fold;accuracy;hyperparameters\n')
-    for fold, accuracy in enumerate(accuracies):
-        hp = hps[fold]
-        w.write(f'shin_2018_nb;CNN;{fold+1};{accuracy};"{hp}"\n')
-
-print(f'Average accuracy: {np.mean(accuracies)}')
-_, p_shap = stats.shapiro(accuracies)
-print(f'Shapiro p-value: {p_shap}')
-if p_shap > CONFIDENCE:
-    s_tt, p_tt = stats.ttest_1samp(accuracies, 1/3, alternative='greater')
-    print(f't-test = {s_tt} (p-value = {p_tt})')
-else:
-    s_wilcox, p_wilcox = stats.wilcoxon(accuracies - np.array(1/3),
-                                        alternative='greater')
-    print(f'Wilcoxon = {s_wilcox} (p-value = {p_wilcox})')
-
-
-end_time = datetime.datetime.now()
-elapsed_time = end_time - start_time
-print(f'===\nElapsed time: {elapsed_time}')