From 9c0fa6673ab41018f928d5f1992b47ba08ec7783 Mon Sep 17 00:00:00 2001 From: Johann Benerradi Date: Tue, 22 Apr 2025 12:53:19 +0100 Subject: [PATCH 1/3] Convert data into float 32 to speed up training --- benchnirs/learn.py | 59 ++++++++++++++++++++++---------------------- benchnirs/process.py | 4 +++ 2 files changed, 34 insertions(+), 29 deletions(-) diff --git a/benchnirs/learn.py b/benchnirs/learn.py index af30f92..edc4da3 100644 --- a/benchnirs/learn.py +++ b/benchnirs/learn.py @@ -281,11 +281,11 @@ class _LSTMClassifier(nn.Module): x = x[:, :, :-r] # crop to fit unit size x = x.reshape(x.size(0), 4, -1, self.unit_size) # (b, ch, seq, tpts) x = x.permute(0, 2, 1, 3) # (b, seq, ch, tpts) - x = x.reshape(x.size(0), x.size(1), -1).double() + x = x.reshape(x.size(0), x.size(1), -1) # Initialise hidden and cell states - h0 = torch.zeros(1, x.size(0), self.hidden_size).double().to(x.device) - c0 = torch.zeros(1, x.size(0), self.hidden_size).double().to(x.device) + h0 = torch.zeros(1, x.size(0), self.hidden_size, device=x.device) + c0 = torch.zeros(1, x.size(0), self.hidden_size, device=x.device) # Feed to model x, _ = self.lstm(x, (h0, c0)) @@ -359,7 +359,7 @@ def _train_dl(clf, nirs_train, labels_train, batch_size, lr, max_epochs, optimizer.step() # Get statistics - running_loss += loss.item() + running_loss += loss.detach().item() total += y.size(0) correct += (predicted == y).sum() correct = int(correct) @@ -382,7 +382,7 @@ def _train_dl(clf, nirs_train, labels_train, batch_size, lr, max_epochs, outputs = clf(x) _, predicted = torch.max(outputs, 1) loss = criterion(outputs, y) - running_loss += loss.item() + running_loss += loss.detach().item() total += y.size(0) correct += (predicted == y).sum() correct = int(correct) @@ -434,10 +434,10 @@ def _test_dl(clf, nirs_test, labels_test, device, total += y.size(0) correct += (predicted == y).sum() correct = int(correct) - y_true.append(y.item()) - y_pred.append(predicted.item()) + y_true.append(y.detach().item()) + y_pred.append(predicted.detach().item()) loss = criterion(outputs, y) - running_loss += loss.item() + running_loss += loss.detach().item() results = {'test_loss': running_loss / (i+1), 'test_accuracy': correct / total, 'y_true': y_true, 'y_pred': y_pred} @@ -621,7 +621,7 @@ def deep_learn(model_class, nirs, labels, groups, normalize=None, nirs_val = nirs_train[in_idx[1]] labels_val = labels_train[in_idx[1]] - clf = model_class(n_classes).double() + clf = model_class(n_classes) clf, _ = _train_dl(clf, nirs_in_train, labels_in_train, batch_size, lr, max_epochs, None, random_state, device, @@ -638,7 +638,7 @@ def deep_learn(model_class, nirs, labels, groups, normalize=None, best_hps = (batch_sizes[0], lrs[0]) # Retrain with best hyperparameters - clf = model_class(n_classes).double() + clf = model_class(n_classes) clf, results = _train_dl(clf, nirs_train, labels_train, best_hps[0], best_hps[1], max_epochs, min_epochs, random_state, device, @@ -709,7 +709,7 @@ def deep_learn(model_class, nirs, labels, groups, normalize=None, nirs_test = (nirs_test - mins) / (maxs - mins) # Load trained model, hyperparameters and training results - clf = model_class(n_classes).double() + clf = model_class(n_classes) clf.load_state_dict(torch.load(f'{output_folder}/model_k{k}.pt')) with open(f'{output_folder}/hps_k{k}.pickle', 'rb') as f: best_hps = pickle.load(f) @@ -835,7 +835,7 @@ def _train_encdec(encoder, decoder, x_train, y_train, batch_size, lr, optimizer.step() # Get statistics - running_loss += loss.item() + running_loss += loss.detach().item() train_losses.append(running_loss / (i+1)) # if epoch % 5 == 0: @@ -852,7 +852,7 @@ def _train_encdec(encoder, decoder, x_train, y_train, batch_size, lr, features = encoder(x) outputs = decoder(features) loss = criterion(outputs, y) - running_loss += loss.item() + running_loss += loss.detach().item() val_losses.append(running_loss / (i+1)) last_sorted = sorted(val_losses[-PATIENCE:]) if (epoch >= max(earliest_stop, PATIENCE) and @@ -895,7 +895,7 @@ def _test_encdec(encoder, decoder, x_test, y_test, device): features = encoder(x) outputs = decoder(features) loss = criterion(outputs, y) - running_loss += loss.item() + running_loss += loss.detach().item() results = {'test_loss': running_loss / (i+1)} return results @@ -994,8 +994,8 @@ def select_proxy(enc_class, dec_class, nirs_train, targets_train, groups_train, nirs_val = nirs_train[in_idx[1]] targets_val = targets_train[in_idx[1]] - encoder = enc_class().double() - decoder = dec_class().double() + encoder = enc_class() + decoder = dec_class() encoder, decoder, _ = _train_encdec( encoder, decoder, nirs_in_train, targets_in_train, batch_size, lr, max_epochs, None, random_state, device) @@ -1012,8 +1012,8 @@ def select_proxy(enc_class, dec_class, nirs_train, targets_train, groups_train, best_hps = (batch_sizes[0], lrs[0]) # Retrain with best hyperparameters - encoder = enc_class().double() - decoder = dec_class().double() + encoder = enc_class() + decoder = dec_class() encoder, decoder, results = _train_encdec( encoder, decoder, nirs_train, targets_train, best_hps[0], best_hps[1], max_epochs, min_epochs, random_state, device) @@ -1038,8 +1038,9 @@ def select_proxy(enc_class, dec_class, nirs_train, targets_train, groups_train, df_losses = df_losses.melt(id_vars=['Epoch'], value_vars=['Training', 'Validation'], var_name='Condition', value_name='Loss') - sns.lineplot(ax=ax, data=df_losses, y='Loss', x='Epoch', - hue='Condition', estimator=None) + plot = sns.lineplot(ax=ax, data=df_losses, y='Loss', x='Epoch', + hue='Condition', estimator=None) + # plot.set(yscale='log') plt.savefig(f'{output_folder}/graph.png', bbox_inches='tight') plt.close() @@ -1218,7 +1219,7 @@ def deep_transfer_learn(enc_class, dec_class, model_class, nirs, labels, # HbO -> HbR if os.path.isfile(f'{output_folder}/k{k}/hbo/encoder.pt'): print(' > HbO encoder checkpoint found, loading it...', end=' ') - enc_hbo = enc_class().double() + enc_hbo = enc_class() enc_hbo.load_state_dict( torch.load(f'{output_folder}/k{k}/hbo/encoder.pt')) print('Done!') @@ -1232,7 +1233,7 @@ def deep_transfer_learn(enc_class, dec_class, model_class, nirs, labels, # HbR -> HbO if os.path.isfile(f'{output_folder}/k{k}/hbr/encoder.pt'): print(' > HbR encoder checkpoint found, loading it...', end=' ') - enc_hbr = enc_class().double() + enc_hbr = enc_class() enc_hbr.load_state_dict( torch.load(f'{output_folder}/k{k}/hbr/encoder.pt')) print('Done!') @@ -1271,7 +1272,7 @@ def deep_transfer_learn(enc_class, dec_class, model_class, nirs, labels, labels_val = labels_val[idx_val] nirs_val = nirs_val[idx_val] - clf = model_class(n_classes, enc_hbo, enc_hbr).double() + clf = model_class(n_classes, enc_hbo, enc_hbr) clf, _ = _train_dl(clf, nirs_in_train, labels_in_train, batch_size, lr, max_epochs, None, random_state, device) @@ -1289,7 +1290,7 @@ def deep_transfer_learn(enc_class, dec_class, model_class, nirs, labels, idx_train = np.where(np.array(labels_train) != 999) labels_train = labels_train[idx_train] nirs_train = nirs_train[idx_train] - clf = model_class(n_classes, enc_hbo, enc_hbr).double() + clf = model_class(n_classes, enc_hbo, enc_hbr) clf, results = _train_dl(clf, nirs_train, labels_train, best_hps[0], best_hps[1], max_epochs, min_epochs, random_state, device) @@ -1350,13 +1351,13 @@ def deep_transfer_learn(enc_class, dec_class, model_class, nirs, labels, nirs_test = (nirs_test - mins) / (maxs - mins) # Load trained model, hyperparameters and training results - enc_hbo = enc_class().double() - enc_hbr = enc_class().double() + enc_hbo = enc_class() + enc_hbr = enc_class() enc_hbo.load_state_dict( torch.load(f'{output_folder}/k{k}/hbo/encoder.pt')) enc_hbr.load_state_dict( torch.load(f'{output_folder}/k{k}/hbr/encoder.pt')) - clf = model_class(n_classes, enc_hbo, enc_hbr).double() + clf = model_class(n_classes, enc_hbo, enc_hbr) clf.load_state_dict(torch.load(f'{output_folder}/k{k}/clf.pt')) with open(f'{output_folder}/k{k}/hps.pickle', 'rb') as f: best_hps = pickle.load(f) @@ -1486,7 +1487,7 @@ def train_final(model_class, nirs, labels, batch_size, lr, n_epochs, # Instantiate the model n_classes = len(set(labels)) - clf = model_class(n_classes).double() + clf = model_class(n_classes) print(f'Final training: {model_class.__name__}', end=' ') @@ -1555,7 +1556,7 @@ def train_final(model_class, nirs, labels, batch_size, lr, n_epochs, optimizer.step() # Get statistics - running_loss += loss.item() + running_loss += loss.detach().item() train_losses.append(running_loss / (i+1)) # scheduler.step() diff --git a/benchnirs/process.py b/benchnirs/process.py index 2523a97..e57c667 100644 --- a/benchnirs/process.py +++ b/benchnirs/process.py @@ -103,6 +103,8 @@ def process_epochs(mne_epochs, tmin=0, tmax=None, tslide=None, sort=False, nirs = nirs.swapaxes(1, 2) nirs = nirs.reshape(-1, nirs.shape[2], nirs.shape[3]) + nirs = np.single(nirs) + print(f'Dataset shape: {nirs.shape}') return nirs, labels, groups @@ -167,4 +169,6 @@ def extract_features(nirs, feature_list): nirs_features = np.concatenate(nirs_features, axis=-1) + nirs_features = np.single(nirs_features) + return nirs_features -- GitLab From 67659de55d74e7fdbb174fd9463c05d1f1f71786 Mon Sep 17 00:00:00 2001 From: Johann Benerradi Date: Tue, 22 Apr 2025 12:59:47 +0100 Subject: [PATCH 2/3] Fix main scripts --- examples/dataset_size.py | 12 ++++++------ examples/generalised.py | 2 +- examples/sliding_window.py | 2 +- examples/tailored_generalised.py | 2 +- examples/tailored_window_size.py | 10 +++++----- examples/window_size.py | 8 ++++---- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/examples/dataset_size.py b/examples/dataset_size.py index e73492e..d4fd6a8 100644 --- a/examples/dataset_size.py +++ b/examples/dataset_size.py @@ -80,12 +80,12 @@ for dataset in DATASETS.keys(): lstm, hps_lstm, _ = deep_learn( 'lstm', nirs, labels, groups, output_folder=f'{out_path}{ts}_lstm') dict_train_size['Chance'] += [1/len(classes) for _ in lda] - dict_train_size['LDA'] += lda - dict_train_size['SVC'] += svc - dict_train_size['kNN'] += knn - dict_train_size['ANN'] += ann - dict_train_size['CNN'] += cnn - dict_train_size['LSTM'] += lstm + dict_train_size['LDA'] += list(lda) + dict_train_size['SVC'] += list(svc) + dict_train_size['kNN'] += list(knn) + dict_train_size['ANN'] += list(ann) + dict_train_size['CNN'] += list(cnn) + dict_train_size['LSTM'] += list(lstm) dict_train_size['Dataset size'] += [ts for _ in lda] # Write results diff --git a/examples/generalised.py b/examples/generalised.py index 84e6994..48ec6c7 100644 --- a/examples/generalised.py +++ b/examples/generalised.py @@ -86,7 +86,7 @@ for dataset in DATASETS.keys(): w_summary.write('\n') w_summary.close() w_results.close() - dict_accuracies[dataset] = lda + svc + knn + ann + cnn + lstm + dict_accuracies[dataset] = np.concatenate((lda, svc, knn, ann, cnn, lstm)) dict_accuracies['Model'] = list(np.repeat(list(results.keys()), len(lda))) df_accuracies = pd.DataFrame(dict_accuracies) diff --git a/examples/sliding_window.py b/examples/sliding_window.py index d39aaff..79f0812 100644 --- a/examples/sliding_window.py +++ b/examples/sliding_window.py @@ -81,7 +81,7 @@ for dataset in DATASETS.keys(): w_summary.write('\n') w_summary.close() w_results.close() - dict_accuracies[dataset] = lda + svc + knn + ann + dict_accuracies[dataset] = np.concatenate((lda, svc, knn, ann)) dict_accuracies['Model'] = list(np.repeat(list(results.keys()), len(lda))) df_accuracies = pd.DataFrame(dict_accuracies) diff --git a/examples/tailored_generalised.py b/examples/tailored_generalised.py index 4e2eedb..e285783 100644 --- a/examples/tailored_generalised.py +++ b/examples/tailored_generalised.py @@ -110,7 +110,7 @@ for dataset in DATASETS.keys(): w_summary.write('\n') w_summary.close() w_results.close() - dict_accuracies[dataset] = lda + svc + knn + ann + cnn + lstm + dict_accuracies[dataset] = np.concatenate((lda, svc, knn, ann, cnn, lstm)) dict_accuracies['Model'] = list(np.repeat(list(results.keys()), len(lda))) df_accuracies = pd.DataFrame(dict_accuracies) diff --git a/examples/tailored_window_size.py b/examples/tailored_window_size.py index 9215831..3cfb240 100644 --- a/examples/tailored_window_size.py +++ b/examples/tailored_window_size.py @@ -64,11 +64,11 @@ for dataset in DATASETS.keys(): lstm, hps_lstm, _ = deep_learn( 'lstm', nirs, labels, groups, output_folder=f'{out_path}{ws}_lstm') dict_window_size['Chance'] += [1/len(classes) for _ in lda] - dict_window_size['LDA'] += lda - dict_window_size['SVC'] += svc - dict_window_size['kNN'] += knn - dict_window_size['ANN'] += ann - dict_window_size['LSTM'] += lstm + dict_window_size['LDA'] += list(lda) + dict_window_size['SVC'] += list(svc) + dict_window_size['kNN'] += list(knn) + dict_window_size['ANN'] += list(ann) + dict_window_size['LSTM'] += list(lstm) dict_window_size['Window size'] += [ws for _ in lda] # Write results diff --git a/examples/window_size.py b/examples/window_size.py index 7a6ad62..f3ac165 100644 --- a/examples/window_size.py +++ b/examples/window_size.py @@ -65,10 +65,10 @@ for dataset in DATASETS.keys(): 'ann', nirs_features, labels, groups, output_folder=f'{out_path}{ws}_ann') dict_window_size['Chance'] += [1/len(classes) for _ in lda] - dict_window_size['LDA'] += lda - dict_window_size['SVC'] += svc - dict_window_size['kNN'] += knn - dict_window_size['ANN'] += ann + dict_window_size['LDA'] += list(lda) + dict_window_size['SVC'] += list(svc) + dict_window_size['kNN'] += list(knn) + dict_window_size['ANN'] += list(ann) dict_window_size['Window size'] += [ws for _ in lda] # Write results -- GitLab From 55db71b162a93660645ee027ffe314d70ea61d3a Mon Sep 17 00:00:00 2001 From: Johann Benerradi Date: Tue, 22 Apr 2025 13:36:00 +0100 Subject: [PATCH 3/3] Fix linting --- benchnirs/learn.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/benchnirs/learn.py b/benchnirs/learn.py index edc4da3..91987ea 100644 --- a/benchnirs/learn.py +++ b/benchnirs/learn.py @@ -1038,9 +1038,8 @@ def select_proxy(enc_class, dec_class, nirs_train, targets_train, groups_train, df_losses = df_losses.melt(id_vars=['Epoch'], value_vars=['Training', 'Validation'], var_name='Condition', value_name='Loss') - plot = sns.lineplot(ax=ax, data=df_losses, y='Loss', x='Epoch', - hue='Condition', estimator=None) - # plot.set(yscale='log') + sns.lineplot(ax=ax, data=df_losses, y='Loss', x='Epoch', + hue='Condition', estimator=None) plt.savefig(f'{output_folder}/graph.png', bbox_inches='tight') plt.close() -- GitLab