# **Inner Evaluation 4**

> **Explore an approach based on sequential data (using MFCC time-varying features), with Recurrent Neural Networks.**



## **Requirements**

In [17]:
import numpy  as np
import polars as pl
from torch import nn
from skorch import NeuralNetClassifier
import torch
import sys
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
import pickle

In [18]:
sys.path.insert(0, r"C:\Users\fscielzo\Documents\Packages\PyAudio_Package_Private")
from PyAudio.preprocessing import get_X_tensor_audio_features

In [19]:
sys.path.insert(0, r'C:\Users\fscielzo\Documents\Packages\PyML_Package_Private')
from PyML.evaluation import SimpleEvaluation

In [20]:
sys.path.insert(0, r'C:\Users\fscielzo\Documents\Packages\PyDL_Package_Private')
from PyDL.models import RNN
from PyDL.preprocessing import TensorStandardScaler

In [21]:
RNN_model = NeuralNetClassifier(
    module=RNN(input_dim=10, output_dim=2, num_layers=3, hidden_size=128),
    criterion=nn.CrossEntropyLoss,
    optimizer=torch.optim.Adam,
    lr=0.001,
    batch_size=50,
    max_epochs=15,
    device='cuda' if torch.cuda.is_available() else 'cpu',
    verbose=False
)

## **Data definition**

In this section we define the data to be used. Specifically we define the response variable and a set of predictors matrices to be used as different alternatives, each one associate to a combination of features extraction methods and statistics.

In [22]:
files_list_name = r'C:\Users\fscielzo\Documents\DataScience-GitHub\Audio Analysis\Parkinson_Severity_Classification\Data\Files_List.txt'
files_df = pl.read_csv(files_list_name, separator='\t', has_header=False, new_columns=['path', 'level'])

In [23]:
# Configuration variables for feature extraction
fs = 16000 # Sampling frequency
wst = 0.032 # Window size (seconds)
fpt = 0.008 # Frame period (seconds)
nfft = int(np.ceil(wst*fs)) # Window size (samples)
fp = int(np.ceil(fpt*fs)) # Frame period (samples)
nbands = 40 # Number of filters in the filterbank
ncomp = 20 # Number of MFCC components

We define the response and the tensor with the time-varying features extracted with the MFCC method.

In [24]:
Y = files_df['level'].to_numpy()

X_MFCC_tensor = get_X_tensor_audio_features(paths=files_df['path'], method='MFCC', sr=fs, n_fft=nfft, hop_length=fp, n_mels=nbands, n_mfcc=ncomp)

In [25]:
X_chroma_tensor = get_X_tensor_audio_features(paths=files_df['path'], method='chroma', sr=fs, n_fft=nfft, hop_length=fp, n_mels=nbands, n_mfcc=ncomp)

## **Outer validation method: train-test split**

We split our data (response and predictors) in two partitions, the training and the testing one. The training partition will be used in the inner evaluation for selecting the best approach to predict the PD level, and the test one will only be used at the very end for making an estimation of the future performance of the best approach, that is, and estimation of how this approach will classify the level of PD of new patients.

In [26]:
X_MFCC_tensor_train, X_MFCC_tensor_test, Y_train, Y_test = train_test_split(X_MFCC_tensor, Y, test_size=0.25, random_state=123, stratify=Y)

In [27]:
X_MFCC_tensor.shape

(240, 20, 4403)

In [28]:
X_MFCC_tensor_train.shape

(180, 20, 4403)

In [29]:
X_MFCC_tensor_test.shape

(60, 20, 4403)

- Standardizing the data since it seems to work well with RNN

In [30]:
# Reshape data for standardization (from 3D to 2D)
n_samples, n_mfcc, max_length = X_MFCC_tensor_train.shape
X_MFCC_tensor_train_flatten = X_MFCC_tensor_train.reshape(-1, n_mfcc * max_length)

# Standardize the data
scaler = StandardScaler()
X_MFCC_train_standardized = scaler.fit_transform(X_MFCC_tensor_train_flatten)

# Reshape back to 3D
X_MFCC_tensor_train = X_MFCC_train_standardized.reshape(n_samples, n_mfcc, max_length)

In [31]:
# Reshape data for standardization (from 3D to 2D)
n_samples, n_mfcc, max_length = X_MFCC_tensor_test.shape
X_MFCC_tensor_test_flatten = X_MFCC_tensor_test.reshape(-1, n_mfcc * max_length)

# Standardize the data
X_MFCC_test_standardized = scaler.transform(X_MFCC_tensor_test_flatten)

# Reshape back to 3D
X_MFCC_tensor_test = X_MFCC_test_standardized.reshape(n_samples, n_mfcc, max_length)

A better approach to do this without falling in data leakage:

In [32]:
X_MFCC_tensor_train

array([[[ 0.77925549,  0.75966389,  0.75801119, ...,  0.07474351,
          0.07474351,  0.07474351],
        [ 0.49572165,  0.11072004, -0.31598378, ..., -0.07474351,
         -0.07474351, -0.07474351],
        [ 1.34147712,  1.38633399,  1.4529877 , ..., -0.07474351,
         -0.07474351, -0.07474351],
        ...,
        [ 0.48452161, -0.07294603,  0.47874218, ..., -0.07474351,
         -0.07474351, -0.07474351],
        [ 1.382128  ,  1.93450536,  2.12793082, ..., -0.07474351,
         -0.07474351, -0.07474351],
        [-0.97325322, -1.423122  , -1.50599919, ..., -0.07474351,
         -0.07474351, -0.07474351]],

       [[-1.09754581, -1.2406114 , -1.43178652, ...,  0.07474351,
          0.07474351,  0.07474351],
        [-0.21227887, -0.53582309, -1.00587868, ..., -0.07474351,
         -0.07474351, -0.07474351],
        [ 0.01043724, -0.08198521, -0.06441753, ..., -0.07474351,
         -0.07474351, -0.07474351],
        ...,
        [ 0.63225567,  0.53115129,  0.74958524, ..., -

In [33]:
X_MFCC_tensor_train.shape

(180, 20, 4403)

In [35]:
scaler = TensorStandardScaler(apply=True)
scaler.fit(X_MFCC_tensor_train)
scaler.transform(X_MFCC_tensor_train)

array([[[ 0.77925549,  0.75966389,  0.75801119, ...,  0.07474351,
          0.07474351,  0.07474351],
        [ 0.49572165,  0.11072004, -0.31598378, ..., -0.07474351,
         -0.07474351, -0.07474351],
        [ 1.34147712,  1.38633399,  1.4529877 , ..., -0.07474351,
         -0.07474351, -0.07474351],
        ...,
        [ 0.48452161, -0.07294603,  0.47874218, ..., -0.07474351,
         -0.07474351, -0.07474351],
        [ 1.382128  ,  1.93450536,  2.12793082, ..., -0.07474351,
         -0.07474351, -0.07474351],
        [-0.97325322, -1.423122  , -1.50599919, ..., -0.07474351,
         -0.07474351, -0.07474351]],

       [[-1.09754581, -1.2406114 , -1.43178652, ...,  0.07474351,
          0.07474351,  0.07474351],
        [-0.21227887, -0.53582309, -1.00587868, ..., -0.07474351,
         -0.07474351, -0.07474351],
        [ 0.01043724, -0.08198521, -0.06441753, ..., -0.07474351,
         -0.07474351, -0.07474351],
        ...,
        [ 0.63225567,  0.53115129,  0.74958524, ..., -

In [36]:
scaler.transform(X_MFCC_tensor_train).shape

(180, 20, 4403)

In [37]:
scaler.transform(X_MFCC_tensor_test)

array([[[-1.1088349 , -1.21068496, -1.24170116, ...,  0.07474351,
          0.07474351,  0.07474351],
        [-1.29952919, -1.45907204, -1.43475645, ..., -0.07474351,
         -0.07474351, -0.07474351],
        [-0.07323975, -0.41990087, -0.55769708, ..., -0.07474351,
         -0.07474351, -0.07474351],
        ...,
        [ 0.74406594,  0.77746824,  0.67277322, ..., -0.07474351,
         -0.07474351, -0.07474351],
        [ 0.69785958,  0.69918349,  0.65714235, ..., -0.07474351,
         -0.07474351, -0.07474351],
        [ 0.57196627,  0.64494054,  0.76531219, ..., -0.07474351,
         -0.07474351, -0.07474351]],

       [[-0.3483037 , -0.15471678,  0.15473919, ...,  0.07474351,
          0.07474351,  0.07474351],
        [ 0.66910327,  0.9586072 ,  1.04969775, ..., -0.07474351,
         -0.07474351, -0.07474351],
        [-1.33066569, -1.04541998, -0.74294924, ..., -0.07474351,
         -0.07474351, -0.07474351],
        ...,
        [ 2.24923983,  2.19150319,  1.54300361, ..., -

- Converting to tensor `PyTroch` data type

In [38]:
X_MFCC_tensor = torch.tensor(X_MFCC_tensor, dtype=torch.float32)
X_MFCC_tensor_train = torch.tensor(X_MFCC_tensor_train, dtype=torch.float32)
X_MFCC_tensor_test = torch.tensor(X_MFCC_tensor_test, dtype=torch.float32)

Y_tensor = torch.tensor(Y, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32)
Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32)

## **Applying Inner Evaluation**

In this section we are going to apply the round four of the inner evaluation.

### **Inner validation method: KFold Cross Validation**

We define the validation method to be used in the inner evaluation, that will be Stratified KFold Cross Validation.

In [39]:
inner = StratifiedKFold(n_splits=5, shuffle=True, random_state=123)

We define dictionaries to save important results that will be gathered in the inner evaluation.

In [None]:
inner_score, best_params, inner_results = {}, {}, {}

### **Grids for HPO** 

#### Grid for RNN (PyTorch)

In [40]:
def param_grid_RNN(trial, input_dim, output_dim):

    param_grid = ({
        'module__input_dim': trial.suggest_categorical('module__input_dim', [input_dim]),
        'module__output_dim': trial.suggest_categorical('module__output_dim', [output_dim]),
        'module__num_layers': trial.suggest_int('module__num_layers', 1, 10),
        'module__hidden_size': trial.suggest_categorical('module__hidden_size', [50, 70, 100, 120, 150, 175, 200, 250]),
        'module__dropout_rate': trial.suggest_float('module__dropout_rate', 0.05, 0.95, log=True),
        'lr': trial.suggest_float('lr', 0.0001, 0.01, log=True),
        'max_epochs': trial.suggest_categorical('max_epochs', [5, 7, 10, 15, 20, 25, 30, 40, 50, 75, 100]),
        'batch_size': trial.suggest_categorical('batch_size', [15, 30, 50, 70, 100])
    })

    return param_grid

### **HPO**

Applying HPO over Recurrent Neural Networks using the MFCC sequencies of features.

#### HPO for RNN (PyTorch)

In [42]:
model = 'RNN'

input_dim = X_MFCC_tensor_train.shape[2]
output_dim = len(np.unique(Y_train))

simple_eval = SimpleEvaluation(estimator=RNN_model, param_grid=param_grid_RNN, 
                inner=inner, search_method='optuna', scoring='balanced_accuracy', direction='maximize', 
                n_trials=5, random_state=123, 
                framework='PyTorch', 
                input_dim=input_dim,
                output_dim=output_dim)

simple_eval.fit(X=X_MFCC_tensor_train, y=Y_train_tensor.long())
inner_score[model] = simple_eval.inner_score
best_params[model]= simple_eval.inner_best_params
inner_results[model] = simple_eval.inner_results

[I 2024-04-16 17:55:50,367] A new study created in memory with name: no-name-5987d063-91d8-4e87-bb8a-845f53459244


#### Saving the results

In [19]:
'''
with open('results/best_params_4', 'wb') as file:
    pickle.dump(best_params, file)

with open('results/inner_scores_4', 'wb') as file:
    pickle.dump(inner_score, file)

with open('results/inner_results_4', 'wb') as file:
    pickle.dump(inner_results, file)
'''

#### Opening the results

In [20]:
with open(f'results/best_params_4', 'rb') as file:
        best_params = pickle.load(file)

with open(f'results/inner_scores_4', 'rb') as file:
        inner_score = pickle.load(file)

with open(f'results/inner_results_4', 'rb') as file:
        inner_results = pickle.load(file)

### **Selecting the best pipeline**

In this case we don't have several alternatives to compare, since we have only one, a RNN using sequential features extracted with MFCC method.

The inner obtained balanced accuracy  for the optimal RNN is quite poor, which could be due to a bad specification of the model or the input data.

In [22]:
inner_score

{'RNN': 0.4431818181818182}