# **Final Model**

> **The next step is to save the best model and let it ready for predicting the Parkinson disease level of new patients.**

## **Requirements**

In [14]:
import numpy  as np
import polars as pl
import sys
import pickle
from sklearn.neural_network import MLPClassifier
import seaborn as sns
sns.set_style('whitegrid')
from itertools import combinations
import joblib


In [15]:
sys.path.insert(0, r'C:\Users\fscielzo\Documents\Packages\PyAudio_Package_Private')
from PyAudio import get_X_audio_features

In [16]:
with open(r'C:\Users\fscielzo\Documents\DataScience-GitHub\Audio Analysis\Parkinson_Severity_Classification\results\final_best_method', 'rb') as file:
            final_best_method = pickle.load(file)

with open(r'C:\Users\fscielzo\Documents\DataScience-GitHub\Audio Analysis\Parkinson_Severity_Classification\results\final_best_stats', 'rb') as file:
        final_best_stats = pickle.load(file)

with open(r'C:\Users\fscielzo\Documents\DataScience-GitHub\Audio Analysis\Parkinson_Severity_Classification\results\final_best_model', 'rb') as file:
        final_best_model = pickle.load(file)

with open(r'C:\Users\fscielzo\Documents\DataScience-GitHub\Audio Analysis\Parkinson_Severity_Classification\results\best_params_3', 'rb') as file:
        best_params = pickle.load(file)

## **Data definition**

In [17]:
files_list_name = r'C:\Users\fscielzo\Documents\DataScience-GitHub\Audio Analysis\Parkinson_Severity_Classification\Data\Files_List.txt'
files_df = pl.read_csv(files_list_name, separator='\t', has_header=False, new_columns=['path', 'level'])

In [18]:
fs = 16000 # Sampling frequency
wst = 0.032 # Window size (seconds)
fpt = 0.008 # Frame period (seconds)
nfft = int(np.ceil(wst*fs)) # Window size (samples)
fp = int(np.ceil(fpt*fs)) # Frame period (samples)
nbands = 40 # Number of filters in the filterbank
ncomp = 20 # Number of MFCC components

In [21]:
Y = files_df['level'].to_numpy()

simple_methods = ['MFCC', 'spectral_centroid', 'chroma', 'spectral_bandwidth', 
                  'spectral_contrast', 'spectral_rolloff', 'zero_crossing_rate', 'tempogram']

combined_methods = []

stats = ['mean-std', 'median-std', 'mean-median-std', 'mean-Q25-median-Q75-std']

sizes = range(2, len(simple_methods) + 1)

combined_methods = ['-'.join(sorted(combi)) for size in sizes for combi in combinations(simple_methods, size)]

X_stats, X_stats_train, X_stats_test = {method: {} for method in simple_methods + combined_methods}, {method: {} for method in simple_methods + combined_methods}, {method: {} for method in simple_methods + combined_methods}

for method in simple_methods:
    for stat in stats:

        X_stats[method][stat] = get_X_audio_features(paths=files_df['path'], method=method, stats=stat, sr=fs, n_fft=nfft, hop_length=fp, n_mels=nbands, n_mfcc=ncomp)

for method in combined_methods:
    for stat in stats:

        X_stats[method][stat] = np.column_stack([X_stats[method.split('-')[i]][stat] for i in range(0, len(method.split('-')))])

## **Saving the final model**

In [None]:
# Initializing th ebest model with its best params
final_model = MLPClassifier(random_state=123).set_params(**best_params[final_best_method][final_best_stats][final_best_model])
# Training the model with all the available data, using the predictors matrix obtained with the best method for feature extraction
final_model.fit(X=X_stats[final_best_method][final_best_stats], y=Y)
# Saving the model already initialized and trained
joblib.dump(final_model, r"C:\Users\fscielzo\Documents\DataScience-GitHub\Audio Analysis\Parkinson_Severity_Classification\results\final_model.joblib")

## **Predicting new data**

- Loading the saved model

In [24]:
final_model = joblib.load(r"C:\Users\fscielzo\Documents\DataScience-GitHub\Audio Analysis\Parkinson_Severity_Classification\results\final_model.joblib")

In [49]:
final_best_method

'MFCC-chroma-spectral_bandwidth-spectral_contrast-zero_crossing_rate'

In [50]:
final_best_stats

'mean-Q25-median-Q75-std'

- Loading a new audio and extracting its features to be used for predicting its class.

In [45]:
# Image that this audio would be the audio of a new patient
new_audio_file = 'PDSpeechData/loc17/loc17_s01.wav' 

individual_methods = final_best_method.split('-')

X_stats_new = {}
for method in individual_methods:
    X_stats_new[method] = get_X_audio_features(paths=[new_audio_file], method=method, stats=final_best_stats, sr=fs, n_fft=nfft, hop_length=fp, n_mels=nbands, n_mfcc=ncomp)

X_stats_new[final_best_method] = np.column_stack([X_stats_new[method] for method in individual_methods])

In [51]:
X_stats_new[final_best_method]

array([[-1.85570816e+02,  9.09455872e+01, -2.44680901e+01,
        -7.04029322e+00, -1.81235161e+01,  1.39041967e+01,
        -2.44391179e+00, -5.16143751e+00,  2.11006179e-01,
        -7.74400330e+00,  2.35691810e+00, -2.67633553e+01,
         5.93925667e+00, -1.15721340e+01, -4.94519901e+00,
         5.14253676e-01, -5.60667944e+00, -6.44442844e+00,
        -1.17365682e+00, -7.13852310e+00, -1.89633648e+02,
         8.78949642e+01, -2.86219001e+01, -8.75667405e+00,
        -2.10318441e+01,  1.22900267e+01, -4.22333467e+00,
        -7.48853457e+00, -2.43984455e+00, -9.48855925e+00,
         1.24283183e+00, -2.94770994e+01,  4.33485985e+00,
        -1.30440550e+01, -6.32022858e+00, -8.08254302e-01,
        -6.70881236e+00, -8.22138071e+00, -2.99997491e+00,
        -8.18231726e+00, -1.78737259e+02,  9.08615875e+01,
        -2.59062710e+01, -7.23408175e+00, -1.86913033e+01,
         1.43382187e+01, -1.84077740e+00, -5.62563133e+00,
         9.03636396e-01, -7.57154369e+00,  2.89028454e+0

In [52]:
X_stats_new[final_best_method].shape

(1, 205)

- We predict the new audio with the pre-trained model.

In [53]:
final_model.predict(X=X_stats_new[final_best_method])

array([0], dtype=int64)

The new patient is predicted as class 0 (normal).