Requirements#
Libraries, functions and classes that are used along the project:
from scipy.io import loadmat, savemat
import numpy as np
import sys
import pickle
from scipy.stats import mode
from sklearn.model_selection import GroupKFold, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, HistGradientBoostingClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, balanced_accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from PIL import Image
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
sys.path.insert(0, r'C:\Users\fscielzo\Documents\Packages\PyMachineLearning_Package_Private')
from PyMachineLearning.evaluation import SimpleEvaluation
from PyMachineLearning.preprocessing import scaler
Function to extract signals features based on block-segmentation, statistics and concatenation.
This function uses raw signals data as input and returns a predictors matrix as output, suitable to be used with classic Machine Learning models.
def get_X_features_HAR(X_HAR, blocks_size, statistics, verbose=False):
# X_HAR: list with the signals data matrix of each individual
n_individuals = len(X_HAR)
X_individual, signals_block = {}, {}
for i in range(n_individuals):
# signals matrix of i-th individual
signals = X_HAR[i].T
# num. samples for i-th individual
n_samples = len(signals)
# Time during which human activity was recorded
if verbose:
print(f'Time during which the activity of individual {i+1} of X_HAR was recorded:', np.round(n_samples/(blocks_size*60),3), 'mins.')
# num. blocks to segmented data of i-th individual.
n_blocks = int(np.floor(n_samples/blocks_size))
# number of samples not assigned to any block
n_remaining_samples = n_samples - n_blocks*blocks_size
# Segmenting the i-th individual data in blocks
signals_block[i] = []
for b in range(0, n_blocks):
signals_block[i].append(signals[b*blocks_size:((b+1)*blocks_size),:])
# Adding the remaining samples to the last block
signals_block[i][-1] = np.row_stack([signals_block[i][-1], signals[-n_remaining_samples:,:]])
# Computing statistics for each signal-data block and most frequent label per block for the i-th individual
X_individual[i] = []
for b in range(0, n_blocks):
if statistics == 'mean':
# mean of the signals features for i-th individual
X_individual[i].append(np.mean(signals_block[i][b], axis=0))
elif statistics == 'mean-std':
# mean-std of the signals features for i-th individual
X_features_mean = np.mean(signals_block[i][b], axis=0)
X_features_std = np.std(signals_block[i][b], axis=0)
X_individual[i].append(np.hstack([X_features_mean, X_features_std]))
elif statistics == 'mean-median-std':
# mean-std of the signals features for i-th individual
X_features_mean = np.mean(signals_block[i][b], axis=0)
X_features_median = np.median(signals_block[i][b], axis=0)
X_features_std = np.std(signals_block[i][b], axis=0)
X_individual[i].append(np.hstack([X_features_mean, X_features_median, X_features_std]))
elif statistics == 'mean-Q25-median-Q75-std':
# mean-std of the signals features for i-th individual
X_features_mean = np.mean(signals_block[i][b], axis=0)
X_features_Q25 = np.quantile(signals_block[i][b], q=0.25, axis=0)
X_features_median = np.median(signals_block[i][b], axis=0)
X_features_Q75 = np.quantile(signals_block[i][b], q=0.75, axis=0)
X_features_std = np.std(signals_block[i][b], axis=0)
X_individual[i].append(np.hstack([X_features_mean, X_features_Q25, X_features_median, X_features_Q75, X_features_std]))
# Building an array with the signal statistics for each data-block, this is the features matrix for the i-th person.
# Rows represent one second and are the observations.
# Columns represent a signal and are the features (predictors).
X_individual[i] = np.array(X_individual[i])
X = np.row_stack([X_individual[i] for i in X_individual.keys()])
return X, X_individual, signals_block
Function to process the given response data and making it compatible with the predictors matrix returned by the above function. This function follows a very similar approach to
get_X_features_HAR
, in a nutsell, it is based on block-segmentation, the mode (as statistic) and concatenation.This function activity classes as input data and returns a response array with those classes processed correctly as output. This output could be used along with the predictors matrix given by
get_X_features_HAR
as the inputs of classic Machine Learning models.
def get_y_features_HAR(Y_HAR, blocks_size, verbose=False):
# Y_HAR: list with the response vector of each person
n_individuals = len(Y_HAR)
Y_individual, classes_block = {}, {}
for i in range(n_individuals):
# response vector of i-th person
classes = Y_HAR[i]
# num. samples for i-th person
n_samples = len(classes)
# Time during which human activity was recorded
if verbose:
print(f'Time during which the activity of person {i+1} of Y_HAR was recorded:', np.round(n_samples/(blocks_size*60),3), 'mins.')
# num. blocks to segmented response vector of i-th person.
n_blocks = int(np.floor(n_samples/blocks_size))
# number of samples not assigned to any block
n_remaining_samples = n_samples - n_blocks*blocks_size
# Segmenting the i-th person data in blocks
# Each block represent {blocks_size/sampling_freq} second/s, since the sampling frequency represents 1 second.
classes_block[i] = []
for b in range(0, n_blocks):
classes_block[i].append(classes[b*blocks_size:((b+1)*blocks_size)])
# Adding the remaining samples to the last block
classes_block[i][-1] = np.hstack([classes_block[i][-1], classes[-n_remaining_samples:]])
# Computing statistics for each signal-data block and most frequent label per block for the i-th person
Y_individual[i] = []
for b in range(0, n_blocks):
Y_individual[i].append(mode(classes_block[i][b])[0])
# Building an array with the classes for each data-block, this is the response vector for the i-th person.
Y_individual[i] = np.array(Y_individual[i])
Y = np.concatenate([Y_individual[i] for i in Y_individual.keys()])
return Y, Y_individual, classes_block
A class that implements
get_X_features_HAR
following theSklearn
transformers rules.FeaturesExtractionHAR
is indeed anSklearn
transformer.
class FeaturesExtractionHAR(BaseEstimator, TransformerMixin):
def __init__(self, blocks_size, statistics, verbose=False):
self.blocks_size = blocks_size
self.statistics = statistics
self.verbose = verbose
def fit(self, X, y=None):
return self
def transform(self, X):
X, self.X_individual, self.signals_block = get_X_features_HAR(X_HAR=X, blocks_size=self.blocks_size,
statistics=self.statistics, verbose=self.verbose)
return X