Quick Start#
Imports#
import polars as pl
import numpy as np
Installation#
# pip install robust-mixed-dist
To see the available versions of the package go to the release history at PyPi: https://pypi.org/project/robust_mixed_dist/#history
Data#
Variable |
Description |
Variable Type |
Possible Categories / Range |
|---|---|---|---|
latitude |
Latitude of the house |
Quantitative |
24.86 - 25.27 |
longitude |
Longitude of the house |
Quantitative |
55.06 - 55.44 |
price |
Market price of the house |
Quantitative |
220000 - 35000000 |
price_per_sqft |
Price per square foot |
Quantitative |
361.87 - 4805.87 |
size in sqft |
Size in square feet |
Quantitative |
294 - 9576 |
no of bedrooms |
Number of bedrooms in the house |
Multiclass |
0, 1, 2, 3, 4, 5 |
no of bathrooms |
Number of bathrooms in the house |
Multiclass |
0, 1, 2, 3, 4, 5, 6 |
quality |
Quality level of the house (response variable) |
Binary |
Low (0), Medium-High-UltraHigh (1) |
balcony |
Indicates if the house has a balcony |
Binary |
true (1), false (0) |
barbecue area |
Indicates if the house has a barbecue area |
Binary |
true (1), false (0) |
private pool |
Indicates if the house has a private pool |
Binary |
true (1), false (0) |
private garden |
Indicates if the house has a private garden |
Binary |
true (1), false (0) |
data_url = "https://raw.githubusercontent.com/FabioScielzoOrtiz/robust_mixed_dist-docu/refs/heads/main/data/dubai_houses_processed.csv"
df = pl.read_csv(data_url)
quant_variables = ['latitude', 'longitude', 'price', 'size_in_sqft', 'price_per_sqft']
binary_variables = ['quality', 'balcony', 'barbecue_area', 'private_pool', 'private_garden']
multiclass_variables = ['no_of_bedrooms', 'no_of_bathrooms']
X = df[quant_variables + binary_variables + multiclass_variables]
p1 = len(quant_variables)
p2 = len(binary_variables)
p3 = len(multiclass_variables)
X.head()
| latitude | longitude | price | size_in_sqft | price_per_sqft | quality | balcony | barbecue_area | private_pool | private_garden | no_of_bedrooms | no_of_bathrooms |
|---|---|---|---|---|---|---|---|---|---|---|---|
| f64 | f64 | i64 | i64 | f64 | i64 | i64 | i64 | i64 | i64 | i64 | i64 |
| 25.113208 | 55.138932 | 2700000 | 1079 | 2502.32 | 1 | 1 | 1 | 0 | 0 | 1 | 2 |
| 25.106809 | 55.151201 | 2850000 | 1582 | 1801.52 | 1 | 1 | 0 | 0 | 0 | 2 | 2 |
| 25.063302 | 55.137728 | 1150000 | 1951 | 589.44 | 1 | 1 | 0 | 0 | 0 | 3 | 5 |
| 25.227295 | 55.341761 | 2850000 | 2020 | 1410.89 | 0 | 1 | 0 | 0 | 0 | 2 | 3 |
| 25.114275 | 55.139764 | 1729200 | 507 | 3410.65 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
n = len(X)
raw_weights = np.random.rand(n)
simulated_weights = raw_weights / np.sum(raw_weights)
robust_mixed_dist.mixed#
generalized_gower_dist_matrix#
from robust_mixed_dist.mixed import generalized_gower_dist_matrix
d1 = 'euclidean'
d2 = 'sokal'
d3 = 'hamming'
D = generalized_gower_dist_matrix(
X=X, p1=p1, p2=p2, p3=p2,
d1=d1, d2=d2, d3=d3,
weights=None
)
D
array([[0. , 1.41503568, 2.14046114, ..., 2.07437985, 2.02688788,
0.66580485],
[1.41503568, 0. , 1.84542512, ..., 1.75076502, 1.59918112,
1.5856242 ],
[2.14046114, 1.84542512, 0. , ..., 0.60087177, 2.07964796,
2.07754328],
...,
[2.07437985, 1.75076502, 0.60087177, ..., 0. , 2.20952014,
2.1994978 ],
[2.02688788, 1.59918112, 2.07964796, ..., 2.20952014, 0. ,
1.9041296 ],
[0.66580485, 1.5856242 , 2.07754328, ..., 2.1994978 , 1.9041296 ,
0. ]], shape=(1905, 1905))
d1 = 'euclidean'
d2 = 'sokal'
d3 = 'hamming'
weights = simulated_weights
D = generalized_gower_dist_matrix(
X=X, p1=p1, p2=p2, p3=p2,
d1=d1, d2=d2, d3=d3,
weights=weights
)
D
array([[0. , 1.40622841, 2.13589776, ..., 2.06477834, 2.02102281,
0.68955799],
[1.40622841, 0. , 1.84669396, ..., 1.74503306, 1.60400845,
1.58950663],
[2.13589776, 1.84669396, 0. , ..., 0.62230837, 2.07045472,
2.06818707],
...,
[2.06477834, 1.74503306, 0.62230837, ..., 0. , 2.21004103,
2.19929159],
[2.02102281, 1.60400845, 2.07045472, ..., 2.21004103, 0. ,
1.88862973],
[0.68955799, 1.58950663, 2.06818707, ..., 2.19929159, 1.88862973,
0. ]], shape=(1905, 1905))
d1 = 'minkowski'
d2 = 'sokal'
d3 = 'hamming'
D = generalized_gower_dist_matrix(
X=X, p1=p1, p2=p2, p3=p2,
d1=d1, d2=d2, d3=d3,
q=1
)
D
array([[0. , 1.41504995, 2.14064286, ..., 2.07439996, 2.02701417,
0.66615139],
[1.41504995, 0. , 1.8455187 , ..., 1.75076714, 1.5993112 ,
1.58574048],
[2.14064286, 1.8455187 , 0. , ..., 0.60093846, 2.07968384,
2.07757304],
...,
[2.07439996, 1.75076714, 0.60093846, ..., 0. , 2.20961978,
2.19958686],
[2.02701417, 1.5993112 , 2.07968384, ..., 2.20961978, 0. ,
1.90412993],
[0.66615139, 1.58574048, 2.07757304, ..., 2.19958686, 1.90412993,
0. ]], shape=(1905, 1905))
d1 = 'canberra'
d2 = 'sokal'
d3 = 'hamming'
D = generalized_gower_dist_matrix(
X=X, p1=p1, p2=p2, p3=p2,
d1=d1, d2=d2, d3=d3
)
D
array([[0. , 1.51698365, 2.81057962, ..., 2.23259929, 2.63105196,
1.67845893],
[1.51698365, 0. , 2.30710265, ..., 1.77496107, 2.36500717,
2.25919371],
[2.81057962, 2.30710265, 0. , ..., 1.27280892, 2.46948878,
2.41664979],
...,
[2.23259929, 1.77496107, 1.27280892, ..., 0. , 2.81661747,
2.73099007],
[2.63105196, 2.36500717, 2.46948878, ..., 2.81661747, 0. ,
1.91188048],
[1.67845893, 2.25919371, 2.41664979, ..., 2.73099007, 1.91188048,
0. ]], shape=(1905, 1905))
d1 = 'pearson'
d2 = 'sokal'
d3 = 'hamming'
D = generalized_gower_dist_matrix(
X=X, p1=p1, p2=p2, p3=p2,
d1=d1, d2=d2, d3=d3,
)
D
array([[0. , 1.51402981, 2.51238026, ..., 2.6775671 , 2.30460266,
1.12050558],
[1.51402981, 0. , 1.98226696, ..., 2.28716057, 1.7206908 ,
1.61863728],
[2.51238026, 1.98226696, 0. , ..., 1.81898487, 2.25477183,
2.17232953],
...,
[2.6775671 , 2.28716057, 1.81898487, ..., 0. , 2.52705657,
2.68755742],
[2.30460266, 1.7206908 , 2.25477183, ..., 2.52705657, 0. ,
1.96638818],
[1.12050558, 1.61863728, 2.17232953, ..., 2.68755742, 1.96638818,
0. ]], shape=(1905, 1905))
d1 = 'mahalanobis'
d2 = 'sokal'
d3 = 'hamming'
D = generalized_gower_dist_matrix(
X=X, p1=p1, p2=p2, p3=p2,
d1=d1, d2=d2, d3=d3,
)
D
array([[0. , 1.66836681, 2.9344599 , ..., 2.43712289, 2.6009022 ,
1.71120297],
[1.66836681, 0. , 2.13804234, ..., 1.80120445, 1.81351956,
1.76764702],
[2.9344599 , 2.13804234, 0. , ..., 0.84835547, 2.40741939,
2.38131804],
...,
[2.43712289, 1.80120445, 0.84835547, ..., 0. , 2.36869851,
2.33260135],
[2.6009022 , 1.81351956, 2.40741939, ..., 2.36869851, 0. ,
1.90552695],
[1.71120297, 1.76764702, 2.38131804, ..., 2.33260135, 1.90552695,
0. ]], shape=(1905, 1905))
d1 = 'robust_mahalanobis'
d2 = 'sokal'
d3 = 'hamming'
D = generalized_gower_dist_matrix(
X=X, p1=p1, p2=p2, p3=p2,
d1=d1, d2=d2, d3=d3,
robust_method='trimmed', alpha=0.05, epsilon=0.05, n_iters=20
)
D
array([[0. , 1.64946389, 2.79008972, ..., 2.36223596, 2.87529178,
2.06208371],
[1.64946389, 0. , 2.04187428, ..., 1.78060429, 2.02786949,
1.95803076],
[2.79008972, 2.04187428, 0. , ..., 0.77870059, 2.3599971 ,
2.32341031],
...,
[2.36223596, 1.78060429, 0.77870059, ..., 0. , 2.50576595,
2.45118981],
[2.87529178, 2.02786949, 2.3599971 , ..., 2.50576595, 0. ,
1.90657364],
[2.06208371, 1.95803076, 2.32341031, ..., 2.45118981, 1.90657364,
0. ]], shape=(1905, 1905))
d1 = 'robust_mahalanobis'
d2 = 'jaccard'
d3 = 'hamming'
D = generalized_gower_dist_matrix(
X=X, p1=p1, p2=p2, p3=p2,
d1=d1, d2=d2, d3=d3,
robust_method='trimmed', alpha=0.05, epsilon=0.05, n_iters=20
)
D
# Time: 0.2 secs
array([[0. , 1.41733191, 2.6594547 , ..., 2.20641278, 2.41162867,
2.06208371],
[1.41733191, 0. , 2.04187428, ..., 1.78060429, 1.84400469,
1.76691343],
[2.6594547 , 2.04187428, 0. , ..., 0.77870059, 2.20401564,
2.16479423],
...,
[2.20641278, 1.78060429, 0.77870059, ..., 0. , 2.35944095,
2.30139742],
[2.41162867, 1.84400469, 2.20401564, ..., 2.35944095, 0. ,
1.08796741],
[2.06208371, 1.76691343, 2.16479423, ..., 2.30139742, 1.08796741,
0. ]], shape=(1905, 1905))
d1 = 'robust_mahalanobis'
d2 = 'jaccard'
d3 = 'hamming'
weights = simulated_weights
D = generalized_gower_dist_matrix(
X=X, p1=p1, p2=p2, p3=p2,
d1=d1, d2=d2, d3=d3,
robust_method='trimmed', alpha=0.05, epsilon=0.05, n_iters=20, weights=weights
)
D
array([[0. , 1.22346879, 2.20797324, ..., 2.01505549, 1.22110179,
0.58290575],
[1.22346879, 0. , 1.92665926, ..., 1.76188422, 1.2039458 ,
1.20253553],
[2.20797324, 1.92665926, 0. , ..., 0.64911354, 1.97428359,
1.9726795 ],
...,
[2.01505549, 1.76188422, 0.64911354, ..., 0. , 1.94068 ,
1.93898147],
[1.22110179, 1.2039458 , 1.97428359, ..., 1.94068 , 0. ,
1.07482551],
[0.58290575, 1.20253553, 1.9726795 , ..., 1.93898147, 1.07482551,
0. ]], shape=(1905, 1905))
D, D1, D2, D3 = generalized_gower_dist_matrix(
X=X, p1=p1, p2=p2, p3=p2,
d1=d1, d2=d2, d3=d3,
robust_method='trimmed', alpha=0.05, epsilon=0.05, n_iters=20, weights=weights,
return_combined_distances=True
)
D1
array([[ 0. , 73.42302734, 178.72422328, ..., 110.36938173,
90.26428032, 90.7829134 ],
[ 73.42302734, 0. , 127.21288122, ..., 37.96138528,
65.12649349, 64.49140866],
[178.72422328, 127.21288122, 0. , ..., 101.0942474 ,
90.7532933 , 89.90323136],
...,
[110.36938173, 37.96138528, 101.0942474 , ..., 0. ,
71.0293845 , 69.89518059],
[ 90.26428032, 65.12649349, 90.7532933 , ..., 71.0293845 ,
0. , 1.28659351],
[ 90.7829134 , 64.49140866, 89.90323136, ..., 69.89518059,
1.28659351, 0. ]], shape=(1905, 1905))
D2
array([[0. , 0.33333333, 0.33333333, ..., 0.33333333, 0.5 ,
0. ],
[0.33333333, 0. , 0. , ..., 0. , 0.33333333,
0.33333333],
[0.33333333, 0. , 0. , ..., 0. , 0.33333333,
0.33333333],
...,
[0.33333333, 0. , 0. , ..., 0. , 0.33333333,
0.33333333],
[0.5 , 0.33333333, 0.33333333, ..., 0.33333333, 0. ,
0.5 ],
[0. , 0.33333333, 0.33333333, ..., 0.33333333, 0.5 ,
0. ]], shape=(1905, 1905))
D3
array([[0. , 0.5, 1. , ..., 1. , 0. , 0. ],
[0.5, 0. , 1. , ..., 1. , 0.5, 0.5],
[1. , 1. , 0. , ..., 0. , 1. , 1. ],
...,
[1. , 1. , 0. , ..., 0. , 1. , 1. ],
[0. , 0.5, 1. , ..., 1. , 0. , 0. ],
[0. , 0.5, 1. , ..., 1. , 0. , 0. ]], shape=(1905, 1905))
generalized_gower_dist#
from robust_mixed_dist.mixed import generalized_gower_dist, compute_geometric_var
from robust_mixed_dist.quantitative import S_robust
xi = X[0,:]
xr = X[2,:]
d1 = 'euclidean'
d2 = 'sokal'
d3 = 'hamming'
gv1, gv2, gv3 = compute_geometric_var(
X=X,
p1=p1, p2=p2, p3=p3,
d1=d1, d2=d2, d3=d3,
)
d = generalized_gower_dist(
xi=xi, xr=xr,
p1=p1, p2=p2, p3=p3,
d1=d1, d2=d2, d3=d3,
q=1, S=None,
geom_var_1=gv1,
geom_var_2=gv2,
geom_var_3=gv3
)
d
np.float64(2.1404611423448294)
d1 = 'euclidean'
d2 = 'sokal'
d3 = 'hamming'
weights = simulated_weights
gv1, gv2, gv3 = compute_geometric_var(
X=X,
p1=p1, p2=p2, p3=p3,
d1=d1, d2=d2, d3=d3,
weights=weights
)
d = generalized_gower_dist(
xi=xi, xr=xr,
p1=p1, p2=p2, p3=p3,
d1=d1, d2=d2, d3=d3,
q=1, S=None,
geom_var_1=gv1,
geom_var_2=gv2,
geom_var_3=gv3
)
d
np.float64(2.135897759374307)
d1 = 'mahalanobis'
d2 = 'sokal'
d3 = 'hamming'
gv1, gv2, gv3 = compute_geometric_var(
X=X,
p1=p1, p2=p2, p3=p3,
d1=d1, d2=d2, d3=d3
)
X_quant = X[:, :p1]
S_estimation = np.cov(X_quant, rowvar=False)
d = generalized_gower_dist(
xi=xi, xr=xr,
p1=p1, p2=p2, p3=p3,
d1=d1, d2=d2, d3=d3,
S=S_estimation,
geom_var_1=gv1,
geom_var_2=gv2,
geom_var_3=gv3
)
d
np.float64(2.9588226438900027)
d1 = 'robust_mahalanobis'
d2 = 'jaccard'
d3 = 'hamming'
gv1, gv2, gv3 = compute_geometric_var(
X=X,
p1=p1, p2=p2, p3=p3,
d1=d1, d2=d2, d3=d3,
robust_method='trimmed', epsilon=0.05, alpha=0.05, n_iters=20
)
X_quant = X[:, :p1]
S_estimation = S_robust(X=X_quant, method='trimmed', epsilon=0.05, alpha=0.05, n_iters=20)
d = generalized_gower_dist(
xi=xi, xr=xr,
p1=p1, p2=p2, p3=p3,
d1=d1, d2=d2, d3=d3,
S=S_estimation,
geom_var_1=gv1,
geom_var_2=gv2,
geom_var_3=gv3
)
d
np.float64(2.6594546971734747)
d1 = 'robust_mahalanobis'
d2 = 'jaccard'
d3 = 'hamming'
weights = simulated_weights
gv1, gv2, gv3 = compute_geometric_var(
X=X,
p1=p1, p2=p2, p3=p3,
d1=d1, d2=d2, d3=d3,
robust_method='trimmed', epsilon=0.05, alpha=0.05, n_iters=20, weights=weights
)
X_quant = X[:, :p1]
S_estimation = S_robust(X=X_quant, method='trimmed', epsilon=0.05, alpha=0.05, n_iters=20, weights=weights)
d = generalized_gower_dist(
xi=xi, xr=xr,
p1=p1, p2=p2, p3=p3,
d1=d1, d2=d2, d3=d3,
q=1, S=S_estimation,
geom_var_1=gv1,
geom_var_2=gv2,
geom_var_3=gv3
)
d
np.float64(2.207973237508286)
robust_mixed_dist.quantitative#
euclidean_dist_matrix#
from robust_mixed_dist.quantitative import euclidean_dist_matrix
euclidean_dist_matrix(X=X[quant_variables])
array([[ 0. , 150002.48041163, 1550001.42564254, ...,
200004.30910639, 2025000.65272331, 1939113.64052303],
[ 150002.48041163, 0. , 1700000.47214668, ...,
50002.10458763, 2175000.34481037, 2089113.31944683],
[1550001.42564254, 1700000.47214668, 0. , ...,
1750000.23836684, 475001.65333313, 389114.87041128],
...,
[ 200004.30910639, 50002.10458763, 1750000.23836684, ...,
0. , 2225000.39656347, 2139113.36955273],
[2025000.65272331, 2175000.34481037, 475001.65333313, ...,
2225000.39656347, 0. , 85887.02978977],
[1939113.64052303, 2089113.31944683, 389114.87041128, ...,
2139113.36955273, 85887.02978977, 0. ]],
shape=(1905, 1905))
euclidean_dist#
from robust_mixed_dist.quantitative import euclidean_dist
xi = X[quant_variables][2,:]
xr = X[quant_variables][10,:]
euclidean_dist(xi=xi, xr=xr)
2400000.3317385474
minkowski_dist_matrix#
from robust_mixed_dist.quantitative import minkowski_dist_matrix
minkowski_dist_matrix(X=X[quant_variables], q=1)
array([[ 0. , 151203.818668, 1552784.93111 , ...,
201851.029416, 2026929.290262, 1940943.259859],
[ 151203.818668, 0. , 1701581.13698 , ...,
50647.223546, 2176731.471594, 2090745.441191],
[1552784.93111 , 1701581.13698 , 0. , ...,
1750934.500526, 476533.822672, 390625.702813],
...,
[ 201851.029416, 50647.223546, 1750934.500526, ...,
0. , 2226780.677854, 2140794.797713],
[2026929.290262, 2176731.471594, 476533.822672, ...,
2226780.677854, 0. , 85986.030403],
[1940943.259859, 2090745.441191, 390625.702813, ...,
2140794.797713, 85986.030403, 0. ]],
shape=(1905, 1905))
minkowski_dist#
from robust_mixed_dist.quantitative import minkowski_dist
xi = X[quant_variables][2,:]
xr = X[quant_variables][10,:]
minkowski_dist(xi=xi, xr=xr, q=1)
np.float64(2401294.7191080004)
canberra_dist_matrix#
from robust_mixed_dist.quantitative import canberra_dist_matrix
canberra_dist_matrix(X=X[quant_variables])
array([[0. , 0.3791237 , 1.31009432, ..., 0.57187821, 1.25368465,
1.15877842],
[0.3791237 , 0. , 1.03737598, ..., 0.20199247, 1.30874091,
1.21638296],
[1.31009432, 1.03737598, 0. , ..., 0.87872481, 0.92625515,
0.85725929],
...,
[0.57187821, 0.20199247, 0.87872481, ..., 0. , 1.31626027,
1.22724486],
[1.25368465, 1.30874091, 0.92625515, ..., 1.31626027, 0. ,
0.12046549],
[1.15877842, 1.21638296, 0.85725929, ..., 1.22724486, 0.12046549,
0. ]], shape=(1905, 1905))
canberra_dist#
from robust_mixed_dist.quantitative import canberra_dist
xi = X[quant_variables][2,:]
xr = X[quant_variables][10,:]
canberra_dist(xi=xi, xr=xr)
np.float64(1.0399913488310089)
pearson_dist_matrix#
from robust_mixed_dist.quantitative import pearson_dist_matrix
pearson_dist_matrix(X=X[quant_variables])
array([[0. , 1.20925465, 3.17227178, ..., 3.78782432, 2.9029884 ,
2.50486894],
[1.20925465, 0. , 2.0785152 , ..., 3.2901998 , 2.19157364,
1.76066423],
[3.17227178, 2.0785152 , 0. , ..., 4.06630612, 1.98147384,
1.44992911],
...,
[3.78782432, 3.2901998 , 4.06630612, ..., 0. , 3.23000242,
3.82301831],
[2.9029884 , 2.19157364, 1.98147384, ..., 3.23000242, 0. ,
1.09935355],
[2.50486894, 1.76066423, 1.44992911, ..., 3.82301831, 1.09935355,
0. ]], shape=(1905, 1905))
mahalanobis_dist_matrix#
from robust_mixed_dist.quantitative import mahalanobis_dist_matrix
mahalanobis_dist_matrix(X=X[quant_variables])
array([[0. , 1.53300007, 3.59602077, ..., 2.2183579 , 3.06832405,
2.96311246],
[1.53300007, 0. , 2.1252418 , ..., 0.73353908, 1.96608988,
1.83655561],
[3.59602077, 2.1252418 , 0. , ..., 1.46900905, 2.11887678,
2.0285113 ],
...,
[2.2183579 , 0.73353908, 1.46900905, ..., 0. , 1.98371456,
1.85102134],
[3.06832405, 1.96608988, 2.11887678, ..., 1.98371456, 0. ,
0.13626976],
[2.96311246, 1.83655561, 2.0285113 , ..., 1.85102134, 0.13626976,
0. ]], shape=(1905, 1905))
mahalanobis_dist#
from robust_mixed_dist.quantitative import mahalanobis_dist
import numpy as np
xi = X[quant_variables][2,:]
xr = X[quant_variables][10,:]
S = np.cov(X[quant_variables], rowvar=False)
mahalanobis_dist(xi=xi, xr=xr, S=S)
np.float64(2.7759092403090455)
robust_mahalanobis_dist_matrix#
from robust_mixed_dist.quantitative import robust_mahalanobis_dist_matrix, S_robust
S_robust_trimmed = S_robust(X=X[quant_variables], method="trimmed",
epsilon=0.05, n_iters=20, alpha=0.07,
weights=None)
robust_mahalanobis_dist_matrix(X=X[quant_variables],
S_robust=S_robust_trimmed)
array([[0. , 1.81864545, 3.91631623, ..., 2.35363057, 4.98194325,
4.75458207],
[1.81864545, 0. , 2.16959868, ..., 0.6471128 , 3.43994713,
3.20084918],
[3.91631623, 2.16959868, 0. , ..., 1.64792972, 2.60025908,
2.39706372],
...,
[2.35363057, 0.6471128 , 1.64792972, ..., 0. , 3.35699379,
3.11692523],
[4.98194325, 3.43994713, 2.60025908, ..., 3.35699379, 0. ,
0.24221216],
[4.75458207, 3.20084918, 2.39706372, ..., 3.11692523, 0.24221216,
0. ]], shape=(1905, 1905))
S_robust_winsorized = S_robust(X=X[quant_variables],
method="winsorized", epsilon=0.05,
n_iters=20, alpha=0.07, weights=None)
robust_mahalanobis_dist_matrix(X=X[quant_variables],
S_robust=S_robust_winsorized)
array([[0. , 1.63032279, 3.65293571, ..., 2.22371162, 3.90611218,
3.74688562],
[1.63032279, 0. , 2.08532543, ..., 0.66249147, 2.59842998,
2.42433088],
[3.65293571, 2.08532543, 0. , ..., 1.52111172, 2.10402449,
1.9736514 ],
...,
[2.22371162, 0.66249147, 1.52111172, ..., 0. , 2.51448355,
2.33923374],
[3.90611218, 2.59842998, 2.10402449, ..., 2.51448355, 0. ,
0.17812079],
[3.74688562, 2.42433088, 1.9736514 , ..., 2.33923374, 0.17812079,
0. ]], shape=(1905, 1905))
S_robust_MAD = S_robust(X=X[quant_variables], method="MAD", epsilon=0.05,
n_iters=20, alpha=None, weights=None)
robust_mahalanobis_dist_matrix(X=X[quant_variables], S_robust=S_robust_MAD)
array([[0. , 1.86757181, 3.95871434, ..., 2.36822465, 5.39713175,
5.13977515],
[1.86757181, 0. , 2.17155508, ..., 0.64240264, 3.80101401,
3.5324399 ],
[3.95871434, 2.17155508, 0. , ..., 1.67434971, 2.92445535,
2.68698538],
...,
[2.36822465, 0.64240264, 1.67434971, ..., 0. , 3.75496147,
3.48534267],
[5.39713175, 3.80101401, 2.92445535, ..., 3.75496147, 0. ,
0.27150497],
[5.13977515, 3.5324399 , 2.68698538, ..., 3.48534267, 0.27150497,
0. ]], shape=(1905, 1905))
robust_mahalanobis_dist#
from robust_mixed_dist.quantitative import robust_mahalanobis_dist, S_robust
xi = X[quant_variables][0,:]
xr = X[quant_variables][2,:]
S_robust_trimmed = S_robust(X=X[quant_variables], method="trimmed",
epsilon=0.05, n_iters=20, alpha=0.07,
weights=None)
robust_mahalanobis_dist(xi=xi, xr=xr, S_robust=S_robust_trimmed)
np.float64(3.9163162282135637)
S_robust_winsorized = S_robust(X=X[quant_variables],
method="winsorized", epsilon=0.05,
n_iters=20, alpha=0.07, weights=None)
robust_mahalanobis_dist(xi=xi, xr=xr, S_robust=S_robust_winsorized)
np.float64(3.6529357075358972)
S_robust_MAD = S_robust(X=X[quant_variables], method="MAD",
epsilon=0.05, n_iters=20, alpha=None,
weights=None)
robust_mahalanobis_dist(xi=xi, xr=xr, S_robust=S_robust_MAD)
np.float64(3.9587143448190836)
robust_mixed_dist.binary#
sokal_dist_matrix#
from robust_mixed_dist.binary import sokal_dist_matrix
sokal_dist_matrix(X=X[binary_variables])
array([[0. , 0.33333333, 0.33333333, ..., 0.33333333, 0.57142857,
0. ],
[0.33333333, 0. , 0. , ..., 0. , 0.33333333,
0.33333333],
[0.33333333, 0. , 0. , ..., 0. , 0.33333333,
0.33333333],
...,
[0.33333333, 0. , 0. , ..., 0. , 0.33333333,
0.33333333],
[0.57142857, 0.33333333, 0.33333333, ..., 0.33333333, 0. ,
0.57142857],
[0. , 0.33333333, 0.33333333, ..., 0.33333333, 0.57142857,
0. ]], shape=(1905, 1905))
sokal_dist#
from robust_mixed_dist.binary import sokal_dist
xi = X[quant_variables][2,:]
xr = X[quant_variables][10,:]
sokal_dist(xi=xi, xr=xr)
2.000000000001225
jaccard_dist_matrix#
from robust_mixed_dist.binary import jaccard_dist_matrix
jaccard_dist_matrix(X=X[binary_variables])
array([[0. , 0.33333333, 0.33333333, ..., 0.33333333, 0.5 ,
0. ],
[0.33333333, 0. , 0. , ..., 0. , 0.33333333,
0.33333333],
[0.33333333, 0. , 0. , ..., 0. , 0.33333333,
0.33333333],
...,
[0.33333333, 0. , 0. , ..., 0. , 0.33333333,
0.33333333],
[0.5 , 0.33333333, 0.33333333, ..., 0.33333333, 0. ,
0.5 ],
[0. , 0.33333333, 0.33333333, ..., 0.33333333, 0.5 ,
0. ]], shape=(1905, 1905))
jaccard_dist#
from robust_mixed_dist.binary import jaccard_dist
xi = X[quant_variables][2,:]
xr = X[quant_variables][10,:]
jaccard_dist(xi=xi, xr=xr)
np.float64(0.0)
robust_mixed_dist.multiclass#
hamming_dist_matrix#
from robust_mixed_dist.multiclass import hamming_dist_matrix
hamming_dist_matrix(X=X[multiclass_variables])
array([[0. , 0.5, 1. , ..., 1. , 0. , 0. ],
[0.5, 0. , 1. , ..., 1. , 0.5, 0.5],
[1. , 1. , 0. , ..., 0. , 1. , 1. ],
...,
[1. , 1. , 0. , ..., 0. , 1. , 1. ],
[0. , 0.5, 1. , ..., 1. , 0. , 0. ],
[0. , 0.5, 1. , ..., 1. , 0. , 0. ]], shape=(1905, 1905))
hamming_dist#
from robust_mixed_dist.multiclass import hamming_dist
xi = X[quant_variables][0,:]
xr = X[quant_variables][2,:]
hamming_dist(xi=xi, xr=xr)
np.float64(1.0)