Quick start#

Requirements#

General requirements#

# Development Python Version: 3.12.0
PyDistances
polars
numpy
pandas
scipy

quantitative requirements#

polars
pandas
numpy
scipy

binary requirements#

polars
pandas
scipy

multiclass requirements#

polars
pandas
scipy

mixed requirements#

polars
pandas
numpy
scipy

Installation#

pip install PyDistances

To see the available versions of the package go to the release history at PyPi: https://pypi.org/project/PyDistances/#history

Example#

from PyDistances.mixed import GGowerDistMatrix

data_url = "https://raw.githubusercontent.com/FabioScielzoOrtiz/PyDistances-demo/refs/heads/main/data/madrid_houses_processed.csv"

data = pd.read_csv(data_url)

quant_cols = ['sq_mt_built', 'n_rooms', 'n_bathrooms', 'n_floors', 'buy_price']
binary_cols = ['is_renewal_needed', 'has_lift', 'is_exterior', 'has_parking']
multiclass_cols = ['energy_certificate', 'house_type']

p1 = len(quant_cols)
p2 = len(binary_cols)
p3 = len(multiclass_cols)

ggower_dist_matrix = GGowerDistMatrix(p1=p1, p2=p2, p3=p3,
                                      d1="robust_mahalanobis", d2="jaccard", d3="hamming",
                                      robust_method="trimmed", alpha=0.07, epsilon=0.05, 
                                      n_iters=20, weights=None)

ggower_dist_matrix.compute(X=data)
array([[0.        , 2.21871457, 1.93429293, ..., 1.94305438, 3.1223396 ,
        2.26768279],
       [2.21871457, 0.        , 1.22327246, ..., 2.38753004, 2.64304949,
        2.00865696],
       [1.93429293, 1.22327246, 0.        , ..., 2.36077974, 2.50019632,
        1.63811682],
       ...,
       [1.94305438, 2.38753004, 2.36077974, ..., 0.        , 2.9036275 ,
        1.75869492],
       [3.1223396 , 2.64304949, 2.50019632, ..., 2.9036275 , 0.        ,
        3.03987403],
       [2.26768279, 2.00865696, 1.63811682, ..., 1.75869492, 3.03987403,
        0.        ]])