Feature importance per form type

This notebook analyses per-type feature importance of form-based clusters.

import numpy as np
import pandas as pd
import geopandas as gpd
import dask.dataframe
import matplotlib.pyplot as plt
import urbangrammar_graphics as ugg

from matplotlib.lines import Line2D
from sklearn.ensemble import RandomForestClassifier

Form signatures

%time data = dask.dataframe.read_parquet("../../urbangrammar_samba/spatial_signatures/clustering_data/form/standardized/").set_index('hindex')
%time data = data.replace([np.inf, -np.inf], np.nan).fillna(0)
%time data = data.compute()
CPU times: user 38.8 s, sys: 35.3 s, total: 1min 14s
Wall time: 1min 44s
CPU times: user 27.6 ms, sys: 0 ns, total: 27.6 ms
Wall time: 23.8 ms
CPU times: user 1min 7s, sys: 43.2 s, total: 1min 50s
Wall time: 1min 51s
data
sdbAre_q1 sdbAre_q2 sdbAre_q3 sdbPer_q1 sdbPer_q2 sdbPer_q3 sdbCoA_q1 sdbCoA_q2 sdbCoA_q3 ssbCCo_q1 ... lseCWA_q3 lteOri_q1 lteOri_q2 lteOri_q3 lteWNB_q1 lteWNB_q2 lteWNB_q3 lieWCe_q1 lieWCe_q2 lieWCe_q3
hindex
c000e094707t0000 -0.947406 -0.371977 0.020285 -0.901199 -0.237045 -0.023143 -0.000419 -0.001515 -0.010221 -0.046170 ... 0.073064 0.031571 0.196520 0.424415 -0.611681 -0.502087 -0.311160 -0.022562 -0.002733 -0.004738
c000e094763t0000 -0.913567 -0.420861 -0.271703 -0.903627 -0.428003 -0.336729 -0.000419 -0.001515 -0.010221 -0.035325 ... -0.254395 -1.016655 -0.161371 0.011093 0.148709 0.214520 -0.125943 -0.019044 -0.002654 -0.004738
c000e094763t0001 -0.878137 -0.411587 -0.284021 -0.900393 -0.416250 -0.350010 -0.000419 -0.001515 -0.010221 -0.034917 ... -0.285074 -0.897625 -0.161371 0.103698 0.148709 0.250036 -0.200268 -0.019323 -0.002654 -0.004738
c000e094763t0002 -0.952475 -0.421566 -0.283919 -0.968400 -0.429947 -0.343165 -0.000419 -0.001515 -0.010221 -0.065649 ... -0.280682 -0.927382 -0.161371 0.177408 0.302689 0.250036 -0.120718 -0.018825 -0.002654 -0.004738
c000e094764t0000 -0.964878 -0.420861 -0.271703 -0.972440 -0.420006 -0.315861 -0.000419 -0.001515 -0.010221 -0.066832 ... -0.280682 -1.016655 -0.104108 0.185348 0.302689 0.250036 -0.105044 -0.018825 -0.002659 -0.004738
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
c102e644989t0111 -0.311466 -0.431706 -0.373463 -0.082269 -0.459270 -0.389532 -0.000419 -0.001515 -0.010221 0.132837 ... 0.199797 1.288365 0.440493 0.197816 -0.351250 -0.580453 -0.704637 -0.000200 -0.002667 -0.004738
c102e644989t0112 -0.326671 -0.461825 -0.371855 -0.149873 -0.528701 -0.386678 -0.000419 -0.001515 -0.010221 0.136559 ... 0.195101 1.288365 0.440493 0.290197 -0.351250 -0.580453 -0.693005 -0.000669 -0.002667 -0.004738
c102e644989t0113 -0.094236 -0.364761 -0.304254 0.024972 -0.347371 -0.283669 -0.000419 -0.001515 -0.010221 0.021411 ... 0.199797 1.288365 0.440493 0.197816 -0.351250 -0.580453 -0.704637 -0.000843 -0.002667 -0.004738
c102e644989t0114 -0.477667 -0.568464 -0.390033 -0.600170 -0.646516 -0.472676 -0.000419 -0.001515 -0.010221 0.424887 ... 0.008079 -0.855901 0.189420 -0.560233 -0.341815 -0.580453 -0.704637 -0.000898 -0.002667 -0.004738
c102e644989t0115 -0.413094 -0.545952 -0.382834 -0.400108 -0.610332 -0.440413 -0.000419 -0.001515 -0.010221 0.160613 ... 0.008079 -0.203827 0.440493 0.231812 -0.351250 -0.580453 -0.638809 -0.000200 -0.002543 -0.004738

14539578 rows × 177 columns

labels_l1 = pd.read_parquet("../../urbangrammar_samba/spatial_signatures/clustering_data/k8_form_labels.pq")
labels_l2_4 = pd.read_parquet("../../urbangrammar_samba/spatial_signatures/clustering_data/clustergram_c4_form_labels.pq")
labels_l2_2 = pd.read_parquet("../../urbangrammar_samba/spatial_signatures/clustering_data/clustergram_c2_form_labels.pq")
labels = labels_l1.copy()
labels.loc[labels.k8 == 4, 'k8'] = labels_l2_4['9'].values + 40
labels.loc[labels.k8 == 2, 'k8'] = labels_l2_2['8'].values + 20
labels.k8.value_counts()
1     5544712
5     3774274
0     1554431
25     681861
22     664692
21     511727
23     502758
26     425631
27     275571
20     250872
24     138339
40     111659
3       62701
42      31800
45       5105
41       1680
47       1128
7         477
43         79
46         65
48          9
44          4
6           3
Name: k8, dtype: int64
outliers = [6, 44, 48, 43]
mask = ~labels.k8.isin(outliers)

Overall similarity

Similarity of clusters can be represented by hierarchical dendrogram generated using Ward’s agglomerative clustering.

from scipy.cluster import hierarchy

group = data.loc[mask].groupby(labels.loc[mask]['k8'].values).mean() # cluster centroids
Z = hierarchy.linkage(group, 'ward')
fig, ax = plt.subplots(figsize=(25, 15))
dn = hierarchy.dendrogram(Z, labels=group.index)
plt.grid(True, axis='y', which='both')
../_images/form_feature_importance_11_0.png

Feature importance per cluster

labels.k8.unique()
array([ 0,  5, 20, 24,  1, 21, 25, 22, 26, 23, 27, 40, 42,  3, 48, 45,  7,
        6, 41, 47, 46, 44, 43], dtype=int32)
imps = pd.DataFrame()
for cluster in labels.k8.unique():
    if cluster not in outliers:
        cluster_bool = labels.loc[mask]['k8'].apply(lambda x: 1 if x == cluster else 0)

        clf = RandomForestClassifier(n_estimators=10, n_jobs=-1, random_state=42, verbose=1)
        clf = clf.fit(data.loc[mask].values, cluster_bool.values)

        importances = pd.Series(clf.feature_importances_, index=data.columns).sort_values(ascending=False)

        imps[f'cluster_{cluster}'] = importances.head(50).index.values
        imps[f'cluster_{cluster}_vals'] = importances.head(50).values
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed: 10.5min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  9.6min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  6.1min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  5.1min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  8.6min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  5.1min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  5.6min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  5.5min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  5.8min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  5.2min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  5.3min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  5.0min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  4.5min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  6.6min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  2.8min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  1.3min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  2.7min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  2.0min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:   57.7s finished
chars = [c for c in imps.columns if 'vals' not in c]
imps[sorted(chars)]
cluster_0 cluster_1 cluster_20 cluster_21 cluster_22 cluster_23 cluster_24 cluster_25 cluster_26 cluster_27 cluster_3 cluster_40 cluster_41 cluster_42 cluster_45 cluster_46 cluster_47 cluster_5 cluster_7
0 sdcLAL_q2 stbCeA_q1 lteWNB_q3 ssbCCM_q3 stcOri_q2 stcOri_q1 mtdDeg_q3 linPDE_q2 lisCel_q2 sdbAre_q3 mtcWNe_q1 ssbCor_q3 ssbCCD_q2 ssbCor_q3 ssbCCD_q2 sdbCoA_q2 sdbAre_q3 sicCAR_q1 sddAre_q2
1 sdcAre_q2 sicCAR_q1 mtdDeg_q3 sdbPer_q3 stbOri_q3 stcOri_q2 linP4W_q1 linPDE_q3 ltcWRE_q3 ssbCCM_q3 sicCAR_q1 ssbERI_q1 ssbSqu_q1 ssbCCD_q2 ssbERI_q2 sdbAre_q2 sdbCoA_q3 mtcWNe_q2 mdsAre_q2
2 sisBpM_q2 mtcWNe_q1 ldePer_q2 sdbAre_q3 stbSAl_q1 stbOri_q1 linP4W_q3 ssbCCo_q2 ltcRea_q1 sdbPer_q3 sdcLAL_q3 sdbAre_q3 sdbAre_q3 ssbSqu_q3 ssbCor_q2 ldePer_q2 ssbCCM_q3 mtcWNe_q1 sdsAre_q2
3 sisBpM_q3 stbCeA_q2 lteWNB_q2 ssbCCo_q1 stbCeA_q1 stbOri_q2 linP4W_q2 stcOri_q2 misCel_q2 sdbAre_q2 ltcAre_q3 ssbCCD_q3 ssbCor_q3 sdbPer_q3 sdbAre_q2 sdbAre_q3 sdbPer_q3 sdcLAL_q3 ldsAre_q2
4 ltcAre_q1 mtbNDi_q2 ltcWRE_q3 ssbElo_q1 stcOri_q3 ssbSqu_q1 linP3W_q2 linPDE_q1 ldePer_q2 ssbCCD_q3 ltbIBD_q2 sdbPer_q3 sdbPer_q2 sdbAre_q3 sdbAre_q3 sicCAR_q1 sicCAR_q1 ltbIBD_q2 lseCWA_q1
5 mtcWNe_q1 stbSAl_q1 linPDE_q2 ssbCCM_q2 ldePer_q2 ldeAre_q1 linP3W_q3 stbOri_q2 linPDE_q3 stbCeA_q1 sdsAre_q2 ssbCCD_q2 sdsSWD_q2 ssbERI_q2 ssbSqu_q2 sdbCoA_q3 ssbSqu_q3 mtbNDi_q2 ldsAre_q1
6 mtbNDi_q2 sdcAre_q2 lteWNB_q1 sdbPer_q2 ssbCCo_q2 stbCeA_q1 xcnSCl_q2 lcdMes_q2 lisCel_q1 ssbCCD_q2 ltcAre_q1 ssbSqu_q3 ssbSqu_q2 sdbAre_q2 sdbPer_q3 sdsAre_q2 sdbAre_q2 sdsAre_q2 mtdMDi_q1
7 ltcWRE_q3 ldePer_q2 linPDE_q3 ssbCCo_q2 stbCeA_q3 stbSAl_q2 lcdMes_q1 stcOri_q3 linPDE_q2 ssbCCM_q2 ltcWRE_q3 sdbPer_q2 ssbCCD_q1 ssbERI_q1 sdbPer_q1 sdsLen_q2 misCel_q3 ldsAre_q2 ltcWRE_q1
8 ldsAre_q1 linPDE_q3 ldeAre_q2 stbCeA_q3 lteOri_q2 linP4W_q2 lcdMes_q2 ssbCCM_q3 ltcRea_q2 sdbPer_q2 ldsAre_q2 sdbAre_q2 sdbCoA_q3 ssbCor_q2 ssbSqu_q3 ssbCCM_q1 lseERI_q2 sisBpM_q2 sddAre_q3
9 sscERI_q2 sdcLAL_q3 sdbAre_q3 linPDE_q3 stcOri_q1 ssbCCM_q3 ldsMSL_q2 stbCeA_q1 mtdDeg_q3 ssbCCo_q1 lcnClo_q2 ssbCCM_q2 ssbCor_q2 ssbCCD_q3 ssbERI_q3 ssbSqu_q2 stcOri_q1 sddAre_q3 sddAre_q1
10 mdcAre_q2 ldsAre_q2 lcnClo_q3 lcdMes_q1 linPDE_q2 stbOri_q3 stbSAl_q1 ssbCCo_q1 ldeAre_q2 ssbCor_q3 sdsAre_q3 ssbCCo_q3 sdbAre_q2 ssbSqu_q2 ssbCCM_q2 ldeAre_q3 ssbCCD_q2 sddAre_q2 mtdMDi_q3
11 ltbIBD_q2 ltcAre_q1 sdbPer_q2 ldePer_q2 lcdMes_q1 sdbAre_q3 linPDE_q3 stbSAl_q1 ltcAre_q1 ssbERI_q1 mdsAre_q2 ssbCCM_q3 stbSAl_q1 ssbCCM_q3 ssbCCM_q3 sdsLen_q3 sicCAR_q2 sscERI_q1 mdsAre_q3
12 sicCAR_q3 sdsAre_q2 linP4W_q2 sdbAre_q2 linPDE_q3 stbCeA_q2 ltcWRE_q1 ssbElo_q1 linP4W_q2 ssbElo_q1 lseCWA_q3 misCel_q3 ssbERI_q2 misCel_q3 misCel_q3 sddAre_q2 lisCel_q1 mtcWNe_q3 ltcWRE_q2
13 sicCAR_q2 ldePer_q1 ltcWRE_q2 lseCWA_q2 ltcWRE_q1 stbCeA_q3 linP3W_q1 stbOri_q3 lcdMes_q3 lcdMes_q2 ltcAre_q2 ssbSqu_q1 stbCeA_q1 misCel_q2 sdsSPO_q1 misCel_q3 stbSAl_q1 mdcAre_q1 ldsCDL_q2
14 sdsAre_q1 sddAre_q2 ldePer_q1 ltcWRE_q2 ltcWRE_q3 sssLin_q2 stbCeA_q1 linP3W_q2 sdcLAL_q3 ssbSqu_q3 ldeAre_q3 ssbElo_q1 sdcAre_q2 ssbCCo_q3 sicCAR_q3 linP3W_q2 sdsSWD_q3 sicCAR_q3 lisCel_q1
15 mdcAre_q1 ldsAre_q1 stbCeA_q1 ssbElo_q2 stbOri_q1 stcOri_q3 ldsMSL_q3 linP3W_q3 misCel_q1 stbCeA_q3 mdcAre_q2 sdbAre_q1 lcnClo_q3 ldsAre_q1 ssbCCD_q3 sdsAre_q1 lisCel_q2 mtbNDi_q3 ltbIBD_q2
16 mtcWNe_q2 stbCeA_q3 lcnClo_q1 stcOri_q2 lcdMes_q2 sdsAre_q3 misCel_q3 ldePer_q2 sddAre_q1 stbSAl_q1 ltbIBD_q3 misCel_q2 ldsAre_q1 ssbSqu_q1 ldsAre_q1 misCel_q1 ssbERI_q2 mdsAre_q2 misCel_q2
17 ltcAre_q2 ldsCDL_q2 mtcWNe_q1 stbOri_q1 stbCeA_q2 lteOri_q1 linPDE_q2 sdsSPO_q3 lcdMes_q2 ldePer_q1 ltcWRE_q2 ssbERI_q2 sicCAR_q2 sscERI_q1 ssbSqu_q1 ssbERI_q1 misCel_q1 mtdMDi_q1 lcnClo_q3
18 sdsAre_q3 linPDE_q2 ssbCCM_q2 linP4W_q1 ssbCCo_q1 linP4W_q1 ssbCCD_q3 ltcWRE_q3 lisCel_q3 ssbERI_q2 sdcAre_q3 lisCel_q1 sscERI_q2 lisCel_q1 ssbElo_q2 stbCeA_q3 sicCAR_q3 sdcLAL_q2 lddNDe_q3
19 ltbIBD_q1 ldsCDL_q3 stcOri_q2 ldeAre_q2 stbSAl_q2 stbSAl_q1 lisCel_q3 stbCeA_q2 sisBpM_q1 ssbSqu_q1 sdsSPO_q2 sicCAR_q2 misCel_q2 lisCel_q2 misCel_q2 lcnClo_q1 sdbCoA_q2 sicCAR_q2 sdsAre_q3
20 sdcAre_q3 ssbCCo_q2 lcnClo_q2 sicCAR_q2 lteOri_q1 linPDE_q3 lisCel_q2 stbSAl_q2 ldsMSL_q1 ldeAre_q1 mdsAre_q3 ssbCCM_q1 stbOri_q3 sdbPer_q2 ssbElo_q1 sdsSPO_q2 sdcAre_q3 sdcAre_q2 lcnClo_q2
21 mtbNDi_q3 stbSAl_q2 ssbCCo_q2 stbCeA_q1 stbOri_q2 ssbCCo_q1 sdbAre_q3 ssbElo_q2 sdcAre_q2 linP4W_q2 mtcWNe_q2 ltcRea_q2 misCel_q3 mtbAli_q3 lisCel_q1 sdcAre_q1 lseCWA_q2 sdsSPO_q2 ssbCCD_q3
22 sicCAR_q1 lcdMes_q2 lcdMes_q2 ssbCCD_q2 ssbCCM_q3 ssbElo_q2 stbCeA_q2 ssbCCo_q3 linPDE_q1 ldeAre_q2 mdcAre_q3 sscERI_q1 lisCel_q1 sicCAR_q2 ssbCor_q3 ltcAre_q2 ldePer_q2 sdcAre_q3 sdsAre_q1
23 mdsAre_q3 ldeAre_q2 linP4W_q1 misCel_q3 sdbPer_q3 mdcAre_q2 misCel_q2 ssbCCM_q2 lcdMes_q1 sdsSPW_q2 sddAre_q3 ssbERI_q3 ssbCCo_q1 ssbCCM_q1 ssbCCM_q1 ssbERI_q2 ldeAre_q2 lcnClo_q3 linWID_q3
24 mtdMDi_q1 sicCAR_q2 stbOri_q2 ssbCCo_q3 ssbElo_q1 ltcWRE_q2 mdcAre_q2 ldsCDL_q2 misCel_q3 sdbPer_q1 ldePer_q3 ssbCCo_q2 sdsSPW_q1 ssbCCM_q2 sscCCo_q1 sdsSPW_q2 ssbElo_q3 mdcAre_q2 linP3W_q2
25 linWID_q3 ldeAre_q1 ssbCCM_q3 ldePer_q1 misCel_q2 linPDE_q1 ssbCCM_q3 stbOri_q1 stbSAl_q2 ltcWRE_q2 lteWNB_q1 lisCel_q3 sdbAre_q1 mdcAre_q2 linPDE_q2 ssbCCo_q1 sdbPer_q1 ltcWRE_q2 lddNDe_q2
26 mtdMDi_q2 mdsAre_q2 misCel_q2 lisCel_q2 linPDE_q1 lteOri_q2 mtcWNe_q1 stcOri_q1 stbSAl_q1 ssbCCo_q2 ldeAre_q2 sicCAR_q3 ssbSqu_q3 sicCAR_q3 lisCel_q2 mdcAre_q3 sdsLen_q2 ltbIBD_q1 ldsAre_q3
27 sdsSPO_q2 sdbPer_q3 stcOri_q3 stbOri_q2 ltcWRE_q2 sdbPer_q3 xcnSCl_q3 linP3W_q1 ssbElo_q1 linP4W_q3 sicCAR_q2 ssbCCo_q1 mtbNDi_q1 sdcAre_q2 ssbCCD_q1 stbCeA_q2 stcOri_q2 linWID_q2 sdsLen_q2
28 mdcAre_q3 sdcAre_q3 linP4W_q3 sdbPer_q1 ssbCCM_q2 lteOri_q3 lcdMes_q3 sicCAR_q2 ldsAre_q1 linP4W_q1 lseCWA_q1 sdbPer_q1 ssbCCM_q2 sdcLAL_q3 sdbPer_q2 ssbCCM_q3 sdbPer_q2 sisBpM_q3 stbOri_q3
29 sdsLen_q1 ssbElo_q1 ldsCDL_q2 ldeAre_q1 linP4W_q2 linP4W_q3 lisCel_q1 sicCAR_q3 mdcAre_q3 ssbCCM_q1 ltcWRE_q1 mdcAre_q3 ssbERI_q1 sdbAre_q1 sscCCo_q2 lisCel_q2 lseERI_q1 ldsMSL_q2 sdcLAL_q3
30 mtcWNe_q3 ssbCCM_q2 ldeAre_q1 mtdMDi_q3 lteOri_q3 sdsSPW_q1 lseERI_q2 sdbPer_q3 linP4W_q1 sisBpM_q2 sddAre_q2 ssbSqu_q2 mtbAli_q1 mdcAre_q3 mdcAre_q1 ssbCCM_q2 lseCCo_q3 stbCeA_q1 sicCAR_q1
31 sdcLAL_q3 sisBpM_q2 lcdMes_q3 ssbElo_q3 lteWNB_q2 sdsSPW_q2 sdcAre_q3 lseCWA_q1 ldsCDL_q3 linPDE_q3 ldePer_q2 sdcAre_q2 ssbCCM_q3 misCel_q1 ssbCCo_q3 mtbNDi_q1 stbCeA_q1 sdsLen_q2 ssbCCM_q2
32 sdsSPO_q1 lcdMes_q3 linPDE_q1 linP4W_q2 ltbIBD_q2 ssbElo_q1 ssbCCM_q2 ltcWRE_q2 ssbCCo_q2 ssbCCo_q3 sdcAre_q2 ltcAre_q1 mtbAli_q2 ltcAre_q2 lteWNB_q3 sdbPer_q3 lteWNB_q1 mtdMDi_q2 mtbAli_q1
33 sdsLen_q3 ssbCCo_q3 stbOri_q1 stbSAl_q2 ssbCCo_q3 misCel_q1 sssLin_q2 sdsSPW_q1 linP3W_q2 mtdDeg_q3 mtdMDi_q3 lisCel_q2 lteWNB_q3 lteWNB_q2 ssbERI_q1 sdsSPO_q3 lseCWA_q3 mtdMDi_q3 mdsAre_q1
34 ldeAre_q3 linWID_q2 ssbCCo_q1 lcdMes_q2 sssLin_q2 ltcWRE_q3 ltcWRE_q2 ldsCDL_q1 ldsCDL_q2 stbCeA_q2 lseCWA_q2 mdcAre_q1 mtcWNe_q3 ltcRea_q1 mdcAre_q3 sscERI_q1 mdcAre_q2 lddNDe_q2 sicCAR_q2
35 sdcLAL_q1 mtbNDi_q3 lteOri_q2 lisCel_q3 ldeAre_q1 ssbCCo_q2 ltcAre_q3 lcdMes_q3 sdsSWD_q3 ldsCDL_q3 mtdMDi_q1 sdsSWD_q2 lcnClo_q1 sdcLAL_q2 ldeAre_q2 ldeAre_q2 sdcAre_q1 ltcAre_q2 ltbIBD_q3
36 sddAre_q3 sdsSPO_q2 stbOri_q3 stcOri_q1 ldeAre_q2 sisBpM_q1 sdbPer_q2 ssbElo_q3 ldsMSL_q2 ltcAre_q1 ltbIBD_q1 lcnClo_q1 sdbPer_q3 sdsSWD_q1 lseCWA_q1 ssbCCD_q2 lteOri_q1 mdcAre_q3 ltcAre_q1
37 mdsAre_q1 ssbCCM_q3 sdcAre_q2 stbSAl_q1 lisCel_q1 sddAre_q3 sdcLAL_q3 ltcWRE_q1 sdsSWD_q2 sicCAR_q2 mtcWNe_q3 sdsSWD_q1 lisCel_q2 ssbElo_q3 mtbAli_q2 lseCCo_q1 linP3W_q2 sscCCo_q3 sdsSPW_q3
38 mdsAre_q2 lteWNB_q2 lcdMes_q1 linPDE_q2 lcdMes_q3 ssbCCD_q2 lseERI_q1 linP4W_q1 ssbCCo_q1 ssbElo_q2 lisCel_q2 ssbElo_q2 ltcAre_q1 ltcWRE_q3 sicCAR_q2 ldsCDL_q2 stbSAl_q3 sdcLAL_q1 ssbElo_q1
39 mtbNDi_q1 ssbElo_q2 xcnSCl_q3 stcOri_q3 ssbElo_q2 ssbSqu_q2 sssLin_q1 lisCel_q2 stcOri_q2 mdcAre_q2 lseERI_q1 mdcAre_q2 sdsSWD_q1 lcnClo_q1 sscERI_q3 mtcWNe_q3 ssbCCM_q2 ltcWRE_q3 sdbPer_q3
40 ldsMSL_q1 linP4W_q1 sdsAre_q1 misCel_q2 linP4W_q3 linPDE_q2 stbSAl_q2 lcdMes_q1 stbOri_q2 lcdMes_q3 sdbAre_q2 sisBpM_q2 mdcAre_q3 sisBpM_q1 mtbNDi_q2 sdbPer_q2 mtdMDi_q1 linWID_q1 mtdMDi_q2
41 sdcAre_q1 ssbCCo_q1 lisCel_q2 stbOri_q3 mtcWNe_q1 linP3W_q3 ldsMSL_q1 misCel_q2 ltcWRE_q1 ldePer_q2 lcnClo_q1 ssbCCD_q1 sicCAR_q3 lddNDe_q1 lseCCo_q3 ltbIBD_q3 linP4W_q3 lddNDe_q1 mdcAre_q2
42 ltcAre_q3 sddAre_q3 ldsCDL_q3 ltcWRE_q3 lisCel_q2 sdbPer_q2 stbCeA_q3 sdbAre_q3 ssbElo_q2 sdbAre_q1 sdsSPO_q1 ssbElo_q3 lcdMes_q2 sdsLen_q2 lcdMes_q2 ldsCDL_q3 sdsSWD_q2 sisBpM_q1 stbOri_q2
43 ltcWRE_q2 lseCWA_q2 lteOri_q1 ssbCCD_q3 sisBpM_q3 lcdMes_q2 lcnClo_q1 lteOri_q2 stcOri_q3 sicCAR_q3 lteWNB_q3 lteWNB_q2 linPDE_q2 mtcWNe_q2 ltcRea_q2 ltbIBD_q1 lteOri_q3 sdsLen_q3 ssbCCo_q2
44 stbCeA_q1 sdsSPO_q3 ldeAre_q3 ldsCDL_q2 linP4W_q1 ldePer_q2 mdcAre_q1 sisBpM_q3 sdsSPO_q3 ssbElo_q3 mtbNDi_q3 linPDE_q2 sdsLen_q1 ldsMSL_q2 ssbCCo_q2 lseCCo_q3 sdsAre_q1 stbCeA_q2 stbCeA_q3
45 ldsMSL_q2 sdbAre_q3 mtdMDi_q3 stbCeA_q2 sdcLAL_q3 misCel_q3 sdcLAL_q2 mdcAre_q1 sssLin_q3 sdcLAL_q2 mdcAre_q1 misCel_q1 ssbERI_q3 stbCeA_q1 mtbNDi_q3 sdbPer_q1 mtbAli_q1 ldsAre_q1 sdcLAL_q2
46 ldsAre_q2 sdcLAL_q2 sdbPer_q3 ltcWRE_q1 ssbSqu_q2 ldeAre_q2 mdsAre_q2 linP4W_q2 linP4W_q3 linPDE_q2 sisBpM_q2 lcnClo_q3 sdbPer_q1 sdsAre_q1 sdcLAL_q3 sscERI_q3 linP3W_q3 mdsAre_q3 lcdMes_q2
47 mtdMDi_q3 ltcWRE_q2 stbSAl_q1 lisCel_q1 ssbElo_q3 lddNDe_q2 sdbPer_q3 sdbPer_q2 stbSAl_q3 ltcWRE_q3 mtdMDi_q2 ldsAre_q1 ssbCCD_q3 lteWNB_q3 mdcAre_q2 sdsSWD_q2 linP4W_q2 sdsSPO_q1 mtcWNe_q3
48 sdsLen_q2 sdbPer_q2 misCel_q1 lseCWA_q1 ldsCDL_q1 lisCel_q1 lcnClo_q2 mtbNDi_q1 stbCeA_q3 stbSAl_q2 sddAre_q1 ldeAre_q2 linP4W_q2 mdcAre_q1 ltcAre_q2 mdcAre_q2 ldsMSL_q1 mtbAli_q1 ssbElo_q2
49 mtbAli_q1 linPDE_q1 stbSAl_q2 ldePer_q3 sdcAre_q2 ssbCCM_q2 ldsAre_q2 sssLin_q2 stbCeA_q2 linPDE_q1 lseERI_q2 mtbAli_q2 ssbCCM_q1 sscERI_q3 sdcAre_q2 ssbSqu_q3 ltcAre_q1 ldsMSL_q3 ssbERI_q3
imps.to_parquet("../../urbangrammar_samba/spatial_signatures/clustering_data/per_cluster_importance_form.pq")

Extremes

import seaborn as sns
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(20, 200))
sns.heatmap(group.T, cmap="vlag", center=0, annot=True, cbar=False)
# plt.savefig("../../urbangrammar_samba/spatial_signatures/clustering_data/spsig_heatmap.pdf", bbox_inches="tight")
<AxesSubplot:>
../_images/form_feature_importance_19_1.png
fig, ax = plt.subplots(figsize=(20, 200))
sns.heatmap(group.T.drop(columns=[7,41, 45, 46, 47]), cmap="vlag", center=0, annot=True, cbar=False)
# plt.savefig("../../urbangrammar_samba/spatial_signatures/clustering_data/spsig_heatmap.pdf", bbox_inches="tight")
<AxesSubplot:>
../_images/form_feature_importance_20_1.png