Generate hierarchical unique IDs

Each tessellation cell will have an unique hierarchical index (hindex) in a form c000e000000t0000 where c000 represents a chunk, e000000 represents an enclosure, and t0000 represents a cell.

Each enclosure then have an unique hierarchical index as c000e00000.

import geopandas as gpd
import pandas as pd
from tqdm import tqdm
import warnings

warnings.filterwarnings('ignore', message='.*initial implementation of Parquet.*')
folder = "../../urbangrammar_samba/spatial_signatures/morphometrics/cells/"
for c in tqdm(range(103), total=103):
    tess = gpd.read_parquet(folder + f'cells_{c}.pq')
    tess = tess.sort_values('enclosureID')
    
    tess_ids = []
    i = 0
    count = 0
    for eid in tess.enclosureID:
        if eid == i:
            tess_ids.append(count)
            count += 1
        else:
            i = eid
            count = 0
            tess_ids.append(count)
            count += 1
    tess['tess_ID'] = tess_ids
    tess = tess.sort_index()
    hindex = (
        "c" + (str(c) if len(str(c)) == 3 else ("0" * (3 - len(str(c)))) + str(c))
        + "e"
        + tess.enclosureID.astype(str).apply(
            lambda x: x if len(x) == 6 else ("0" * (6 - len(x))) + x
        )
        + "t"
        + tess.tess_ID.astype(str).apply(
            lambda x: x if len(x) == 4 else ("0" * (4 - len(x))) + x
        )
    )

    tess.insert(0, 'hindex', hindex)

    tess.drop(columns=['uID', 'enclosureID', 'tess_ID']).to_parquet(folder + f'cells_{c}.pq')
    tess.to_parquet(folder + f'cells_{c}.pq')
    tess[['hindex', 'tessellation', 'buildings']].to_parquet(f"../../urbangrammar_samba/spatial_signatures/tessellation/tess_{c}.pq")
100%|██████████| 103/103 [16:51<00:00,  9.82s/it]
for c in tqdm(range(103), total=103):
    encl =  gpd.read_parquet(f"../../urbangrammar_samba/spatial_signatures/enclosures/encl_{c}.pq")
    hindex = (
        "c" + (str(c) if len(str(c)) == 3 else ("0" * (3 - len(str(c)))) + str(c))
        + "e"
        + encl.enclosureID.astype(str).apply(
            lambda x: x if len(x) == 6 else ("0" * (6 - len(x))) + x
        )
    )
    encl.insert(0, 'hindex', hindex)
    encl.drop(columns='enclosureID').to_parquet(f"../../urbangrammar_samba/spatial_signatures/enclosures/encl_{c}.pq")
100%|██████████| 103/103 [00:29<00:00,  3.45it/s]
for c in tqdm(range(103), total=103):
    conv = pd.read_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/convolutions/conv_{c}.pq")
    tess = gpd.read_parquet(folder + f'cells_{c}.pq')
    conv.insert(0, 'hindex', tess['hindex'])
    conv.to_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/convolutions/conv_{c}.pq")
100%|██████████| 103/103 [12:18<00:00,  7.17s/it]