Generate hierarchical unique IDs¶
Each tessellation cell will have an unique hierarchical index (hindex
) in a form c000e000000t0000
where c000
represents a chunk, e000000
represents an enclosure, and t0000
represents a cell.
Each enclosure then have an unique hierarchical index as c000e00000
.
import geopandas as gpd
import pandas as pd
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore', message='.*initial implementation of Parquet.*')
folder = "../../urbangrammar_samba/spatial_signatures/morphometrics/cells/"
for c in tqdm(range(103), total=103):
tess = gpd.read_parquet(folder + f'cells_{c}.pq')
tess = tess.sort_values('enclosureID')
tess_ids = []
i = 0
count = 0
for eid in tess.enclosureID:
if eid == i:
tess_ids.append(count)
count += 1
else:
i = eid
count = 0
tess_ids.append(count)
count += 1
tess['tess_ID'] = tess_ids
tess = tess.sort_index()
hindex = (
"c" + (str(c) if len(str(c)) == 3 else ("0" * (3 - len(str(c)))) + str(c))
+ "e"
+ tess.enclosureID.astype(str).apply(
lambda x: x if len(x) == 6 else ("0" * (6 - len(x))) + x
)
+ "t"
+ tess.tess_ID.astype(str).apply(
lambda x: x if len(x) == 4 else ("0" * (4 - len(x))) + x
)
)
tess.insert(0, 'hindex', hindex)
tess.drop(columns=['uID', 'enclosureID', 'tess_ID']).to_parquet(folder + f'cells_{c}.pq')
tess.to_parquet(folder + f'cells_{c}.pq')
tess[['hindex', 'tessellation', 'buildings']].to_parquet(f"../../urbangrammar_samba/spatial_signatures/tessellation/tess_{c}.pq")
100%|██████████| 103/103 [16:51<00:00, 9.82s/it]
for c in tqdm(range(103), total=103):
encl = gpd.read_parquet(f"../../urbangrammar_samba/spatial_signatures/enclosures/encl_{c}.pq")
hindex = (
"c" + (str(c) if len(str(c)) == 3 else ("0" * (3 - len(str(c)))) + str(c))
+ "e"
+ encl.enclosureID.astype(str).apply(
lambda x: x if len(x) == 6 else ("0" * (6 - len(x))) + x
)
)
encl.insert(0, 'hindex', hindex)
encl.drop(columns='enclosureID').to_parquet(f"../../urbangrammar_samba/spatial_signatures/enclosures/encl_{c}.pq")
100%|██████████| 103/103 [00:29<00:00, 3.45it/s]
for c in tqdm(range(103), total=103):
conv = pd.read_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/convolutions/conv_{c}.pq")
tess = gpd.read_parquet(folder + f'cells_{c}.pq')
conv.insert(0, 'hindex', tess['hindex'])
conv.to_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/convolutions/conv_{c}.pq")
100%|██████████| 103/103 [12:18<00:00, 7.17s/it]