Download data for a functional layer of Spatial Signatures¶
This notebook downloads and prepares data for a functional layer of Spatial Signatures.
from download import download
import geopandas as gpd
import pandas as pd
import osmnx as ox
from tqdm import tqdm
from glob import glob
import rioxarray as ra
import pyproj
import zipfile
import tarfile
from shapely.geometry import box, mapping
import requests
import datetime
Population estimates¶
Population estimates for England, Scotland and Wales. England is split into regions.
ONS data¶
download('https://www.ons.gov.uk/file?uri=%2fpeoplepopulationandcommunity%2fpopulationandmigration%2fpopulationestimates%2fdatasets%2fcensusoutputareaestimatesinthesouthwestregionofengland%2fmid2019sape22dt10g/sape22dt10gmid2019southwest.zip',
'../../urbangrammar_samba/functional_data/population_estimates/south_west_england', kind='zip')
Creating data folder...
Downloading data from https://www.ons.gov.uk/file?uri=%2fpeoplepopulationandcommunity%2fpopulationandmigration%2fpopulationestimates%2fdatasets%2fcensusoutputareaestimatesinthesouthwestregionofengland%2fmid2019sape22dt10g/sape22dt10gmid2019southwest.zip (1 byte)
file_sizes: 17.4MB [00:00, 72.8MB/s]
Extracting zip file...
Successfully downloaded / unzipped to ../../urbangrammar_samba/functional_data/population_estimates/south_west_england
'../../urbangrammar_samba/functional_data/population_estimates/south_west_england'
download('https://www.ons.gov.uk/file?uri=%2fpeoplepopulationandcommunity%2fpopulationandmigration%2fpopulationestimates%2fdatasets%2fcensusoutputareaestimatesintheyorkshireandthehumberregionofengland%2fmid2019sape22dt10c/sape22dt10cmid2019yorkshireandthehumber.zip',
'../../urbangrammar_samba/functional_data/population_estimates/yorkshire_humber_england', kind='zip')
Creating data folder...
Downloading data from https://www.ons.gov.uk/file?uri=%2fpeoplepopulationandcommunity%2fpopulationandmigration%2fpopulationestimates%2fdatasets%2fcensusoutputareaestimatesintheyorkshireandthehumberregionofengland%2fmid2019sape22dt10c/sape22dt10cmid2019yorkshireandthehumber.zip (1 byte)
file_sizes: 16.9MB [00:00, 72.4MB/s]
Extracting zip file...
Successfully downloaded / unzipped to ../../urbangrammar_samba/functional_data/population_estimates/yorkshire_humber_england
'../../urbangrammar_samba/functional_data/population_estimates/yorkshire_humber_england'
download('https://www.ons.gov.uk/file?uri=%2fpeoplepopulationandcommunity%2fpopulationandmigration%2fpopulationestimates%2fdatasets%2fcensusoutputareaestimatesinthesoutheastregionofengland%2fmid2019sape22dt10i/sape22dt10imid2019southeast.zip',
'../../urbangrammar_samba/functional_data/population_estimates/south_east_england', kind='zip')
Creating data folder...
Downloading data from https://www.ons.gov.uk/file?uri=%2fpeoplepopulationandcommunity%2fpopulationandmigration%2fpopulationestimates%2fdatasets%2fcensusoutputareaestimatesinthesoutheastregionofengland%2fmid2019sape22dt10i/sape22dt10imid2019southeast.zip (1 byte)
file_sizes: 27.6MB [00:00, 71.8MB/s]
Extracting zip file...
Successfully downloaded / unzipped to ../../urbangrammar_samba/functional_data/population_estimates/south_east_england
'../../urbangrammar_samba/functional_data/population_estimates/south_east_england'
download('https://www.ons.gov.uk/file?uri=%2fpeoplepopulationandcommunity%2fpopulationandmigration%2fpopulationestimates%2fdatasets%2fcensusoutputareaestimatesintheeastmidlandsregionofengland%2fmid2019sape22dt10f/sape22dt10fmid2019eastmidlands.zip',
'../../urbangrammar_samba/functional_data/population_estimates/east_midlands_england', kind='zip')
Creating data folder...
Downloading data from https://www.ons.gov.uk/file?uri=%2fpeoplepopulationandcommunity%2fpopulationandmigration%2fpopulationestimates%2fdatasets%2fcensusoutputareaestimatesintheeastmidlandsregionofengland%2fmid2019sape22dt10f/sape22dt10fmid2019eastmidlands.zip (1 byte)
file_sizes: 14.5MB [00:00, 104MB/s]
Extracting zip file...
Successfully downloaded / unzipped to ../../urbangrammar_samba/functional_data/population_estimates/east_midlands_england
'../../urbangrammar_samba/functional_data/population_estimates/east_midlands_england'
download('https://www.ons.gov.uk/file?uri=%2fpeoplepopulationandcommunity%2fpopulationandmigration%2fpopulationestimates%2fdatasets%2fcensusoutputareaestimatesinthenorthwestregionofengland%2fmid2019sape22dt10b/sape22dt10bmid2019northwest.zip',
'../../urbangrammar_samba/functional_data/population_estimates/north_west_england', kind='zip')
Creating data folder...
Downloading data from https://www.ons.gov.uk/file?uri=%2fpeoplepopulationandcommunity%2fpopulationandmigration%2fpopulationestimates%2fdatasets%2fcensusoutputareaestimatesinthenorthwestregionofengland%2fmid2019sape22dt10b/sape22dt10bmid2019northwest.zip (1 byte)
file_sizes: 23.0MB [00:00, 67.3MB/s]
Extracting zip file...
Successfully downloaded / unzipped to ../../urbangrammar_samba/functional_data/population_estimates/north_west_england
'../../urbangrammar_samba/functional_data/population_estimates/north_west_england'
download('https://www.ons.gov.uk/file?uri=%2fpeoplepopulationandcommunity%2fpopulationandmigration%2fpopulationestimates%2fdatasets%2fcensusoutputareaestimatesintheeastregionofengland%2fmid2019sape22dt10h/sape22dt10hmid2019east.zip',
'../../urbangrammar_samba/functional_data/population_estimates/east_england', kind='zip')
Creating data folder...
Downloading data from https://www.ons.gov.uk/file?uri=%2fpeoplepopulationandcommunity%2fpopulationandmigration%2fpopulationestimates%2fdatasets%2fcensusoutputareaestimatesintheeastregionofengland%2fmid2019sape22dt10h/sape22dt10hmid2019east.zip (1 byte)
file_sizes: 18.8MB [00:00, 111MB/s]
Extracting zip file...
Successfully downloaded / unzipped to ../../urbangrammar_samba/functional_data/population_estimates/east_england
'../../urbangrammar_samba/functional_data/population_estimates/east_england'
download('https://www.ons.gov.uk/file?uri=%2fpeoplepopulationandcommunity%2fpopulationandmigration%2fpopulationestimates%2fdatasets%2fcensusoutputareaestimatesinwales%2fmid2019sape22dt10j/sape22dt10jmid2019wales.zip',
'../../urbangrammar_samba/functional_data/population_estimates/wales', kind='zip')
Creating data folder...
Downloading data from https://www.ons.gov.uk/file?uri=%2fpeoplepopulationandcommunity%2fpopulationandmigration%2fpopulationestimates%2fdatasets%2fcensusoutputareaestimatesinwales%2fmid2019sape22dt10j/sape22dt10jmid2019wales.zip (1 byte)
file_sizes: 9.60MB [00:00, 97.4MB/s]
Extracting zip file...
Successfully downloaded / unzipped to ../../urbangrammar_samba/functional_data/population_estimates/wales
'../../urbangrammar_samba/functional_data/population_estimates/wales'
download('https://www.ons.gov.uk/file?uri=%2fpeoplepopulationandcommunity%2fpopulationandmigration%2fpopulationestimates%2fdatasets%2fcensusoutputareaestimatesinthenortheastregionofengland%2fmid2019sape22dt10d/sape22dt10dmid2019northeast.zip',
'../../urbangrammar_samba/functional_data/population_estimates/north_east_england', kind='zip')
Creating data folder...
Downloading data from https://www.ons.gov.uk/file?uri=%2fpeoplepopulationandcommunity%2fpopulationandmigration%2fpopulationestimates%2fdatasets%2fcensusoutputareaestimatesinthenortheastregionofengland%2fmid2019sape22dt10d/sape22dt10dmid2019northeast.zip (1 byte)
file_sizes: 8.39MB [00:00, 68.0MB/s]
Extracting zip file...
Successfully downloaded / unzipped to ../../urbangrammar_samba/functional_data/population_estimates/north_east_england
'../../urbangrammar_samba/functional_data/population_estimates/north_east_england'
download('https://www.ons.gov.uk/file?uri=%2fpeoplepopulationandcommunity%2fpopulationandmigration%2fpopulationestimates%2fdatasets%2fcensusoutputareaestimatesinthewestmidlandsregionofengland%2fmid2019sape22dt10e/sape22dt10emid2019westmidlands.zip',
'../../urbangrammar_samba/functional_data/population_estimates/west_midlands_england', kind='zip')
Creating data folder...
Downloading data from https://www.ons.gov.uk/file?uri=%2fpeoplepopulationandcommunity%2fpopulationandmigration%2fpopulationestimates%2fdatasets%2fcensusoutputareaestimatesinthewestmidlandsregionofengland%2fmid2019sape22dt10e/sape22dt10emid2019westmidlands.zip (1 byte)
file_sizes: 17.7MB [00:00, 69.1MB/s]
Extracting zip file...
Successfully downloaded / unzipped to ../../urbangrammar_samba/functional_data/population_estimates/west_midlands_england
'../../urbangrammar_samba/functional_data/population_estimates/west_midlands_england'
Geometries¶
download('https://borders.ukdataservice.ac.uk/ukborders/easy_download/prebuilt/shape/England_oa_2011.zip', '../../urbangrammar_samba/functional_data/population_estimates/oa_geometry_england', kind='zip')
Creating data folder...
Downloading data from https://borders.ukdataservice.ac.uk/ukborders/easy_download/prebuilt/shape/England_oa_2011.zip (388.3 MB)
file_sizes: 100%|████████████████████████████| 407M/407M [00:08<00:00, 49.9MB/s]
Extracting zip file...
Successfully downloaded / unzipped to ../../urbangrammar_samba/functional_data/population_estimates/oa_geometry_england
'../../urbangrammar_samba/functional_data/population_estimates/oa_geometry_england'
download('https://borders.ukdataservice.ac.uk/ukborders/easy_download/prebuilt/shape/Wales_oac_2011.zip', '../../urbangrammar_samba/functional_data/population_estimates/oa_geometry_wales', kind='zip')
Creating data folder...
Downloading data from https://borders.ukdataservice.ac.uk/ukborders/easy_download/prebuilt/shape/Wales_oac_2011.zip (30.0 MB)
file_sizes: 100%|██████████████████████████| 31.4M/31.4M [00:00<00:00, 32.5MB/s]
Extracting zip file...
Successfully downloaded / unzipped to ../../urbangrammar_samba/functional_data/population_estimates/oa_geometry_wales
'../../urbangrammar_samba/functional_data/population_estimates/oa_geometry_wales'
Data cleaning and processing¶
england = gpd.read_file('../../urbangrammar_samba/functional_data/population_estimates/oa_geometry_england/england_oa_2011.shp')
wales = gpd.read_file('../../urbangrammar_samba/functional_data/population_estimates/oa_geometry_wales/wales_oac_2011.shp')
oa = england.append(wales[['code', 'label', 'name', 'geometry']])
files = glob('../../urbangrammar_samba/functional_data/population_estimates/*/*.xlsx', recursive=True)
%time merged = pd.concat([pd.read_excel(f, sheet_name='Mid-2019 Persons', header=0, skiprows=4) for f in files])
CPU times: user 12min 2s, sys: 640 ms, total: 12min 2s
Wall time: 12min 5s
population_est = oa.merge(merged, left_on='code', right_on='OA11CD', how='left')
Add Scotland¶
Scottish data are shipped differently.
Data¶
download('http://statistics.gov.scot/downloads/file?id=438c9dc6-dca0-48d5-995c-e3bb1d34e29e%2FSAPE_2011DZ_2001-2019_Five_and_broad_age_groups.zip', '../../urbangrammar_samba/functional_data/population_estimates/scotland', kind='zip')
Creating data folder...
Downloading data from https://scottish-government-files.s3.amazonaws.com/438c9dc6-dca0-48d5-995c-e3bb1d34e29e/SAPE_2011DZ_2001-2019_Five_and_broad_age_groups.zip (35.8 MB)
file_sizes: 100%|██████████████████████████| 37.5M/37.5M [00:02<00:00, 14.0MB/s]
Extracting zip file...
Successfully downloaded / unzipped to ../../urbangrammar_samba/functional_data/population_estimates/scotland
'../../urbangrammar_samba/functional_data/population_estimates/scotland'
pop_scot = pd.read_csv('../../urbangrammar_samba/functional_data/population_estimates/scotland/data - statistics.gov.scot - SAPE_2011DZ_2019_Five.csv')
pop_scot = pop_scot[pop_scot.Sex == 'All']
counts = pop_scot[['GeographyCode', 'Value']].groupby('GeographyCode').sum()
Geometry¶
download('http://sedsh127.sedsh.gov.uk/Atom_data/ScotGov/ZippedShapefiles/SG_DataZoneBdry_2011.zip', '../../urbangrammar_samba/functional_data/population_estimates/dz_geometry_scotland', kind='zip')
Creating data folder...
Downloading data from http://sedsh127.sedsh.gov.uk/Atom_data/ScotGov/ZippedShapefiles/SG_DataZoneBdry_2011.zip (18.2 MB)
file_sizes: 100%|██████████████████████████| 19.1M/19.1M [00:06<00:00, 3.05MB/s]
Extracting zip file...
Successfully downloaded / unzipped to ../../urbangrammar_samba/functional_data/population_estimates/dz_geometry_scotland
'../../urbangrammar_samba/functional_data/population_estimates/dz_geometry_scotland'
data_zones = gpd.read_file('../../urbangrammar_samba/functional_data/population_estimates/dz_geometry_scotland')
scotland = data_zones.merge(counts, left_on='DataZone', right_index=True)
scotland = scotland[['DataZone', 'Value', 'geometry']].rename(columns={'DataZone': 'code', 'Value': 'population'})
population_est = population_est[['code', 'All Ages', 'geometry']].rename(columns={'All Ages': 'population'}).append(scotland)
population_est.to_parquet('../../urbangrammar_samba/functional_data/population_estimates/gb_population_estimates.pq')
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:1: UserWarning: this is an initial implementation of Parquet/Feather file support and associated metadata. This is tracking version 0.1.0 of the metadata specification at https://github.com/geopandas/geo-arrow-spec
This metadata specification does not yet make stability promises. We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.
To further ignore this warning, you can do:
import warnings; warnings.filterwarnings('ignore', message='.*initial implementation of Parquet.*')
"""Entry point for launching an IPython kernel.
WorldPop¶
Data is dowloaded clipped to GB, so we only have to reproject to OSGB.
download('ftp://ftp.worldpop.org.uk/GIS/Population/Global_2000_2020_Constrained/2020/BSGM/GBR/gbr_ppp_2020_constrained.tif', '../../urbangrammar_samba/functional_data/population_estimates/world_pop/gbr_ppp_2020_constrained.tif')
Downloading data from ftp://ftp.worldpop.org.uk/GIS/Population/Global_2000_2020_Constrained/2020/BSGM/GBR/gbr_ppp_2020_constrained.tif (32.7 MB)
file_sizes: 100%|██████████████████████████| 34.3M/34.3M [00:01<00:00, 19.2MB/s]
Successfully downloaded file to ../../urbangrammar_samba/functional_data/population_estimates/world_pop/gbr_ppp_2020_constrained.tif
'../../urbangrammar_samba/functional_data/population_estimates/world_pop/gbr_ppp_2020_constrained.tif'
Reproject to OSGB¶
wp = ra.open_rasterio("../../urbangrammar_samba/functional_data/population_estimates/world_pop/gbr_ppp_2020_constrained.tif")
wp.rio.crs
CRS.from_epsg(4326)
%time wp_osgb = wp.rio.reproject(pyproj.CRS(27700).to_wkt())
CPU times: user 8.28 s, sys: 896 ms, total: 9.17 s
Wall time: 9.18 s
wp_osgb.rio.crs
CRS.from_epsg(27700)
wp_osgb.rio.to_raster("../../urbangrammar_samba/functional_data/population_estimates/world_pop/gbr_ppp_2020_constrained_osgb.tif")
POIs¶
Geolytix retail¶
Geolytix retail POIs: https://drive.google.com/u/0/uc?id=1B8M7m86rQg2sx2TsHhFa2d-x-dZ1DbSy (no idea how to get them programatically, so they were downloaded manually)
geolytix = pd.read_csv('../../urbangrammar_samba/functional_data/pois/GEOLYTIX - RetailPoints/geolytix_retailpoints_v17_202008.csv')
geolytix.head(2)
id | retailer | fascia | store_name | add_one | add_two | town | suburb | postcode | long_wgs | lat_wgs | bng_e | bng_n | pqi | open_date | size_band | geom_p_4326 | geom_p_27700 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1010015805 | The Southern Co-operative | The Co-operative Food PFS | Co-op Lavant Road Chichester | Unit 2 Summersdale Retail Park | Lavant Road | Chichester | Summersdale | PO19 5RD | -0.784194 | 50.859193 | 485668.1622 | 107335.2522 | Rooftop geocoded by Geolytix | NaN | < 3,013 ft2 (280m2) | 0101000020E6100000B52C23D51D18E9BF761C8509FA6D... | 0101000020346C0000849822A690A41D4126241B097434... |
1 | 1010014166 | Marks and Spencer | Marks and Spencer BP | M&S Colchester Eastwood BP | Ipswich Road | NaN | Colchester | Highwoods | CO4 0EU | 0.923960 | 51.906927 | 601229.3812 | 227194.2254 | Rooftop geocoded by Geolytix | NaN | < 3,013 ft2 (280m2) | 0101000020E6100000F09B5AF81391ED3FCF1F012C16F4... | 0101000020346C0000DCFE2FC31A5922418A6885CDD1BB... |
We already have coordinates in OSGB, no need to preprocess.
Listed buildings¶
We have to merge English, Scottish and Welsh data.
England downloaded manually from https://services.historicengland.org.uk/NMRDataDownload/OpenPages/Download.aspx
download('https://inspire.hes.scot/AtomService/DATA/lb_scotland.zip', '../../urbangrammar_samba/functional_data/pois/listed_buildings/scotland', kind='zip')
Creating data folder...
Downloading data from https://inspire.hes.scot/AtomService/DATA/lb_scotland.zip (6.1 MB)
file_sizes: 100%|██████████████████████████| 6.41M/6.41M [00:01<00:00, 3.43MB/s]
Extracting zip file...
Successfully downloaded / unzipped to ../../urbangrammar_samba/functional_data/pois/listed_buildings/scotland
'../../urbangrammar_samba/functional_data/pois/listed_buildings/scotland'
download('http://lle.gov.wales/catalogue/item/ListedBuildings.zip', '../../urbangrammar_samba/functional_data/pois/listed_buildings/wales', kind='zip')
Creating data folder...
Downloading data from http://lle.gov.wales/catalogue/item/ListedBuildings.zip (2.5 MB)
file_sizes: 100%|██████████████████████████| 2.57M/2.57M [00:00<00:00, 13.3MB/s]
Extracting zip file...
Successfully downloaded / unzipped to ../../urbangrammar_samba/functional_data/pois/listed_buildings/wales
'../../urbangrammar_samba/functional_data/pois/listed_buildings/wales'
Processing¶
with zipfile.ZipFile("../../urbangrammar_samba/functional_data/pois/listed_buildings/Listed Buildings.zip", 'r') as zip_ref:
zip_ref.extractall("../../urbangrammar_samba/functional_data/pois/listed_buildings/england")
england = gpd.read_file('../../urbangrammar_samba/functional_data/pois/listed_buildings/england/ListedBuildings_23Oct2020.shp')
england.head(2)
ListEntry | Name | Location | Grade | ListDate | AmendDate | LegacyUID | NGR | CaptureSca | Easting | Northing | Hyperlink | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1021466 | 20 AND 20A | Corsley, Wiltshire, BA12 | II | 1987-11-05 | None | 313736 | ST8338544428 | 1:2500 | 383389.03 | 144430.4185 | https://historicengland.org.uk/listing/the-lis... | POINT (383389.030 144430.419) |
1 | 1021467 | TENNIS CORNER FARMHOUSE WITH GRANARY AND STABLE | Dilton Marsh, Wiltshire, BA11 | II | 1987-11-05 | None | 313738 | ST 82839 50851 | 1:2500 | 382839.00 | 150851.3608 | https://historicengland.org.uk/listing/the-lis... | POINT (382839.000 150851.361) |
scotland = gpd.read_file('../../urbangrammar_samba/functional_data/pois/listed_buildings/scotland/Listed_Buildings.shp')
scotland.head(2)
ENT_REF | ENT_SEQ | ENT_TITLE | ACCURACY | X | Y | CREATED | COMPILER | UPDATED | PRECISION | ... | AMENDED | LINK | LEGISLATIO | CATEGORY | GROUPCAT | CLASS | LOCAL_AUTH | NAT_PARK | PARBUR | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 330184.0 | 1.0 | Ayton Manse | Location Digitised on OS MasterMap | 392542.0 | 660715.0 | 2003-01-01 | Historic Environment Scotland | None | Within 10m | ... | None | http://portal.historicenvironment.scot/designa... | http://www.legislation.gov.uk/ukpga/1997/9 | C | None | Cultural | Scottish Borders | None | Ayton | POINT (392542.000 660715.000) |
1 | 330186.0 | 2.0 | Railway Bridge, Ayton | Location Digitised on OS MasterMap | 392330.0 | 659774.0 | 2003-01-01 | Historic Environment Scotland | None | Within 10m | ... | None | http://portal.historicenvironment.scot/designa... | http://www.legislation.gov.uk/ukpga/1997/9 | C | None | Cultural | Scottish Borders | None | Ayton | POINT (392330.000 659774.000) |
2 rows × 24 columns
wales = gpd.read_file('../../urbangrammar_samba/functional_data/pois/listed_buildings/wales/Cadw_ListedBuildingsMPoint.shp')
wales.head(2)
RecordNumb | Name | Name_cy | Designatio | Grade | Location | BroadClass | BroadClas0 | Report | geometry | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Kelsterton Hall | None | 1949-08-08 | II | Set back slightly from the road at the junctio... | Recreational | Hamdden | http://cadwpublic-api.azurewebsites.net/report... | MULTIPOINT (327836.000 370714.000) |
1 | 2 | Plas Bellin | None | 1962-01-21 | II | To W of road, approximately 900m N of crossroa... | Domestic | Domestig | http://cadwpublic-api.azurewebsites.net/report... | MULTIPOINT (326364.000 369400.000) |
listed = pd.concat([england[['geometry']], scotland[['geometry']], wales[['geometry']]])
listed.reset_index(drop=True).to_parquet("../../urbangrammar_samba/functional_data/pois/listed_buildings/listed_buildings_gb.pq")
<ipython-input-44-77ad3d860075>:2: UserWarning: this is an initial implementation of Parquet/Feather file support and associated metadata. This is tracking version 0.1.0 of the metadata specification at https://github.com/geopandas/geo-arrow-spec
This metadata specification does not yet make stability promises. We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.
To further ignore this warning, you can do:
import warnings; warnings.filterwarnings('ignore', message='.*initial implementation of Parquet.*')
listed.reset_index(drop=True).to_parquet("../../urbangrammar_samba/functional_data/pois/listed_buildings/listed_buildings_gb.pq")
Night lights¶
We need to clip it to the extent of GB (dataset has a global coverage) and reproject to OSGB.
with open('../../urbangrammar_samba/functional_data/employment/SVDNB_npp_20190301-20190331_75N060W_vcmcfg_v10_c201904071900.tgz', "wb") as down:
down.write(requests.get('https://data.ngdc.noaa.gov/instruments/remote-sensing/passive/spectrometers-radiometers/imaging/viirs/dnb_composites/v10//201903/vcmcfg/SVDNB_npp_20190301-20190331_75N060W_vcmcfg_v10_c201904071900.tgz').content)
down.close()
with tarfile.open('../../urbangrammar_samba/functional_data/employment/SVDNB_npp_20190301-20190331_75N060W_vcmcfg_v10_c201904071900.tgz', 'r') as zip_ref:
zip_ref.extractall("../../urbangrammar_samba/functional_data/employment")
Clip and reproject¶
nl = ra.open_rasterio('../../urbangrammar_samba/functional_data/employment/SVDNB_npp_20190301-20190331_75N060W_vcmcfg_v10_c201904071900.avg_rade9h.tif')
nl.rio.crs
CRS.from_epsg(4326)
extent = gpd.read_parquet("../../urbangrammar_samba/spatial_signatures/local_auth_chunks.pq")
extent = extent.to_crs(4326)
%time nl_clipped = nl.rio.clip([mapping(box(*extent.total_bounds))], all_touched=True)
CPU times: user 3.96 s, sys: 6.95 s, total: 10.9 s
Wall time: 29.5 s
%time nl_osgb = nl_clipped.rio.reproject(pyproj.CRS(27700).to_wkt())
CPU times: user 766 ms, sys: 50.2 ms, total: 816 ms
Wall time: 816 ms
nl_osgb.rio.to_raster("../../urbangrammar_samba/functional_data/employment/night_lights_osgb.tif")
nl_osgb.plot(figsize=(12, 12), vmin=0, vmax=7)
<matplotlib.collections.QuadMesh at 0x7fa79c138160>
Postcodes¶
Keeping only active postcodes, relevant columns and determining their age.
download('https://www.arcgis.com/sharing/rest/content/items/b6e6715fa1984648b5e690b6a8519e53/data', '../../urbangrammar_samba/functional_data/postcode/nhspd', kind='zip')
Creating data folder...
Downloading data from https://ago-item-storage.s3.us-east-1.amazonaws.com/b6e6715fa1984648b5e690b6a8519e53/NHSPD_AUG_2020_UK_FULL.zip?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEGcaCXVzLWVhc3QtMSJGMEQCIE1aY0Pvl6tQbhtVpUFLoDlgPOfnYQeua3PqJo%2BY4ha7AiBmEHPkSi6dk7cfbOMACbdFZIJkTemkkKYLzVaD5gPFnSq9Awi%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F8BEAAaDDYwNDc1ODEwMjY2NSIMqVOVkqTsT8bs3%2FTqKpEDPg1CUnhXrkkNksaeepovynl5NWDCyWb5iMjrMuss6PTKTyCod85tX1OfcgXIhUdARci%2FKYA5fOpfPJGJpuJ%2B1qoljKytKriAyaNU%2FRonRCgB%2FSti5aaQy8i7uK923IlQwx1tbNswcBs2eO6bAYQagoRIsiv%2FynfrVAoot0zgl9nxGomHFYugdHXKoqqHSmAkYf%2FzEZmuehv0hPDealY%2FPUFbopmmGro0XY1ETls52NzkUJKhaqdz9kofSWpRrrOuVi8N3%2Ft9ggvENoVnAhw62OpBKLIoGvKyUa8XVX4qD5uljNRB6N9fpbR2BcxDERzhYDR2zSd0z6XCWjAI6UBBLcZy%2FOd2NMcP5tTaQWeA2gVtIReskJPB2MF60srFTUl07eEFub41lxQQyEqJ05uNg5mp2iwokIr7mR3Nq7GJxrzIH80QgYMEXiqpUlzptwYumC0%2F3JdqsXpNlyr0m4PR5VdcskpyYnsEGpnetaz%2B0G4%2FVhRlB1n4DIFfdMvahSf%2F9h%2FkhDyq7ADtcGie%2Ba3V%2Bhcw3MyF%2FQU67AFvPcSHt9IlZ4h%2B3Ls3iBfbnynIjGG96K8W23RkCzosvVeEzqvBd7oHufO3LrcegBJN0rvK4jwLbzomgGjTfGrHvfUSpPl4gUSAZp1CY2rqbng8blAX5tyGwo7%2BnOSybr33uDiJb9sFd8InpfOeV7Yud%2BB3gIQEdyOnS5XgR7hth3AIJPRsXbCR2yWJrkx%2BFjj4nDsNlVG73RbjNUL%2FfV4nm3lAsT5oUBCbFDO72xiEW9f3%2BgLUiAmwWmapM83nlhYF4ZAx%2Fltl%2BPD5BsvsF8zGUhCFXketa%2Bh3nYgPST1MYpT7ANE68XXJEtA9AQ%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20201103T145506Z&X-Amz-SignedHeaders=host&X-Amz-Expires=300&X-Amz-Credential=ASIAYZTTEKKETFSSP5W2%2F20201103%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=f290dd148c10a89a40b5ef7f3f0d15eda6abc484fac92116edba6a1c46205ac9 (104.8 MB)
file_sizes: 100%|████████████████████████████| 110M/110M [00:04<00:00, 24.2MB/s]
Extracting zip file...
Successfully downloaded / unzipped to ../../urbangrammar_samba/functional_data/postcode/nhspd
'../../urbangrammar_samba/functional_data/postcode/nhspd'
postcodes = pd.read_csv("../../urbangrammar_samba/functional_data/postcode/nhspd/Data/nhg20aug.csv", header=None)
/opt/conda/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3146: DtypeWarning: Columns (7,10,11,17,20,26,35) have mixed types.Specify dtype option on import or set low_memory=False.
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
postcodes = postcodes.iloc[:, :6]
existing = postcodes[postcodes[3].isna()]
located = existing[existing[4].notna()]
located = located.rename(columns={0: 'postcode', 1: 'postcode2', 2:'introduced', 3:'terminated', 4:'x', 5:'y'})
located.introduced = pd.to_datetime(located.introduced, format="%Y%m")
located['age'] = (pd.to_datetime('today') - located.introduced).dt.days
located.drop(columns=['postcode2', 'terminated']).to_parquet('../../urbangrammar_samba/functional_data/postcode/postcodes_gb.pq')
Food hygiene rating scheme¶
FHRS https://data.cdrc.ac.uk/dataset/food-hygiene-rating-scheme-fhrs-ratings (requires login)
fhrs = pd.read_csv('../../urbangrammar_samba/functional_data/fhrs/Data/fhrs_location_20200528.csv')
fhrs
BusinessID | latitude | longitude | bng_east | bng_north | utm29_east | utm29_north | utm30_east | utm30_north | utm31_east | utm31_north | utm_zone | date_created | date_updated | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 51.840311 | -0.634672 | 494159.3436 | 216625.1394 | 1.075836e+06 | 5776434.811 | 662947.2937 | 5745922.713 | 662947.2937 | 5745922.713 | 30.0 | 2012-11-22 | NaN |
1 | 2 | 51.914273 | -1.054004 | 465163.7254 | 224393.1735 | 1.046116e+06 | 5781394.193 | 633842.5939 | 5753292.773 | 633842.5939 | 5753292.773 | 30.0 | 2012-11-22 | NaN |
2 | 3 | 0.000000 | 0.000000 | 0.0000 | 0.0000 | 0.000000e+00 | 0.000 | 0.0000 | 0.000 | 0.0000 | 0.000 | NaN | 2012-11-22 | NaN |
3 | 4 | 51.762521 | -0.744901 | 486714.3979 | 207837.3419 | 1.069245e+06 | 5766934.601 | 655622.2705 | 5737031.704 | 655622.2705 | 5737031.704 | 30.0 | 2012-11-22 | 2018-12-26 |
4 | 5 | 51.795047 | -0.716069 | 488640.4326 | 211489.2210 | 1.070818e+06 | 5770769.654 | 657498.3679 | 5740710.457 | 657498.3679 | 5740710.457 | 30.0 | 2012-11-22 | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5122160 | 883771 | 0.000000 | 0.000000 | 0.0000 | 0.0000 | 0.000000e+00 | 0.000 | 0.0000 | 0.000 | 0.0000 | 0.000 | NaN | 2020-05-01 | NaN |
5122161 | 1014862 | 52.633659 | -1.130756 | 458925.5825 | 304342.1474 | 1.032123e+06 | 5860641.674 | 626498.9755 | 5833159.625 | 626498.9755 | 5833159.625 | 30.0 | 2020-04-30 | NaN |
5122162 | 1014863 | 52.646938 | -1.075170 | 462668.4778 | 305866.0147 | 1.035712e+06 | 5862528.654 | 630220.8779 | 5834735.509 | 630220.8779 | 5834735.509 | 30.0 | 2020-04-30 | NaN |
5122163 | 1014864 | 55.761562 | -4.696288 | 230919.6035 | 655316.2701 | 7.699701e+05 | 6187931.219 | 393561.5047 | 6180845.309 | 393561.5047 | 6180845.309 | 30.0 | 2020-04-25 | NaN |
5122164 | 1014865 | 55.639553 | -4.814533 | 222951.0199 | 642038.1479 | 7.633777e+05 | 6173911.049 | 385787.4574 | 6167457.148 | 385787.4574 | 6167457.148 | 30.0 | 2020-04-25 | NaN |
5122165 rows × 14 columns
No need to preprocess at the moment. Contains OSGB coordinates for each point.
Business census¶
https://data.cdrc.ac.uk/dataset/business-census (requires login)
encoding = "ISO-8859-1"
get gemetries
either geocode addresses (could be expensive
or link to postcode points
Workplace density¶
Dowload workplace population data from scottish census and english census, combine together and link to geometry.
download('http://www.scotlandscensus.gov.uk/documents/additional_tables/WP605SCwz.csv', '../../urbangrammar_samba/functional_data/employment/workplace/scotland_industry.csv')
Downloading data from https://www.scotlandscensus.gov.uk/documents/additional_tables/WP605SCwz.csv (234 kB)
file_sizes: 100%|████████████████████████████| 239k/239k [00:00<00:00, 2.47MB/s]
Successfully downloaded file to ../../urbangrammar_samba/functional_data/employment/workplace/scotland_industry.csv
'../../urbangrammar_samba/functional_data/employment/workplace/scotland_industry.csv'
download('https://www.nomisweb.co.uk/api/v01/dataset/nm_1314_1.bulk.csv?time=latest&measures=20100&geography=TYPE262', '../../urbangrammar_samba/functional_data/employment/workplace/england_wales_industry.csv', timeout=60)
Downloading data from https://www.nomisweb.co.uk/api/v01/dataset/nm_1314_1.bulk.csv?time=latest&measures=20100&geography=TYPE262 (5.1 MB)
file_sizes: 100%|██████████████████████████| 5.35M/5.35M [00:00<00:00, 7.41MB/s]
Successfully downloaded file to ../../urbangrammar_samba/functional_data/employment/workplace/england_wales_industry.csv
'../../urbangrammar_samba/functional_data/employment/workplace/england_wales_industry.csv'
download('https://www.nomisweb.co.uk/api/v01/dataset/nm_1300_1.bulk.csv?time=latest&measures=20100&geography=2013265922TYPE299', '../../urbangrammar_samba/functional_data/employment/workplace/north_west.csv')
Downloading data from https://www.nomisweb.co.uk/api/v01/dataset/nm_1300_1.bulk.csv?time=latest&measures=20100&geography=2013265922TYPE299 (781 kB)
file_sizes: 100%|████████████████████████████| 799k/799k [00:00<00:00, 6.92MB/s]
Successfully downloaded file to ../../urbangrammar_samba/functional_data/employment/workplace/north_west.csv
'../../urbangrammar_samba/functional_data/employment/workplace/north_west.csv'
download('https://www.nomisweb.co.uk/api/v01/dataset/nm_1300_1.bulk.csv?time=latest&measures=20100&geography=2013265926TYPE299', '../../urbangrammar_samba/functional_data/employment/workplace/east.csv')
Downloading data from https://www.nomisweb.co.uk/api/v01/dataset/nm_1300_1.bulk.csv?time=latest&measures=20100&geography=2013265926TYPE299 (636 kB)
file_sizes: 100%|████████████████████████████| 651k/651k [00:00<00:00, 6.86MB/s]
Successfully downloaded file to ../../urbangrammar_samba/functional_data/employment/workplace/east.csv
'../../urbangrammar_samba/functional_data/employment/workplace/east.csv'
download('https://www.nomisweb.co.uk/api/v01/dataset/nm_1300_1.bulk.csv?time=latest&measures=20100&geography=2013265924TYPE299', '../../urbangrammar_samba/functional_data/employment/workplace/east_midlands.csv')
Downloading data from https://www.nomisweb.co.uk/api/v01/dataset/nm_1300_1.bulk.csv?time=latest&measures=20100&geography=2013265924TYPE299 (492 kB)
file_sizes: 100%|████████████████████████████| 504k/504k [00:00<00:00, 6.91MB/s]
Successfully downloaded file to ../../urbangrammar_samba/functional_data/employment/workplace/east_midlands.csv
'../../urbangrammar_samba/functional_data/employment/workplace/east_midlands.csv'
download('https://www.nomisweb.co.uk/api/v01/dataset/nm_1300_1.bulk.csv?time=latest&measures=20100&geography=2013265927TYPE299', '../../urbangrammar_samba/functional_data/employment/workplace/london.csv')
Downloading data from https://www.nomisweb.co.uk/api/v01/dataset/nm_1300_1.bulk.csv?time=latest&measures=20100&geography=2013265927TYPE299 (839 kB)
file_sizes: 100%|████████████████████████████| 859k/859k [00:00<00:00, 6.74MB/s]
Successfully downloaded file to ../../urbangrammar_samba/functional_data/employment/workplace/london.csv
'../../urbangrammar_samba/functional_data/employment/workplace/london.csv'
download('https://www.nomisweb.co.uk/api/v01/dataset/nm_1300_1.bulk.csv?time=latest&measures=20100&geography=2013265921TYPE299', '../../urbangrammar_samba/functional_data/employment/workplace/north_east.csv')
Downloading data from https://www.nomisweb.co.uk/api/v01/dataset/nm_1300_1.bulk.csv?time=latest&measures=20100&geography=2013265921TYPE299 (294 kB)
file_sizes: 100%|████████████████████████████| 301k/301k [00:00<00:00, 5.98MB/s]
Successfully downloaded file to ../../urbangrammar_samba/functional_data/employment/workplace/north_east.csv
'../../urbangrammar_samba/functional_data/employment/workplace/north_east.csv'
download('https://www.nomisweb.co.uk/api/v01/dataset/nm_1300_1.bulk.csv?time=latest&measures=20100&geography=2013265928TYPE299', '../../urbangrammar_samba/functional_data/employment/workplace/south_east.csv', timeout=30)
Downloading data from https://www.nomisweb.co.uk/api/v01/dataset/nm_1300_1.bulk.csv?time=latest&measures=20100&geography=2013265928TYPE299 (926 kB)
file_sizes: 100%|████████████████████████████| 948k/948k [00:00<00:00, 7.44MB/s]
Successfully downloaded file to ../../urbangrammar_samba/functional_data/employment/workplace/south_east.csv
'../../urbangrammar_samba/functional_data/employment/workplace/south_east.csv'
download('https://www.nomisweb.co.uk/api/v01/dataset/nm_1300_1.bulk.csv?time=latest&measures=20100&geography=2013265929TYPE299', '../../urbangrammar_samba/functional_data/employment/workplace/south_west.csv')
Downloading data from https://www.nomisweb.co.uk/api/v01/dataset/nm_1300_1.bulk.csv?time=latest&measures=20100&geography=2013265929TYPE299 (591 kB)
file_sizes: 100%|████████████████████████████| 605k/605k [00:00<00:00, 7.05MB/s]
Successfully downloaded file to ../../urbangrammar_samba/functional_data/employment/workplace/south_west.csv
'../../urbangrammar_samba/functional_data/employment/workplace/south_west.csv'
download('https://www.nomisweb.co.uk/api/v01/dataset/nm_1300_1.bulk.csv?time=latest&measures=20100&geography=2013265925TYPE299', '../../urbangrammar_samba/functional_data/employment/workplace/west_midlands.csv')
Downloading data from https://www.nomisweb.co.uk/api/v01/dataset/nm_1300_1.bulk.csv?time=latest&measures=20100&geography=2013265925TYPE299 (599 kB)
file_sizes: 100%|████████████████████████████| 614k/614k [00:00<00:00, 6.14MB/s]
Successfully downloaded file to ../../urbangrammar_samba/functional_data/employment/workplace/west_midlands.csv
'../../urbangrammar_samba/functional_data/employment/workplace/west_midlands.csv'
download('https://www.nomisweb.co.uk/api/v01/dataset/nm_1300_1.bulk.csv?time=latest&measures=20100&geography=2013265923TYPE299', '../../urbangrammar_samba/functional_data/employment/workplace/yorkshire.csv')
Downloading data from https://www.nomisweb.co.uk/api/v01/dataset/nm_1300_1.bulk.csv?time=latest&measures=20100&geography=2013265923TYPE299 (578 kB)
file_sizes: 100%|████████████████████████████| 592k/592k [00:00<00:00, 6.67MB/s]
Successfully downloaded file to ../../urbangrammar_samba/functional_data/employment/workplace/yorkshire.csv
'../../urbangrammar_samba/functional_data/employment/workplace/yorkshire.csv'
download('https://www.nrscotland.gov.uk/files/geography/output-area-2011-mhw.zip', '../../urbangrammar_samba/functional_data/employment/workplace/scotland_oa', kind='zip')
Creating data folder...
Downloading data from https://www.nrscotland.gov.uk/files/geography/output-area-2011-mhw.zip (31.2 MB)
file_sizes: 100%|██████████████████████████| 32.7M/32.7M [00:00<00:00, 65.3MB/s]
Extracting zip file...
Successfully downloaded / unzipped to ../../urbangrammar_samba/functional_data/employment/workplace/scotland_oa
'../../urbangrammar_samba/functional_data/employment/workplace/scotland_oa'
download('https://www.nomisweb.co.uk/api/v01/dataset/nm_155_1.bulk.csv?time=latest&measures=20100&geography=TYPE262', '../../urbangrammar_samba/functional_data/employment/workplace/wp_density_ew.csv', timeout=30)
Downloading data from https://www.nomisweb.co.uk/api/v01/dataset/nm_155_1.bulk.csv?time=latest&measures=20100&geography=TYPE262 (2.3 MB)
file_sizes: 100%|██████████████████████████| 2.46M/2.46M [00:00<00:00, 7.66MB/s]
Successfully downloaded file to ../../urbangrammar_samba/functional_data/employment/workplace/wp_density_ew.csv
'../../urbangrammar_samba/functional_data/employment/workplace/wp_density_ew.csv'
download('https://www.nrscotland.gov.uk/files//geography/products/workplacezones2011scotland.zip', '../../urbangrammar_samba/functional_data/employment/workplace/wpz_scotland', kind='zip')
Creating data folder...
Downloading data from https://www.nrscotland.gov.uk/files//geography/products/workplacezones2011scotland.zip (14.7 MB)
file_sizes: 100%|██████████████████████████| 15.4M/15.4M [00:00<00:00, 39.3MB/s]
Extracting zip file...
Successfully downloaded / unzipped to ../../urbangrammar_samba/functional_data/employment/workplace/wpz_scotland
'../../urbangrammar_samba/functional_data/employment/workplace/wpz_scotland'
download('http://www.scotlandscensus.gov.uk/documents/additional_tables/WP102SCca.csv', '../../urbangrammar_samba/functional_data/employment/workplace/wp_density_scotland.csv')
Downloading data from https://www.scotlandscensus.gov.uk/documents/additional_tables/WP102SCca.csv (3 kB)
file_sizes: 100%|███████████████████████████| 2.79k/2.79k [00:00<00:00, 184kB/s]
Successfully downloaded file to ../../urbangrammar_samba/functional_data/employment/workplace/wp_density_scotland.csv
'../../urbangrammar_samba/functional_data/employment/workplace/wp_density_scotland.csv'
download('http://www.scotlandscensus.gov.uk/documents/additional_tables/WP103SCwz.csv', '../../urbangrammar_samba/functional_data/employment/workplace/wp_pop_scotland.csv')
Downloading data from https://www.scotlandscensus.gov.uk/documents/additional_tables/WP103SCwz.csv (462 kB)
file_sizes: 100%|████████████████████████████| 473k/473k [00:00<00:00, 4.64MB/s]
Successfully downloaded file to ../../urbangrammar_samba/functional_data/employment/workplace/wp_pop_scotland.csv
'../../urbangrammar_samba/functional_data/employment/workplace/wp_pop_scotland.csv'
with zipfile.ZipFile("../../urbangrammar_samba/functional_data/employment/workplace/wz2011ukbgcv2.zip", 'r') as zip_ref:
zip_ref.extractall("../../urbangrammar_samba/functional_data/employment/workplace/")
wpz_geom = gpd.read_file('../../urbangrammar_samba/functional_data/employment/workplace/WZ_2011_UK_BGC_V2.shp')
wpz_geom
WZ11CD | LAD_DCACD | LAD11NM | LAD11NMW | BNG_E | BNG_N | LONG | LAT | geometry | |
---|---|---|---|---|---|---|---|---|---|
0 | E33000001 | E08000020 | Gateshead | None | 426394 | 563614 | -1.58929 | 54.96646 | POLYGON ((426363.000 563750.000, 426477.836 56... |
1 | E33000002 | E08000020 | Gateshead | None | 426186 | 563095 | -1.59258 | 54.96181 | POLYGON ((426497.000 563046.000, 426487.000 56... |
2 | E33000003 | E08000020 | Gateshead | None | 426570 | 563542 | -1.58654 | 54.96580 | POLYGON ((426786.032 563536.860, 426602.875 56... |
3 | E33000004 | E08000020 | Gateshead | None | 426828 | 563212 | -1.58254 | 54.96283 | POLYGON ((427052.405 563318.499, 427011.334 56... |
4 | E33000005 | E08000020 | Gateshead | None | 425707 | 562777 | -1.60009 | 54.95897 | POLYGON ((425818.791 562796.341, 425806.657 56... |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
60704 | W35002706 | W06000015 | Cardiff | Caerdydd | 318509 | 175037 | -3.17457 | 51.46841 | POLYGON ((318762.325 174960.625, 318647.719 17... |
60705 | W35002707 | W06000015 | Cardiff | Caerdydd | 318854 | 174831 | -3.16956 | 51.46660 | POLYGON ((319066.795 174842.341, 319078.000 17... |
60706 | W35002708 | W06000015 | Cardiff | Caerdydd | 318930 | 175713 | -3.16867 | 51.47454 | POLYGON ((319154.263 175562.432, 319108.261 17... |
60707 | W35002709 | W06000015 | Cardiff | Caerdydd | 318822 | 175200 | -3.17011 | 51.46992 | POLYGON ((318768.685 175656.549, 319035.000 17... |
60708 | W35002710 | W06000015 | Cardiff | Caerdydd | 317925 | 174229 | -3.18279 | 51.46106 | MULTIPOLYGON (((318537.460 174145.029, 318441.... |
60709 rows × 9 columns
wpz_ew = pd.read_csv("../../urbangrammar_samba/functional_data/employment/workplace/wp_density_ew.csv")
wpz_ew
date | geography | geography code | Area/Population Density: All usual residents; measures: Value | Area/Population Density: Area Hectares; measures: Value | Area/Population Density: Density (number of persons per hectare); measures: Value | |
---|---|---|---|---|---|---|
0 | 2011 | E33000439 | E33000439 | 687 | 68.41 | 10.0 |
1 | 2011 | E33000440 | E33000440 | 361 | 50.77 | 7.1 |
2 | 2011 | E33000441 | E33000441 | 462 | 11.28 | 41.0 |
3 | 2011 | E33000454 | E33000454 | 772 | 1147.22 | 0.7 |
4 | 2011 | E33000455 | E33000455 | 3278 | 5.81 | 564.2 |
... | ... | ... | ... | ... | ... | ... |
53573 | 2011 | W35002465 | W35002465 | 239 | 1784.50 | 0.1 |
53574 | 2011 | W35002466 | W35002466 | 1489 | 30.67 | 48.5 |
53575 | 2011 | W35002467 | W35002467 | 2979 | 58.44 | 51.0 |
53576 | 2011 | W35002468 | W35002468 | 217 | 1599.63 | 0.1 |
53577 | 2011 | W35002469 | W35002469 | 354 | 272.54 | 1.3 |
53578 rows × 6 columns
wpz = wpz_geom[['WZ11CD', 'LAD_DCACD', 'geometry']].merge(wpz_ew[['geography code', 'Area/Population Density: All usual residents; measures: Value']], left_on='WZ11CD', right_on='geography code', how='left')
scot = pd.read_csv("../../urbangrammar_samba/functional_data/employment/workplace/wp_pop_scotland.csv", header=5)
wpz = wpz.merge(scot[['Unnamed: 0', 'Total']], left_on='WZ11CD', right_on='Unnamed: 0', how='left')
wpz.Total = wpz.Total.astype(str).apply(lambda x: x.replace(',', '') if ',' in x else x).astype(float)
wpz['count'] = wpz['Area/Population Density: All usual residents; measures: Value'].astype(float).fillna(0) + wpz.Total.fillna(0)
wpz = wpz[~wpz.WZ11CD.str.startswith('N')]
wpz[['geography code', 'count', 'geometry']].to_parquet('../../urbangrammar_samba/functional_data/employment/workplace/workplace_population_gb.pq')
<ipython-input-144-0a3123e8e9c0>:1: UserWarning: this is an initial implementation of Parquet/Feather file support and associated metadata. This is tracking version 0.1.0 of the metadata specification at https://github.com/geopandas/geo-arrow-spec
This metadata specification does not yet make stability promises. We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.
To further ignore this warning, you can do:
import warnings; warnings.filterwarnings('ignore', message='.*initial implementation of Parquet.*')
wpz[['geography code', 'count', 'geometry']].to_parquet('../../urbangrammar_samba/functional_data/employment/workplace/workplace_population_gb.pq')
wpz_ind_s = pd.read_csv('../../urbangrammar_samba/functional_data/employment/workplace/scotland_industry.csv', skiprows=4)
wpz_ind_s = wpz_ind_s.loc[4:5378].drop(columns=[c for c in wpz_ind_s.columns if 'Unnamed' in c])
wpz_ind_s
2011 Workplace Zone | All workplace population aged 16 to 74 | A, B, D, E. Agriculture, energy and water | C. Manufacturing | F. Construction | G, I. Distribution, hotels and restaurants | H, J. Transport and communication | K, L, M, N. Financial, real estate, professional and administrative activities | O,P,Q. Public administration, education and health | R, S, T, U. Other | |
---|---|---|---|---|---|---|---|---|---|---|
4 | S34000001 | 269 | 4 | 9 | 1 | 107 | 2 | 115 | 23 | 8 |
5 | S34000002 | 1,270 | 233 | 55 | 27 | 96 | 22 | 775 | 49 | 13 |
6 | S34000003 | 272 | 0 | 1 | 1 | 98 | 2 | 107 | 50 | 13 |
7 | S34000004 | 206 | 0 | 3 | 0 | 98 | 2 | 19 | 4 | 80 |
8 | S34000005 | 432 | 0 | 5 | 5 | 111 | 12 | 91 | 192 | 16 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5374 | S34005371 | 257 | 2 | 6 | 11 | 39 | 9 | 29 | 125 | 36 |
5375 | S34005372 | 218 | 0 | 8 | 6 | 39 | 5 | 44 | 104 | 12 |
5376 | S34005373 | 1,572 | 14 | 194 | 16 | 111 | 508 | 199 | 510 | 20 |
5377 | S34005374 | 218 | 1 | 118 | 5 | 11 | 12 | 53 | 16 | 2 |
5378 | S34005375 | 348 | 5 | 20 | 27 | 50 | 47 | 65 | 97 | 37 |
5375 rows × 10 columns
wpz_ind_s.columns
Index(['2011 Workplace Zone', 'All workplace population aged 16 to 74',
'A, B, D, E. Agriculture, energy and water', 'C. Manufacturing',
'F. Construction', 'G, I. Distribution, hotels and restaurants',
'H, J. Transport and communication',
'K, L, M, N. Financial, real estate, professional and administrative activities',
'O,P,Q. Public administration, education and health',
'R, S, T, U. Other'],
dtype='object')
wpz_ind_ew = pd.read_csv('../../urbangrammar_samba/functional_data/employment/workplace/england_wales_industry.csv')
wpz_ind_ew.columns
Index(['date', 'geography', 'geography code',
'Industry: All categories: Industry; measures: Value',
'Industry: A Agriculture, forestry and fishing; measures: Value',
'Industry: B Mining and quarrying; measures: Value',
'Industry: C Manufacturing; measures: Value',
'Industry: C10-12 Manufacturing: Food, beverages and tobacco; measures: Value',
'Industry: C13-15 Manufacturing: Textiles, wearing apparel and leather and related products; measures: Value',
'Industry: C16,17 Manufacturing: Wood, paper and paper products; measures: Value',
'Industry: C19-22 Manufacturing: Chemicals, chemical products, rubber and plastic; measures: Value',
'Industry: C23-25 Manufacturing: Low tech; measures: Value',
'Industry: C26-30 Manufacturing: High tech; measures: Value',
'Industry: C18, 31, 32 Manufacturing: Other; measures: Value',
'Industry: D Electricity, gas, steam and air conditioning supply; measures: Value',
'Industry: E Water supply, sewerage, waste management and remediation activities; measures: Value',
'Industry: F Construction; measures: Value',
'Industry: G Wholesale and retail trade; repair of motor vehicles and motor cycles; measures: Value',
'Industry: H Transport and storage; measures: Value',
'Industry: I Accommodation and food service activities; measures: Value',
'Industry: J Information and communication; measures: Value',
'Industry: K Financial and insurance activities; measures: Value',
'Industry: L Real estate activities; measures: Value',
'Industry: M Professional, scientific and technical activities; measures: Value',
'Industry: N Administrative and support service activities; measures: Value',
'Industry: O Public administration and defence; compulsory social security; measures: Value',
'Industry: P Education; measures: Value',
'Industry: Q Human health and social work activities; measures: Value',
'Industry: R,S Arts, entertainment and recreation; other service activities; measures: Value',
'Industry: T Activities of households as employers; undifferentiated goods - and services - producing activities of households for own use; measures: Value',
'Industry: U Activities of extraterritorial organisations and bodies; measures: Value'],
dtype='object')
wpz_ind_ew['A, B, D, E. Agriculture, energy and water'] = wpz_ind_ew[[c for c in wpz_ind_ew.columns[4:] if c[10] in ['A', 'B', 'D', 'E']]].sum(axis=1)
wpz_ind_ew['C. Manufacturing'] = wpz_ind_ew[[c for c in wpz_ind_ew.columns[4:] if c[10] in ['C']]].sum(axis=1)
wpz_ind_ew['F. Construction'] = wpz_ind_ew[[c for c in wpz_ind_ew.columns[4:] if c[10] in ['F']]].sum(axis=1)
wpz_ind_ew['G, I. Distribution, hotels and restaurants'] = wpz_ind_ew[[c for c in wpz_ind_ew.columns[4:] if c[10] in ['G', 'I']]].sum(axis=1)
wpz_ind_ew['H, J. Transport and communication'] = wpz_ind_ew[[c for c in wpz_ind_ew.columns[4:] if c[10] in ['H', 'J']]].sum(axis=1)
wpz_ind_ew['K, L, M, N. Financial, real estate, professional and administrative activities'] = wpz_ind_ew[[c for c in wpz_ind_ew.columns[4:] if c[10] in ['K', 'L', 'M', 'N']]].sum(axis=1)
wpz_ind_ew['O,P,Q. Public administration, education and health'] = wpz_ind_ew[[c for c in wpz_ind_ew.columns[4:] if c[10] in ['O', 'P', 'Q']]].sum(axis=1)
wpz_ind_ew['R, S, T, U. Other'] = wpz_ind_ew[[c for c in wpz_ind_ew.columns[4:] if c[10] in ['R', 'S', 'T', 'U']]].sum(axis=1)
wpz = wpz_ind_ew[['geography code'] + list(wpz_ind_ew.columns[-8:])].append(wpz_ind_s.rename(columns={'2011 Workplace Zone': 'geography code'}).drop(columns='All workplace population aged 16 to 74'))
wpz_merged = wpz_geom.merge(wpz, left_on='WZ11CD', right_on='geography code', how='left')
wpz_merged = wpz_merged[~wpz_merged.WZ11CD.str.startswith('N')]
wpz_merged = wpz_merged.reset_index(drop=True)[list(wpz.columns) + ['geometry']]
wpz_merged.columns
Index(['geography code', 'A, B, D, E. Agriculture, energy and water',
'C. Manufacturing', 'F. Construction',
'G, I. Distribution, hotels and restaurants',
'H, J. Transport and communication',
'K, L, M, N. Financial, real estate, professional and administrative activities',
'O,P,Q. Public administration, education and health',
'R, S, T, U. Other', 'geometry'],
dtype='object')
for c in wpz_merged.columns[1:-1]:
wpz_merged[c] = wpz_merged[c].astype(str).apply(lambda x: x.replace(',', '') if ',' in x else x).astype(float)
wpz_merged
geography code | A, B, D, E. Agriculture, energy and water | C. Manufacturing | F. Construction | G, I. Distribution, hotels and restaurants | H, J. Transport and communication | K, L, M, N. Financial, real estate, professional and administrative activities | O,P,Q. Public administration, education and health | R, S, T, U. Other | geometry | |
---|---|---|---|---|---|---|---|---|---|---|
0 | E33000001 | 32.0 | 64.0 | 172.0 | 115.0 | 37.0 | 213.0 | 389.0 | 4.0 | POLYGON ((426363.000 563750.000, 426477.836 56... |
1 | E33000002 | 0.0 | 10.0 | 3.0 | 113.0 | 50.0 | 33.0 | 121.0 | 5.0 | POLYGON ((426497.000 563046.000, 426487.000 56... |
2 | E33000003 | 5.0 | 370.0 | 81.0 | 184.0 | 114.0 | 155.0 | 39.0 | 15.0 | POLYGON ((426786.032 563536.860, 426602.875 56... |
3 | E33000004 | 21.0 | 298.0 | 50.0 | 47.0 | 18.0 | 48.0 | 13.0 | 8.0 | POLYGON ((427052.405 563318.499, 427011.334 56... |
4 | E33000005 | 2.0 | 84.0 | 12.0 | 61.0 | 106.0 | 85.0 | 346.0 | 16.0 | POLYGON ((425818.791 562796.341, 425806.657 56... |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
58948 | W35002706 | 2.0 | 18.0 | 11.0 | 21.0 | 8.0 | 29.0 | 83.0 | 11.0 | POLYGON ((318762.325 174960.625, 318647.719 17... |
58949 | W35002707 | 1.0 | 24.0 | 11.0 | 40.0 | 23.0 | 95.0 | 78.0 | 21.0 | POLYGON ((319066.795 174842.341, 319078.000 17... |
58950 | W35002708 | 6.0 | 14.0 | 16.0 | 26.0 | 67.0 | 134.0 | 156.0 | 28.0 | POLYGON ((319154.263 175562.432, 319108.261 17... |
58951 | W35002709 | 15.0 | 96.0 | 207.0 | 116.0 | 31.0 | 70.0 | 183.0 | 29.0 | POLYGON ((318768.685 175656.549, 319035.000 17... |
58952 | W35002710 | 0.0 | 20.0 | 23.0 | 39.0 | 46.0 | 75.0 | 195.0 | 40.0 | MULTIPOLYGON (((318537.460 174145.029, 318441.... |
58953 rows × 10 columns
wpz_merged.to_parquet('../../urbangrammar_samba/functional_data/employment/workplace/workplace_by_industry_gb.pq')
%%time
pois = []
for i in tqdm(range(103), total=103):
nodes = gpd.read_parquet(f'../../urbangrammar_samba/spatial_signatures/morphometrics/nodes/nodes_{i}.pq')
poly = nodes.to_crs(4326).unary_union.convex_hull
tags = {'amenity': ['cinema', 'theatre']}
pois.append(ox.geometries.geometries_from_polygon(poly, tags))
100%|██████████| 103/103 [15:42<00:00, 9.15s/it]
CPU times: user 3min 59s, sys: 4.76 s, total: 4min 4s
Wall time: 15min 42s
pois_merged = pd.concat(pois)
pois_merged
/opt/conda/lib/python3.8/site-packages/ipykernel/ipkernel.py:287: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.
and should_run_async(code)
unique_id | osmid | element_type | amenity | brand | brand:wikidata | brand:wikipedia | name | source | geometry | ... | edinburgh_international | edinburgh_fringe | name:gd | ref:planningapp | local_name | surveillance | restaurant | building:roof:orientation | music | roof:orientation | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | node/31532987 | 31532987 | node | cinema | Vue | Q2535134 | en:Vue Cinemas | Vue | local knowledge | POINT (-2.68034 53.74839) | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | node/307745711 | 307745711 | node | cinema | NaN | NaN | NaN | The Palace | NaN | POINT (-2.60005 53.82949) | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | node/662485019 | 662485019 | node | theatre | NaN | NaN | NaN | Charter Theatre | NaN | POINT (-2.69619 53.75984) | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 | node/973599598 | 973599598 | node | theatre | NaN | NaN | NaN | Thornton Little Theatre | NaN | POINT (-3.01110 53.87245) | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
4 | node/2078549775 | 2078549775 | node | cinema | NaN | NaN | NaN | The Dukes | survey | POINT (-2.79687 54.04910) | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
48 | way/690683787 | 690683787 | way | cinema | NaN | NaN | NaN | Essoldo (disused) | NaN | POLYGON ((-2.30625 53.44634, -2.30593 53.44619... | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
49 | node/1043270422 | 1043270422 | node | theatre | NaN | NaN | NaN | The Partington Players Theatre | NaN | POINT (-1.94948 53.44373) | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
50 | way/117880925 | 117880925 | way | theatre | NaN | NaN | NaN | Buxton Opera House | Bing | POLYGON ((-1.91742 53.25824, -1.91736 53.25826... | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
51 | way/281805351 | 281805351 | way | theatre | NaN | NaN | NaN | The Playhouse | NaN | POLYGON ((-1.91784 53.32325, -1.91780 53.32305... | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
52 | way/158533178 | 158533178 | way | theatre | NaN | NaN | NaN | NaN | Bing | POLYGON ((-1.89410 52.98496, -1.89413 52.98486... | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
4396 rows × 272 columns
pois_merged.drop_duplicates(subset='unique_id')[['amenity', 'name', 'geometry']].to_crs(27700).to_parquet('../../urbangrammar_samba/functional_data/pois/culture_gb.pq')
/opt/conda/lib/python3.8/site-packages/ipykernel/ipkernel.py:287: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.
and should_run_async(code)
<ipython-input-150-b737c023146a>:1: UserWarning: this is an initial implementation of Parquet/Feather file support and associated metadata. This is tracking version 0.1.0 of the metadata specification at https://github.com/geopandas/geo-arrow-spec
This metadata specification does not yet make stability promises. We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.
To further ignore this warning, you can do:
import warnings; warnings.filterwarnings('ignore', message='.*initial implementation of Parquet.*')
pois_merged.drop_duplicates(subset='unique_id')[['amenity', 'name', 'geometry']].to_parquet('../../urbangrammar_samba/functional_data/pois/culture_gb.pq')
Corine land cover¶
Corine - get link from https://land.copernicus.eu/pan-european/corine-land-cover
We need to extract data, clip to GB and reproject to OSGB.
download('https://land.copernicus.eu/land-files/afd643e4508e9dd7af7659c1fb1d75017ba6d9f4.zip', '../../urbangrammar_samba/functional_data/land_use/corine', kind='zip')
Creating data folder...
Downloading data from https://land.copernicus.eu/land-files/afd643e4508e9dd7af7659c1fb1d75017ba6d9f4.zip (3.62 GB)
file_sizes: 100%|██████████████████████████| 3.89G/3.89G [01:11<00:00, 54.5MB/s]
Extracting zip file...
Successfully downloaded / unzipped to ../../urbangrammar_samba/functional_data/land_use/corine
'../../urbangrammar_samba/functional_data/land_use/corine'
with zipfile.ZipFile("../../urbangrammar_samba/functional_data/land_use/corine/u2018_clc2018_v2020_20u1_geoPackage.zip", 'r') as zip_ref:
zip_ref.extractall("../../urbangrammar_samba/functional_data/land_use/corine")
/opt/conda/lib/python3.8/site-packages/ipykernel/ipkernel.py:287: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.
and should_run_async(code)
extent = gpd.read_parquet("../../urbangrammar_samba/spatial_signatures/local_auth_chunks.pq")
corine_gdf = gpd.read_file("../../urbangrammar_samba/functional_data/land_use/corine/u2018_clc2018_v2020_20u1_geoPackage/DATA/U2018_CLC2018_V2020_20u1.gpkg", mask=extent)
/opt/conda/lib/python3.8/site-packages/ipykernel/ipkernel.py:287: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.
and should_run_async(code)
corine_gdf.to_crs(27700).to_parquet("../../urbangrammar_samba/functional_data/land_use/corine/corine_gb.pq")
/opt/conda/lib/python3.8/site-packages/ipykernel/ipkernel.py:287: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.
and should_run_async(code)
<ipython-input-159-970263de7d63>:1: UserWarning: this is an initial implementation of Parquet/Feather file support and associated metadata. This is tracking version 0.1.0 of the metadata specification at https://github.com/geopandas/geo-arrow-spec
This metadata specification does not yet make stability promises. We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.
To further ignore this warning, you can do:
import warnings; warnings.filterwarnings('ignore', message='.*initial implementation of Parquet.*')
corine_gdf.to_crs(27700).to_parquet("../../urbangrammar_samba/functional_data/land_use/corine/corine_gb.pq")
Land cover classification¶
Land cover classification - get link from https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover?tab=form
We need to clip it to the extent of GB (dataset has a global coverage) and reproject to OSGB.
download('http://136.156.133.37/cache-compute-0011/cache/data0/dataset-satellite-land-cover-c20f5b30-2bdb-4f69-a21e-c8f2e696e715.zip', '../../urbangrammar_samba/functional_data/land_use/lcc', kind='zip' )
Creating data folder...
Downloading data from http://136.156.133.37/cache-compute-0011/cache/data0/dataset-satellite-land-cover-c20f5b30-2bdb-4f69-a21e-c8f2e696e715.zip (2.17 GB)
file_sizes: 100%|██████████████████████████| 2.33G/2.33G [01:26<00:00, 27.1MB/s]
Extracting zip file...
Successfully downloaded / unzipped to ../../urbangrammar_samba/functional_data/land_use/lcc
'../../urbangrammar_samba/functional_data/land_use/lcc'
lcc = ra.open_rasterio("../../urbangrammar_samba/functional_data/land_use/lcc/C3S-LC-L4-LCCS-Map-300m-P1Y-2018-v2.1.1.nc")
/opt/conda/lib/python3.8/site-packages/rasterio/__init__.py:221: NotGeoreferencedWarning: Dataset has no geotransform set. The identity matrix may be returned.
s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
/opt/conda/lib/python3.8/site-packages/rioxarray/_io.py:678: NotGeoreferencedWarning: Dataset has no geotransform set. The identity matrix may be returned.
warnings.warn(str(rio_warning.message), type(rio_warning.message))
/opt/conda/lib/python3.8/site-packages/rasterio/__init__.py:221: NotGeoreferencedWarning: Dataset has no geotransform set. The identity matrix may be returned.
s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
/opt/conda/lib/python3.8/site-packages/rioxarray/_io.py:678: NotGeoreferencedWarning: Dataset has no geotransform set. The identity matrix may be returned.
warnings.warn(str(rio_warning.message), type(rio_warning.message))
/opt/conda/lib/python3.8/site-packages/rasterio/__init__.py:221: NotGeoreferencedWarning: Dataset has no geotransform set. The identity matrix may be returned.
s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
/opt/conda/lib/python3.8/site-packages/rioxarray/_io.py:678: NotGeoreferencedWarning: Dataset has no geotransform set. The identity matrix may be returned.
warnings.warn(str(rio_warning.message), type(rio_warning.message))
lccs = lcc[0].lccs_class
extent.total_bounds
array([-8.64999583, 49.86463177, 1.76893745, 60.86078737])
lccs_gb = lccs.sel(x=slice(-9, 2), y=slice(61, 49))
lccs_gb = lccs_gb.rio.set_crs(4326)
lccs_osgb = lccs_gb.rio.reproject(pyproj.CRS(27700).to_wkt())
lccs_osgb.rio.to_raster("../../urbangrammar_samba/functional_data/land_use/lcc/lccs_osgb.tif")