Source code for nycschools.geo

# NYC School Data
# Copyright (C) 2022. Matthew X. Curinga
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU AFFERO GENERAL PUBLIC LICENSE (the "License") as
# published by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the License for more details.
#
# You should have received a copy of the License along with this program.
# If not, see <http://www.gnu.org/licenses/>.
# ==============================================================================
import pandas as pd
import geopandas as gpd
import folium
import os
import os.path

from . import config
urls = config.urls
school_location_file = os.path.join(config.data_dir, "school_locations.geojson")


[docs]def load_zipcodes(): """Load the NYC zip code boundaries as a GeoDataFrame from data_dir. Zip codes are compiled from the NYC Data Portal via the US Post Office""" df = gpd.read_file(os.path.join(config.data_dir,urls["zipcodes"].filename)) return df
[docs]def load_school_locations(): """Returns a GeoDataFrame with the school locations and location meta-data""" try: df = gpd.read_file(school_location_file) return df except Exception as e: # geopandas throws DriveError, but I don't know where to import it to catch it if e.type != "<class 'fiona.errors.DriverError'>": raise e return get_and_save_locations()
[docs]def load_school_geo_points(): """Load only the school location points as a GeoDataFrame""" df = load_school_locations() return df[["dbn", "x", "y", "geometry"]]
[docs]def get_and_save_locations(filename=school_location_file): points = get_points() locations = get_locations() points = points.merge(locations, on="dbn", how="left") df = gpd.GeoDataFrame(points) df.to_file(filename, driver="GeoJSON") return df
[docs]def get_points(geojsonurl=urls["school_geo"].url): """Read the school location points and zipcodes from an Open Data Portal GeoJSON URL""" # this is the API feed for the location points # it's the best place to get zip codes df = gpd.read_file(geojsonurl) df = df.rename(columns={"xcoordinat":"x","ycoordinat":"y",}) df.x = pd.to_numeric(df.x, errors='coerce') df.y = pd.to_numeric(df.y, errors='coerce') df = df[df.x > 0] df["dbn"] = df.ats_code df["district"] = df.adimindist.astype(int) df = df.rename(columns={"geodistric":"geo_district"}) cols = ['dbn', 'zip', 'geo_district', 'district', 'x','y', 'geometry'] df = df[cols] df.zip = df.zip.astype("string") return df
[docs]def get_locations(url=urls["school_locations"].url): """ Read school level data with many location-related columns: school x,y coords, and data about the school locations including NYS BEDS ids, census tract, and police precinct. """ locations = pd.read_csv(url) locations["dbn"] = locations.system_code cols = [ 'dbn', 'administrative_district_code', 'administrative_district_name', 'beds', 'borough_block_lot', 'census_tract', 'community_district', 'community_school_sup_name', 'council_district', 'fax_number', 'fiscal_year', 'geographical_district_code', 'grades_final_text', 'grades_text', 'highschool_network', 'highschool_network_location', 'highschool_network_name', 'latitude', 'location_category_description', 'location_code', 'location_name', 'location_type_description', 'longitude', 'managed_by_name', 'nta', 'nta_name', 'open_date', 'police_precinct', 'primary_building_code', 'principal_name', 'principal_phone_number', 'principal_title', 'state_code', 'status_descriptions'] locations = locations[cols] locations.beds = locations.beds.astype("string") locations.open_date = locations.open_date.astype("datetime64[ns]").dt.year # locations.beds = locations.beds.astype("string") return locations
[docs]def load_districts(url=urls["district_geo"].url): """Get geo shape file for NYC school districts, indexed by district number.""" districts = gpd.read_file(url) # rename the columns districts.columns = ['district', 'area', 'length', 'geometry'] districts.district = pd.to_numeric(districts.district, downcast='integer', errors='coerce') districts = districts.to_crs(epsg=4326) return districts
[docs]def add_labels(ax, df, col, fontsize=14): def label(row): xy=row.geometry.centroid.coords[0] ax.annotate(row[col], xy=xy, ha='center', fontsize=fontsize) df.apply(label, axis=1)