Source code for nycschools.ui

# NYC School Data
# Copyright (C) 2022. Matthew X. Curinga
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU AFFERO GENERAL PUBLIC LICENSE (the "License") as
# published by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the License for more details.
#
# You should have received a copy of the License along with this program.
# If not, see <http://www.gnu.org/licenses/>.
# ==============================================================================
import pandas as pd
from IPython.display import Markdown as md
from decimal import *

import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import networkx as nx
import folium

import math

[docs]def ul(t): items = [f"- {i}" for i in t] return str("\n".join(items))
[docs]def label_shapes(m, df, col, style={}): """Create a function that will add the string of `col` to the center of each shape specified by """ style_str = ";".join([f"{k}:{v}" for k,v in style.items()]) def label(row): point = row.geometry.centroid html=f"""<div style="{style_str}">{row[col]}</div>""" folium.Marker( location=(point.y, point.x), icon=folium.DivIcon(html=html)).add_to(m) df.apply(label, axis=1) return m
[docs]def fmt_num(col, n): if col.endswith("_pct"): return pct(n) try: n = float(row[col]) if round(n) == n: return f"{int(n):,}" else: return f"{n:,.2f}" except: return n
[docs]def hexmap(cmap): def f(color): return mpl.colors.rgb2hex(cmap(color)) return f
[docs]def pct(n): try: whole = int(n) if whole == float(n): return f"{whole}%" n = float(n) return f"{n:.1%}" except: return "-"
[docs]def commas(n): try: float(n) return f"{round(n, 3):,}" except: return "-"
[docs]def fmt_table(df, col_map=None, pct_cols=[], num_cols=[]): result = df.copy() for col in pct_cols: result[col] = result[col].apply(pct) for col in num_cols: result[col] = result[col].apply(commas) if col_map: result = result.rename(columns=col_map) return result
[docs]def infinite(): n = 0 while True: n += 1 yield n
[docs]def counter(): x = infinite() return x.__next__
# strips the leading zero from a rounded float
[docs]def round_f(f, places): s = str(round(f, places)) if not "." in s: return f whole, frac = s.split(".") if whole == "0": whole = "" frac = frac.ljust(places, "0") return f"{whole}.{frac}"
[docs]def fmt_pearson(r): """Formats the Pearson's R correlation table returned from `pengouin.corr` in the format r(df)={r}, p={p}. The r is rounded to 2 decimals, and p is rounded to 3 decimals. """ df = r.n[0] - 2 p = round_f(r['p-val'][0], 3) r_val = round_f(r['r'][0], 2) return f"r({df})={r_val}, p={p}"
[docs]def edge_label(p, r): return f"{p}={round_f(r,2)}"
[docs]def nice_name(n): allcaps = ["dbn", "beds"] if n in allcaps: return n.upper() return n.replace("_", " ").title()
[docs]def plot_model(model): # get the data we need from model dv = model.model.endog_names params = list(model.params.index.values[1:]) coefs = list(model.params.values[1:],) pvalues = list(model.pvalues.round(3).values[1:]) network_map(dv, params, coefs, pvalues=None)
[docs]def network_map(dv, params, coefs, pvalues): if not pvalues: pvalues = [0 for i in coefs] cmap = mpl.cm.seismic nodes = params + [dv] plabels = [f"p={p}" for p in pvalues] pnodes = list(zip(plabels, params)) # all targets point to the dv targets = [dv for _ in range(len(params))] edges = list(zip(params, targets)) colors = coefs + [0] weights = [abs(c) for c in coefs] weighted_edges = list(zip(params, targets, weights)) max_node = 2000 max_size = max(weights) node_size = [math.ceil(max_node * (c/max_size)) for c in weights] node_size.append( max_node * 2 ) labels = dict([(n, nice_name(n)) for n in nodes]) edge_labels = dict([(x, edge_label(x[0], y)) for x, y in zip(edges, coefs)]) node_dict = {} for i, n in enumerate(nodes): node_dict[n] = { "node_size": node_size[i], "color": colors[i], "pvalue": pvalues[i] if i < len(pvalues) - 1 else 0, "weight": weights[i] if i < len(weights) - 1 else 0, "scale": node_size[i] / node_size[i], "label": nice_name(n) } draw_model(nodes, pnodes, node_size, weighted_edges, labels, edge_labels, colors, cmap, node_dict)
[docs]def draw_model(nodes, pnodes, node_size, edges, labels, edge_labels, colors, cmap, node_dict): G = nx.DiGraph() G.add_nodes_from(node_dict) G.add_weighted_edges_from(edges) fig, ax = plt.subplots(figsize=(16,9)) # pos = nx.spring_layout(G, k=3) pos = nx.circular_layout(G) nx.draw(G, pos=pos, ax = ax, with_labels=False, node_color=colors, cmap=cmap, node_size=node_size, linewidths=2, min_source_margin=2, min_target_margin=2, font_size=14) nx.draw_networkx_edge_labels(G,pos, ax=ax, edge_labels=edge_labels, label_pos=.5, font_size=12) # draw the node labels separately to put below the nodes pos_attrs = {} for node, coords in pos.items(): x, y = coords pos_attrs[node] = (x, y - .1) nx.draw_networkx_labels(G, pos_attrs, labels=labels) # adding p-values as node # G.add_nodes_from(pnodes,{"pvalue":True}) # nx.draw_networkx_nodes(G, pos, ax=ax, nodelist=G.nodes(data=True)) plt.tight_layout() plt.margins(0.05) plt.show()