{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "4c9be1ce",
"metadata": {},
"source": [
"Basic Statistics\n",
"================\n",
"\n",
"This Notebook shows us how to use the `pandas` to find\n",
"basic quantitative descriptions of our data\n",
"\n",
"Topics in this Notebook:\n",
"\n",
"- dropping columns\n",
"- minimum and maximum ranges\n",
"- averages\n",
"- counts\n",
"- sorting data\n",
"- correlations with `corr()`\n",
"- `describe()`"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "98a332e1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" dbn | \n",
" district | \n",
" boro | \n",
" school_name | \n",
" total_enrollment | \n",
" asian_pct | \n",
" black_pct | \n",
" hispanic_pct | \n",
" white_pct | \n",
" swd_pct | \n",
" ell_pct | \n",
" poverty_pct | \n",
"
\n",
" \n",
" \n",
" \n",
" 4 | \n",
" 01M015 | \n",
" 1 | \n",
" Manhattan | \n",
" P.S. 015 Roberto Clemente | \n",
" 193 | \n",
" 0.135 | \n",
" 0.275 | \n",
" 0.528 | \n",
" 0.057 | \n",
" 0.223 | \n",
" 0.109 | \n",
" 0.819 | \n",
"
\n",
" \n",
" 9 | \n",
" 01M019 | \n",
" 1 | \n",
" Manhattan | \n",
" P.S. 019 Asher Levy | \n",
" 212 | \n",
" 0.061 | \n",
" 0.193 | \n",
" 0.613 | \n",
" 0.080 | \n",
" 0.392 | \n",
" 0.042 | \n",
" 0.712 | \n",
"
\n",
" \n",
" 14 | \n",
" 01M020 | \n",
" 1 | \n",
" Manhattan | \n",
" P.S. 020 Anna Silver | \n",
" 412 | \n",
" 0.248 | \n",
" 0.133 | \n",
" 0.522 | \n",
" 0.073 | \n",
" 0.218 | \n",
" 0.119 | \n",
" 0.709 | \n",
"
\n",
" \n",
" 19 | \n",
" 01M034 | \n",
" 1 | \n",
" Manhattan | \n",
" P.S. 034 Franklin D. Roosevelt | \n",
" 273 | \n",
" 0.026 | \n",
" 0.381 | \n",
" 0.557 | \n",
" 0.029 | \n",
" 0.392 | \n",
" 0.062 | \n",
" 0.960 | \n",
"
\n",
" \n",
" 24 | \n",
" 01M063 | \n",
" 1 | \n",
" Manhattan | \n",
" The STAR Academy - P.S.63 | \n",
" 208 | \n",
" 0.029 | \n",
" 0.192 | \n",
" 0.635 | \n",
" 0.091 | \n",
" 0.279 | \n",
" 0.014 | \n",
" 0.769 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" dbn district boro school_name \\\n",
"4 01M015 1 Manhattan P.S. 015 Roberto Clemente \n",
"9 01M019 1 Manhattan P.S. 019 Asher Levy \n",
"14 01M020 1 Manhattan P.S. 020 Anna Silver \n",
"19 01M034 1 Manhattan P.S. 034 Franklin D. Roosevelt \n",
"24 01M063 1 Manhattan The STAR Academy - P.S.63 \n",
"\n",
" total_enrollment asian_pct black_pct hispanic_pct white_pct swd_pct \\\n",
"4 193 0.135 0.275 0.528 0.057 0.223 \n",
"9 212 0.061 0.193 0.613 0.080 0.392 \n",
"14 412 0.248 0.133 0.522 0.073 0.218 \n",
"19 273 0.026 0.381 0.557 0.029 0.392 \n",
"24 208 0.029 0.192 0.635 0.091 0.279 \n",
"\n",
" ell_pct poverty_pct \n",
"4 0.109 0.819 \n",
"9 0.042 0.712 \n",
"14 0.119 0.709 \n",
"19 0.062 0.960 \n",
"24 0.014 0.769 "
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# import schools from the nycschool package\n",
"from nycschools import schools\n",
"\n",
"# load the demographic data into a `DataFrame` called df\n",
"df = schools.load_school_demographics()\n",
"\n",
"# let's just use one year of data\n",
"df = df[df.ay == 2020]\n",
"\n",
"# use a subset of columns for this notebook\n",
"cols = [\n",
" 'dbn',\n",
" 'district',\n",
" 'boro',\n",
" 'school_name',\n",
" 'total_enrollment',\n",
" 'asian_pct',\n",
" 'black_pct',\n",
" 'hispanic_pct',\n",
" 'white_pct',\n",
" 'swd_pct',\n",
" 'ell_pct',\n",
" 'poverty_pct'\n",
"]\n",
"df = df[cols]\n",
"\n",
"\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "dcec0f0c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" dbn | \n",
" district | \n",
" boro | \n",
" school_name | \n",
" total_enrollment | \n",
" asian_pct | \n",
" black_pct | \n",
" hispanic_pct | \n",
" white_pct | \n",
" swd_pct | \n",
" ell_pct | \n",
" poverty_pct | \n",
"
\n",
" \n",
" \n",
" \n",
" 3723 | \n",
" 13K430 | \n",
" 13 | \n",
" Brooklyn | \n",
" Brooklyn Technical High School | \n",
" 5921 | \n",
" 0.599 | \n",
" 0.056 | \n",
" 0.066 | \n",
" 0.237 | \n",
" 0.016 | \n",
" 0.001 | \n",
" 0.590 | \n",
"
\n",
" \n",
" 5310 | \n",
" 20K490 | \n",
" 20 | \n",
" Brooklyn | \n",
" Fort Hamilton High School | \n",
" 4678 | \n",
" 0.290 | \n",
" 0.025 | \n",
" 0.317 | \n",
" 0.347 | \n",
" 0.153 | \n",
" 0.162 | \n",
" 0.706 | \n",
"
\n",
" \n",
" 6686 | \n",
" 26Q430 | \n",
" 26 | \n",
" Queens | \n",
" Francis Lewis High School | \n",
" 4424 | \n",
" 0.577 | \n",
" 0.047 | \n",
" 0.210 | \n",
" 0.150 | \n",
" 0.150 | \n",
" 0.115 | \n",
" 0.690 | \n",
"
\n",
" \n",
" 5802 | \n",
" 22K405 | \n",
" 22 | \n",
" Brooklyn | \n",
" Midwood High School | \n",
" 4109 | \n",
" 0.354 | \n",
" 0.255 | \n",
" 0.133 | \n",
" 0.225 | \n",
" 0.138 | \n",
" 0.036 | \n",
" 0.723 | \n",
"
\n",
" \n",
" 5807 | \n",
" 22K425 | \n",
" 22 | \n",
" Brooklyn | \n",
" James Madison High School | \n",
" 3851 | \n",
" 0.209 | \n",
" 0.128 | \n",
" 0.175 | \n",
" 0.472 | \n",
" 0.154 | \n",
" 0.121 | \n",
" 0.758 | \n",
"
\n",
" \n",
" 7279 | \n",
" 28Q440 | \n",
" 28 | \n",
" Queens | \n",
" Forest Hills High School | \n",
" 3775 | \n",
" 0.247 | \n",
" 0.065 | \n",
" 0.381 | \n",
" 0.272 | \n",
" 0.164 | \n",
" 0.093 | \n",
" 0.702 | \n",
"
\n",
" \n",
" 8199 | \n",
" 31R455 | \n",
" 31 | \n",
" Staten Island | \n",
" Tottenville High School | \n",
" 3726 | \n",
" 0.072 | \n",
" 0.014 | \n",
" 0.132 | \n",
" 0.761 | \n",
" 0.227 | \n",
" 0.023 | \n",
" 0.416 | \n",
"
\n",
" \n",
" 5557 | \n",
" 21K525 | \n",
" 21 | \n",
" Brooklyn | \n",
" Edward R. Murrow High School | \n",
" 3691 | \n",
" 0.274 | \n",
" 0.176 | \n",
" 0.193 | \n",
" 0.308 | \n",
" 0.178 | \n",
" 0.125 | \n",
" 0.670 | \n",
"
\n",
" \n",
" 5300 | \n",
" 20K445 | \n",
" 20 | \n",
" Brooklyn | \n",
" New Utrecht High School | \n",
" 3572 | \n",
" 0.391 | \n",
" 0.030 | \n",
" 0.336 | \n",
" 0.234 | \n",
" 0.165 | \n",
" 0.218 | \n",
" 0.796 | \n",
"
\n",
" \n",
" 6681 | \n",
" 26Q415 | \n",
" 26 | \n",
" Queens | \n",
" Benjamin N. Cardozo High School | \n",
" 3405 | \n",
" 0.423 | \n",
" 0.214 | \n",
" 0.223 | \n",
" 0.120 | \n",
" 0.145 | \n",
" 0.051 | \n",
" 0.668 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" dbn district boro school_name \\\n",
"3723 13K430 13 Brooklyn Brooklyn Technical High School \n",
"5310 20K490 20 Brooklyn Fort Hamilton High School \n",
"6686 26Q430 26 Queens Francis Lewis High School \n",
"5802 22K405 22 Brooklyn Midwood High School \n",
"5807 22K425 22 Brooklyn James Madison High School \n",
"7279 28Q440 28 Queens Forest Hills High School \n",
"8199 31R455 31 Staten Island Tottenville High School \n",
"5557 21K525 21 Brooklyn Edward R. Murrow High School \n",
"5300 20K445 20 Brooklyn New Utrecht High School \n",
"6681 26Q415 26 Queens Benjamin N. Cardozo High School \n",
"\n",
" total_enrollment asian_pct black_pct hispanic_pct white_pct \\\n",
"3723 5921 0.599 0.056 0.066 0.237 \n",
"5310 4678 0.290 0.025 0.317 0.347 \n",
"6686 4424 0.577 0.047 0.210 0.150 \n",
"5802 4109 0.354 0.255 0.133 0.225 \n",
"5807 3851 0.209 0.128 0.175 0.472 \n",
"7279 3775 0.247 0.065 0.381 0.272 \n",
"8199 3726 0.072 0.014 0.132 0.761 \n",
"5557 3691 0.274 0.176 0.193 0.308 \n",
"5300 3572 0.391 0.030 0.336 0.234 \n",
"6681 3405 0.423 0.214 0.223 0.120 \n",
"\n",
" swd_pct ell_pct poverty_pct \n",
"3723 0.016 0.001 0.590 \n",
"5310 0.153 0.162 0.706 \n",
"6686 0.150 0.115 0.690 \n",
"5802 0.138 0.036 0.723 \n",
"5807 0.154 0.121 0.758 \n",
"7279 0.164 0.093 0.702 \n",
"8199 0.227 0.023 0.416 \n",
"5557 0.178 0.125 0.670 \n",
"5300 0.165 0.218 0.796 \n",
"6681 0.145 0.051 0.668 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# sort the data and show just the 10 largest schools\n",
"# sort in descending order (biggest --> smallest)\n",
"data = df.sort_values(by=\"total_enrollment\", ascending=False)\n",
"\n",
"# show the first 10 rows\n",
"data[:10]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2386fe42",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The largest school: 5921\n",
"The smallest school: 7\n",
"Avg (mean) school size: 560.79296875\n",
"Avg (median) school size: 460.5\n",
"Avg (mode, can return multiple values) school size: [479, 714]\n"
]
}
],
"source": [
"# get just the total_enrollment column, called a Series in pandas\n",
"enrollment = df[\"total_enrollment\"]\n",
"print(\"The largest school:\", enrollment.max())\n",
"print(\"The smallest school:\", enrollment.min())\n",
"\n",
"print(\"Avg (mean) school size:\", enrollment.mean())\n",
"print(\"Avg (median) school size:\", enrollment.median())\n",
"print(\"Avg (mode, can return multiple values) school size:\", list(enrollment.mode()))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "2cf28566",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" district | \n",
" total_enrollment | \n",
" asian_pct | \n",
" black_pct | \n",
" hispanic_pct | \n",
" white_pct | \n",
" swd_pct | \n",
" ell_pct | \n",
" poverty_pct | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 2048.00000 | \n",
" 2048.000000 | \n",
" 2048.000000 | \n",
" 2048.000000 | \n",
" 2048.000000 | \n",
" 2048.000000 | \n",
" 2048.000000 | \n",
" 2048.000000 | \n",
" 2048.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 27.62207 | \n",
" 560.792969 | \n",
" 0.117710 | \n",
" 0.302644 | \n",
" 0.431721 | \n",
" 0.118602 | \n",
" 0.237641 | \n",
" 0.141212 | \n",
" 0.766560 | \n",
"
\n",
" \n",
" std | \n",
" 26.68098 | \n",
" 461.014510 | \n",
" 0.166804 | \n",
" 0.256375 | \n",
" 0.246118 | \n",
" 0.168041 | \n",
" 0.160780 | \n",
" 0.143127 | \n",
" 0.194297 | \n",
"
\n",
" \n",
" min | \n",
" 1.00000 | \n",
" 7.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.015000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.050000 | \n",
"
\n",
" \n",
" 25% | \n",
" 10.00000 | \n",
" 304.000000 | \n",
" 0.015000 | \n",
" 0.081474 | \n",
" 0.206000 | \n",
" 0.016000 | \n",
" 0.163000 | \n",
" 0.049000 | \n",
" 0.711000 | \n",
"
\n",
" \n",
" 50% | \n",
" 19.00000 | \n",
" 460.500000 | \n",
" 0.046000 | \n",
" 0.245000 | \n",
" 0.405000 | \n",
" 0.036000 | \n",
" 0.208000 | \n",
" 0.100000 | \n",
" 0.827000 | \n",
"
\n",
" \n",
" 75% | \n",
" 30.00000 | \n",
" 663.250000 | \n",
" 0.148000 | \n",
" 0.472250 | \n",
" 0.631250 | \n",
" 0.145250 | \n",
" 0.261000 | \n",
" 0.187250 | \n",
" 0.903000 | \n",
"
\n",
" \n",
" max | \n",
" 84.00000 | \n",
" 5921.000000 | \n",
" 0.926000 | \n",
" 0.935000 | \n",
" 1.000000 | \n",
" 0.945000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 0.960000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" district total_enrollment asian_pct black_pct hispanic_pct \\\n",
"count 2048.00000 2048.000000 2048.000000 2048.000000 2048.000000 \n",
"mean 27.62207 560.792969 0.117710 0.302644 0.431721 \n",
"std 26.68098 461.014510 0.166804 0.256375 0.246118 \n",
"min 1.00000 7.000000 0.000000 0.000000 0.015000 \n",
"25% 10.00000 304.000000 0.015000 0.081474 0.206000 \n",
"50% 19.00000 460.500000 0.046000 0.245000 0.405000 \n",
"75% 30.00000 663.250000 0.148000 0.472250 0.631250 \n",
"max 84.00000 5921.000000 0.926000 0.935000 1.000000 \n",
"\n",
" white_pct swd_pct ell_pct poverty_pct \n",
"count 2048.000000 2048.000000 2048.000000 2048.000000 \n",
"mean 0.118602 0.237641 0.141212 0.766560 \n",
"std 0.168041 0.160780 0.143127 0.194297 \n",
"min 0.000000 0.000000 0.000000 0.050000 \n",
"25% 0.016000 0.163000 0.049000 0.711000 \n",
"50% 0.036000 0.208000 0.100000 0.827000 \n",
"75% 0.145250 0.261000 0.187250 0.903000 \n",
"max 0.945000 1.000000 1.000000 0.960000 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# the built in describe() function calculates several descriptive statististics for each column\n",
"# in the data frame and returns them as a new dataframe\n",
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ee9e8328",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 2048.000000\n",
"mean 0.237641\n",
"std 0.160780\n",
"min 0.000000\n",
"25% 0.163000\n",
"50% 0.208000\n",
"75% 0.261000\n",
"max 1.000000\n",
"Name: swd_pct, dtype: float64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# we can also call describe aon a single series:\n",
"df.swd_pct.describe()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "4cdd0f0d",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_4218/792112253.py:8: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n",
" data.corr()\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" total_enrollment | \n",
" asian_pct | \n",
" black_pct | \n",
" hispanic_pct | \n",
" white_pct | \n",
" swd_pct | \n",
" ell_pct | \n",
" poverty_pct | \n",
"
\n",
" \n",
" \n",
" \n",
" total_enrollment | \n",
" 1.000000 | \n",
" 0.350881 | \n",
" -0.242553 | \n",
" -0.111551 | \n",
" 0.179056 | \n",
" -0.175962 | \n",
" -0.018582 | \n",
" -0.155614 | \n",
"
\n",
" \n",
" asian_pct | \n",
" 0.350881 | \n",
" 1.000000 | \n",
" -0.452366 | \n",
" -0.370264 | \n",
" 0.210809 | \n",
" -0.210489 | \n",
" 0.138261 | \n",
" -0.287675 | \n",
"
\n",
" \n",
" black_pct | \n",
" -0.242553 | \n",
" -0.452366 | \n",
" 1.000000 | \n",
" -0.416505 | \n",
" -0.450801 | \n",
" 0.138008 | \n",
" -0.362794 | \n",
" 0.299685 | \n",
"
\n",
" \n",
" hispanic_pct | \n",
" -0.111551 | \n",
" -0.370264 | \n",
" -0.416505 | \n",
" 1.000000 | \n",
" -0.396739 | \n",
" 0.070421 | \n",
" 0.441878 | \n",
" 0.490632 | \n",
"
\n",
" \n",
" white_pct | \n",
" 0.179056 | \n",
" 0.210809 | \n",
" -0.450801 | \n",
" -0.396739 | \n",
" 1.000000 | \n",
" -0.083534 | \n",
" -0.177996 | \n",
" -0.795595 | \n",
"
\n",
" \n",
" swd_pct | \n",
" -0.175962 | \n",
" -0.210489 | \n",
" 0.138008 | \n",
" 0.070421 | \n",
" -0.083534 | \n",
" 1.000000 | \n",
" 0.000117 | \n",
" 0.233832 | \n",
"
\n",
" \n",
" ell_pct | \n",
" -0.018582 | \n",
" 0.138261 | \n",
" -0.362794 | \n",
" 0.441878 | \n",
" -0.177996 | \n",
" 0.000117 | \n",
" 1.000000 | \n",
" 0.359285 | \n",
"
\n",
" \n",
" poverty_pct | \n",
" -0.155614 | \n",
" -0.287675 | \n",
" 0.299685 | \n",
" 0.490632 | \n",
" -0.795595 | \n",
" 0.233832 | \n",
" 0.359285 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" total_enrollment asian_pct black_pct hispanic_pct \\\n",
"total_enrollment 1.000000 0.350881 -0.242553 -0.111551 \n",
"asian_pct 0.350881 1.000000 -0.452366 -0.370264 \n",
"black_pct -0.242553 -0.452366 1.000000 -0.416505 \n",
"hispanic_pct -0.111551 -0.370264 -0.416505 1.000000 \n",
"white_pct 0.179056 0.210809 -0.450801 -0.396739 \n",
"swd_pct -0.175962 -0.210489 0.138008 0.070421 \n",
"ell_pct -0.018582 0.138261 -0.362794 0.441878 \n",
"poverty_pct -0.155614 -0.287675 0.299685 0.490632 \n",
"\n",
" white_pct swd_pct ell_pct poverty_pct \n",
"total_enrollment 0.179056 -0.175962 -0.018582 -0.155614 \n",
"asian_pct 0.210809 -0.210489 0.138261 -0.287675 \n",
"black_pct -0.450801 0.138008 -0.362794 0.299685 \n",
"hispanic_pct -0.396739 0.070421 0.441878 0.490632 \n",
"white_pct 1.000000 -0.083534 -0.177996 -0.795595 \n",
"swd_pct -0.083534 1.000000 0.000117 0.233832 \n",
"ell_pct -0.177996 0.000117 1.000000 0.359285 \n",
"poverty_pct -0.795595 0.233832 0.359285 1.000000 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# we can also call the corr() method to show correclations between columns\n",
"# we will take out \"district\" from this data because the district number\n",
"# is categorical -- not the measure of a value\n",
"\n",
"# correlations close to 1 or negative one show high correlations\n",
"# closer to zero items are not closely correlated\n",
"data = df.drop(columns=[\"district\"])\n",
"data.corr()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "2b796861",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_4218/143913075.py:3: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n",
" corr = data.corr()\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" total_enrollment | \n",
" asian_pct | \n",
" black_pct | \n",
" hispanic_pct | \n",
" white_pct | \n",
" swd_pct | \n",
" ell_pct | \n",
" poverty_pct | \n",
"
\n",
" \n",
" \n",
" \n",
" total_enrollment | \n",
" 1.000000 | \n",
" 0.350881 | \n",
" -0.242553 | \n",
" -0.111551 | \n",
" 0.179056 | \n",
" -0.175962 | \n",
" -0.018582 | \n",
" -0.155614 | \n",
"
\n",
" \n",
" asian_pct | \n",
" 0.350881 | \n",
" 1.000000 | \n",
" -0.452366 | \n",
" -0.370264 | \n",
" 0.210809 | \n",
" -0.210489 | \n",
" 0.138261 | \n",
" -0.287675 | \n",
"
\n",
" \n",
" black_pct | \n",
" -0.242553 | \n",
" -0.452366 | \n",
" 1.000000 | \n",
" -0.416505 | \n",
" -0.450801 | \n",
" 0.138008 | \n",
" -0.362794 | \n",
" 0.299685 | \n",
"
\n",
" \n",
" hispanic_pct | \n",
" -0.111551 | \n",
" -0.370264 | \n",
" -0.416505 | \n",
" 1.000000 | \n",
" -0.396739 | \n",
" 0.070421 | \n",
" 0.441878 | \n",
" 0.490632 | \n",
"
\n",
" \n",
" white_pct | \n",
" 0.179056 | \n",
" 0.210809 | \n",
" -0.450801 | \n",
" -0.396739 | \n",
" 1.000000 | \n",
" -0.083534 | \n",
" -0.177996 | \n",
" -0.795595 | \n",
"
\n",
" \n",
" swd_pct | \n",
" -0.175962 | \n",
" -0.210489 | \n",
" 0.138008 | \n",
" 0.070421 | \n",
" -0.083534 | \n",
" 1.000000 | \n",
" 0.000117 | \n",
" 0.233832 | \n",
"
\n",
" \n",
" ell_pct | \n",
" -0.018582 | \n",
" 0.138261 | \n",
" -0.362794 | \n",
" 0.441878 | \n",
" -0.177996 | \n",
" 0.000117 | \n",
" 1.000000 | \n",
" 0.359285 | \n",
"
\n",
" \n",
" poverty_pct | \n",
" -0.155614 | \n",
" -0.287675 | \n",
" 0.299685 | \n",
" 0.490632 | \n",
" -0.795595 | \n",
" 0.233832 | \n",
" 0.359285 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# last, we can use styles to make the correlation table easier to read\n",
"# note: you need to run this cell to see the colors -- it's get saved without the styled output\n",
"corr = data.corr()\n",
"# a coolwarm color map will show values in a gradient where -1 is the deepest blue and 1 is deepest red\n",
"corr.style.background_gradient(cmap='coolwarm')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "school-data",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
},
"vscode": {
"interpreter": {
"hash": "c853444e20c489e5b96d8e1a4533affead1d94f1ba40ff9ef08cffb9c8ee794e"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}