{ "cells": [ { "attachments": {}, "cell_type": "markdown", "id": "3d5e9efa", "metadata": {}, "source": [ "T Tests\n", "=======" ] }, { "cell_type": "code", "execution_count": 1, "id": "f3b215ee", "metadata": {}, "outputs": [], "source": [ "import scipy\n", "from IPython.display import Markdown as md\n", "\n", "from nycschools import schools, exams" ] }, { "cell_type": "code", "execution_count": 2, "id": "41c3b328", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dbngradecategorynumber_testedmean_scale_scorelevel_1_nlevel_1_pctlevel_2_nlevel_2_pctlevel_3_n...missing_race_ethnicity_data_pctswd_nswd_pctell_nell_pctpoverty_npoverty_pcteni_pctclean_namezip
001M0153All Students29301.5517278.00.2758629.00.3103457.0...0.0510.287120.0671520.8540.882roberto clemente10009
101M0154All Students23301.3912966.00.2608708.00.3478268.0...0.0510.287120.0671520.8540.882roberto clemente10009
201M0155All Students17322.0000002.00.1176475.00.2941188.0...0.0510.287120.0671520.8540.882roberto clemente10009
301M015All GradesAll Students69306.53622416.00.23188422.00.31884123.0...0.0510.287120.0671520.8540.882roberto clemente10009
401M0153Not SWD23307.6521614.00.1739139.00.3913046.0...0.0510.287120.0671520.8540.882roberto clemente10009
\n", "

5 rows × 69 columns

\n", "
" ], "text/plain": [ " dbn grade category number_tested mean_scale_score \\\n", "0 01M015 3 All Students 29 301.551727 \n", "1 01M015 4 All Students 23 301.391296 \n", "2 01M015 5 All Students 17 322.000000 \n", "3 01M015 All Grades All Students 69 306.536224 \n", "4 01M015 3 Not SWD 23 307.652161 \n", "\n", " level_1_n level_1_pct level_2_n level_2_pct level_3_n ... \\\n", "0 8.0 0.275862 9.0 0.310345 7.0 ... \n", "1 6.0 0.260870 8.0 0.347826 8.0 ... \n", "2 2.0 0.117647 5.0 0.294118 8.0 ... \n", "3 16.0 0.231884 22.0 0.318841 23.0 ... \n", "4 4.0 0.173913 9.0 0.391304 6.0 ... \n", "\n", " missing_race_ethnicity_data_pct swd_n swd_pct ell_n ell_pct poverty_n \\\n", "0 0.0 51 0.287 12 0.067 152 \n", "1 0.0 51 0.287 12 0.067 152 \n", "2 0.0 51 0.287 12 0.067 152 \n", "3 0.0 51 0.287 12 0.067 152 \n", "4 0.0 51 0.287 12 0.067 152 \n", "\n", " poverty_pct eni_pct clean_name zip \n", "0 0.854 0.882 roberto clemente 10009 \n", "1 0.854 0.882 roberto clemente 10009 \n", "2 0.854 0.882 roberto clemente 10009 \n", "3 0.854 0.882 roberto clemente 10009 \n", "4 0.854 0.882 roberto clemente 10009 \n", "\n", "[5 rows x 69 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "demo = schools.load_school_demographics()\n", "df = exams.load_math_ela_long()\n", "df = df[df[\"mean_scale_score\"].notnull()]\n", "df = df.merge(demo, how=\"inner\", on=[\"dbn\", \"ay\"])\n", "\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 3, "id": "9cf9383a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mean_scale_scorecharter
mean_scale_score1.0000000.015811
charter0.0158111.000000
\n", "
" ], "text/plain": [ " mean_scale_score charter\n", "mean_scale_score 1.000000 0.015811\n", "charter 0.015811 1.000000" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# let's look at charter schools vs non-charter schools \n", "df[\"charter\"] = df.district == 84\n", "# get the schools in the range of 1-32 and district 84 (excludes other special districts)\n", "data = df[df.district.isin(list(range(1,33)) + [84])]\n", "\n", "# just get all students\n", "data = data[data.category==\"All Students\"]\n", "\n", "# remove null data\n", "data = data[data[\"mean_scale_score\"].notnull()]\n", "\n", "# show the correlation between chater and test score\n", "data = df[[\"mean_scale_score\", \"charter\"]]\n", "data.corr()" ] }, { "cell_type": "code", "execution_count": 4, "id": "5f1f9950", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Ttest_indResult(statistic=9.1114087193616, pvalue=8.175234619664718e-20)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# split the data into two groups for the t test\n", "charter = data[data.charter == True]\n", "community = data[data.charter == False]\n", "# run a t-test to see if there is a statistical difference between charter and non-charter test results\n", "t = scipy.stats.ttest_ind(charter.mean_scale_score, community.mean_scale_score)\n", "\n", "# the scipy results include the t-value and the p-value\n", "# t is the score of the test, and p is the probability that the difference is the result of chance\n", "t" ] }, { "cell_type": "code", "execution_count": 5, "id": "64e0c465", "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "\n", "**T-Test results** comparing school averages of \n", "Charter School Test Results (`n=5983`) and Community School Test Results (`n=326004`)\n", "students in 3-8th grade student ELA and Math scores.\n", "\n", "- Charter test results: M=517.14, SD=134.96\n", "- Community test results: M=500.42, SD=140.75\n", "- T-score: 9.1114, p-val: 0.0000\n", "\n", "`n` values report the number of school average test results observed, not the number of test takers. \n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# when reporting the results we care about these variables too\n", "\n", "# population size\n", "n_charter = len(charter)\n", "n_community = len(community)\n", "\n", "# mean average\n", "M_charter = charter.mean_scale_score.mean()\n", "M_community = community.mean_scale_score.mean()\n", "\n", "# standard deviation \n", "sd_charter = charter.mean_scale_score.std()\n", "sd_community = community.mean_scale_score.std()\n", "\n", "\n", "display(md(f\"\"\"\n", "**T-Test results** comparing school averages of \n", "Charter School Test Results (`n={n_charter}`) and Community School Test Results (`n={n_community}`)\n", "students in 3-8th grade student ELA and Math scores.\n", "\n", "- Charter test results: M={M_charter:.02f}, SD={sd_charter:.02f}\n", "- Community test results: M={M_community:.02f}, SD={sd_community:.02f}\n", "- T-score: {t.statistic:.04f}, p-val: {t.pvalue:.04f}\n", "\n", "`n` values report the number of school average test results observed, not the number of test takers. \n", "\"\"\"))\n" ] }, { "cell_type": "markdown", "id": "67988c29", "metadata": {}, "source": [ "`pingouin` stats wrapper\n", "------------------------\n", "The [`pingouin` library](https://pingouin-stats.org/index.html) has a number of functions that \"wrap\" standard python stats functions to include additional information and nicer formatting out of the box. `ttest` is one of these functions. We can see in the output below that we get the t value, p value (like `scipy.stats`), but we also get degrees of freedom, confidence intervals, and more, without having to calculate these independently for each test." ] }, { "cell_type": "code", "execution_count": 6, "id": "bfe17125", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Tdofalternativep-valCI95%cohen-dBF10power
T-test9.111409331985two-sided8.175235e-20[13.12, 20.32]0.1188711.516e+161.0
\n", "
" ], "text/plain": [ " T dof alternative p-val CI95% cohen-d \\\n", "T-test 9.111409 331985 two-sided 8.175235e-20 [13.12, 20.32] 0.118871 \n", "\n", " BF10 power \n", "T-test 1.516e+16 1.0 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pingouin as pg\n", "pg.ttest(charter.mean_scale_score, community.mean_scale_score, correction=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.10.6 ('school-data')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" }, "vscode": { "interpreter": { "hash": "c853444e20c489e5b96d8e1a4533affead1d94f1ba40ff9ef08cffb9c8ee794e" } } }, "nbformat": 4, "nbformat_minor": 5 }