{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "3d5e9efa",
   "metadata": {},
   "source": [
    "T Tests\n",
    "======="
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "f3b215ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "import scipy\n",
    "from IPython.display import Markdown as md\n",
    "\n",
    "from nycschools import schools, exams"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "41c3b328",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>dbn</th>\n",
       "      <th>grade</th>\n",
       "      <th>category</th>\n",
       "      <th>number_tested</th>\n",
       "      <th>mean_scale_score</th>\n",
       "      <th>level_1_n</th>\n",
       "      <th>level_1_pct</th>\n",
       "      <th>level_2_n</th>\n",
       "      <th>level_2_pct</th>\n",
       "      <th>level_3_n</th>\n",
       "      <th>...</th>\n",
       "      <th>missing_race_ethnicity_data_pct</th>\n",
       "      <th>swd_n</th>\n",
       "      <th>swd_pct</th>\n",
       "      <th>ell_n</th>\n",
       "      <th>ell_pct</th>\n",
       "      <th>poverty_n</th>\n",
       "      <th>poverty_pct</th>\n",
       "      <th>eni_pct</th>\n",
       "      <th>clean_name</th>\n",
       "      <th>zip</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>01M015</td>\n",
       "      <td>3</td>\n",
       "      <td>All Students</td>\n",
       "      <td>29</td>\n",
       "      <td>301.551727</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.275862</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.310345</td>\n",
       "      <td>7.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>51</td>\n",
       "      <td>0.287</td>\n",
       "      <td>12</td>\n",
       "      <td>0.067</td>\n",
       "      <td>152</td>\n",
       "      <td>0.854</td>\n",
       "      <td>0.882</td>\n",
       "      <td>roberto clemente</td>\n",
       "      <td>10009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>01M015</td>\n",
       "      <td>4</td>\n",
       "      <td>All Students</td>\n",
       "      <td>23</td>\n",
       "      <td>301.391296</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.260870</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.347826</td>\n",
       "      <td>8.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>51</td>\n",
       "      <td>0.287</td>\n",
       "      <td>12</td>\n",
       "      <td>0.067</td>\n",
       "      <td>152</td>\n",
       "      <td>0.854</td>\n",
       "      <td>0.882</td>\n",
       "      <td>roberto clemente</td>\n",
       "      <td>10009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>01M015</td>\n",
       "      <td>5</td>\n",
       "      <td>All Students</td>\n",
       "      <td>17</td>\n",
       "      <td>322.000000</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.117647</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.294118</td>\n",
       "      <td>8.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>51</td>\n",
       "      <td>0.287</td>\n",
       "      <td>12</td>\n",
       "      <td>0.067</td>\n",
       "      <td>152</td>\n",
       "      <td>0.854</td>\n",
       "      <td>0.882</td>\n",
       "      <td>roberto clemente</td>\n",
       "      <td>10009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>01M015</td>\n",
       "      <td>All Grades</td>\n",
       "      <td>All Students</td>\n",
       "      <td>69</td>\n",
       "      <td>306.536224</td>\n",
       "      <td>16.0</td>\n",
       "      <td>0.231884</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0.318841</td>\n",
       "      <td>23.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>51</td>\n",
       "      <td>0.287</td>\n",
       "      <td>12</td>\n",
       "      <td>0.067</td>\n",
       "      <td>152</td>\n",
       "      <td>0.854</td>\n",
       "      <td>0.882</td>\n",
       "      <td>roberto clemente</td>\n",
       "      <td>10009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>01M015</td>\n",
       "      <td>3</td>\n",
       "      <td>Not SWD</td>\n",
       "      <td>23</td>\n",
       "      <td>307.652161</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.173913</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.391304</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>51</td>\n",
       "      <td>0.287</td>\n",
       "      <td>12</td>\n",
       "      <td>0.067</td>\n",
       "      <td>152</td>\n",
       "      <td>0.854</td>\n",
       "      <td>0.882</td>\n",
       "      <td>roberto clemente</td>\n",
       "      <td>10009</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 69 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      dbn       grade      category  number_tested  mean_scale_score  \\\n",
       "0  01M015           3  All Students             29        301.551727   \n",
       "1  01M015           4  All Students             23        301.391296   \n",
       "2  01M015           5  All Students             17        322.000000   \n",
       "3  01M015  All Grades  All Students             69        306.536224   \n",
       "4  01M015           3       Not SWD             23        307.652161   \n",
       "\n",
       "   level_1_n  level_1_pct  level_2_n  level_2_pct  level_3_n  ...  \\\n",
       "0        8.0     0.275862        9.0     0.310345        7.0  ...   \n",
       "1        6.0     0.260870        8.0     0.347826        8.0  ...   \n",
       "2        2.0     0.117647        5.0     0.294118        8.0  ...   \n",
       "3       16.0     0.231884       22.0     0.318841       23.0  ...   \n",
       "4        4.0     0.173913        9.0     0.391304        6.0  ...   \n",
       "\n",
       "   missing_race_ethnicity_data_pct  swd_n  swd_pct  ell_n  ell_pct  poverty_n  \\\n",
       "0                              0.0     51    0.287     12    0.067        152   \n",
       "1                              0.0     51    0.287     12    0.067        152   \n",
       "2                              0.0     51    0.287     12    0.067        152   \n",
       "3                              0.0     51    0.287     12    0.067        152   \n",
       "4                              0.0     51    0.287     12    0.067        152   \n",
       "\n",
       "   poverty_pct  eni_pct        clean_name    zip  \n",
       "0        0.854    0.882  roberto clemente  10009  \n",
       "1        0.854    0.882  roberto clemente  10009  \n",
       "2        0.854    0.882  roberto clemente  10009  \n",
       "3        0.854    0.882  roberto clemente  10009  \n",
       "4        0.854    0.882  roberto clemente  10009  \n",
       "\n",
       "[5 rows x 69 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "demo = schools.load_school_demographics()\n",
    "df = exams.load_math_ela_long()\n",
    "df = df[df[\"mean_scale_score\"].notnull()]\n",
    "df = df.merge(demo, how=\"inner\", on=[\"dbn\", \"ay\"])\n",
    "\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "9cf9383a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mean_scale_score</th>\n",
       "      <th>charter</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>mean_scale_score</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.015811</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>charter</th>\n",
       "      <td>0.015811</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  mean_scale_score   charter\n",
       "mean_scale_score          1.000000  0.015811\n",
       "charter                   0.015811  1.000000"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# let's look at charter schools vs non-charter schools \n",
    "df[\"charter\"] = df.district == 84\n",
    "# get the schools in the range of 1-32 and district 84 (excludes other special districts)\n",
    "data = df[df.district.isin(list(range(1,33)) + [84])]\n",
    "\n",
    "# just get all students\n",
    "data = data[data.category==\"All Students\"]\n",
    "\n",
    "# remove null data\n",
    "data = data[data[\"mean_scale_score\"].notnull()]\n",
    "\n",
    "# show the correlation between chater and test score\n",
    "data = df[[\"mean_scale_score\", \"charter\"]]\n",
    "data.corr()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "5f1f9950",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Ttest_indResult(statistic=9.1114087193616, pvalue=8.175234619664718e-20)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# split the data into two groups for the t test\n",
    "charter = data[data.charter == True]\n",
    "community = data[data.charter == False]\n",
    "# run a t-test to see if there is a statistical difference between charter and non-charter test results\n",
    "t = scipy.stats.ttest_ind(charter.mean_scale_score, community.mean_scale_score)\n",
    "\n",
    "# the scipy results include the t-value and the p-value\n",
    "# t is the score of the test, and p is the probability that the difference is the result of chance\n",
    "t"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "64e0c465",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "\n",
       "**T-Test results** comparing school averages of \n",
       "Charter School Test Results (`n=5983`) and Community School Test Results (`n=326004`)\n",
       "students in 3-8th grade student ELA and Math scores.\n",
       "\n",
       "- Charter test results: M=517.14, SD=134.96\n",
       "- Community test results: M=500.42, SD=140.75\n",
       "- T-score: 9.1114, p-val: 0.0000\n",
       "\n",
       "`n` values report the number of school average test results observed, not the number of test takers. \n"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# when reporting the results we care about these variables too\n",
    "\n",
    "# population size\n",
    "n_charter = len(charter)\n",
    "n_community = len(community)\n",
    "\n",
    "# mean average\n",
    "M_charter = charter.mean_scale_score.mean()\n",
    "M_community = community.mean_scale_score.mean()\n",
    "\n",
    "# standard deviation \n",
    "sd_charter = charter.mean_scale_score.std()\n",
    "sd_community = community.mean_scale_score.std()\n",
    "\n",
    "\n",
    "display(md(f\"\"\"\n",
    "**T-Test results** comparing school averages of \n",
    "Charter School Test Results (`n={n_charter}`) and Community School Test Results (`n={n_community}`)\n",
    "students in 3-8th grade student ELA and Math scores.\n",
    "\n",
    "- Charter test results: M={M_charter:.02f}, SD={sd_charter:.02f}\n",
    "- Community test results: M={M_community:.02f}, SD={sd_community:.02f}\n",
    "- T-score: {t.statistic:.04f}, p-val: {t.pvalue:.04f}\n",
    "\n",
    "`n` values report the number of school average test results observed, not the number of test takers. \n",
    "\"\"\"))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "67988c29",
   "metadata": {},
   "source": [
    "`pingouin` stats wrapper\n",
    "------------------------\n",
    "The [`pingouin` library](https://pingouin-stats.org/index.html) has a number of functions that \"wrap\" standard python stats functions to include additional information and nicer formatting out of the box. `ttest` is one of these functions. We can see in the output below that we get the t value, p value (like `scipy.stats`), but we also get degrees of freedom, confidence intervals, and more, without having to calculate these independently for each test."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "bfe17125",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>T</th>\n",
       "      <th>dof</th>\n",
       "      <th>alternative</th>\n",
       "      <th>p-val</th>\n",
       "      <th>CI95%</th>\n",
       "      <th>cohen-d</th>\n",
       "      <th>BF10</th>\n",
       "      <th>power</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>T-test</th>\n",
       "      <td>9.111409</td>\n",
       "      <td>331985</td>\n",
       "      <td>two-sided</td>\n",
       "      <td>8.175235e-20</td>\n",
       "      <td>[13.12, 20.32]</td>\n",
       "      <td>0.118871</td>\n",
       "      <td>1.516e+16</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               T     dof alternative         p-val           CI95%   cohen-d  \\\n",
       "T-test  9.111409  331985   two-sided  8.175235e-20  [13.12, 20.32]  0.118871   \n",
       "\n",
       "             BF10  power  \n",
       "T-test  1.516e+16    1.0  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pingouin as pg\n",
    "pg.ttest(charter.mean_scale_score, community.mean_scale_score, correction=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.10.6 ('school-data')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  },
  "vscode": {
   "interpreter": {
    "hash": "c853444e20c489e5b96d8e1a4533affead1d94f1ba40ff9ef08cffb9c8ee794e"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}