1 år sedan · f1d0dbe5b1
--- a/day7/1_Examples.ipynb
+++ b/day7/1_Examples.ipynb
@@ -0,0 +1,297 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 1,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "oEidUj12Pc1s",
			
 
				+    "outputId": "a711cf45-3051-447f-be8a-495e10e7f2fc",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Confidence Interval: (222.1160773511857, 257.8839226488143)\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "import scipy.stats as stats\n",
			
 
				+    "import math\n",
			
 
				+    "\n",
			
 
				+    "# Given values\n",
			
 
				+    "sample_mean = 240\n",
			
 
				+    "sample_std_dev = 25\n",
			
 
				+    "sample_size = 10\n",
			
 
				+    "confidence_level = 0.95\n",
			
 
				+    "\n",
			
 
				+    "# DF\n",
			
 
				+    "df = sample_size - 1\n",
			
 
				+    "\n",
			
 
				+    "# Significance level (α)\n",
			
 
				+    "alpha = (1 - confidence_level) / 2\n",
			
 
				+    "\n",
			
 
				+    "# t-value from the t-distribution table\n",
			
 
				+    "t_value = stats.t.ppf(1 - alpha, df)\n",
			
 
				+    "\n",
			
 
				+    "margin_of_error = t_value * (sample_std_dev / math.sqrt(sample_size))\n",
			
 
				+    "\n",
			
 
				+    "lower_limit = sample_mean - margin_of_error\n",
			
 
				+    "upper_limit = sample_mean + margin_of_error\n",
			
 
				+    "\n",
			
 
				+    "print(f\"Confidence Interval: ({lower_limit}, {upper_limit})\")\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 2,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "6_w0e9bIRjPe",
			
 
				+    "outputId": "c4217e46-18d6-4c89-8f37-ffeae785c76d",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Z-Score : 4.714045207910317\n",
			
 
				+      "Critical Z-Score : 1.6448536269514722\n",
			
 
				+      "Reject Null Hypothesis\n",
			
 
				+      "p-value : 1.2142337364462463e-06\n",
			
 
				+      "Reject Null Hypothesis\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "# Import the necessary libraries\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import scipy.stats as stats\n",
			
 
				+    "\n",
			
 
				+    "# Given information\n",
			
 
				+    "sample_mean = 110\n",
			
 
				+    "population_mean = 100\n",
			
 
				+    "population_std = 15\n",
			
 
				+    "sample_size = 50\n",
			
 
				+    "alpha = 0.05\n",
			
 
				+    "\n",
			
 
				+    "# compute the z-score\n",
			
 
				+    "z_score = (sample_mean-population_mean)/(population_std/np.sqrt(sample_size))\n",
			
 
				+    "print('Z-Score :',z_score)\n",
			
 
				+    "\n",
			
 
				+    "# Approach 1: Using Critical Z-Score\n",
			
 
				+    "\n",
			
 
				+    "# Critical Z-Score\n",
			
 
				+    "z_critical = stats.norm.ppf(1-alpha)\n",
			
 
				+    "print('Critical Z-Score :',z_critical)\n",
			
 
				+    "\n",
			
 
				+    "# Hypothesis\n",
			
 
				+    "if z_score >  z_critical:\n",
			
 
				+    "    print(\"Reject Null Hypothesis\")\n",
			
 
				+    "else:\n",
			
 
				+    "  print(\"Fail to Reject Null Hypothesis\")\n",
			
 
				+    "\n",
			
 
				+    "# Approach 2: Using P-value\n",
			
 
				+    "\n",
			
 
				+    "# P-Value : Probability of getting less than a Z-score\n",
			
 
				+    "p_value = 1-stats.norm.cdf(z_score)\n",
			
 
				+    "\n",
			
 
				+    "print('p-value :',p_value)\n",
			
 
				+    "\n",
			
 
				+    "# smaller p-value indicates stronger evidence against the null hypothesis.\n",
			
 
				+    "\n",
			
 
				+    "# Hypothesis\n",
			
 
				+    "if p_value <  alpha:\n",
			
 
				+    "    print(\"Reject Null Hypothesis\")\n",
			
 
				+    "else:\n",
			
 
				+    "  print(\"Fail to Reject Null Hypothesis\")\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "qvw1L2grR7zT",
			
 
				+    "outputId": "06a8c967-d75f-47a7-9c84-fab208475777"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "Chi2ContingencyResult(statistic=0.8640353908896108, pvalue=0.6491978887380976, dof=2, expected_freq=array([[115. ,  92.5,  42.5],\n",
			
 
				+       "       [115. ,  92.5,  42.5]]))"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 4,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "import scipy.stats as stats\n",
			
 
				+    "data = [[120, 90, 40],\n",
			
 
				+    "        [110, 95, 45]]\n",
			
 
				+    "#perform the Chi-Square Test of Independence\n",
			
 
				+    "stats.chi2_contingency(data)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "ZXiu_YyOKJTn",
			
 
				+    "outputId": "c76ed879-e1b2-49e3-a922-192111b1895c"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "T-statistic value: -1.298928415375236\n",
			
 
				+      "P-value: 0.2205360614433977\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "import scipy.stats as stats\n",
			
 
				+    "\n",
			
 
				+    "# Define the data\n",
			
 
				+    "data = [300, 315, 320, 311, 314, 309, 300, 308, 305, 303, 305, 301]\n",
			
 
				+    "\n",
			
 
				+    "# Perform the one-sample t-test\n",
			
 
				+    "t_stat, p_value = stats.ttest_1samp(a=data, popmean=310)\n",
			
 
				+    "\n",
			
 
				+    "print(\"T-statistic value:\", t_stat)\n",
			
 
				+    "print(\"P-value:\", p_value)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "j88Ty-VdLp12"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "Interpret the Results:\n",
			
 
				+    "If the p-value is less than 0.05, we reject the null hypothesis and conclude that the mean height is less than 310 cm.\n",
			
 
				+    "Otherwise, we fail to reject the null hypothesis."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "cLRVXAOTM_ON"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "## Paired-T test"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "id": "0YB6wBh8NFbS"
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "pre = [88, 82, 84, 93, 75, 78, 84, 87, 95, 91, 83, 89, 77, 68, 91]\n",
			
 
				+    "post = [91, 84, 88, 90, 79, 80, 88, 90, 90, 96, 88, 89, 81, 74, 92]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "5oLo7rYpNYdT"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "H0: The mean pre-test and post-test scores are equal\n",
			
 
				+    "\n",
			
 
				+    "HA:The mean pre-test and post-test scores are not equal"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "ywnT9GbINJ0s"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "ttest_rel() function from Scipy"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "D3AHkkrqNLea",
			
 
				+    "outputId": "9630680a-f2d3-43d4-dbde-ff3361e1b994"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "TtestResult(statistic=-2.9732484231168796, pvalue=0.01007144862643272, df=14)"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 4,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "import scipy.stats as stats\n",
			
 
				+    "\n",
			
 
				+    "#perform the paired samples t-test\n",
			
 
				+    "stats.ttest_rel(pre, post)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "EzDmxtc3NaVV"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "Since the p-value (0.0101) is less than 0.05, we reject the null hypothesis. We have sufficient evidence to say that the true mean test score is different for students before and after participating in the study program."
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "colab": {
			
 
				+   "provenance": []
			
 
				+  },
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python [conda env:ns-user-38_v1]",
			
 
				+   "language": "python",
			
 
				+   "name": "conda-env-ns-user-38_v1-py"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.11.9"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 4
			
 
				+}
			
--- a/day7/2_Case_Study_Hypothesis_Testing_in_IT_Maintenance_Using_Python.ipynb
+++ b/day7/2_Case_Study_Hypothesis_Testing_in_IT_Maintenance_Using_Python.ipynb
@@ -0,0 +1,1078 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "aTtDwHCv0sDg"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# **Case Study: Hypothesis Testing in IT Maintenance Using Python**\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "2LEXJiyh0v9Q"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# **Objective:**\n",
			
 
				+    "\n",
			
 
				+    "In this case study, we'll explore how hypothesis testing can be applied in IT maintenance to determine if a new maintenance protocol improves system uptime.\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "We'll create a small dataset and explain the concepts of Z-test, T-test, F-test, ANOVA, and Chi-square test step by step in a very easy way."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "smHP-Bef0043"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# **Scenario**\n",
			
 
				+    "\n",
			
 
				+    "Imagine you work in an IT department responsible for maintaining servers. Recently, your team introduced a new maintenance protocol, and you want to determine whether this new protocol has significantly improved system uptime.\n",
			
 
				+    "\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "O7jEuWai04i9"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# **Step 1: Understanding Hypothesis Testing**\n",
			
 
				+    "\n",
			
 
				+    "Hypothesis testing is a statistical method used to make decisions based on data. It helps you determine whether the observed effects in your data are significant or if they happened by chance.\n",
			
 
				+    "\n",
			
 
				+    "Null Hypothesis (H0): There is no significant difference between the old and new protocols (no improvement in uptime).\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "Alternative Hypothesis (H1): The new protocol has significantly improved system uptime."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "EM_uH63x07u3"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# **Step 2: Creating a Dataset**\n",
			
 
				+    "\n",
			
 
				+    "Let's create a simple dataset in Python."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/",
			
 
				+     "height": 206
			
 
				+    },
			
 
				+    "id": "05F5a-XY0pvg",
			
 
				+    "outputId": "0abe3ff4-89c7-410c-9b6d-a8d2fb12ba97"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "application/vnd.google.colaboratory.intrinsic+json": {
			
 
				+       "summary": "{\n  \"name\": \"data\",\n  \"rows\": 30,\n  \"fields\": [\n    {\n      \"column\": \"Old_Protocol\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 2.2006568334906293,\n        \"min\": 89.89402036833184,\n        \"max\": 99.53950924797522,\n        \"num_unique_values\": 30,\n        \"samples\": [\n          94.62563229994834,\n          95.66734865474854,\n          93.51566995918712\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"New_Protocol\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 1.8284770171735507,\n        \"min\": 92.03840706355214,\n        \"max\": 99.90155079046357,\n        \"num_unique_values\": 30,\n        \"samples\": [\n          96.60494379547956,\n          95.12385139677762,\n          93.63873563175518\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
			
 
				+       "type": "dataframe",
			
 
				+       "variable_name": "data"
			
 
				+      },
			
 
				+      "text/html": [
			
 
				+       "\n",
			
 
				+       "  <div id=\"df-1bdd3e32-da99-40a4-8a08-343bcab0d3e9\" class=\"colab-df-container\">\n",
			
 
				+       "    <div>\n",
			
 
				+       "<style scoped>\n",
			
 
				+       "    .dataframe tbody tr th:only-of-type {\n",
			
 
				+       "        vertical-align: middle;\n",
			
 
				+       "    }\n",
			
 
				+       "\n",
			
 
				+       "    .dataframe tbody tr th {\n",
			
 
				+       "        vertical-align: top;\n",
			
 
				+       "    }\n",
			
 
				+       "\n",
			
 
				+       "    .dataframe thead th {\n",
			
 
				+       "        text-align: right;\n",
			
 
				+       "    }\n",
			
 
				+       "</style>\n",
			
 
				+       "<table border=\"1\" class=\"dataframe\">\n",
			
 
				+       "  <thead>\n",
			
 
				+       "    <tr style=\"text-align: right;\">\n",
			
 
				+       "      <th></th>\n",
			
 
				+       "      <th>Old_Protocol</th>\n",
			
 
				+       "      <th>New_Protocol</th>\n",
			
 
				+       "    </tr>\n",
			
 
				+       "  </thead>\n",
			
 
				+       "  <tbody>\n",
			
 
				+       "    <tr>\n",
			
 
				+       "      <th>0</th>\n",
			
 
				+       "      <td>98.528105</td>\n",
			
 
				+       "      <td>96.309895</td>\n",
			
 
				+       "    </tr>\n",
			
 
				+       "    <tr>\n",
			
 
				+       "      <th>1</th>\n",
			
 
				+       "      <td>95.800314</td>\n",
			
 
				+       "      <td>96.756325</td>\n",
			
 
				+       "    </tr>\n",
			
 
				+       "    <tr>\n",
			
 
				+       "      <th>2</th>\n",
			
 
				+       "      <td>96.957476</td>\n",
			
 
				+       "      <td>94.224429</td>\n",
			
 
				+       "    </tr>\n",
			
 
				+       "    <tr>\n",
			
 
				+       "      <th>3</th>\n",
			
 
				+       "      <td>99.481786</td>\n",
			
 
				+       "      <td>92.038407</td>\n",
			
 
				+       "    </tr>\n",
			
 
				+       "    <tr>\n",
			
 
				+       "      <th>4</th>\n",
			
 
				+       "      <td>98.735116</td>\n",
			
 
				+       "      <td>95.304176</td>\n",
			
 
				+       "    </tr>\n",
			
 
				+       "  </tbody>\n",
			
 
				+       "</table>\n",
			
 
				+       "</div>\n",
			
 
				+       "    <div class=\"colab-df-buttons\">\n",
			
 
				+       "\n",
			
 
				+       "  <div class=\"colab-df-container\">\n",
			
 
				+       "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-1bdd3e32-da99-40a4-8a08-343bcab0d3e9')\"\n",
			
 
				+       "            title=\"Convert this dataframe to an interactive table.\"\n",
			
 
				+       "            style=\"display:none;\">\n",
			
 
				+       "\n",
			
 
				+       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
			
 
				+       "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
			
 
				+       "  </svg>\n",
			
 
				+       "    </button>\n",
			
 
				+       "\n",
			
 
				+       "  <style>\n",
			
 
				+       "    .colab-df-container {\n",
			
 
				+       "      display:flex;\n",
			
 
				+       "      gap: 12px;\n",
			
 
				+       "    }\n",
			
 
				+       "\n",
			
 
				+       "    .colab-df-convert {\n",
			
 
				+       "      background-color: #E8F0FE;\n",
			
 
				+       "      border: none;\n",
			
 
				+       "      border-radius: 50%;\n",
			
 
				+       "      cursor: pointer;\n",
			
 
				+       "      display: none;\n",
			
 
				+       "      fill: #1967D2;\n",
			
 
				+       "      height: 32px;\n",
			
 
				+       "      padding: 0 0 0 0;\n",
			
 
				+       "      width: 32px;\n",
			
 
				+       "    }\n",
			
 
				+       "\n",
			
 
				+       "    .colab-df-convert:hover {\n",
			
 
				+       "      background-color: #E2EBFA;\n",
			
 
				+       "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
			
 
				+       "      fill: #174EA6;\n",
			
 
				+       "    }\n",
			
 
				+       "\n",
			
 
				+       "    .colab-df-buttons div {\n",
			
 
				+       "      margin-bottom: 4px;\n",
			
 
				+       "    }\n",
			
 
				+       "\n",
			
 
				+       "    [theme=dark] .colab-df-convert {\n",
			
 
				+       "      background-color: #3B4455;\n",
			
 
				+       "      fill: #D2E3FC;\n",
			
 
				+       "    }\n",
			
 
				+       "\n",
			
 
				+       "    [theme=dark] .colab-df-convert:hover {\n",
			
 
				+       "      background-color: #434B5C;\n",
			
 
				+       "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
			
 
				+       "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
			
 
				+       "      fill: #FFFFFF;\n",
			
 
				+       "    }\n",
			
 
				+       "  </style>\n",
			
 
				+       "\n",
			
 
				+       "    <script>\n",
			
 
				+       "      const buttonEl =\n",
			
 
				+       "        document.querySelector('#df-1bdd3e32-da99-40a4-8a08-343bcab0d3e9 button.colab-df-convert');\n",
			
 
				+       "      buttonEl.style.display =\n",
			
 
				+       "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
			
 
				+       "\n",
			
 
				+       "      async function convertToInteractive(key) {\n",
			
 
				+       "        const element = document.querySelector('#df-1bdd3e32-da99-40a4-8a08-343bcab0d3e9');\n",
			
 
				+       "        const dataTable =\n",
			
 
				+       "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
			
 
				+       "                                                    [key], {});\n",
			
 
				+       "        if (!dataTable) return;\n",
			
 
				+       "\n",
			
 
				+       "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
			
 
				+       "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
			
 
				+       "          + ' to learn more about interactive tables.';\n",
			
 
				+       "        element.innerHTML = '';\n",
			
 
				+       "        dataTable['output_type'] = 'display_data';\n",
			
 
				+       "        await google.colab.output.renderOutput(dataTable, element);\n",
			
 
				+       "        const docLink = document.createElement('div');\n",
			
 
				+       "        docLink.innerHTML = docLinkHtml;\n",
			
 
				+       "        element.appendChild(docLink);\n",
			
 
				+       "      }\n",
			
 
				+       "    </script>\n",
			
 
				+       "  </div>\n",
			
 
				+       "\n",
			
 
				+       "\n",
			
 
				+       "<div id=\"df-b80e302d-0864-443c-aac8-b9a9bbaf0dd7\">\n",
			
 
				+       "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-b80e302d-0864-443c-aac8-b9a9bbaf0dd7')\"\n",
			
 
				+       "            title=\"Suggest charts\"\n",
			
 
				+       "            style=\"display:none;\">\n",
			
 
				+       "\n",
			
 
				+       "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
			
 
				+       "     width=\"24px\">\n",
			
 
				+       "    <g>\n",
			
 
				+       "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
			
 
				+       "    </g>\n",
			
 
				+       "</svg>\n",
			
 
				+       "  </button>\n",
			
 
				+       "\n",
			
 
				+       "<style>\n",
			
 
				+       "  .colab-df-quickchart {\n",
			
 
				+       "      --bg-color: #E8F0FE;\n",
			
 
				+       "      --fill-color: #1967D2;\n",
			
 
				+       "      --hover-bg-color: #E2EBFA;\n",
			
 
				+       "      --hover-fill-color: #174EA6;\n",
			
 
				+       "      --disabled-fill-color: #AAA;\n",
			
 
				+       "      --disabled-bg-color: #DDD;\n",
			
 
				+       "  }\n",
			
 
				+       "\n",
			
 
				+       "  [theme=dark] .colab-df-quickchart {\n",
			
 
				+       "      --bg-color: #3B4455;\n",
			
 
				+       "      --fill-color: #D2E3FC;\n",
			
 
				+       "      --hover-bg-color: #434B5C;\n",
			
 
				+       "      --hover-fill-color: #FFFFFF;\n",
			
 
				+       "      --disabled-bg-color: #3B4455;\n",
			
 
				+       "      --disabled-fill-color: #666;\n",
			
 
				+       "  }\n",
			
 
				+       "\n",
			
 
				+       "  .colab-df-quickchart {\n",
			
 
				+       "    background-color: var(--bg-color);\n",
			
 
				+       "    border: none;\n",
			
 
				+       "    border-radius: 50%;\n",
			
 
				+       "    cursor: pointer;\n",
			
 
				+       "    display: none;\n",
			
 
				+       "    fill: var(--fill-color);\n",
			
 
				+       "    height: 32px;\n",
			
 
				+       "    padding: 0;\n",
			
 
				+       "    width: 32px;\n",
			
 
				+       "  }\n",
			
 
				+       "\n",
			
 
				+       "  .colab-df-quickchart:hover {\n",
			
 
				+       "    background-color: var(--hover-bg-color);\n",
			
 
				+       "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
			
 
				+       "    fill: var(--button-hover-fill-color);\n",
			
 
				+       "  }\n",
			
 
				+       "\n",
			
 
				+       "  .colab-df-quickchart-complete:disabled,\n",
			
 
				+       "  .colab-df-quickchart-complete:disabled:hover {\n",
			
 
				+       "    background-color: var(--disabled-bg-color);\n",
			
 
				+       "    fill: var(--disabled-fill-color);\n",
			
 
				+       "    box-shadow: none;\n",
			
 
				+       "  }\n",
			
 
				+       "\n",
			
 
				+       "  .colab-df-spinner {\n",
			
 
				+       "    border: 2px solid var(--fill-color);\n",
			
 
				+       "    border-color: transparent;\n",
			
 
				+       "    border-bottom-color: var(--fill-color);\n",
			
 
				+       "    animation:\n",
			
 
				+       "      spin 1s steps(1) infinite;\n",
			
 
				+       "  }\n",
			
 
				+       "\n",
			
 
				+       "  @keyframes spin {\n",
			
 
				+       "    0% {\n",
			
 
				+       "      border-color: transparent;\n",
			
 
				+       "      border-bottom-color: var(--fill-color);\n",
			
 
				+       "      border-left-color: var(--fill-color);\n",
			
 
				+       "    }\n",
			
 
				+       "    20% {\n",
			
 
				+       "      border-color: transparent;\n",
			
 
				+       "      border-left-color: var(--fill-color);\n",
			
 
				+       "      border-top-color: var(--fill-color);\n",
			
 
				+       "    }\n",
			
 
				+       "    30% {\n",
			
 
				+       "      border-color: transparent;\n",
			
 
				+       "      border-left-color: var(--fill-color);\n",
			
 
				+       "      border-top-color: var(--fill-color);\n",
			
 
				+       "      border-right-color: var(--fill-color);\n",
			
 
				+       "    }\n",
			
 
				+       "    40% {\n",
			
 
				+       "      border-color: transparent;\n",
			
 
				+       "      border-right-color: var(--fill-color);\n",
			
 
				+       "      border-top-color: var(--fill-color);\n",
			
 
				+       "    }\n",
			
 
				+       "    60% {\n",
			
 
				+       "      border-color: transparent;\n",
			
 
				+       "      border-right-color: var(--fill-color);\n",
			
 
				+       "    }\n",
			
 
				+       "    80% {\n",
			
 
				+       "      border-color: transparent;\n",
			
 
				+       "      border-right-color: var(--fill-color);\n",
			
 
				+       "      border-bottom-color: var(--fill-color);\n",
			
 
				+       "    }\n",
			
 
				+       "    90% {\n",
			
 
				+       "      border-color: transparent;\n",
			
 
				+       "      border-bottom-color: var(--fill-color);\n",
			
 
				+       "    }\n",
			
 
				+       "  }\n",
			
 
				+       "</style>\n",
			
 
				+       "\n",
			
 
				+       "  <script>\n",
			
 
				+       "    async function quickchart(key) {\n",
			
 
				+       "      const quickchartButtonEl =\n",
			
 
				+       "        document.querySelector('#' + key + ' button');\n",
			
 
				+       "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
			
 
				+       "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
			
 
				+       "      try {\n",
			
 
				+       "        const charts = await google.colab.kernel.invokeFunction(\n",
			
 
				+       "            'suggestCharts', [key], {});\n",
			
 
				+       "      } catch (error) {\n",
			
 
				+       "        console.error('Error during call to suggestCharts:', error);\n",
			
 
				+       "      }\n",
			
 
				+       "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
			
 
				+       "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
			
 
				+       "    }\n",
			
 
				+       "    (() => {\n",
			
 
				+       "      let quickchartButtonEl =\n",
			
 
				+       "        document.querySelector('#df-b80e302d-0864-443c-aac8-b9a9bbaf0dd7 button');\n",
			
 
				+       "      quickchartButtonEl.style.display =\n",
			
 
				+       "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
			
 
				+       "    })();\n",
			
 
				+       "  </script>\n",
			
 
				+       "</div>\n",
			
 
				+       "\n",
			
 
				+       "    </div>\n",
			
 
				+       "  </div>\n"
			
 
				+      ],
			
 
				+      "text/plain": [
			
 
				+       "   Old_Protocol  New_Protocol\n",
			
 
				+       "0     98.528105     96.309895\n",
			
 
				+       "1     95.800314     96.756325\n",
			
 
				+       "2     96.957476     94.224429\n",
			
 
				+       "3     99.481786     92.038407\n",
			
 
				+       "4     98.735116     95.304176"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 2,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "\n",
			
 
				+    "# Create sample data\n",
			
 
				+    "np.random.seed(0)\n",
			
 
				+    "old_protocol_uptime = np.random.normal(loc=95, scale=2, size=30)  # Old protocol\n",
			
 
				+    "new_protocol_uptime = np.random.normal(loc=96, scale=2, size=30)  # New protocol\n",
			
 
				+    "\n",
			
 
				+    "# Combine into a DataFrame\n",
			
 
				+    "data = pd.DataFrame({\n",
			
 
				+    "    'Old_Protocol': old_protocol_uptime,\n",
			
 
				+    "    'New_Protocol': new_protocol_uptime\n",
			
 
				+    "})\n",
			
 
				+    "\n",
			
 
				+    "data.head()\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "irUDTbi41GwD"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "This code creates two sets of uptime data, one for the old protocol and one for the new protocol, with slightly different means."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "Au6VQcjT1JQf"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# **Step 3: Performing Hypothesis Tests**\n",
			
 
				+    "\n",
			
 
				+    "1. Z-Test\n",
			
 
				+    "\n",
			
 
				+    "A Z-test is used when the sample size is large (n > 30) and the population standard deviation is known.\n",
			
 
				+    "\n",
			
 
				+    "In our case, if we assume we know the population standard deviation, we can perform a Z-test."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "9tv8YzXz1HH9",
			
 
				+    "outputId": "8ad9357b-20b5-4051-dd2d-896324f87fa8"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Z-Statistic: -0.8897019207505747, P-Value: 0.37362596238923207\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "from statsmodels.stats.weightstats import ztest\n",
			
 
				+    "\n",
			
 
				+    "# Perform Z-test\n",
			
 
				+    "z_stat, p_value = ztest(data['New_Protocol'], data['Old_Protocol'], value=0, alternative='two-sided', usevar='pooled')\n",
			
 
				+    "print(f\"Z-Statistic: {z_stat}, P-Value: {p_value}\")\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "PmC3Iqs0mC78"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "Z-Statistic: Measures how far our sample mean is from the population mean.\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "P-Value: If the p-value is less than 0.05, we reject the null hypothesis. In this case we fail to reject the null hypothesis"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "rnmPWjSJmEA8"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# **Using Scipy**"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "P-Klkj2ug8xq",
			
 
				+    "outputId": "c3f18aae-d69b-42d2-cfe7-a322dde0ea5f"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Critical Z-Score: 1.959963984540054\n",
			
 
				+      "Fail to reject the null hypothesis.\n",
			
 
				+      "There is not enough evidence to suggest a significant difference between the online and offline classes.\n",
			
 
				+      "P-Value : 0.37362596238923196\n",
			
 
				+      "Fail to reject the null hypothesis.\n",
			
 
				+      "There is not enough evidence to suggest significant difference between the online and offline classes.\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "import numpy as np\n",
			
 
				+    "import scipy.stats as stats\n",
			
 
				+    "\n",
			
 
				+    "# Set the significance level\n",
			
 
				+    "alpha = 0.05\n",
			
 
				+    "\n",
			
 
				+    "# Calculate the mean and standard deviation\n",
			
 
				+    "mean_old = np.mean(data['Old_Protocol'])\n",
			
 
				+    "mean_new = np.mean(data['New_Protocol'])\n",
			
 
				+    "\n",
			
 
				+    "# Calculate the standard deviation\n",
			
 
				+    "s_old =np.std(data['Old_Protocol'], ddof=1)\n",
			
 
				+    "s_new = np.std(data['New_Protocol'], ddof=1)\n",
			
 
				+    "n1 = len(data['Old_Protocol'])\n",
			
 
				+    "n2 = len(data['New_Protocol'])\n",
			
 
				+    "\n",
			
 
				+    "# Calculate the Z-score\n",
			
 
				+    "pop_std = np.sqrt((s_old**2 / n1) + (s_new**2 / n2))\n",
			
 
				+    "z_score = (mean_new - mean_old) / (pop_std)\n",
			
 
				+    "\n",
			
 
				+    "# Calculate the critical value\n",
			
 
				+    "z_critical = stats.norm.ppf(1 - alpha/2)\n",
			
 
				+    "print('Critical Z-Score:',z_critical)\n",
			
 
				+    "\n",
			
 
				+    "# Compare the test statistic with the critical value\n",
			
 
				+    "if np.abs(z_score) > z_critical:\n",
			
 
				+    "    print(\"\"\"Reject the null hypothesis.\n",
			
 
				+    "There is a significant difference between the online and offline classes.\"\"\")\n",
			
 
				+    "else:\n",
			
 
				+    "    print(\"\"\"Fail to reject the null hypothesis.\n",
			
 
				+    "There is not enough evidence to suggest a significant difference between the online and offline classes.\"\"\")\n",
			
 
				+    "\n",
			
 
				+    "# Approach 2: Using P-value\n",
			
 
				+    "\n",
			
 
				+    "# P-Value : Probability of getting less than a Z-score\n",
			
 
				+    "p_value = 2 * (1 - stats.norm.cdf(np.abs(z_score)))\n",
			
 
				+    "print('P-Value :',p_value)\n",
			
 
				+    "\n",
			
 
				+    "# Compare the p-value with the significance level\n",
			
 
				+    "if p_value < alpha:\n",
			
 
				+    "    print(\"\"\"Reject the null hypothesis.\n",
			
 
				+    "There is a significant difference between the online and offline classes.\"\"\")\n",
			
 
				+    "else:\n",
			
 
				+    "    print(\"\"\"Fail to reject the null hypothesis.\n",
			
 
				+    "There is not enough evidence to suggest significant difference between the online and offline classes.\"\"\")\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "ebi1okhGlPiC"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "Interpretation:\n",
			
 
				+    "\n",
			
 
				+    "If the p-value is less than the chosen significance level (e.g., 0.05), reject the null hypothesis. This indicates that the new protocol has significantly improved uptime.\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "If the p-value is greater, you fail to reject the null hypothesis."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "0vvzDSI-1RLg"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# **2. T-Test**\n",
			
 
				+    "A T-test is used when the sample size is small, and the population standard deviation is unknown."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "tnmPWWys1Pa5",
			
 
				+    "outputId": "180b32af-9c7d-48a3-d0b8-cb45ff691d75"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "T-Statistic: -0.8897019207505747, P-Value: 0.377301453394316\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "from scipy.stats import ttest_ind\n",
			
 
				+    "\n",
			
 
				+    "# Perform T-test\n",
			
 
				+    "t_stat, p_value = ttest_ind(data['New_Protocol'], data['Old_Protocol'])\n",
			
 
				+    "print(f\"T-Statistic: {t_stat}, P-Value: {p_value}\")\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "At2_oWxFlcfD"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "T-Statistic: Similar to the Z-statistic but more suitable for small samples.\n",
			
 
				+    "\n",
			
 
				+    "P-Value: Again, if it's less than 0.05, we reject the null hypothesis."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "JhFeEmaKlUlS"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# **Second Approach**"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "cFs4bac4hI5d",
			
 
				+    "outputId": "524b77a9-8622-4744-d79e-d59d69f55add"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "T-Statistic: -0.89, P-Value: 0.377\n",
			
 
				+      "Fail to reject the null hypothesis: The new protocol does not significantly improve uptime.\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "from scipy.stats import ttest_ind\n",
			
 
				+    "\n",
			
 
				+    "def t_test(data, significance_level=0.05):\n",
			
 
				+    "    t_stat, p_value = ttest_ind(data['New_Protocol'], data['Old_Protocol'])\n",
			
 
				+    "\n",
			
 
				+    "    print(f\"T-Statistic: {t_stat:.2f}, P-Value: {p_value:.3f}\")\n",
			
 
				+    "    if p_value < significance_level:\n",
			
 
				+    "        print(\"Reject the null hypothesis: The new protocol significantly improves uptime.\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        print(\"Fail to reject the null hypothesis: The new protocol does not significantly improve uptime.\")\n",
			
 
				+    "\n",
			
 
				+    "# Run T-test with a custom significance level\n",
			
 
				+    "t_test(data, significance_level=0.05)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "EdgxvYjIlg66"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "Interpretation:\n",
			
 
				+    "\n",
			
 
				+    "Reject the null hypothesis if the p-value is less than the significance level, indicating a significant difference in uptime.\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "Fail to reject the null hypothesis otherwise."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "odU7Km531fsZ"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# **3.F-Test**\n",
			
 
				+    "An F-test compares the variances of two samples to see if they are significantly different."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "I_d-TXge1paf"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "F-Statistic: Measures the ratio of variances.\n",
			
 
				+    "\n",
			
 
				+    "P-Value: A small p-value indicates a significant difference in variances."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "kWJ498QFmO5E"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# **Second Approach**"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "SN-4aZ5GllH3",
			
 
				+    "outputId": "ee6c58e6-f44c-4b43-ea34-bee494fad17e"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "F-Statistic: 0.69, P-Value: 0.838\n",
			
 
				+      "Fail to reject the null hypothesis: The variance in uptime has not changed significantly.\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "from scipy.stats import f\n",
			
 
				+    "\n",
			
 
				+    "def f_test(data, significance_level=0.05):\n",
			
 
				+    "    var_old = np.var(data['Old_Protocol'], ddof=1)\n",
			
 
				+    "    var_new = np.var(data['New_Protocol'], ddof=1)\n",
			
 
				+    "\n",
			
 
				+    "    f_stat = var_new / var_old\n",
			
 
				+    "    dfn = len(data['New_Protocol']) - 1  # Degrees of freedom for the numerator\n",
			
 
				+    "    dfd = len(data['Old_Protocol']) - 1  # Degrees of freedom for the denominator\n",
			
 
				+    "\n",
			
 
				+    "    p_value = 1 - f.cdf(f_stat, dfn, dfd)\n",
			
 
				+    "\n",
			
 
				+    "    print(f\"F-Statistic: {f_stat:.2f}, P-Value: {p_value:.3f}\")\n",
			
 
				+    "    if p_value < significance_level:\n",
			
 
				+    "        print(\"Reject the null hypothesis: The variance in uptime has changed significantly.\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        print(\"Fail to reject the null hypothesis: The variance in uptime has not changed significantly.\")\n",
			
 
				+    "\n",
			
 
				+    "# Run F-test with a custom significance level\n",
			
 
				+    "f_test(data, significance_level=0.05)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "OTwpCCS_loSu"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "Interpretation:\n",
			
 
				+    "\n",
			
 
				+    "Reject the null hypothesis if the p-value is less than the significance level, indicating a significant difference in variances.\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "Fail to reject the null hypothesis otherwise."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "_vojMOmk1sUY"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# **4. ANOVA Test**\n",
			
 
				+    "ANOVA (Analysis of Variance) is used to compare means of three or more groups.\n",
			
 
				+    "\n",
			
 
				+    "In this case, we don't have three groups, but for illustration:"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "VAICvkfz1WUY",
			
 
				+    "outputId": "c9639887-d23b-4628-a225-cd3be2fa81b2"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "F-Statistic: 0.7915695077871667, P-Value: 0.37730145339434407\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "from scipy.stats import f_oneway\n",
			
 
				+    "\n",
			
 
				+    "# Perform F-test\n",
			
 
				+    "f_stat, p_value = f_oneway(data['New_Protocol'], data['Old_Protocol'])\n",
			
 
				+    "print(f\"F-Statistic: {f_stat}, P-Value: {p_value}\")\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "XzhOQl6z1qHb",
			
 
				+    "outputId": "5720f605-ebfa-4145-d21d-a2858a3aa8a1"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "ANOVA F-Statistic: 14.997332898615701, P-Value: 2.5343338721405356e-06\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "# Dummy data for ANOVA (normally we would have more than 2 groups)\n",
			
 
				+    "group1 = np.random.normal(95, 2, 30)\n",
			
 
				+    "group2 = np.random.normal(96, 2, 30)\n",
			
 
				+    "group3 = np.random.normal(97, 2, 30)\n",
			
 
				+    "\n",
			
 
				+    "f_stat, p_value = f_oneway(group1, group2, group3)\n",
			
 
				+    "print(f\"ANOVA F-Statistic: {f_stat}, P-Value: {p_value}\")\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "616O6b7G1wxS"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "ANOVA F-Statistic: Compares the means of multiple groups.\n",
			
 
				+    "\n",
			
 
				+    "P-Value: A small p-value indicates at least one group mean is different."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "vwNdh3RemQjW"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# **Second Approach**"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "y9HwndZKlrXV",
			
 
				+    "outputId": "4609253f-11f9-4f87-afed-4828138583d8"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "ANOVA F-Statistic: 15.00, P-Value: 0.000\n",
			
 
				+      "Reject the null hypothesis: At least one group's mean uptime is significantly different.\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "from scipy.stats import f_oneway\n",
			
 
				+    "\n",
			
 
				+    "def anova_test(*groups, significance_level=0.05):\n",
			
 
				+    "    f_stat, p_value = f_oneway(*groups)\n",
			
 
				+    "\n",
			
 
				+    "    print(f\"ANOVA F-Statistic: {f_stat:.2f}, P-Value: {p_value:.3f}\")\n",
			
 
				+    "    if p_value < significance_level:\n",
			
 
				+    "        print(\"Reject the null hypothesis: At least one group's mean uptime is significantly different.\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        print(\"Fail to reject the null hypothesis: No significant difference in group means.\")\n",
			
 
				+    "\n",
			
 
				+    "# Dummy data for ANOVA\n",
			
 
				+    "protocol1 = np.random.normal(95, 2, 30)\n",
			
 
				+    "protocol2 = np.random.normal(96, 2, 30)\n",
			
 
				+    "protocol3 = np.random.normal(97, 2, 30)\n",
			
 
				+    "\n",
			
 
				+    "# Run ANOVA with a custom significance level\n",
			
 
				+    "anova_test(protocol1, protocol2, protocol3, significance_level=0.05)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "IpepT0BIlvlI"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "Interpretation:\n",
			
 
				+    "\n",
			
 
				+    "Reject the null hypothesis if the p-value is less than the significance level, indicating that at least one group's mean is significantly different.\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "Fail to reject the null hypothesis otherwise."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "7U9bwV8U1zVn"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# **5. Chi-Square Test**\n",
			
 
				+    "A Chi-square test is used for categorical data to see if there's a significant association between variables."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "kgyVcC3U1xYO",
			
 
				+    "outputId": "1869bd03-96e4-453c-f3fa-f9d977bf84ef"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Chi-Square Statistic: 0.1751373626373625, P-Value: 0.6755858549092755\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "from scipy.stats import chi2_contingency\n",
			
 
				+    "\n",
			
 
				+    "# Example categorical data\n",
			
 
				+    "contingency_table = pd.crosstab(\n",
			
 
				+    "    np.random.choice(['Pass', 'Fail'], size=30),\n",
			
 
				+    "    np.random.choice(['Old', 'New'], size=30)\n",
			
 
				+    ")\n",
			
 
				+    "\n",
			
 
				+    "chi2, p, dof, expected = chi2_contingency(contingency_table)\n",
			
 
				+    "print(f\"Chi-Square Statistic: {chi2}, P-Value: {p}\")\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "d_RvVZLi13vW"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "Chi-Square Statistic: Measures the association between categorical variables.\n",
			
 
				+    "\n",
			
 
				+    "P-Value: A small p-value indicates a significant association."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "1lRrJXICmSXt"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# **Second Approach**"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "JV3LugY8lyur",
			
 
				+    "outputId": "6c348086-dee0-438f-d8d7-297b2976d8f7"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Chi-Square Statistic: 0.18, P-Value: 0.676\n",
			
 
				+      "Fail to reject the null hypothesis: No significant association between the variables.\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "from scipy.stats import chi2_contingency\n",
			
 
				+    "\n",
			
 
				+    "def chi_square_test(contingency_table, significance_level=0.05):\n",
			
 
				+    "    chi2, p, dof, expected = chi2_contingency(contingency_table)\n",
			
 
				+    "\n",
			
 
				+    "    print(f\"Chi-Square Statistic: {chi2:.2f}, P-Value: {p:.3f}\")\n",
			
 
				+    "    if p < significance_level:\n",
			
 
				+    "        print(\"Reject the null hypothesis: There is a significant association between the variables.\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        print(\"Fail to reject the null hypothesis: No significant association between the variables.\")\n",
			
 
				+    "\n",
			
 
				+    "# Example contingency table\n",
			
 
				+    "contingency_table = pd.crosstab(\n",
			
 
				+    "    np.random.choice(['Pass', 'Fail'], size=30),\n",
			
 
				+    "    np.random.choice(['Old', 'New'], size=30)\n",
			
 
				+    ")\n",
			
 
				+    "\n",
			
 
				+    "# Run Chi-Square test with a custom significance level\n",
			
 
				+    "chi_square_test(contingency_table, significance_level=0.05)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "WQrrKE4zl2Xg"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "Interpretation:\n",
			
 
				+    "\n",
			
 
				+    "Reject the null hypothesis if the p-value is less than the significance level, indicating a significant association between variables.\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "Fail to reject the null hypothesis otherwise."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "FKnRYfmW16QB"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# **Step 4: Interpreting the Results**\n",
			
 
				+    "\n",
			
 
				+    "Z-Test & T-Test: If the p-value is below 0.05, the new protocol significantly improves uptime.\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "F-Test: If the p-value is below 0.05, the variance in uptime between protocols is significantly different.\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "ANOVA: Used for comparing more than two groups (illustrative).\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "Chi-Square Test: Used for categorical data to test for associations."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "9ZJaGrad1-yX"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# **Conclusion**\n",
			
 
				+    "\n",
			
 
				+    "Hypothesis testing helps you make data-driven decisions. In IT maintenance, it can help you determine whether a new protocol has significantly improved system uptime, guiding you to adopt more effective practices.\n",
			
 
				+    "\n",
			
 
				+    "By applying these tests, you can confidently assess whether your new maintenance protocol is making a difference."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "xioO3P0ol65H"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "Conclusion\n",
			
 
				+    "By using scipy, you can perform hypothesis tests with a custom significance level to determine whether changes in IT maintenance protocols have significantly impacted system uptime. Each test allows for a different type of comparison:\n",
			
 
				+    "\n",
			
 
				+    "Z-Test and T-Test: Compare means between two groups.\n",
			
 
				+    "\n",
			
 
				+    "F-Test: Compares variances.\n",
			
 
				+    "\n",
			
 
				+    "ANOVA: Compares means across multiple groups.\n",
			
 
				+    "\n",
			
 
				+    "Chi-Square Test: Tests associations between categorical variables."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "id": "RHmvrNvK14Te"
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "colab": {
			
 
				+   "provenance": []
			
 
				+  },
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python [conda env:ns-user-38_v1]",
			
 
				+   "language": "python",
			
 
				+   "name": "conda-env-ns-user-38_v1-py"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.11.9"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 4
			
 
				+}
			
--- a/day7/3_BackOrders_Participant.ipynb
+++ b/day7/3_BackOrders_Participant.ipynb