1 gadu atpakaļ · af22b18c36
--- a/day5/22082024_Stock_data_exercise_StdCopy.ipynb
+++ b/day5/22082024_Stock_data_exercise_StdCopy.ipynb
--- a/day5/22082024_Understanding_probability_distributions_StdCopy.ipynb
+++ b/day5/22082024_Understanding_probability_distributions_StdCopy.ipynb
@@ -0,0 +1,1114 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 1,
			
 
				+   "metadata": {
			
 
				+    "id": "mWD37HLPkS38",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "from scipy.stats import binom"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "WLtWdES-kl19"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "Nathan makes 60% of his free-throw attempts. If he shoots 12 free throws, what is the probability that he makes exactly 10?"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 3,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "W9p8fM31kffA",
			
 
				+    "outputId": "a5b12f19-e4a3-4d80-817d-621aa1104973",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "0.063852281856"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 3,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "#calculate binomial probability\n",
			
 
				+    "# k - number of successes\n",
			
 
				+    "# n - number of attempts\n",
			
 
				+    "# p - possibility of success\n",
			
 
				+    "binom.pmf(k=10, n=12, p=0.6)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 4,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "9Z3ha7A1tUyv",
			
 
				+    "outputId": "24868432-8e4e-4b6f-a24e-3ab188ba96a9",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "0.980408958976"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 4,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "#calculate binomial probability\n",
			
 
				+    "# at most 10 succeses\n",
			
 
				+    "binom.cdf(k=10, n=12, p=0.6)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "gg74sMW6k2Yn"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "Marty flips a fair coin 5 times. What is the probability that the coin lands on heads 2 times or fewer?"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "SAVNpnekkfQD",
			
 
				+    "outputId": "f1ff6a0c-7905-42f3-aa55-fda7126417fc"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "0.5"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 3,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "#calculate binomial probability\n",
			
 
				+    "binom.cdf(k=2, n=5, p=0.5)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "xfTmHopClE5a"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "It is known that 70% of individuals support a certain law. If 10 individuals are randomly selected, what is the probability that between 4 and 6 of them support the law?"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "-LKngfWwlF1N",
			
 
				+    "outputId": "2a455b37-6151-445f-c21f-9296931c7310"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "0.33979720320000006"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 19,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "binom.cdf(k=6, n=10, p=0.7) - binom.cdf(k=3, n=10, p=0.7)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "vmYphsNIlV1A"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "Medical professionals use the binomial distribution to model the probability that a certain number of patients will experience side effects as a result of taking new medications.\n",
			
 
				+    "For example, suppose it is known that 5% of adults who take a certain medication experience negative side effect. We can use a Binomial Distribution Calculator to find the probability that more than a certain number of patients in a random sample of 100 will experience negative side effects.\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "Yg9C1WTXmL-Z"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "P(X > 5 patients experience side effects)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 5,
			
 
				+   "metadata": {
			
 
				+    "id": "cAoRmGa5l-3C",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "0.3840008720438586"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 5,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "1 - binom.cdf(k=5, n=100, p=0.05)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "EPG1FcB6mNZU"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "P(X > 10 patients experience side effects)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 6,
			
 
				+   "metadata": {
			
 
				+    "id": "J8Kl6hTnmP13",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "0.011472410067484673"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 6,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "1 - binom.cdf(k=10, n=100, p=0.05)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "JrYtEMsVmR-e"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "P(X > 15 patients experience side effects)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 7,
			
 
				+   "metadata": {
			
 
				+    "id": "flG6V2OlmVh0",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "3.705407617760059e-05"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 7,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "1 - binom.cdf(k=15, n=100, p=0.05)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "XCav2-iGnGLJ"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "The PMF for the given problem statement"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 11,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "z_nWf6adlWbj",
			
 
				+    "outputId": "dbf059d3-e4af-4257-9ff0-546c3f218da3",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "0.031160680107021173\n",
			
 
				+      "0.08118177185776572\n",
			
 
				+      "0.13957567793089543\n",
			
 
				+      "0.17814264156969548\n",
			
 
				+      "0.18001782727042887\n",
			
 
				+      "0.1500148560586907\n",
			
 
				+      "0.10602553736478909\n",
			
 
				+      "0.06487088799293012\n",
			
 
				+      "0.034901296464032665\n",
			
 
				+      "0.01671588409593141\n",
			
 
				+      "0.007198227601118789\n",
			
 
				+      "0.0028098344583314602\n",
			
 
				+      "0.001001074624830638\n",
			
 
				+      "0.00032741914421152555\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "# write a function to print all the pmf values in a range of 1 to n+1\n",
			
 
				+    "for x in range(1,15):\n",
			
 
				+    "    print(binom.pmf(k=x, n=100, p=0.05))"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "X8nLTvaEnTq3"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "Banks use the binomial distribution to model the probability that a certain number of credit card transactions are fraudulent.For example, suppose it is known that 2% of all credit card transactions in a certain region are fraudulent. If there are 50 transactions per day in a certain region,find the probability that more than a certain number of fraudulent transactions occur in a given day:"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "XRSrHBWcnnQi"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "N = 50\n",
			
 
				+    "r = ?\n",
			
 
				+    "p = 0.02"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "nhZnYXcrnhhI"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "P(X > 1 fraudulent transaction) atleast one fraudulent transaction"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 15,
			
 
				+   "metadata": {
			
 
				+    "id": "55AnJ4AKnfd4",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "0.26422860553827376"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 15,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "1 - binom.cdf(k=1, n=50, p=0.02)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "RL_mwLxGn5yd"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "P(X > 2 fraudulent transactions)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 16,
			
 
				+   "metadata": {
			
 
				+    "id": "e2boDXO2nMwK",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "0.078427748350969"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 16,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "1 - binom.cdf(k=2, n=50, p=0.02)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "BFAshzZ7oG8-"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "P(X< 5 fraudulent transactions) less than 5 fraudulent transactions"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 23,
			
 
				+   "metadata": {
			
 
				+    "id": "LzhI7vhkoN8R",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "0.9967902579739206"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 23,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "binom.cdf(k=4, n=50, p=0.02)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "LrAMWZ01ocC-"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "Email companies use the binomial distribution to model the probability that a certain number of spam emails land in an inbox per day.For example, suppose it is known that 4% of all emails are spam. If an account receives 20 emails in a given day, find the probability that a certain number of those emails are spam:"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "g4W1xcfMojTg"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "P(X = 0 spam emails)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 24,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "Bxp6hwuIol0L",
			
 
				+    "outputId": "c15f2e3a-c1d3-4b8b-f11f-d2a77a3aee9d",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "0.4420024338794074"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 24,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "binom.pmf(k=0, n=20, p=0.04)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "Uq2gLtg9otU-"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "Probability of 1 to 3 spam emails"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 25,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "1Hbo-Sk_o2TK",
			
 
				+    "outputId": "24cead1f-c3ad-4ed4-b84a-418e973591db",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "0.18224926744159597"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 25,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "binom.cdf(k=3, n=20, p=0.04) - binom.cdf(k=1, n=20, p=0.04)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "o3J0A3gupoAw"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "Probability of greater than 2 spam emails"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 21,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "1vx2NRGRpsGp",
			
 
				+    "outputId": "1367a811-e272-453b-c584-59b4222bc6bb",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "0.04386279060114229"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 21,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "1 - binom.cdf(k=2, n=20, p=0.04)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "6PgXbx4UrcAn",
			
 
				+    "outputId": "10ae517b-703f-4b96-edaa-9778f6606c1a"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "r\tP(r)\n",
			
 
				+      "0\t0.44200243387940763\n",
			
 
				+      "1\t0.368335361566173\n",
			
 
				+      "2\t0.14579941395327686\n",
			
 
				+      "3\t0.03644985348831919\n",
			
 
				+      "4\t0.006454661555223204\n",
			
 
				+      "5\t0.0008606215406964257\n",
			
 
				+      "6\t8.964807715587748e-05\n",
			
 
				+      "7\t7.470673096323137e-06\n",
			
 
				+      "8\t5.058268242302121e-07\n",
			
 
				+      "9\t2.810149023501174e-08\n",
			
 
				+      "10\t1.2879849691047076e-09\n",
			
 
				+      "11\t4.878730943578432e-11\n",
			
 
				+      "12\t1.5246034198682613e-12\n",
			
 
				+      "13\t3.9092395381237447e-14\n",
			
 
				+      "14\t8.1442490377578e-16\n",
			
 
				+      "15\t1.357374839626301e-17\n",
			
 
				+      "16\t1.7674151557634125e-19\n",
			
 
				+      "17\t1.732759956630794e-21\n",
			
 
				+      "18\t1.2033055254380537e-23\n",
			
 
				+      "19\t5.277655813324789e-26\n",
			
 
				+      "20\t1.0995116277760004e-28\n",
			
 
				+      "Mean = 0.8\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "# write a function to generate pmf values for values in the range of 1 to n+1"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "ziYALXYPsDMB"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "Park systems use the binomial distribution to model the probability that rivers overflow several times each year due to excessive rain.\n",
			
 
				+    "For example, suppose it is known that a given river overflows during 5% of all storms. If there are 20 storms in a given year, find the probability that the river overflows a certain number of times:\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "DsqzLdgDsI8T"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "P(X = 0 overflows)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 26,
			
 
				+   "metadata": {
			
 
				+    "id": "411e2BwEsF00",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "0.3584859224085419"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 26,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "binom.pmf(k=0, n=20, p=0.05)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "oDbHqxjCsXHj"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "P(X = 1 overflow)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 27,
			
 
				+   "metadata": {
			
 
				+    "id": "XmEcPjZcsYNd",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "0.37735360253530753"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 27,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "binom.pmf(k=1, n=20, p=0.05)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "qH6cOSKesgEW"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "P(X = 2 overflows)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 28,
			
 
				+   "metadata": {
			
 
				+    "id": "GgLPTFh0sisT",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "0.18867680126765404"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 28,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "binom.pmf(k=2, n=20, p=0.05)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "cEUgXXU5ssU-"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "Retail stores use the binomial distribution to model the probability that they receive a certain number of shopping returns each week.For example, suppose it is known that 10% of all orders get returned at a certain store each week. If there are 50 orders that week, find the probability that the store receives more than a certain number of returns that week:"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "i6niaOHSszSh"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "P(X = 5 returns)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 29,
			
 
				+   "metadata": {
			
 
				+    "id": "-tsNTuAasw_v",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "0.1849246008952154"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 29,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "binom.pmf(k=5, n=50, p=0.1)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "TNkn8yuMuEzD"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "P(X > 10 returns)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 30,
			
 
				+   "metadata": {
			
 
				+    "id": "UUkUyMrvuHiF",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "0.009354601587329037"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 30,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "1 - binom.cdf(k=10, n=50, p=0.1)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "XlIqhmhCujAt"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "P(X > 15 returns)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 31,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "xG2U0AFBul64",
			
 
				+    "outputId": "491b647f-3541-434c-bc06-50f05fa8a6e9",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "1.7496921685511424e-05"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 31,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "1 - binom.cdf(k=15, n=50, p=0.1)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "i4pljlDtuwZl"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "The heights of men is computed based on data is found to have a mean height of 175 cm and a standard deviation of 6 cm. What % of men are shorter than 180 cm?\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "iEXmzYd1u9LH"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "180 cm is 1 standard deviation to the right of mean. 50% observations are on the left of mean and 68/2 percent observations lie between 175 and 180. So 84% (50% + 34%) men are shorter than 180 cm."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 40,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "Ge--6oyDu-vY",
			
 
				+    "outputId": "642b5719-6dc5-43b0-872c-c9805018216a",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Percentage of men shorter than 180 cm: 84.13\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "from scipy.stats import norm\n",
			
 
				+    "mu = 175\n",
			
 
				+    "sigma = 5\n",
			
 
				+    "#Find the z-score\n",
			
 
				+    "z = (180-mu)/sigma\n",
			
 
				+    "#Find the probability (area under the curve until 183 cm)\n",
			
 
				+    "p = norm.cdf(z)\n",
			
 
				+    "print('Percentage of men shorter than 180 cm:', round(p*100,2))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "1UMv_GMDvHuF"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "What % of men are shorter than 183 cm?"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 36,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "iPtd3LRgvKfS",
			
 
				+    "outputId": "5b9643f2-bc03-42da-ddd3-e549f0918b90",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Percentage of men shorter than 183 cm: 94.52\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "#Find the z-score\n",
			
 
				+    "z = (183-mu)/sigma\n",
			
 
				+    "#Find the probability (area under the curve until 183 cm)\n",
			
 
				+    "p = norm.cdf(z)\n",
			
 
				+    "print('Percentage of men shorter than 183 cm:', round(p*100,2))"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "wlRNKKIqvTt0"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "What is the probability of a man’s height being more than 177 cm?"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 37,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "24LlsZwevU2u",
			
 
				+    "outputId": "6f728ea8-8d2e-494c-f7b2-cf18248e9b43",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Percentage of men taller than 177 cm: 34.46\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "#Find the z-score\n",
			
 
				+    "z = (177-mu)/sigma\n",
			
 
				+    "#Find the probability (1 - area under the curve until 177 cm)\n",
			
 
				+    "p = 1 - norm.cdf(z)\n",
			
 
				+    "print('Percentage of men taller than 177 cm:', round(p*100,2))\n",
			
 
				+    "\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "S515K2OnvcP1"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "What is the height of a man who is taller than 80% of all the men?"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 38,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "FZDiu_NWvc-q",
			
 
				+    "outputId": "a5a284ab-bf10-443f-bfe5-4cf207b8fd7f",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "The height for which 80% men are shorter: 179.21\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "#Find the z-score for 80th percentile\n",
			
 
				+    "z = norm.ppf(0.8) #percent point function, gives percentile\n",
			
 
				+    "x = (z*sigma) + mu #find the value using z-score formula\n",
			
 
				+    "\n",
			
 
				+    "print(\"The height for which 80% men are shorter:\", round(x,2))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "yMN5QHeCvtJM"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "The distribution of retirement age for NFL players is normally distributed with a mean of 33 years old and a standard deviation of about 2 years. What is the probability that a 35 year old will retire?"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 46,
			
 
				+   "metadata": {
			
 
				+    "id": "8hzglcGgwYhV",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "p of x=35: 0.8413447460685429\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "mu = 33\n",
			
 
				+    "sigma = 2\n",
			
 
				+    "z = (35-mu)/sigma\n",
			
 
				+    "p = norm.cdf(z)\n",
			
 
				+    "\n",
			
 
				+    "print(\"p of x=35:\", p)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "EjkhkzJoxFFj"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "The distribution of diastolic blood pressure for men is normally distributed with a mean of about 80 and a standard deviation of 20. What is the blood pressure for which 75% of men have higher bp"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 48,
			
 
				+   "metadata": {
			
 
				+    "id": "0ZQut1_iyQqu",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "The height for which 80% men are shorter: 93.48979500392163\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "mu = 80\n",
			
 
				+    "sigma = 20\n",
			
 
				+    "z = norm.ppf(0.75)\n",
			
 
				+    "x = (z*sigma) + mu #find the value using z-score formula\n",
			
 
				+    "\n",
			
 
				+    "print(\"The height for which 80% men are shorter:\", x)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "nc1GkEmGzDvN"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "In a call center, the distribution of the number of phone calls answered each day by each of the 12 receptionists is bell-shaped and has a mean of 63 and a standard deviation of 3. Use the empirical rule, what is the approximate percentage of daily phone calls numbering between 60 and 66?"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 50,
			
 
				+   "metadata": {
			
 
				+    "id": "U4XuQIAtzFAP",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "mu = 63\n",
			
 
				+    "sigma = 3\n",
			
 
				+    "z1 = (66-mu)/sigma\n",
			
 
				+    "z2 = (60-mu)/sigma"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 51,
			
 
				+   "metadata": {
			
 
				+    "id": "zUGcvNYY1Mr9",
			
 
				+    "tags": []
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "0.6826894921370859"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 51,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "norm.cdf(z1) - norm.cdf(z2)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "-t_1Tqqt4t_c"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "- One standard deviation (µ ± σ): 68%\n",
			
 
				+    "- Two standard deviations (µ ± 2σ): 95%\n",
			
 
				+    "- Three standard deviations (µ ± 3σ): 99%"
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "colab": {
			
 
				+   "provenance": []
			
 
				+  },
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python [conda env:ns-user-38_v1]",
			
 
				+   "language": "python",
			
 
				+   "name": "conda-env-ns-user-38_v1-py"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.11.9"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 4
			
 
				+}