|
|
@@ -0,0 +1,1114 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 1,
|
|
|
+ "metadata": {
|
|
|
+ "id": "mWD37HLPkS38",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "from scipy.stats import binom"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "WLtWdES-kl19"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "Nathan makes 60% of his free-throw attempts. If he shoots 12 free throws, what is the probability that he makes exactly 10?"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 3,
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "base_uri": "https://localhost:8080/"
|
|
|
+ },
|
|
|
+ "id": "W9p8fM31kffA",
|
|
|
+ "outputId": "a5b12f19-e4a3-4d80-817d-621aa1104973",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "0.063852281856"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 3,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "#calculate binomial probability\n",
|
|
|
+ "# k - number of successes\n",
|
|
|
+ "# n - number of attempts\n",
|
|
|
+ "# p - possibility of success\n",
|
|
|
+ "binom.pmf(k=10, n=12, p=0.6)\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 4,
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "base_uri": "https://localhost:8080/"
|
|
|
+ },
|
|
|
+ "id": "9Z3ha7A1tUyv",
|
|
|
+ "outputId": "24868432-8e4e-4b6f-a24e-3ab188ba96a9",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "0.980408958976"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 4,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "#calculate binomial probability\n",
|
|
|
+ "# at most 10 succeses\n",
|
|
|
+ "binom.cdf(k=10, n=12, p=0.6)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "gg74sMW6k2Yn"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "Marty flips a fair coin 5 times. What is the probability that the coin lands on heads 2 times or fewer?"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "base_uri": "https://localhost:8080/"
|
|
|
+ },
|
|
|
+ "id": "SAVNpnekkfQD",
|
|
|
+ "outputId": "f1ff6a0c-7905-42f3-aa55-fda7126417fc"
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "0.5"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 3,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "#calculate binomial probability\n",
|
|
|
+ "binom.cdf(k=2, n=5, p=0.5)\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "xfTmHopClE5a"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "It is known that 70% of individuals support a certain law. If 10 individuals are randomly selected, what is the probability that between 4 and 6 of them support the law?"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "base_uri": "https://localhost:8080/"
|
|
|
+ },
|
|
|
+ "id": "-LKngfWwlF1N",
|
|
|
+ "outputId": "2a455b37-6151-445f-c21f-9296931c7310"
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "0.33979720320000006"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 19,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "binom.cdf(k=6, n=10, p=0.7) - binom.cdf(k=3, n=10, p=0.7)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "vmYphsNIlV1A"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "Medical professionals use the binomial distribution to model the probability that a certain number of patients will experience side effects as a result of taking new medications.\n",
|
|
|
+ "For example, suppose it is known that 5% of adults who take a certain medication experience negative side effect. We can use a Binomial Distribution Calculator to find the probability that more than a certain number of patients in a random sample of 100 will experience negative side effects.\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "Yg9C1WTXmL-Z"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "P(X > 5 patients experience side effects)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 5,
|
|
|
+ "metadata": {
|
|
|
+ "id": "cAoRmGa5l-3C",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "0.3840008720438586"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 5,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "1 - binom.cdf(k=5, n=100, p=0.05)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "EPG1FcB6mNZU"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "P(X > 10 patients experience side effects)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 6,
|
|
|
+ "metadata": {
|
|
|
+ "id": "J8Kl6hTnmP13",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "0.011472410067484673"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 6,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "1 - binom.cdf(k=10, n=100, p=0.05)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "JrYtEMsVmR-e"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "P(X > 15 patients experience side effects)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 7,
|
|
|
+ "metadata": {
|
|
|
+ "id": "flG6V2OlmVh0",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "3.705407617760059e-05"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 7,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "1 - binom.cdf(k=15, n=100, p=0.05)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "XCav2-iGnGLJ"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "The PMF for the given problem statement"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 11,
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "base_uri": "https://localhost:8080/"
|
|
|
+ },
|
|
|
+ "id": "z_nWf6adlWbj",
|
|
|
+ "outputId": "dbf059d3-e4af-4257-9ff0-546c3f218da3",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "0.031160680107021173\n",
|
|
|
+ "0.08118177185776572\n",
|
|
|
+ "0.13957567793089543\n",
|
|
|
+ "0.17814264156969548\n",
|
|
|
+ "0.18001782727042887\n",
|
|
|
+ "0.1500148560586907\n",
|
|
|
+ "0.10602553736478909\n",
|
|
|
+ "0.06487088799293012\n",
|
|
|
+ "0.034901296464032665\n",
|
|
|
+ "0.01671588409593141\n",
|
|
|
+ "0.007198227601118789\n",
|
|
|
+ "0.0028098344583314602\n",
|
|
|
+ "0.001001074624830638\n",
|
|
|
+ "0.00032741914421152555\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "# write a function to print all the pmf values in a range of 1 to n+1\n",
|
|
|
+ "for x in range(1,15):\n",
|
|
|
+ " print(binom.pmf(k=x, n=100, p=0.05))"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "X8nLTvaEnTq3"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "Banks use the binomial distribution to model the probability that a certain number of credit card transactions are fraudulent.For example, suppose it is known that 2% of all credit card transactions in a certain region are fraudulent. If there are 50 transactions per day in a certain region,find the probability that more than a certain number of fraudulent transactions occur in a given day:"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "XRSrHBWcnnQi"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "N = 50\n",
|
|
|
+ "r = ?\n",
|
|
|
+ "p = 0.02"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "nhZnYXcrnhhI"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "P(X > 1 fraudulent transaction) atleast one fraudulent transaction"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 15,
|
|
|
+ "metadata": {
|
|
|
+ "id": "55AnJ4AKnfd4",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "0.26422860553827376"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 15,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "1 - binom.cdf(k=1, n=50, p=0.02)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "RL_mwLxGn5yd"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "P(X > 2 fraudulent transactions)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 16,
|
|
|
+ "metadata": {
|
|
|
+ "id": "e2boDXO2nMwK",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "0.078427748350969"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 16,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "1 - binom.cdf(k=2, n=50, p=0.02)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "BFAshzZ7oG8-"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "P(X< 5 fraudulent transactions) less than 5 fraudulent transactions"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 23,
|
|
|
+ "metadata": {
|
|
|
+ "id": "LzhI7vhkoN8R",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "0.9967902579739206"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 23,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "binom.cdf(k=4, n=50, p=0.02)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "LrAMWZ01ocC-"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "Email companies use the binomial distribution to model the probability that a certain number of spam emails land in an inbox per day.For example, suppose it is known that 4% of all emails are spam. If an account receives 20 emails in a given day, find the probability that a certain number of those emails are spam:"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "g4W1xcfMojTg"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "P(X = 0 spam emails)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 24,
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "base_uri": "https://localhost:8080/"
|
|
|
+ },
|
|
|
+ "id": "Bxp6hwuIol0L",
|
|
|
+ "outputId": "c15f2e3a-c1d3-4b8b-f11f-d2a77a3aee9d",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "0.4420024338794074"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 24,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "binom.pmf(k=0, n=20, p=0.04)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "Uq2gLtg9otU-"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "Probability of 1 to 3 spam emails"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 25,
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "base_uri": "https://localhost:8080/"
|
|
|
+ },
|
|
|
+ "id": "1Hbo-Sk_o2TK",
|
|
|
+ "outputId": "24cead1f-c3ad-4ed4-b84a-418e973591db",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "0.18224926744159597"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 25,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "binom.cdf(k=3, n=20, p=0.04) - binom.cdf(k=1, n=20, p=0.04)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "o3J0A3gupoAw"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "Probability of greater than 2 spam emails"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 21,
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "base_uri": "https://localhost:8080/"
|
|
|
+ },
|
|
|
+ "id": "1vx2NRGRpsGp",
|
|
|
+ "outputId": "1367a811-e272-453b-c584-59b4222bc6bb",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "0.04386279060114229"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 21,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "1 - binom.cdf(k=2, n=20, p=0.04)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "base_uri": "https://localhost:8080/"
|
|
|
+ },
|
|
|
+ "id": "6PgXbx4UrcAn",
|
|
|
+ "outputId": "10ae517b-703f-4b96-edaa-9778f6606c1a"
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "r\tP(r)\n",
|
|
|
+ "0\t0.44200243387940763\n",
|
|
|
+ "1\t0.368335361566173\n",
|
|
|
+ "2\t0.14579941395327686\n",
|
|
|
+ "3\t0.03644985348831919\n",
|
|
|
+ "4\t0.006454661555223204\n",
|
|
|
+ "5\t0.0008606215406964257\n",
|
|
|
+ "6\t8.964807715587748e-05\n",
|
|
|
+ "7\t7.470673096323137e-06\n",
|
|
|
+ "8\t5.058268242302121e-07\n",
|
|
|
+ "9\t2.810149023501174e-08\n",
|
|
|
+ "10\t1.2879849691047076e-09\n",
|
|
|
+ "11\t4.878730943578432e-11\n",
|
|
|
+ "12\t1.5246034198682613e-12\n",
|
|
|
+ "13\t3.9092395381237447e-14\n",
|
|
|
+ "14\t8.1442490377578e-16\n",
|
|
|
+ "15\t1.357374839626301e-17\n",
|
|
|
+ "16\t1.7674151557634125e-19\n",
|
|
|
+ "17\t1.732759956630794e-21\n",
|
|
|
+ "18\t1.2033055254380537e-23\n",
|
|
|
+ "19\t5.277655813324789e-26\n",
|
|
|
+ "20\t1.0995116277760004e-28\n",
|
|
|
+ "Mean = 0.8\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "# write a function to generate pmf values for values in the range of 1 to n+1"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "ziYALXYPsDMB"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "Park systems use the binomial distribution to model the probability that rivers overflow several times each year due to excessive rain.\n",
|
|
|
+ "For example, suppose it is known that a given river overflows during 5% of all storms. If there are 20 storms in a given year, find the probability that the river overflows a certain number of times:\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "DsqzLdgDsI8T"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "P(X = 0 overflows)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 26,
|
|
|
+ "metadata": {
|
|
|
+ "id": "411e2BwEsF00",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "0.3584859224085419"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 26,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "binom.pmf(k=0, n=20, p=0.05)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "oDbHqxjCsXHj"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "P(X = 1 overflow)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 27,
|
|
|
+ "metadata": {
|
|
|
+ "id": "XmEcPjZcsYNd",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "0.37735360253530753"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 27,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "binom.pmf(k=1, n=20, p=0.05)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "qH6cOSKesgEW"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "P(X = 2 overflows)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 28,
|
|
|
+ "metadata": {
|
|
|
+ "id": "GgLPTFh0sisT",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "0.18867680126765404"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 28,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "binom.pmf(k=2, n=20, p=0.05)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "cEUgXXU5ssU-"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "Retail stores use the binomial distribution to model the probability that they receive a certain number of shopping returns each week.For example, suppose it is known that 10% of all orders get returned at a certain store each week. If there are 50 orders that week, find the probability that the store receives more than a certain number of returns that week:"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "i6niaOHSszSh"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "P(X = 5 returns)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 29,
|
|
|
+ "metadata": {
|
|
|
+ "id": "-tsNTuAasw_v",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "0.1849246008952154"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 29,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "binom.pmf(k=5, n=50, p=0.1)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "TNkn8yuMuEzD"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "P(X > 10 returns)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 30,
|
|
|
+ "metadata": {
|
|
|
+ "id": "UUkUyMrvuHiF",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "0.009354601587329037"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 30,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "1 - binom.cdf(k=10, n=50, p=0.1)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "XlIqhmhCujAt"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "P(X > 15 returns)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 31,
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "base_uri": "https://localhost:8080/"
|
|
|
+ },
|
|
|
+ "id": "xG2U0AFBul64",
|
|
|
+ "outputId": "491b647f-3541-434c-bc06-50f05fa8a6e9",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "1.7496921685511424e-05"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 31,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "1 - binom.cdf(k=15, n=50, p=0.1)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "i4pljlDtuwZl"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "The heights of men is computed based on data is found to have a mean height of 175 cm and a standard deviation of 6 cm. What % of men are shorter than 180 cm?\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "iEXmzYd1u9LH"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "180 cm is 1 standard deviation to the right of mean. 50% observations are on the left of mean and 68/2 percent observations lie between 175 and 180. So 84% (50% + 34%) men are shorter than 180 cm."
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 40,
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "base_uri": "https://localhost:8080/"
|
|
|
+ },
|
|
|
+ "id": "Ge--6oyDu-vY",
|
|
|
+ "outputId": "642b5719-6dc5-43b0-872c-c9805018216a",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "Percentage of men shorter than 180 cm: 84.13\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "from scipy.stats import norm\n",
|
|
|
+ "mu = 175\n",
|
|
|
+ "sigma = 5\n",
|
|
|
+ "#Find the z-score\n",
|
|
|
+ "z = (180-mu)/sigma\n",
|
|
|
+ "#Find the probability (area under the curve until 183 cm)\n",
|
|
|
+ "p = norm.cdf(z)\n",
|
|
|
+ "print('Percentage of men shorter than 180 cm:', round(p*100,2))\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "1UMv_GMDvHuF"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "What % of men are shorter than 183 cm?"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 36,
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "base_uri": "https://localhost:8080/"
|
|
|
+ },
|
|
|
+ "id": "iPtd3LRgvKfS",
|
|
|
+ "outputId": "5b9643f2-bc03-42da-ddd3-e549f0918b90",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "Percentage of men shorter than 183 cm: 94.52\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "#Find the z-score\n",
|
|
|
+ "z = (183-mu)/sigma\n",
|
|
|
+ "#Find the probability (area under the curve until 183 cm)\n",
|
|
|
+ "p = norm.cdf(z)\n",
|
|
|
+ "print('Percentage of men shorter than 183 cm:', round(p*100,2))"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "wlRNKKIqvTt0"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "What is the probability of a man’s height being more than 177 cm?"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 37,
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "base_uri": "https://localhost:8080/"
|
|
|
+ },
|
|
|
+ "id": "24LlsZwevU2u",
|
|
|
+ "outputId": "6f728ea8-8d2e-494c-f7b2-cf18248e9b43",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "Percentage of men taller than 177 cm: 34.46\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "#Find the z-score\n",
|
|
|
+ "z = (177-mu)/sigma\n",
|
|
|
+ "#Find the probability (1 - area under the curve until 177 cm)\n",
|
|
|
+ "p = 1 - norm.cdf(z)\n",
|
|
|
+ "print('Percentage of men taller than 177 cm:', round(p*100,2))\n",
|
|
|
+ "\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "S515K2OnvcP1"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "What is the height of a man who is taller than 80% of all the men?"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 38,
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "base_uri": "https://localhost:8080/"
|
|
|
+ },
|
|
|
+ "id": "FZDiu_NWvc-q",
|
|
|
+ "outputId": "a5a284ab-bf10-443f-bfe5-4cf207b8fd7f",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "The height for which 80% men are shorter: 179.21\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "#Find the z-score for 80th percentile\n",
|
|
|
+ "z = norm.ppf(0.8) #percent point function, gives percentile\n",
|
|
|
+ "x = (z*sigma) + mu #find the value using z-score formula\n",
|
|
|
+ "\n",
|
|
|
+ "print(\"The height for which 80% men are shorter:\", round(x,2))\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "yMN5QHeCvtJM"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "The distribution of retirement age for NFL players is normally distributed with a mean of 33 years old and a standard deviation of about 2 years. What is the probability that a 35 year old will retire?"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 46,
|
|
|
+ "metadata": {
|
|
|
+ "id": "8hzglcGgwYhV",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "p of x=35: 0.8413447460685429\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "mu = 33\n",
|
|
|
+ "sigma = 2\n",
|
|
|
+ "z = (35-mu)/sigma\n",
|
|
|
+ "p = norm.cdf(z)\n",
|
|
|
+ "\n",
|
|
|
+ "print(\"p of x=35:\", p)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "EjkhkzJoxFFj"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "The distribution of diastolic blood pressure for men is normally distributed with a mean of about 80 and a standard deviation of 20. What is the blood pressure for which 75% of men have higher bp"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 48,
|
|
|
+ "metadata": {
|
|
|
+ "id": "0ZQut1_iyQqu",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "The height for which 80% men are shorter: 93.48979500392163\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "mu = 80\n",
|
|
|
+ "sigma = 20\n",
|
|
|
+ "z = norm.ppf(0.75)\n",
|
|
|
+ "x = (z*sigma) + mu #find the value using z-score formula\n",
|
|
|
+ "\n",
|
|
|
+ "print(\"The height for which 80% men are shorter:\", x)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "nc1GkEmGzDvN"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "In a call center, the distribution of the number of phone calls answered each day by each of the 12 receptionists is bell-shaped and has a mean of 63 and a standard deviation of 3. Use the empirical rule, what is the approximate percentage of daily phone calls numbering between 60 and 66?"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 50,
|
|
|
+ "metadata": {
|
|
|
+ "id": "U4XuQIAtzFAP",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "mu = 63\n",
|
|
|
+ "sigma = 3\n",
|
|
|
+ "z1 = (66-mu)/sigma\n",
|
|
|
+ "z2 = (60-mu)/sigma"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 51,
|
|
|
+ "metadata": {
|
|
|
+ "id": "zUGcvNYY1Mr9",
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "0.6826894921370859"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 51,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "norm.cdf(z1) - norm.cdf(z2)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "-t_1Tqqt4t_c"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "- One standard deviation (µ ± σ): 68%\n",
|
|
|
+ "- Two standard deviations (µ ± 2σ): 95%\n",
|
|
|
+ "- Three standard deviations (µ ± 3σ): 99%"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "provenance": []
|
|
|
+ },
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "Python [conda env:ns-user-38_v1]",
|
|
|
+ "language": "python",
|
|
|
+ "name": "conda-env-ns-user-38_v1-py"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 3
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
+ "version": "3.11.9"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 4
|
|
|
+}
|