bodicsek před 1 rokem
rodič
revize
84e7ceddd2
1 změnil soubory, kde provedl 616 přidání a 0 odebrání
  1. 616 0
      day3/case-study-1.ipynb

+ 616 - 0
day3/case-study-1.ipynb

@@ -0,0 +1,616 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "423932f7-1217-4d5e-8cee-1b4e0a6855e1",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### OCI Data Science - Useful Tips\n",
+    "<details>\n",
+    "<summary><font size=\"2\">Check for Public Internet Access</font></summary>\n",
+    "\n",
+    "```python\n",
+    "import requests\n",
+    "response = requests.get(\"https://oracle.com\")\n",
+    "assert response.status_code==200, \"Internet connection failed\"\n",
+    "```\n",
+    "</details>\n",
+    "<details>\n",
+    "<summary><font size=\"2\">Helpful Documentation </font></summary>\n",
+    "<ul><li><a href=\"https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm\">Data Science Service Documentation</a></li>\n",
+    "<li><a href=\"https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html\">ADS documentation</a></li>\n",
+    "</ul>\n",
+    "</details>\n",
+    "<details>\n",
+    "<summary><font size=\"2\">Typical Cell Imports and Settings for ADS</font></summary>\n",
+    "\n",
+    "```python\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "%matplotlib inline\n",
+    "\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "\n",
+    "import logging\n",
+    "logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)\n",
+    "\n",
+    "import ads\n",
+    "from ads.dataset.factory import DatasetFactory\n",
+    "from ads.automl.provider import OracleAutoMLProvider\n",
+    "from ads.automl.driver import AutoML\n",
+    "from ads.evaluations.evaluator import ADSEvaluator\n",
+    "from ads.common.data import ADSData\n",
+    "from ads.explanations.explainer import ADSExplainer\n",
+    "from ads.explanations.mlx_global_explainer import MLXGlobalExplainer\n",
+    "from ads.explanations.mlx_local_explainer import MLXLocalExplainer\n",
+    "from ads.catalog.model import ModelCatalog\n",
+    "from ads.common.model_artifact import ModelArtifact\n",
+    "```\n",
+    "</details>\n",
+    "<details>\n",
+    "<summary><font size=\"2\">Useful Environment Variables</font></summary>\n",
+    "\n",
+    "```python\n",
+    "import os\n",
+    "print(os.environ[\"NB_SESSION_COMPARTMENT_OCID\"])\n",
+    "print(os.environ[\"PROJECT_OCID\"])\n",
+    "print(os.environ[\"USER_OCID\"])\n",
+    "print(os.environ[\"TENANCY_OCID\"])\n",
+    "print(os.environ[\"NB_REGION\"])\n",
+    "```\n",
+    "</details>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "c8e038c6-c252-4c41-88ac-5d4d4996e59b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "688aaeb5-6e94-4741-ab02-e6b159325a13",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "mpg = pd.read_csv('~/ns-user-38/day3/predictive_maintenance.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "cc403108-5d3b-449a-b685-00fff0637761",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(10000, 10)"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mpg.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "772a10b9-c4b3-4c73-b6fc-20cd5432e4cd",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>UDI</th>\n",
+       "      <th>Product ID</th>\n",
+       "      <th>Type</th>\n",
+       "      <th>Air temperature [K]</th>\n",
+       "      <th>Process temperature [K]</th>\n",
+       "      <th>Rotational speed [rpm]</th>\n",
+       "      <th>Torque [Nm]</th>\n",
+       "      <th>Tool wear [min]</th>\n",
+       "      <th>Target</th>\n",
+       "      <th>Failure Type</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>M14860</td>\n",
+       "      <td>M</td>\n",
+       "      <td>298.1</td>\n",
+       "      <td>308.6</td>\n",
+       "      <td>1551</td>\n",
+       "      <td>42.8</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>No Failure</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>L47181</td>\n",
+       "      <td>L</td>\n",
+       "      <td>298.2</td>\n",
+       "      <td>308.7</td>\n",
+       "      <td>1408</td>\n",
+       "      <td>46.3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>No Failure</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>L47182</td>\n",
+       "      <td>L</td>\n",
+       "      <td>298.1</td>\n",
+       "      <td>308.5</td>\n",
+       "      <td>1498</td>\n",
+       "      <td>49.4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>No Failure</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>L47183</td>\n",
+       "      <td>L</td>\n",
+       "      <td>298.2</td>\n",
+       "      <td>308.6</td>\n",
+       "      <td>1433</td>\n",
+       "      <td>39.5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>0</td>\n",
+       "      <td>No Failure</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>L47184</td>\n",
+       "      <td>L</td>\n",
+       "      <td>298.2</td>\n",
+       "      <td>308.7</td>\n",
+       "      <td>1408</td>\n",
+       "      <td>40.0</td>\n",
+       "      <td>9</td>\n",
+       "      <td>0</td>\n",
+       "      <td>No Failure</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   UDI Product ID Type  Air temperature [K]  Process temperature [K]  \\\n",
+       "0    1     M14860    M                298.1                    308.6   \n",
+       "1    2     L47181    L                298.2                    308.7   \n",
+       "2    3     L47182    L                298.1                    308.5   \n",
+       "3    4     L47183    L                298.2                    308.6   \n",
+       "4    5     L47184    L                298.2                    308.7   \n",
+       "\n",
+       "   Rotational speed [rpm]  Torque [Nm]  Tool wear [min]  Target Failure Type  \n",
+       "0                    1551         42.8                0       0   No Failure  \n",
+       "1                    1408         46.3                3       0   No Failure  \n",
+       "2                    1498         49.4                5       0   No Failure  \n",
+       "3                    1433         39.5                7       0   No Failure  \n",
+       "4                    1408         40.0                9       0   No Failure  "
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mpg.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "e5698130-0254-46d3-94a5-05b5c32d9e6c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "mpg_update = mpg.drop(['UDI','Product ID','Type','Failure Type','Target'], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "8473e9bf-77b0-473e-b9c2-f19996a3ec08",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Air temperature [K]</th>\n",
+       "      <th>Process temperature [K]</th>\n",
+       "      <th>Rotational speed [rpm]</th>\n",
+       "      <th>Torque [Nm]</th>\n",
+       "      <th>Tool wear [min]</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>10000.000000</td>\n",
+       "      <td>10000.000000</td>\n",
+       "      <td>10000.000000</td>\n",
+       "      <td>10000.000000</td>\n",
+       "      <td>10000.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>300.004930</td>\n",
+       "      <td>310.005560</td>\n",
+       "      <td>1538.776100</td>\n",
+       "      <td>39.986910</td>\n",
+       "      <td>107.951000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>2.000259</td>\n",
+       "      <td>1.483734</td>\n",
+       "      <td>179.284096</td>\n",
+       "      <td>9.968934</td>\n",
+       "      <td>63.654147</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>295.300000</td>\n",
+       "      <td>305.700000</td>\n",
+       "      <td>1168.000000</td>\n",
+       "      <td>3.800000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>298.300000</td>\n",
+       "      <td>308.800000</td>\n",
+       "      <td>1423.000000</td>\n",
+       "      <td>33.200000</td>\n",
+       "      <td>53.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>300.100000</td>\n",
+       "      <td>310.100000</td>\n",
+       "      <td>1503.000000</td>\n",
+       "      <td>40.100000</td>\n",
+       "      <td>108.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>301.500000</td>\n",
+       "      <td>311.100000</td>\n",
+       "      <td>1612.000000</td>\n",
+       "      <td>46.800000</td>\n",
+       "      <td>162.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>304.500000</td>\n",
+       "      <td>313.800000</td>\n",
+       "      <td>2886.000000</td>\n",
+       "      <td>76.600000</td>\n",
+       "      <td>253.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       Air temperature [K]  Process temperature [K]  Rotational speed [rpm]  \\\n",
+       "count         10000.000000             10000.000000            10000.000000   \n",
+       "mean            300.004930               310.005560             1538.776100   \n",
+       "std               2.000259                 1.483734              179.284096   \n",
+       "min             295.300000               305.700000             1168.000000   \n",
+       "25%             298.300000               308.800000             1423.000000   \n",
+       "50%             300.100000               310.100000             1503.000000   \n",
+       "75%             301.500000               311.100000             1612.000000   \n",
+       "max             304.500000               313.800000             2886.000000   \n",
+       "\n",
+       "        Torque [Nm]  Tool wear [min]  \n",
+       "count  10000.000000     10000.000000  \n",
+       "mean      39.986910       107.951000  \n",
+       "std        9.968934        63.654147  \n",
+       "min        3.800000         0.000000  \n",
+       "25%       33.200000        53.000000  \n",
+       "50%       40.100000       108.000000  \n",
+       "75%       46.800000       162.000000  \n",
+       "max       76.600000       253.000000  "
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mpg_update.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "82381e8b-4b7a-405e-a930-41b4875f1c35",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Product ID</th>\n",
+       "      <th>Type</th>\n",
+       "      <th>Failure Type</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>10000</td>\n",
+       "      <td>10000</td>\n",
+       "      <td>10000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>unique</th>\n",
+       "      <td>10000</td>\n",
+       "      <td>3</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>top</th>\n",
+       "      <td>M14860</td>\n",
+       "      <td>L</td>\n",
+       "      <td>No Failure</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>freq</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6000</td>\n",
+       "      <td>9652</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       Product ID   Type Failure Type\n",
+       "count       10000  10000        10000\n",
+       "unique      10000      3            6\n",
+       "top        M14860      L   No Failure\n",
+       "freq            1   6000         9652"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# categorcial variable summary\n",
+    "\n",
+    "mpg.describe(include=['O'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "c5f61e38-83be-46cb-98da-5bf56b87892d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 10000 entries, 0 to 9999\n",
+      "Data columns (total 10 columns):\n",
+      " #   Column                   Non-Null Count  Dtype  \n",
+      "---  ------                   --------------  -----  \n",
+      " 0   UDI                      10000 non-null  int64  \n",
+      " 1   Product ID               10000 non-null  object \n",
+      " 2   Type                     10000 non-null  object \n",
+      " 3   Air temperature [K]      10000 non-null  float64\n",
+      " 4   Process temperature [K]  10000 non-null  float64\n",
+      " 5   Rotational speed [rpm]   10000 non-null  int64  \n",
+      " 6   Torque [Nm]              10000 non-null  float64\n",
+      " 7   Tool wear [min]          10000 non-null  int64  \n",
+      " 8   Target                   10000 non-null  int64  \n",
+      " 9   Failure Type             10000 non-null  object \n",
+      "dtypes: float64(3), int64(4), object(3)\n",
+      "memory usage: 781.4+ KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "#printing data types\n",
+    "\n",
+    "mpg.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "91ffc9e8-814b-429f-90ef-1b20d3f34c02",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "UDI                        10000\n",
+       "Product ID                 10000\n",
+       "Type                           3\n",
+       "Air temperature [K]           93\n",
+       "Process temperature [K]       82\n",
+       "Rotational speed [rpm]       941\n",
+       "Torque [Nm]                  577\n",
+       "Tool wear [min]              246\n",
+       "Target                         2\n",
+       "Failure Type                   6\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mpg.nunique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "de56763d-31cc-45d9-887c-d2a0f76956f5",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "UDI                        0\n",
+       "Product ID                 0\n",
+       "Type                       0\n",
+       "Air temperature [K]        0\n",
+       "Process temperature [K]    0\n",
+       "Rotational speed [rpm]     0\n",
+       "Torque [Nm]                0\n",
+       "Tool wear [min]            0\n",
+       "Target                     0\n",
+       "Failure Type               0\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#check for missing entries\n",
+    "mpg.isnull().sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8a15fe03-d630-4108-ba07-97ba6b1ba4f6",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:ns-user-38_v1]",
+   "language": "python",
+   "name": "conda-env-ns-user-38_v1-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}