{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "de01e3e8-b122-4809-b19f-8bb1f71cbecd", "metadata": { "id": "gIWMSTWcu5IG", "tags": [] }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "%matplotlib inline\n", "import warnings\n", "from IPython.display import Image\n", "warnings.filterwarnings('ignore')\n", "import os\n", "import sklearn" ] }, { "cell_type": "code", "execution_count": 2, "id": "96e0e180-7df5-40e3-bd7b-aaad1d22b3b3", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
skunational_invlead_timein_transit_qtyforecast_3_monthforecast_6_monthforecast_9_monthsales_1_monthsales_3_monthsales_6_month...pieces_past_dueperf_6_month_avgperf_12_month_avglocal_bo_qtydeck_riskoe_constraintppap_riskstop_auto_buyrev_stopwent_on_backorder
01888279NullNaN00.00.00.00.00.015.0...0.0-99.00-99.000.0NoNoYesYesNoNo
1187055772.000.00.00.00.00.00.0...0.00.500.280.0YesNoNoYesNoNo
2147548125815.01010.077.0184.046.0132.0256.0...0.00.540.700.0NoNoNoYesNoNo
31758220462.000.00.00.01.02.06.0...0.00.750.900.0YesNoNoYesNoNo
\n", "

4 rows × 23 columns

\n", "
" ], "text/plain": [ " sku national_inv lead_time in_transit_qty forecast_3_month \\\n", "0 1888279 Null NaN 0 0.0 \n", "1 1870557 7 2.0 0 0.0 \n", "2 1475481 258 15.0 10 10.0 \n", "3 1758220 46 2.0 0 0.0 \n", "\n", " forecast_6_month forecast_9_month sales_1_month sales_3_month \\\n", "0 0.0 0.0 0.0 0.0 \n", "1 0.0 0.0 0.0 0.0 \n", "2 77.0 184.0 46.0 132.0 \n", "3 0.0 0.0 1.0 2.0 \n", "\n", " sales_6_month ... pieces_past_due perf_6_month_avg perf_12_month_avg \\\n", "0 15.0 ... 0.0 -99.00 -99.00 \n", "1 0.0 ... 0.0 0.50 0.28 \n", "2 256.0 ... 0.0 0.54 0.70 \n", "3 6.0 ... 0.0 0.75 0.90 \n", "\n", " local_bo_qty deck_risk oe_constraint ppap_risk stop_auto_buy rev_stop \\\n", "0 0.0 No No Yes Yes No \n", "1 0.0 Yes No No Yes No \n", "2 0.0 No No No Yes No \n", "3 0.0 Yes No No Yes No \n", "\n", " went_on_backorder \n", "0 No \n", "1 No \n", "2 No \n", "3 No \n", "\n", "[4 rows x 23 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Load the dataset\n", "orders = pd.read_csv('BackOrders.csv')\n", "\n", "# Strip extra spaces from column names\n", "orders.columns = orders.columns.str.strip()\n", "\n", "# Explore the first few rows of the dataset\n", "orders.head(4)" ] }, { "cell_type": "code", "execution_count": 5, "id": "ccb8b001-64aa-48aa-b958-165ce193a4c9", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(61594, 23)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "orders.shape" ] }, { "cell_type": "code", "execution_count": 6, "id": "c0f7c2b7-47b3-47b4-bfdd-57e53e1f2537", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 61594 entries, 0 to 61593\n", "Data columns (total 23 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 sku 61594 non-null int64 \n", " 1 national_inv 61589 non-null object \n", " 2 lead_time 58186 non-null float64\n", " 3 in_transit_qty 61589 non-null object \n", " 4 forecast_3_month 61589 non-null float64\n", " 5 forecast_6_month 61589 non-null float64\n", " 6 forecast_9_month 61589 non-null float64\n", " 7 sales_1_month 61589 non-null float64\n", " 8 sales_3_month 61589 non-null float64\n", " 9 sales_6_month 61589 non-null float64\n", " 10 sales_9_month 61589 non-null float64\n", " 11 min_bank 61589 non-null float64\n", " 12 potential_issue 61589 non-null object \n", " 13 pieces_past_due 61589 non-null float64\n", " 14 perf_6_month_avg 61589 non-null float64\n", " 15 perf_12_month_avg 61589 non-null float64\n", " 16 local_bo_qty 61589 non-null float64\n", " 17 deck_risk 61589 non-null object \n", " 18 oe_constraint 61589 non-null object \n", " 19 ppap_risk 61589 non-null object \n", " 20 stop_auto_buy 61589 non-null object \n", " 21 rev_stop 61589 non-null object \n", " 22 went_on_backorder 61589 non-null object \n", "dtypes: float64(13), int64(1), object(9)\n", "memory usage: 10.8+ MB\n" ] } ], "source": [ "orders.info()" ] }, { "cell_type": "code", "execution_count": 11, "id": "31ce8231-29b9-4861-aeac-3c295c726e18", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "lead_time\n", "8.0 25116\n", "2.0 13314\n", "12.0 7007\n", "4.0 4372\n", "9.0 4281\n", "52.0 870\n", "3.0 675\n", "0.0 498\n", "10.0 476\n", "14.0 350\n", "16.0 326\n", "6.0 203\n", "13.0 199\n", "5.0 158\n", "15.0 141\n", "17.0 113\n", "11.0 43\n", "20.0 15\n", "30.0 9\n", "18.0 4\n", "7.0 4\n", "22.0 4\n", "24.0 2\n", "26.0 2\n", "1.0 1\n", "40.0 1\n", "35.0 1\n", "28.0 1\n", "Name: count, dtype: int64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "orders['lead_time'].value_counts()" ] }, { "cell_type": "code", "execution_count": 9, "id": "3272ff49-3896-45f7-9857-c33f9e390186", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "sku 0\n", "national_inv 5\n", "lead_time 3408\n", "in_transit_qty 6\n", "forecast_3_month 5\n", "forecast_6_month 5\n", "forecast_9_month 5\n", "sales_1_month 5\n", "sales_3_month 5\n", "sales_6_month 5\n", "sales_9_month 5\n", "min_bank 5\n", "potential_issue 5\n", "pieces_past_due 5\n", "perf_6_month_avg 5\n", "perf_12_month_avg 5\n", "local_bo_qty 5\n", "deck_risk 5\n", "oe_constraint 5\n", "ppap_risk 5\n", "stop_auto_buy 5\n", "rev_stop 5\n", "went_on_backorder 5\n", "dtype: int64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Check for missing values, NaNs, Nulls and special symbols like '?'\n", "missing_values_na_null_symbols = (orders.isnull() | orders.isin(['NULL', '?', 'NA', 'NaN', '#'])).sum()\n", "missing_values_na_null_symbols" ] }, { "cell_type": "code", "execution_count": null, "id": "0193a642-041f-479b-addb-7bcf84b720a4", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.5" } }, "nbformat": 4, "nbformat_minor": 5 }