{
"cells": [
{
"cell_type": "markdown",
"id": "177c8898-d9f3-4114-b646-b0284815da40",
"metadata": {},
"source": [
"# OSP Orders\n",
"\n",
"##### Independent variables (**X**)\n",
"* TRX_SUB_TYPE\n",
"* ORDERED_DATE (month)\n",
"* COUNTRY_CODE\n",
"* SUBSCRIBER_CODE\n",
"* QUANTITY\n",
"* UNIT_LIST_PRICE\n",
"* UNIT_SELLING_PRICE\n",
"* TAX_VALUE\n",
"* TOTAL_DISCOUNT_PCT\n",
"\n",
"##### Dependent variable (**y**)\n",
"\n",
"* LINE_TOTAL_USD"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "d3007836-2941-4140-9f60-e4ca0f47d9ad",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4351663f-c2d0-40d3-bd67-63cf0f0dfe8c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ORDER_NUMBER | \n",
" ORDER_STATUS | \n",
" APP_NAME | \n",
" TRX_TYPE | \n",
" TRX_SUB_TYPE | \n",
" POSTAL_CODE | \n",
" PAYMENT_TYPE_CODE | \n",
" TAX_EXEMPT_FLAG | \n",
" CURRENCY_CODE | \n",
" END_USE | \n",
" ... | \n",
" STD_DISCOUNT | \n",
" ADDL_DISCOUNT | \n",
" UNIT_NET_SELLING_PRICE | \n",
" STD_DISCOUNT_PCT | \n",
" ADDL_DISCOUNT_PCT | \n",
" TOTAL_DISCOUNT_PCT | \n",
" PRICE_OVERRIDE_FLAG | \n",
" LINE_PRODUCT_TYPE | \n",
" DATA_CENTER | \n",
" ERROR_FLAG | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 37994600 | \n",
" DOO_SUBMITTED | \n",
" Cloud Renewals | \n",
" CSM | \n",
" CLOUD | \n",
" NaN | \n",
" CC | \n",
" S | \n",
" USD | \n",
" NaN | \n",
" ... | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 0.0 | \n",
" NaN | \n",
" NaN | \n",
" OPC-GLOBAL | \n",
" N | \n",
"
\n",
" \n",
" | 1 | \n",
" 39591076 | \n",
" DOO_DRAFT | \n",
" Partner Store Application | \n",
" CART | \n",
" CLOUD | \n",
" 94065 | \n",
" CC | \n",
" S | \n",
" USD | \n",
" NaN | \n",
" ... | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 40.0 | \n",
" NaN | \n",
" 40.0 | \n",
" NaN | \n",
" NaN | \n",
" US001 | \n",
" N | \n",
"
\n",
" \n",
" | 2 | \n",
" 37955812 | \n",
" DOO_DRAFT | \n",
" Partner Store Application | \n",
" CART | \n",
" COMBINED | \n",
" 94002 | \n",
" PO | \n",
" S | \n",
" USD | \n",
" NaN | \n",
" ... | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 30.0 | \n",
" NaN | \n",
" 30.0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" N | \n",
"
\n",
" \n",
" | 3 | \n",
" 37955812 | \n",
" DOO_DRAFT | \n",
" Partner Store Application | \n",
" CART | \n",
" COMBINED | \n",
" 94002 | \n",
" PO | \n",
" S | \n",
" USD | \n",
" NaN | \n",
" ... | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 30.0 | \n",
" NaN | \n",
" 30.0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" N | \n",
"
\n",
" \n",
"
\n",
"
4 rows × 49 columns
\n",
"
"
],
"text/plain": [
" ORDER_NUMBER ORDER_STATUS APP_NAME TRX_TYPE \\\n",
"0 37994600 DOO_SUBMITTED Cloud Renewals CSM \n",
"1 39591076 DOO_DRAFT Partner Store Application CART \n",
"2 37955812 DOO_DRAFT Partner Store Application CART \n",
"3 37955812 DOO_DRAFT Partner Store Application CART \n",
"\n",
" TRX_SUB_TYPE POSTAL_CODE PAYMENT_TYPE_CODE TAX_EXEMPT_FLAG CURRENCY_CODE \\\n",
"0 CLOUD NaN CC S USD \n",
"1 CLOUD 94065 CC S USD \n",
"2 COMBINED 94002 PO S USD \n",
"3 COMBINED 94002 PO S USD \n",
"\n",
" END_USE ... STD_DISCOUNT ADDL_DISCOUNT UNIT_NET_SELLING_PRICE \\\n",
"0 NaN ... NaN NaN NaN \n",
"1 NaN ... NaN NaN NaN \n",
"2 NaN ... NaN NaN NaN \n",
"3 NaN ... NaN NaN NaN \n",
"\n",
" STD_DISCOUNT_PCT ADDL_DISCOUNT_PCT TOTAL_DISCOUNT_PCT PRICE_OVERRIDE_FLAG \\\n",
"0 NaN NaN 0.0 NaN \n",
"1 40.0 NaN 40.0 NaN \n",
"2 30.0 NaN 30.0 NaN \n",
"3 30.0 NaN 30.0 NaN \n",
"\n",
" LINE_PRODUCT_TYPE DATA_CENTER ERROR_FLAG \n",
"0 NaN OPC-GLOBAL N \n",
"1 NaN US001 N \n",
"2 NaN NaN N \n",
"3 NaN NaN N \n",
"\n",
"[4 rows x 49 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Load the dataset\n",
"orders = pd.read_csv('orders.csv')\n",
"orders.head(4)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "fd0359ad-fb36-45e6-ae74-9ceb92bb7bf7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(4326, 49)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"orders.shape # data points and # columns"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "7b93416c-8812-4a71-849c-105a84caea9a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 4326 entries, 0 to 4325\n",
"Data columns (total 49 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 ORDER_NUMBER 4326 non-null int64 \n",
" 1 ORDER_STATUS 4326 non-null object \n",
" 2 APP_NAME 4326 non-null object \n",
" 3 TRX_TYPE 4326 non-null object \n",
" 4 TRX_SUB_TYPE 4326 non-null object \n",
" 5 POSTAL_CODE 2067 non-null object \n",
" 6 PAYMENT_TYPE_CODE 4246 non-null object \n",
" 7 TAX_EXEMPT_FLAG 4326 non-null object \n",
" 8 CURRENCY_CODE 4326 non-null object \n",
" 9 END_USE 4 non-null object \n",
" 10 CUSTOMER_TYPE 186 non-null object \n",
" 11 CUSTOMER_SIZE 186 non-null float64\n",
" 12 PUBLIC_SECTOR_FLAG 1581 non-null object \n",
" 13 ORDERED_DATE 4326 non-null object \n",
" 14 AUTO_RENEW_FLAG 2421 non-null object \n",
" 15 CART_PRODUCT_TYPE 1808 non-null object \n",
" 16 AUTOBK_FLAG 4243 non-null object \n",
" 17 COUNTRY_CODE 4323 non-null object \n",
" 18 INSTALL_COUNTRY_CODE 1377 non-null object \n",
" 19 INVOICING_FREQUENCY 3231 non-null object \n",
" 20 CLOUD_ORDER_TYPE 9 non-null object \n",
" 21 SALES_CHANNEL_CODE 1891 non-null object \n",
" 22 INVOICING_TYPE 128 non-null object \n",
" 23 SUBSCRIBER_CODE 4326 non-null object \n",
" 24 SALES_PERSON 4326 non-null object \n",
" 25 CREATION_DATE 4326 non-null object \n",
" 26 MONTH_NUM 4326 non-null object \n",
" 27 LINE_TYPE 4326 non-null object \n",
" 28 QUANTITY 4326 non-null int64 \n",
" 29 UNIT_LIST_PRICE 4326 non-null float64\n",
" 30 UNIT_SELLING_PRICE 4326 non-null float64\n",
" 31 TAX_VALUE 1376 non-null float64\n",
" 32 LINE_TOTAL_LOCAL_CURRENCY 4326 non-null float64\n",
" 33 LINE_TOTAL_USD 4326 non-null float64\n",
" 34 LINE_NUMBER 4326 non-null object \n",
" 35 TERM_UOM 3470 non-null object \n",
" 36 ESTIMATED_TAX_AMOUNT 210 non-null float64\n",
" 37 TERM 3470 non-null float64\n",
" 38 PART_NUMBER 4326 non-null object \n",
" 39 STD_DISCOUNT 0 non-null float64\n",
" 40 ADDL_DISCOUNT 0 non-null float64\n",
" 41 UNIT_NET_SELLING_PRICE 187 non-null float64\n",
" 42 STD_DISCOUNT_PCT 1164 non-null float64\n",
" 43 ADDL_DISCOUNT_PCT 27 non-null float64\n",
" 44 TOTAL_DISCOUNT_PCT 4326 non-null float64\n",
" 45 PRICE_OVERRIDE_FLAG 2 non-null object \n",
" 46 LINE_PRODUCT_TYPE 116 non-null object \n",
" 47 DATA_CENTER 2509 non-null object \n",
" 48 ERROR_FLAG 4326 non-null object \n",
"dtypes: float64(14), int64(2), object(33)\n",
"memory usage: 1.6+ MB\n"
]
}
],
"source": [
"orders.info() # data types of each column"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "2caf9bce-8137-40bc-8c71-34607b159b7a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ORDER_NUMBER | \n",
" CUSTOMER_SIZE | \n",
" QUANTITY | \n",
" UNIT_LIST_PRICE | \n",
" UNIT_SELLING_PRICE | \n",
" TAX_VALUE | \n",
" LINE_TOTAL_LOCAL_CURRENCY | \n",
" LINE_TOTAL_USD | \n",
" ESTIMATED_TAX_AMOUNT | \n",
" TERM | \n",
" STD_DISCOUNT | \n",
" ADDL_DISCOUNT | \n",
" UNIT_NET_SELLING_PRICE | \n",
" STD_DISCOUNT_PCT | \n",
" ADDL_DISCOUNT_PCT | \n",
" TOTAL_DISCOUNT_PCT | \n",
"
\n",
" \n",
" \n",
" \n",
" | count | \n",
" 4.326000e+03 | \n",
" 186.0 | \n",
" 4326.000000 | \n",
" 4.326000e+03 | \n",
" 4.326000e+03 | \n",
" 1.376000e+03 | \n",
" 4.326000e+03 | \n",
" 4.326000e+03 | \n",
" 210.000000 | \n",
" 3470.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 187.0 | \n",
" 1164.000000 | \n",
" 27.000000 | \n",
" 4326.000000 | \n",
"
\n",
" \n",
" | mean | \n",
" 4.024189e+07 | \n",
" 501.0 | \n",
" 115.626214 | \n",
" 6.346140e+04 | \n",
" 6.109841e+04 | \n",
" 4.754365e+04 | \n",
" 1.951080e+05 | \n",
" 1.818930e+04 | \n",
" 49.084143 | \n",
" 11.164265 | \n",
" NaN | \n",
" NaN | \n",
" 0.0 | \n",
" 22.915808 | \n",
" 23.279975 | \n",
" 6.311271 | \n",
"
\n",
" \n",
" | std | \n",
" 9.895896e+05 | \n",
" 0.0 | \n",
" 2415.216262 | \n",
" 1.674418e+06 | \n",
" 1.671203e+06 | \n",
" 9.055148e+05 | \n",
" 3.937639e+06 | \n",
" 4.206390e+05 | \n",
" 488.650876 | \n",
" 5.009888 | \n",
" NaN | \n",
" NaN | \n",
" 0.0 | \n",
" 9.373484 | \n",
" 24.491179 | \n",
" 11.728768 | \n",
"
\n",
" \n",
" | min | \n",
" 3.781086e+07 | \n",
" 501.0 | \n",
" 0.000000 | \n",
" 0.000000e+00 | \n",
" 0.000000e+00 | \n",
" 0.000000e+00 | \n",
" 0.000000e+00 | \n",
" 0.000000e+00 | \n",
" 0.000000 | \n",
" 1.000000 | \n",
" NaN | \n",
" NaN | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" | 25% | \n",
" 3.964231e+07 | \n",
" 501.0 | \n",
" 1.000000 | \n",
" 7.318800e+00 | \n",
" 7.318800e+00 | \n",
" 0.000000e+00 | \n",
" 2.592000e+01 | \n",
" 2.592000e+01 | \n",
" 0.000000 | \n",
" 12.000000 | \n",
" NaN | \n",
" NaN | \n",
" 0.0 | \n",
" 17.000000 | \n",
" 3.500000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" | 50% | \n",
" 3.971750e+07 | \n",
" 501.0 | \n",
" 1.000000 | \n",
" 7.930800e+01 | \n",
" 7.080370e+01 | \n",
" 0.000000e+00 | \n",
" 2.855958e+02 | \n",
" 2.614900e+02 | \n",
" 0.000000 | \n",
" 12.000000 | \n",
" NaN | \n",
" NaN | \n",
" 0.0 | \n",
" 25.000000 | \n",
" 10.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" | 75% | \n",
" 4.132187e+07 | \n",
" 501.0 | \n",
" 5.000000 | \n",
" 4.960799e+02 | \n",
" 4.579200e+02 | \n",
" 5.310000e+00 | \n",
" 2.181220e+03 | \n",
" 1.909148e+03 | \n",
" 0.000000 | \n",
" 12.000000 | \n",
" NaN | \n",
" NaN | \n",
" 0.0 | \n",
" 30.000000 | \n",
" 35.500000 | \n",
" 8.750000 | \n",
"
\n",
" \n",
" | max | \n",
" 4.134302e+07 | \n",
" 501.0 | \n",
" 150000.000000 | \n",
" 5.484510e+07 | \n",
" 5.484510e+07 | \n",
" 2.125796e+07 | \n",
" 1.421813e+08 | \n",
" 2.550000e+07 | \n",
" 7000.000000 | \n",
" 59.000000 | \n",
" NaN | \n",
" NaN | \n",
" 0.0 | \n",
" 53.000000 | \n",
" 91.559322 | \n",
" 91.559322 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ORDER_NUMBER CUSTOMER_SIZE QUANTITY UNIT_LIST_PRICE \\\n",
"count 4.326000e+03 186.0 4326.000000 4.326000e+03 \n",
"mean 4.024189e+07 501.0 115.626214 6.346140e+04 \n",
"std 9.895896e+05 0.0 2415.216262 1.674418e+06 \n",
"min 3.781086e+07 501.0 0.000000 0.000000e+00 \n",
"25% 3.964231e+07 501.0 1.000000 7.318800e+00 \n",
"50% 3.971750e+07 501.0 1.000000 7.930800e+01 \n",
"75% 4.132187e+07 501.0 5.000000 4.960799e+02 \n",
"max 4.134302e+07 501.0 150000.000000 5.484510e+07 \n",
"\n",
" UNIT_SELLING_PRICE TAX_VALUE LINE_TOTAL_LOCAL_CURRENCY \\\n",
"count 4.326000e+03 1.376000e+03 4.326000e+03 \n",
"mean 6.109841e+04 4.754365e+04 1.951080e+05 \n",
"std 1.671203e+06 9.055148e+05 3.937639e+06 \n",
"min 0.000000e+00 0.000000e+00 0.000000e+00 \n",
"25% 7.318800e+00 0.000000e+00 2.592000e+01 \n",
"50% 7.080370e+01 0.000000e+00 2.855958e+02 \n",
"75% 4.579200e+02 5.310000e+00 2.181220e+03 \n",
"max 5.484510e+07 2.125796e+07 1.421813e+08 \n",
"\n",
" LINE_TOTAL_USD ESTIMATED_TAX_AMOUNT TERM STD_DISCOUNT \\\n",
"count 4.326000e+03 210.000000 3470.000000 0.0 \n",
"mean 1.818930e+04 49.084143 11.164265 NaN \n",
"std 4.206390e+05 488.650876 5.009888 NaN \n",
"min 0.000000e+00 0.000000 1.000000 NaN \n",
"25% 2.592000e+01 0.000000 12.000000 NaN \n",
"50% 2.614900e+02 0.000000 12.000000 NaN \n",
"75% 1.909148e+03 0.000000 12.000000 NaN \n",
"max 2.550000e+07 7000.000000 59.000000 NaN \n",
"\n",
" ADDL_DISCOUNT UNIT_NET_SELLING_PRICE STD_DISCOUNT_PCT \\\n",
"count 0.0 187.0 1164.000000 \n",
"mean NaN 0.0 22.915808 \n",
"std NaN 0.0 9.373484 \n",
"min NaN 0.0 0.000000 \n",
"25% NaN 0.0 17.000000 \n",
"50% NaN 0.0 25.000000 \n",
"75% NaN 0.0 30.000000 \n",
"max NaN 0.0 53.000000 \n",
"\n",
" ADDL_DISCOUNT_PCT TOTAL_DISCOUNT_PCT \n",
"count 27.000000 4326.000000 \n",
"mean 23.279975 6.311271 \n",
"std 24.491179 11.728768 \n",
"min 0.000000 0.000000 \n",
"25% 3.500000 0.000000 \n",
"50% 10.000000 0.000000 \n",
"75% 35.500000 8.750000 \n",
"max 91.559322 91.559322 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"orders.describe()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "f636d478-3307-47b9-b363-625d7acf2036",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" LINE_TOTAL_USD | \n",
" TRX_SUB_TYPE | \n",
" MONTH_NUM | \n",
" COUNTRY_CODE | \n",
" SUBSCRIBER_CODE | \n",
" QUANTITY | \n",
" UNIT_LIST_PRICE | \n",
" UNIT_SELLING_PRICE | \n",
" TAX_VALUE | \n",
" TOTAL_DISCOUNT_PCT | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0.00 | \n",
" CLOUD | \n",
" 22-Oct | \n",
" US | \n",
" CSM | \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" NaN | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 1 | \n",
" 14040.00 | \n",
" CLOUD | \n",
" 22-Oct | \n",
" US | \n",
" PARTNER_STORE | \n",
" 10 | \n",
" 2340.0 | \n",
" 1404.0 | \n",
" 0.00 | \n",
" 40.0 | \n",
"
\n",
" \n",
" | 2 | \n",
" 7306.68 | \n",
" COMBINED | \n",
" 22-Oct | \n",
" US | \n",
" PARTNER_STORE | \n",
" 1 | \n",
" 9500.0 | \n",
" 6650.0 | \n",
" 656.68 | \n",
" 30.0 | \n",
"
\n",
" \n",
" | 3 | \n",
" 7315.00 | \n",
" COMBINED | \n",
" 22-Oct | \n",
" US | \n",
" PARTNER_STORE | \n",
" 1 | \n",
" 10450.0 | \n",
" 7315.0 | \n",
" 0.00 | \n",
" 30.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" LINE_TOTAL_USD TRX_SUB_TYPE MONTH_NUM COUNTRY_CODE SUBSCRIBER_CODE \\\n",
"0 0.00 CLOUD 22-Oct US CSM \n",
"1 14040.00 CLOUD 22-Oct US PARTNER_STORE \n",
"2 7306.68 COMBINED 22-Oct US PARTNER_STORE \n",
"3 7315.00 COMBINED 22-Oct US PARTNER_STORE \n",
"\n",
" QUANTITY UNIT_LIST_PRICE UNIT_SELLING_PRICE TAX_VALUE \\\n",
"0 1 0.0 0.0 NaN \n",
"1 10 2340.0 1404.0 0.00 \n",
"2 1 9500.0 6650.0 656.68 \n",
"3 1 10450.0 7315.0 0.00 \n",
"\n",
" TOTAL_DISCOUNT_PCT \n",
"0 0.0 \n",
"1 40.0 \n",
"2 30.0 \n",
"3 30.0 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"orders_interest = pd.DataFrame(orders, columns=[\"LINE_TOTAL_USD\", \"TRX_SUB_TYPE\", \"MONTH_NUM\", \"COUNTRY_CODE\", \"SUBSCRIBER_CODE\", \"QUANTITY\", \"UNIT_LIST_PRICE\", \"UNIT_SELLING_PRICE\", \"TAX_VALUE\", \"TOTAL_DISCOUNT_PCT\"])\n",
"orders_interest.head(4)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "0e3a6e62-ceac-46f4-ac00-176099fb02fc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 4326 entries, 0 to 4325\n",
"Data columns (total 10 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 LINE_TOTAL_USD 4326 non-null float64\n",
" 1 TRX_SUB_TYPE 4326 non-null object \n",
" 2 MONTH_NUM 4326 non-null object \n",
" 3 COUNTRY_CODE 4323 non-null object \n",
" 4 SUBSCRIBER_CODE 4326 non-null object \n",
" 5 QUANTITY 4326 non-null int64 \n",
" 6 UNIT_LIST_PRICE 4326 non-null float64\n",
" 7 UNIT_SELLING_PRICE 4326 non-null float64\n",
" 8 TAX_VALUE 1376 non-null float64\n",
" 9 TOTAL_DISCOUNT_PCT 4326 non-null float64\n",
"dtypes: float64(5), int64(1), object(4)\n",
"memory usage: 338.1+ KB\n"
]
}
],
"source": [
"orders_interest.info()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "356251ff-f1b3-431f-922e-378c0b84063f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LINE_TOTAL_USD 0\n",
"TRX_SUB_TYPE 0\n",
"MONTH_NUM 0\n",
"COUNTRY_CODE 3\n",
"SUBSCRIBER_CODE 0\n",
"QUANTITY 0\n",
"UNIT_LIST_PRICE 0\n",
"UNIT_SELLING_PRICE 0\n",
"TAX_VALUE 2950\n",
"TOTAL_DISCOUNT_PCT 0\n",
"dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"symbols = ['#', '$', '?', '%', 'NULL', 'NA', 'NaN']\n",
"missing_values_summary = (orders_interest.isnull() | orders_interest.isin(symbols)).sum()\n",
"missing_values_summary"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "7ff5f221-67ea-469e-ad91-04e81ffd0844",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"TRX_SUB_TYPE\n",
"CLOUD 2571\n",
"HARDWARE 1063\n",
"SOFTWARE 626\n",
"COMBINED 66\n",
"Name: count, dtype: int64"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"orders_interest['TRX_SUB_TYPE'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "5070859d-3508-48b6-ac83-80b4c86bc686",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"MONTH_NUM\n",
"24-Apr 599\n",
"24-Mar 449\n",
"24-Jan 343\n",
"23-Mar 334\n",
"23-Feb 312\n",
"23-Jun 281\n",
"23-Dec 279\n",
"24-Jun 254\n",
"24-Feb 223\n",
"23-Oct 217\n",
"23-May 156\n",
"23-Sep 111\n",
"22-Oct 97\n",
"23-Jan 93\n",
"24-Aug 82\n",
"24-Sep 81\n",
"23-Aug 73\n",
"22-Nov 72\n",
"23-Apr 72\n",
"24-Jul 52\n",
"22-Dec 47\n",
"24-May 44\n",
"23-Jul 33\n",
"23-Nov 22\n",
"Name: count, dtype: int64"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"orders_interest['MONTH_NUM'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "fa82cdea-457a-47bb-9c31-9b2bb28f4616",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"COUNTRY_CODE\n",
"US 3336\n",
"CA 161\n",
"GB 122\n",
"DE 105\n",
"FR 95\n",
"HU 88\n",
"JP 57\n",
"ES 34\n",
"IT 33\n",
"MX 32\n",
"IN 31\n",
"AU 23\n",
"MY 22\n",
"CL 21\n",
"TH 18\n",
"CN 14\n",
"DK 13\n",
"SA 10\n",
"NO 8\n",
"IE 7\n",
"KE 6\n",
"DO 5\n",
"AR 5\n",
"GH 5\n",
"AE 5\n",
"AT 4\n",
"CY 4\n",
"JE 4\n",
"KR 4\n",
"MP 3\n",
"NZ 3\n",
"BO 3\n",
"DJ 3\n",
"SV 2\n",
"CH 2\n",
"MA 2\n",
"TT 2\n",
"HK 2\n",
"CR 2\n",
"JO 2\n",
"MV 2\n",
"PA 2\n",
"BG 2\n",
"QA 2\n",
"SG 2\n",
"BS 2\n",
"JM 2\n",
"IL 2\n",
"LK 1\n",
"FI 1\n",
"PR 1\n",
"HR 1\n",
"TW 1\n",
"TN 1\n",
"CZ 1\n",
"CI 1\n",
"SE 1\n",
"Name: count, dtype: int64"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"orders_interest['COUNTRY_CODE'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "66ff50f1-16c6-4c6d-9023-a3fbb2d9f119",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"SUBSCRIBER_CODE\n",
"CSM 2238\n",
"PARTNER_STORE 1616\n",
"CUSTOMER_STORE 215\n",
"ACADEMY 182\n",
"FRANCHISE_STORE 60\n",
"EMPLOYEE_STORE 15\n",
"Name: count, dtype: int64"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"orders_interest['SUBSCRIBER_CODE'].value_counts()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}