{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "OzT8H2buCU5x" }, "source": [ "### Data Visualization: \n", "* Univariate Plot\n", "* Bivariate Plot\n", "* Multivariate Plot\n", "* Plot libraries Used: matplotlib, plotly, ggplot, seaborn, altair, plotnine, bokeh" ] }, { "cell_type": "markdown", "metadata": { "id": "_9tQKKaVPPdc" }, "source": [ "#### Salient features of each package: What is unique to these packages?" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "collapsed": true, "id": "9Kk4LdESCU51", "jupyter": { "outputs_hidden": true }, "outputId": "86bbc23d-16aa-42b5-b3e7-c27658ea5b77" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Defaulting to user installation because normal site-packages is not writeable\n", "Requirement already satisfied: matplotlib in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (3.9.2)\n", "Requirement already satisfied: seaborn in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (0.13.2)\n", "Requirement already satisfied: plotly in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (5.24.0)\n", "Requirement already satisfied: ggplot in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (0.11.5)\n", "Requirement already satisfied: altair in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (5.4.1)\n", "Requirement already satisfied: geopandas in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (1.0.1)\n", "Requirement already satisfied: bokeh in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (3.5.2)\n", "Requirement already satisfied: plotnine in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (0.13.6)\n", "Requirement already satisfied: contextily in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (1.6.1)\n", "Requirement already satisfied: squarify in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (0.4.4)\n", "Requirement already satisfied: dash in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (2.17.1)\n", "Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from matplotlib) (1.2.1)\n", "Requirement already satisfied: cycler>=0.10 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from matplotlib) (0.12.1)\n", "Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from matplotlib) (4.53.1)\n", "Requirement already satisfied: kiwisolver>=1.3.1 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from matplotlib) (1.4.5)\n", "Requirement already satisfied: numpy>=1.23 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from matplotlib) (1.26.4)\n", "Requirement already satisfied: packaging>=20.0 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from matplotlib) (24.1)\n", "Requirement already satisfied: pillow>=8 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from matplotlib) (10.1.0)\n", "Requirement already satisfied: pyparsing>=2.3.1 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from matplotlib) (3.1.2)\n", "Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from matplotlib) (2.9.0.post0)\n", "Requirement already satisfied: pandas>=1.2 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from seaborn) (2.2.2)\n", "Requirement already satisfied: tenacity>=6.2.0 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from plotly) (8.5.0)\n", "Requirement already satisfied: brewer2mpl in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from ggplot) (1.4.1)\n", "Requirement already satisfied: patsy>=0.4 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from ggplot) (0.5.6)\n", "Requirement already satisfied: scipy in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from ggplot) (1.11.4)\n", "Requirement already satisfied: six in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from ggplot) (1.16.0)\n", "Requirement already satisfied: statsmodels in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from ggplot) (0.14.2)\n", "Requirement already satisfied: jinja2 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from altair) (3.1.4)\n", "Requirement already satisfied: jsonschema>=3.0 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from altair) (4.23.0)\n", "Requirement already satisfied: narwhals>=1.5.2 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from altair) (1.5.5)\n", "Requirement already satisfied: typing-extensions>=4.10.0 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from altair) (4.12.2)\n", "Requirement already satisfied: pyogrio>=0.7.2 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from geopandas) (0.9.0)\n", "Requirement already satisfied: pyproj>=3.3.0 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from geopandas) (3.6.1)\n", "Requirement already satisfied: shapely>=2.0.0 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from geopandas) (2.0.6)\n", "Requirement already satisfied: PyYAML>=3.10 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from bokeh) (6.0.2)\n", "Requirement already satisfied: tornado>=6.2 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from bokeh) (6.4.1)\n", "Requirement already satisfied: xyzservices>=2021.09.1 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from bokeh) (2024.6.0)\n", "Requirement already satisfied: mizani~=0.11.0 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from plotnine) (0.11.4)\n", "Requirement already satisfied: geopy in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from contextily) (2.4.1)\n", "Requirement already satisfied: mercantile in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from contextily) (1.2.1)\n", "Requirement already satisfied: rasterio in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from contextily) (1.3.10)\n", "Requirement already satisfied: requests in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from contextily) (2.32.3)\n", "Requirement already satisfied: joblib in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from contextily) (1.4.2)\n", "Requirement already satisfied: Flask<3.1,>=1.0.4 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from dash) (3.0.3)\n", "Requirement already satisfied: Werkzeug<3.1 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from dash) (3.0.4)\n", "Requirement already satisfied: dash-html-components==2.0.0 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from dash) (2.0.0)\n", "Requirement already satisfied: dash-core-components==2.0.0 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from dash) (2.0.0)\n", "Requirement already satisfied: dash-table==5.0.0 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from dash) (5.0.0)\n", "Requirement already satisfied: importlib-metadata in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from dash) (8.4.0)\n", "Requirement already satisfied: retrying in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from dash) (1.3.4)\n", "Requirement already satisfied: nest-asyncio in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from dash) (1.6.0)\n", "Requirement already satisfied: setuptools in c:\\program files\\python311\\lib\\site-packages (from dash) (65.5.0)\n", "Requirement already satisfied: itsdangerous>=2.1.2 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from Flask<3.1,>=1.0.4->dash) (2.2.0)\n", "Requirement already satisfied: click>=8.1.3 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from Flask<3.1,>=1.0.4->dash) (8.1.7)\n", "Requirement already satisfied: blinker>=1.6.2 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from Flask<3.1,>=1.0.4->dash) (1.8.2)\n", "Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from jinja2->altair) (2.1.5)\n", "Requirement already satisfied: attrs>=22.2.0 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from jsonschema>=3.0->altair) (24.2.0)\n", "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from jsonschema>=3.0->altair) (2023.12.1)\n", "Requirement already satisfied: referencing>=0.28.4 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from jsonschema>=3.0->altair) (0.35.1)\n", "Requirement already satisfied: rpds-py>=0.7.1 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from jsonschema>=3.0->altair) (0.20.0)\n", "Requirement already satisfied: tzdata in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from mizani~=0.11.0->plotnine) (2024.1)\n", "Requirement already satisfied: pytz>=2020.1 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from pandas>=1.2->seaborn) (2024.1)\n", "Requirement already satisfied: certifi in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from pyogrio>=0.7.2->geopandas) (2024.7.4)\n", "Requirement already satisfied: geographiclib<3,>=1.52 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from geopy->contextily) (2.0)\n", "Requirement already satisfied: zipp>=0.5 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from importlib-metadata->dash) (3.20.1)\n", "Requirement already satisfied: affine in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from rasterio->contextily) (2.4.0)\n", "Requirement already satisfied: cligj>=0.5 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from rasterio->contextily) (0.7.2)\n", "Requirement already satisfied: snuggs>=1.4.1 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from rasterio->contextily) (1.4.7)\n", "Requirement already satisfied: click-plugins in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from rasterio->contextily) (1.1.1)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from requests->contextily) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from requests->contextily) (3.7)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from requests->contextily) (2.0.7)\n", "Requirement already satisfied: colorama in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from click>=8.1.3->Flask<3.1,>=1.0.4->dash) (0.4.6)\n", "Note: you may need to restart the kernel to use updated packages.\n", "Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.\n", "\n", "Requirement already satisfied: vegafusion>=1.5.0 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from vegafusion[embed]>=1.5.0) (1.6.9)\n", "Requirement already satisfied: altair>=5.2.0 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (5.4.1)\n", "Requirement already satisfied: pyarrow>=5 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (17.0.0)\n", "Requirement already satisfied: pandas in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (2.2.2)\n", "Requirement already satisfied: psutil in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (6.0.0)\n", "Requirement already satisfied: protobuf in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (4.25.4)\n", "Requirement already satisfied: vegafusion-python-embed==1.6.9 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from vegafusion[embed]>=1.5.0) (1.6.9)\n", "Requirement already satisfied: vl-convert-python>=0.7.0 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from vegafusion[embed]>=1.5.0) (1.6.1)\n", "Requirement already satisfied: jinja2 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from altair>=5.2.0->vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (3.1.4)\n", "Requirement already satisfied: jsonschema>=3.0 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from altair>=5.2.0->vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (4.23.0)\n", "Requirement already satisfied: narwhals>=1.5.2 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from altair>=5.2.0->vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (1.5.5)\n", "Requirement already satisfied: packaging in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from altair>=5.2.0->vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (24.1)\n", "Requirement already satisfied: typing-extensions>=4.10.0 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from altair>=5.2.0->vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (4.12.2)\n", "Requirement already satisfied: numpy>=1.16.6 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from pyarrow>=5->vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (1.26.4)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from pandas->vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from pandas->vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (2024.1)\n", "Requirement already satisfied: tzdata>=2022.7 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from pandas->vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (2024.1)\n", "Requirement already satisfied: attrs>=22.2.0 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from jsonschema>=3.0->altair>=5.2.0->vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (24.2.0)\n", "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from jsonschema>=3.0->altair>=5.2.0->vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (2023.12.1)\n", "Requirement already satisfied: referencing>=0.28.4 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from jsonschema>=3.0->altair>=5.2.0->vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (0.35.1)\n", "Requirement already satisfied: rpds-py>=0.7.1 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from jsonschema>=3.0->altair>=5.2.0->vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (0.20.0)\n", "Requirement already satisfied: six>=1.5 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from python-dateutil>=2.8.2->pandas->vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (1.16.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\debrup banerjee\\appdata\\roaming\\python\\python311\\site-packages (from jinja2->altair>=5.2.0->vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (2.1.5)\n" ] } ], "source": [ "%pip install matplotlib seaborn plotly ggplot altair geopandas bokeh plotnine contextily squarify dash\n", "%pip install \"vegafusion[embed]>=1.5.0\"" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "uiwSM2bSCU54", "outputId": "7b7edf66-4309-4106-8f38-c835af0d3ae7", "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/home/datascience/ns-user-38/day8\n" ] } ], "source": [ "import os\n", "print(os.getcwd())" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "PBne0mP_CU55", "tags": [] }, "outputs": [], "source": [ "import pandas as pd\n", "# Set the option to display full content of DataFrame columns without truncation\n", "pd.set_option('display.max_colwidth', None)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 284 }, "id": "GHVzADVhCU55", "outputId": "025b9d40-fe32-4f3e-fafa-b3be33c23e8c", "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/home/datascience/ns-user-38/day8/orders_management.csv\n" ] }, { "data": { "text/html": [ "
| \n", " | Row ID | \n", "Order Priority | \n", "Discount | \n", "Unit Price | \n", "Shipping Cost | \n", "Customer ID | \n", "Customer Name | \n", "Ship Mode | \n", "Customer Segment | \n", "Product Category | \n", "... | \n", "Region | \n", "State or Province | \n", "City | \n", "Postal Code | \n", "Order Date | \n", "Ship Date | \n", "Profit | \n", "Quantity ordered new | \n", "Sales | \n", "Order ID | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "18606 | \n", "Not Specified | \n", "0.01 | \n", "2.88 | \n", "0.50 | \n", "2 | \n", "Janice Fletcher | \n", "Regular Air | \n", "Corporate | \n", "Office Supplies | \n", "... | \n", "Central | \n", "Illinois | \n", "Addison | \n", "60101 | \n", "28-05-2012 | \n", "30-05-2012 | \n", "1.32 | \n", "2 | \n", "5.90 | \n", "88525 | \n", "
| 1 | \n", "20847 | \n", "High | \n", "0.01 | \n", "2.84 | \n", "0.93 | \n", "3 | \n", "Bonnie Potter | \n", "Express Air | \n", "Corporate | \n", "Office Supplies | \n", "... | \n", "West | \n", "Washington | \n", "Anacortes | \n", "98221 | \n", "07-07-2010 | \n", "08-07-2010 | \n", "4.56 | \n", "4 | \n", "13.01 | \n", "88522 | \n", "
| 2 | \n", "23086 | \n", "Not Specified | \n", "0.03 | \n", "6.68 | \n", "6.15 | \n", "3 | \n", "Bonnie Potter | \n", "Express Air | \n", "Corporate | \n", "Office Supplies | \n", "... | \n", "West | \n", "Washington | \n", "Anacortes | \n", "98221 | \n", "27-07-2011 | \n", "28-07-2011 | \n", "-47.64 | \n", "7 | \n", "49.92 | \n", "88523 | \n", "
3 rows × 24 columns
\n", "| \n", " | Profit | \n", "Sales | \n", "
|---|---|---|
| Profit | \n", "1.000000 | \n", "0.371583 | \n", "
| Sales | \n", "0.371583 | \n", "1.000000 | \n", "
\\n\"+\n", " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", " \"
\\n\"+\n", " \"\\n\"+\n",
" \"from bokeh.resources import INLINE\\n\"+\n",
" \"output_notebook(resources=INLINE)\\n\"+\n",
" \"\\n\"+\n",
" \"