mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
2307 lines
136 KiB
Plaintext
2307 lines
136 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"# Online retail analysis using Eland\n",
|
||
"\n",
|
||
"[](https://colab.research.google.com/github/elastic/eland/blob/main/docs/sphinx/examples/online_retail_analysis.ipynb)\n",
|
||
"\n",
|
||
"Learn how to analyze some online retail data using Eland."
|
||
],
|
||
"metadata": {
|
||
"id": "2IM_dmhFeRhR"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"## Install and import packages"
|
||
],
|
||
"metadata": {
|
||
"id": "xx09MAYFe3Ep"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:06.764412Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:06.755567Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:07.316950Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:07.316561Z"
|
||
},
|
||
"id": "Z7MEVJy9W_sd",
|
||
"outputId": "91b215ff-9f4f-4b1f-d204-f24bcc38b3f6",
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"Collecting eland\n",
|
||
" Downloading eland-8.11.1-py3-none-any.whl (157 kB)\n",
|
||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m157.9/157.9 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||
"\u001b[?25hCollecting elasticsearch<9,>=8.3 (from eland)\n",
|
||
" Downloading elasticsearch-8.11.1-py3-none-any.whl (412 kB)\n",
|
||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m412.8/412.8 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||
"\u001b[?25hRequirement already satisfied: pandas<2,>=1.5 in /usr/local/lib/python3.10/dist-packages (from eland) (1.5.3)\n",
|
||
"Requirement already satisfied: matplotlib>=3.6 in /usr/local/lib/python3.10/dist-packages (from eland) (3.7.1)\n",
|
||
"Requirement already satisfied: numpy<2,>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from eland) (1.23.5)\n",
|
||
"Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from eland) (23.2)\n",
|
||
"Collecting elastic-transport<9,>=8 (from elasticsearch<9,>=8.3->eland)\n",
|
||
" Downloading elastic_transport-8.11.0-py3-none-any.whl (59 kB)\n",
|
||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.8/59.8 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||
"\u001b[?25hRequirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (1.2.0)\n",
|
||
"Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (0.12.1)\n",
|
||
"Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (4.47.0)\n",
|
||
"Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (1.4.5)\n",
|
||
"Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (9.4.0)\n",
|
||
"Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (3.1.1)\n",
|
||
"Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (2.8.2)\n",
|
||
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<2,>=1.5->eland) (2023.3.post1)\n",
|
||
"Requirement already satisfied: urllib3<3,>=1.26.2 in /usr/local/lib/python3.10/dist-packages (from elastic-transport<9,>=8->elasticsearch<9,>=8.3->eland) (2.0.7)\n",
|
||
"Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from elastic-transport<9,>=8->elasticsearch<9,>=8.3->eland) (2023.11.17)\n",
|
||
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib>=3.6->eland) (1.16.0)\n",
|
||
"Installing collected packages: elastic-transport, elasticsearch, eland\n",
|
||
"Successfully installed eland-8.11.1 elastic-transport-8.11.0 elasticsearch-8.11.1\n",
|
||
"Requirement already satisfied: elasticsearch in /usr/local/lib/python3.10/dist-packages (8.11.1)\n",
|
||
"Requirement already satisfied: elastic-transport<9,>=8 in /usr/local/lib/python3.10/dist-packages (from elasticsearch) (8.11.0)\n",
|
||
"Requirement already satisfied: urllib3<3,>=1.26.2 in /usr/local/lib/python3.10/dist-packages (from elastic-transport<9,>=8->elasticsearch) (2.0.7)\n",
|
||
"Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from elastic-transport<9,>=8->elasticsearch) (2023.11.17)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!pip install eland\n",
|
||
"!pip install elasticsearch\n",
|
||
"\n",
|
||
"from elasticsearch import Elasticsearch\n",
|
||
"import eland as ed\n",
|
||
"import getpass\n",
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"\n",
|
||
"# Fix console size for consistent test results\n",
|
||
"from eland.conftest import *"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"## Connect to Elasticsearch\n",
|
||
"\n",
|
||
"First we need to connect to a running Elasticsearch instance.\n",
|
||
"In this example we'll be using Elastic Cloud.\n",
|
||
"Sign up for a [free trial](https://cloud.elastic.co/registration).\n",
|
||
"\n",
|
||
"See [documentation](https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html#connect-self-managed-new) if you want to connect to a self-managed cluster."
|
||
],
|
||
"metadata": {
|
||
"id": "SD0Ul-I_Xipy"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# Connect to an Elastic Cloud instance\n",
|
||
"\n",
|
||
"ELASTIC_CLOUD_ID = getpass.getpass(\"Cloud ID:\")\n",
|
||
"ELASTIC_CLOUD_PASSWORD = getpass.getpass(\"`elastic` user password:\")\n",
|
||
"\n",
|
||
"es = Elasticsearch(\n",
|
||
" cloud_id=ELASTIC_CLOUD_ID,\n",
|
||
" basic_auth=(\"elastic\", ELASTIC_CLOUD_PASSWORD)\n",
|
||
")\n",
|
||
"print(es.info())"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "mNrJvFU1Yagd",
|
||
"outputId": "db8829bf-bd59-4d86-8b73-c956d09f373e"
|
||
},
|
||
"execution_count": 4,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"Cloud ID:··········\n",
|
||
"`elastic` user password:··········\n",
|
||
"{'name': 'instance-0000000001', 'cluster_name': '69662f53fe844e2d81effcbc7f41e867', 'cluster_uuid': 'GHyCC4NpTAC3SyxZkx65Jw', 'version': {'number': '8.12.0-SNAPSHOT', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '38ddf39a3efc422a702adc83b1bb2cd6fc2edc5b', 'build_date': '2024-01-03T12:58:40.771552945Z', 'build_snapshot': True, 'lucene_version': '9.9.1', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "uvhOwB3dW_sg"
|
||
},
|
||
"source": [
|
||
"## Download test data\n",
|
||
"\n",
|
||
"Let's start by downloading our test data."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"import requests\n",
|
||
"\n",
|
||
"# URL of the raw file on GitHub\n",
|
||
"file_url = \"https://github.com/elastic/eland/raw/main/docs/sphinx/examples/data/online-retail.csv.gz\"\n",
|
||
"\n",
|
||
"# Local path where you want to save the file\n",
|
||
"local_filename = \"online-retail.csv.gz\"\n",
|
||
"\n",
|
||
"# Send a GET request to the file URL\n",
|
||
"response = requests.get(file_url, stream=True)\n",
|
||
"\n",
|
||
"# Open a local file in binary write mode\n",
|
||
"with open(local_filename, 'wb') as file:\n",
|
||
" for chunk in response.iter_content(chunk_size=128):\n",
|
||
" file.write(chunk)\n",
|
||
"\n",
|
||
"print(f\"File downloaded: {local_filename}\")"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "FTCeS2rFZrYh",
|
||
"outputId": "131aeeac-0af5-4746-e639-626b3c80fb69"
|
||
},
|
||
"execution_count": 5,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"File downloaded: online-retail.csv.gz\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"## Create Eland dataframe\n",
|
||
"\n",
|
||
"To get started, let's create an `eland.DataFrame` by reading a csv file. This creates and populates the\n",
|
||
"`online-retail` index in the local Elasticsearch cluster."
|
||
],
|
||
"metadata": {
|
||
"id": "V-BiYFnvZwTd"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:07.324283Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:07.323764Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:16.241379Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:16.241877Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "3Pq2jtVQW_sh",
|
||
"outputId": "e677a2eb-e59d-4403-f19b-7326ec954a6b"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stderr",
|
||
"text": [
|
||
"/usr/local/lib/python3.10/dist-packages/eland/etl.py:529: FutureWarning: the 'mangle_dupe_cols' keyword is deprecated and will be removed in a future version. Please take steps to stop the use of 'mangle_dupe_cols'\n",
|
||
" reader = pd.read_csv(filepath_or_buffer, **kwargs)\n",
|
||
"/usr/local/lib/python3.10/dist-packages/eland/etl.py:529: FutureWarning: The squeeze argument has been deprecated and will be removed in a future version. Append .squeeze(\"columns\") to the call to squeeze.\n",
|
||
"\n",
|
||
"\n",
|
||
" reader = pd.read_csv(filepath_or_buffer, **kwargs)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"df = ed.csv_to_eland(\"online-retail.csv.gz\",\n",
|
||
" es_client=es,\n",
|
||
" es_dest_index='online-retail',\n",
|
||
" es_if_exists='replace',\n",
|
||
" es_dropna=True,\n",
|
||
" es_refresh=True,\n",
|
||
" compression='gzip',\n",
|
||
" index_col=0)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "RT8kO2rqW_sh"
|
||
},
|
||
"source": [
|
||
"Here we see that the `\"_id\"` field was used to index our data frame."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:16.246737Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:16.244084Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:16.250080Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:16.250410Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/",
|
||
"height": 35
|
||
},
|
||
"id": "x2um6Xd4W_sh",
|
||
"outputId": "69a5a3b3-95cb-4851-dec5-f54e55d16235"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
"'_id'"
|
||
],
|
||
"application/vnd.google.colaboratory.intrinsic+json": {
|
||
"type": "string"
|
||
}
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 7
|
||
}
|
||
],
|
||
"source": [
|
||
"df.index.es_index_field"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "YHFiR_8MW_si"
|
||
},
|
||
"source": [
|
||
"Next, we can check which field from elasticsearch are available to our eland data frame. `columns` is available as a parameter when instantiating the data frame which allows one to choose only a subset of fields from your index to be included in the data frame. Since we didn't set this parameter, we have access to all fields."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:16.254703Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:16.254060Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:16.256567Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:16.256138Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "h_6_2YphW_si",
|
||
"outputId": "1fb83e3b-8827-4f18-db7d-79bb25e7123d"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
"Index(['Country', 'CustomerID', 'Description', 'InvoiceDate', 'InvoiceNo', 'Quantity', 'StockCode',\n",
|
||
" 'UnitPrice'],\n",
|
||
" dtype='object')"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 8
|
||
}
|
||
],
|
||
"source": [
|
||
"df.columns"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "-LmvXDA3W_si"
|
||
},
|
||
"source": [
|
||
"Now, let's see the data types of our fields. Running `df.dtypes`, we can see that elasticsearch field types are mapped to pandas field types."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:16.261335Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:16.260762Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:16.263024Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:16.263323Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "4KEZ3zsEW_si",
|
||
"outputId": "090674df-7f4d-41bd-b4bf-3ee476945775"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
"Country object\n",
|
||
"CustomerID float64\n",
|
||
"Description object\n",
|
||
"InvoiceDate object\n",
|
||
"InvoiceNo object\n",
|
||
"Quantity int64\n",
|
||
"StockCode object\n",
|
||
"UnitPrice float64\n",
|
||
"dtype: object"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 9
|
||
}
|
||
],
|
||
"source": [
|
||
"df.dtypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "4Levcmf0W_si"
|
||
},
|
||
"source": [
|
||
"We also offer a `.es_info()` data frame method that shows all info about the underlying index. It also contains information about operations being passed from data frame methods to elasticsearch. More on this later."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:16.266245Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:16.265860Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:16.271135Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:16.270816Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "RhxvDBAiW_si",
|
||
"outputId": "db60167f-d973-41d9-bd42-438855cef1a4"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"es_index_pattern: online-retail\n",
|
||
"Index:\n",
|
||
" es_index_field: _id\n",
|
||
" is_source_field: False\n",
|
||
"Mappings:\n",
|
||
" capabilities:\n",
|
||
" es_field_name is_source es_dtype es_date_format pd_dtype is_searchable is_aggregatable is_scripted aggregatable_es_field_name\n",
|
||
"Country Country True keyword None object True True False Country\n",
|
||
"CustomerID CustomerID True double None float64 True True False CustomerID\n",
|
||
"Description Description True keyword None object True True False Description\n",
|
||
"InvoiceDate InvoiceDate True keyword None object True True False InvoiceDate\n",
|
||
"InvoiceNo InvoiceNo True keyword None object True True False InvoiceNo\n",
|
||
"Quantity Quantity True long None int64 True True False Quantity\n",
|
||
"StockCode StockCode True keyword None object True True False StockCode\n",
|
||
"UnitPrice UnitPrice True double None float64 True True False UnitPrice\n",
|
||
"Operations:\n",
|
||
" tasks: []\n",
|
||
" size: None\n",
|
||
" sort_params: None\n",
|
||
" _source: ['Country', 'CustomerID', 'Description', 'InvoiceDate', 'InvoiceNo', 'Quantity', 'StockCode', 'UnitPrice']\n",
|
||
" body: {}\n",
|
||
" post_processing: []\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(df.es_info())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "Zlp7YIrPW_sj"
|
||
},
|
||
"source": [
|
||
"## Selecting and Indexing Data\n",
|
||
"\n",
|
||
"Now that we understand how to create a data frame and get access to it's underlying attributes, let's see how we can select subsets of our data."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "6dvAlxBTW_sj"
|
||
},
|
||
"source": [
|
||
"### head and tail\n",
|
||
"\n",
|
||
"much like pandas, eland data frames offer `.head(n)` and `.tail(n)` methods that return the first and last n rows, respectively."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:16.274779Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:16.274393Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:17.555325Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:17.555642Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/",
|
||
"height": 140
|
||
},
|
||
"id": "WaV6ZJ7CW_sj",
|
||
"outputId": "203a8b15-5a08-4808-a675-8a98d1174c1e"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
" Country CustomerID ... StockCode UnitPrice\n",
|
||
"0 United Kingdom 17850.0 ... 85123A 2.55\n",
|
||
"1 United Kingdom 17850.0 ... 71053 3.39\n",
|
||
"\n",
|
||
"[2 rows x 8 columns]"
|
||
],
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Country</th>\n",
|
||
" <th>CustomerID</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>StockCode</th>\n",
|
||
" <th>UnitPrice</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>United Kingdom</td>\n",
|
||
" <td>17850.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>85123A</td>\n",
|
||
" <td>2.55</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>United Kingdom</td>\n",
|
||
" <td>17850.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>71053</td>\n",
|
||
" <td>3.39</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>\n",
|
||
"<p>2 rows × 8 columns</p>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 11
|
||
}
|
||
],
|
||
"source": [
|
||
"df.head(2)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:17.559534Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:17.559123Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:17.637500Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:17.637125Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "XpwHbrGwW_sj",
|
||
"outputId": "130ba338-abcf-4fe4-cce3-a1840f0fe46b"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"es_index_pattern: online-retail\n",
|
||
"Index:\n",
|
||
" es_index_field: _id\n",
|
||
" is_source_field: False\n",
|
||
"Mappings:\n",
|
||
" capabilities:\n",
|
||
" es_field_name is_source es_dtype es_date_format pd_dtype is_searchable is_aggregatable is_scripted aggregatable_es_field_name\n",
|
||
"Country Country True keyword None object True True False Country\n",
|
||
"CustomerID CustomerID True double None float64 True True False CustomerID\n",
|
||
"Description Description True keyword None object True True False Description\n",
|
||
"InvoiceDate InvoiceDate True keyword None object True True False InvoiceDate\n",
|
||
"InvoiceNo InvoiceNo True keyword None object True True False InvoiceNo\n",
|
||
"Quantity Quantity True long None int64 True True False Quantity\n",
|
||
"StockCode StockCode True keyword None object True True False StockCode\n",
|
||
"UnitPrice UnitPrice True double None float64 True True False UnitPrice\n",
|
||
"Operations:\n",
|
||
" tasks: [('tail': ('sort_field': '_doc', 'count': 2)), ('head': ('sort_field': '_doc', 'count': 2)), ('tail': ('sort_field': '_doc', 'count': 2))]\n",
|
||
" size: 2\n",
|
||
" sort_params: {'_doc': 'desc'}\n",
|
||
" _source: ['Country', 'CustomerID', 'Description', 'InvoiceDate', 'InvoiceNo', 'Quantity', 'StockCode', 'UnitPrice']\n",
|
||
" body: {}\n",
|
||
" post_processing: [('sort_index'), ('head': ('count': 2)), ('tail': ('count': 2))]\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(df.tail(2).head(2).tail(2).es_info())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:17.640519Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:17.640139Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:18.647340Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:18.646548Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/",
|
||
"height": 140
|
||
},
|
||
"id": "Zf-TFwvXW_sj",
|
||
"outputId": "4300d820-843d-448f-879d-586756c2e620"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
" Country CustomerID ... StockCode UnitPrice\n",
|
||
"14998 United Kingdom 17419.0 ... 21773 1.25\n",
|
||
"14999 United Kingdom 17419.0 ... 22149 2.10\n",
|
||
"\n",
|
||
"[2 rows x 8 columns]"
|
||
],
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Country</th>\n",
|
||
" <th>CustomerID</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>StockCode</th>\n",
|
||
" <th>UnitPrice</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>14998</th>\n",
|
||
" <td>United Kingdom</td>\n",
|
||
" <td>17419.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>21773</td>\n",
|
||
" <td>1.25</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>14999</th>\n",
|
||
" <td>United Kingdom</td>\n",
|
||
" <td>17419.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>22149</td>\n",
|
||
" <td>2.10</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>\n",
|
||
"<p>2 rows × 8 columns</p>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 13
|
||
}
|
||
],
|
||
"source": [
|
||
"df.tail(2)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "AkbC-qckW_sj"
|
||
},
|
||
"source": [
|
||
"### Selecting columns\n",
|
||
"\n",
|
||
"you can also pass a list of columns to select columns from the data frame in a specified order."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:18.654238Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:18.653517Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:19.431749Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:19.431127Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/",
|
||
"height": 233
|
||
},
|
||
"id": "Tdhy8cQzW_sk",
|
||
"outputId": "ab4bcc6b-43ab-45b3-8880-545d52410851"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
" Country InvoiceDate\n",
|
||
"0 United Kingdom 2010-12-01 08:26:00\n",
|
||
"1 United Kingdom 2010-12-01 08:26:00\n",
|
||
"2 United Kingdom 2010-12-01 08:26:00\n",
|
||
"3 United Kingdom 2010-12-01 08:26:00\n",
|
||
"4 United Kingdom 2010-12-01 08:26:00\n",
|
||
"\n",
|
||
"[5 rows x 2 columns]"
|
||
],
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Country</th>\n",
|
||
" <th>InvoiceDate</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>United Kingdom</td>\n",
|
||
" <td>2010-12-01 08:26:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>United Kingdom</td>\n",
|
||
" <td>2010-12-01 08:26:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>United Kingdom</td>\n",
|
||
" <td>2010-12-01 08:26:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>United Kingdom</td>\n",
|
||
" <td>2010-12-01 08:26:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>United Kingdom</td>\n",
|
||
" <td>2010-12-01 08:26:00</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>\n",
|
||
"<p>5 rows × 2 columns</p>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 14
|
||
}
|
||
],
|
||
"source": [
|
||
"df[['Country', 'InvoiceDate']].head(5)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "CYV4WfSHW_sk"
|
||
},
|
||
"source": [
|
||
"### Boolean Indexing\n",
|
||
"\n",
|
||
"we also allow you to filter the data frame using boolean indexing. Under the hood, a boolean index maps to a `terms` query that is then passed to elasticsearch to filter the index."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:19.440640Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:19.439831Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:20.066747Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:20.067477Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/",
|
||
"height": 250
|
||
},
|
||
"id": "p2SPKSOEW_sk",
|
||
"outputId": "288a036f-fda0-4d22-9fb4-15ba2cb551b3"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"{'term': {'Country': 'Germany'}}\n"
|
||
]
|
||
},
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
" Country CustomerID ... StockCode UnitPrice\n",
|
||
"1109 Germany 12662.0 ... 22809 2.95\n",
|
||
"1110 Germany 12662.0 ... 84347 2.55\n",
|
||
"1111 Germany 12662.0 ... 84945 0.85\n",
|
||
"1112 Germany 12662.0 ... 22242 1.65\n",
|
||
"1113 Germany 12662.0 ... 22244 1.95\n",
|
||
"\n",
|
||
"[5 rows x 8 columns]"
|
||
],
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Country</th>\n",
|
||
" <th>CustomerID</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>StockCode</th>\n",
|
||
" <th>UnitPrice</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>1109</th>\n",
|
||
" <td>Germany</td>\n",
|
||
" <td>12662.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>22809</td>\n",
|
||
" <td>2.95</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1110</th>\n",
|
||
" <td>Germany</td>\n",
|
||
" <td>12662.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>84347</td>\n",
|
||
" <td>2.55</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1111</th>\n",
|
||
" <td>Germany</td>\n",
|
||
" <td>12662.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>84945</td>\n",
|
||
" <td>0.85</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1112</th>\n",
|
||
" <td>Germany</td>\n",
|
||
" <td>12662.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>22242</td>\n",
|
||
" <td>1.65</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1113</th>\n",
|
||
" <td>Germany</td>\n",
|
||
" <td>12662.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>22244</td>\n",
|
||
" <td>1.95</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>\n",
|
||
"<p>5 rows × 8 columns</p>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 15
|
||
}
|
||
],
|
||
"source": [
|
||
"# the construction of a boolean vector maps directly to an elasticsearch query\n",
|
||
"print(df['Country']=='Germany')\n",
|
||
"df[(df['Country']=='Germany')].head(5)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "xV2bkyC3W_sk"
|
||
},
|
||
"source": [
|
||
"we can also filter the data frame using a list of values."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:20.077022Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:20.076412Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:21.233013Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:21.234073Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/",
|
||
"height": 250
|
||
},
|
||
"id": "atifgs9hW_sk",
|
||
"outputId": "26468789-6032-428c-f64a-ce36c5a5f426"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"{'terms': {'Country': ['Germany', 'United States']}}\n"
|
||
]
|
||
},
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
" Country CustomerID ... StockCode UnitPrice\n",
|
||
"0 United Kingdom 17850.0 ... 85123A 2.55\n",
|
||
"1 United Kingdom 17850.0 ... 71053 3.39\n",
|
||
"2 United Kingdom 17850.0 ... 84406B 2.75\n",
|
||
"3 United Kingdom 17850.0 ... 84029G 3.39\n",
|
||
"4 United Kingdom 17850.0 ... 84029E 3.39\n",
|
||
"\n",
|
||
"[5 rows x 8 columns]"
|
||
],
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Country</th>\n",
|
||
" <th>CustomerID</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>StockCode</th>\n",
|
||
" <th>UnitPrice</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>United Kingdom</td>\n",
|
||
" <td>17850.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>85123A</td>\n",
|
||
" <td>2.55</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>United Kingdom</td>\n",
|
||
" <td>17850.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>71053</td>\n",
|
||
" <td>3.39</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>United Kingdom</td>\n",
|
||
" <td>17850.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>84406B</td>\n",
|
||
" <td>2.75</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>United Kingdom</td>\n",
|
||
" <td>17850.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>84029G</td>\n",
|
||
" <td>3.39</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>United Kingdom</td>\n",
|
||
" <td>17850.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>84029E</td>\n",
|
||
" <td>3.39</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>\n",
|
||
"<p>5 rows × 8 columns</p>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 16
|
||
}
|
||
],
|
||
"source": [
|
||
"print(df['Country'].isin(['Germany', 'United States']))\n",
|
||
"df[df['Country'].isin(['Germany', 'United Kingdom'])].head(5)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "oFwNF24EW_sk"
|
||
},
|
||
"source": [
|
||
"We can also combine boolean vectors to further filter the data frame."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:21.245390Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:21.244737Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:22.358701Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:22.355150Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/",
|
||
"height": 78
|
||
},
|
||
"id": "WK3xl6JQW_sk",
|
||
"outputId": "7e7d9cb2-79d8-4f2b-bb95-02a316246fa6"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
"Empty DataFrame\n",
|
||
"Columns: [Country, CustomerID, Description, InvoiceDate, InvoiceNo, Quantity, StockCode, UnitPrice]\n",
|
||
"Index: []\n",
|
||
"\n",
|
||
"[0 rows x 8 columns]"
|
||
],
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Country</th>\n",
|
||
" <th>CustomerID</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>StockCode</th>\n",
|
||
" <th>UnitPrice</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>\n",
|
||
"<p>0 rows × 8 columns</p>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 17
|
||
}
|
||
],
|
||
"source": [
|
||
"df[(df['Country']=='Germany') & (df['Quantity']>90)]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "OOZMEOTXW_sk"
|
||
},
|
||
"source": [
|
||
"Using this example, let see how eland translates this boolean filter to an elasticsearch `bool` query."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:22.383610Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:22.370577Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:22.390275Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:22.388963Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "zg5CX5A9W_sl",
|
||
"outputId": "929c23f7-be9b-4ea6-c1bc-e934f25e9fb0"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"es_index_pattern: online-retail\n",
|
||
"Index:\n",
|
||
" es_index_field: _id\n",
|
||
" is_source_field: False\n",
|
||
"Mappings:\n",
|
||
" capabilities:\n",
|
||
" es_field_name is_source es_dtype es_date_format pd_dtype is_searchable is_aggregatable is_scripted aggregatable_es_field_name\n",
|
||
"Country Country True keyword None object True True False Country\n",
|
||
"CustomerID CustomerID True double None float64 True True False CustomerID\n",
|
||
"Description Description True keyword None object True True False Description\n",
|
||
"InvoiceDate InvoiceDate True keyword None object True True False InvoiceDate\n",
|
||
"InvoiceNo InvoiceNo True keyword None object True True False InvoiceNo\n",
|
||
"Quantity Quantity True long None int64 True True False Quantity\n",
|
||
"StockCode StockCode True keyword None object True True False StockCode\n",
|
||
"UnitPrice UnitPrice True double None float64 True True False UnitPrice\n",
|
||
"Operations:\n",
|
||
" tasks: [('boolean_filter': ('boolean_filter': {'bool': {'must': [{'term': {'Country': 'Germany'}}, {'range': {'Quantity': {'gt': 90}}}]}}))]\n",
|
||
" size: None\n",
|
||
" sort_params: None\n",
|
||
" _source: ['Country', 'CustomerID', 'Description', 'InvoiceDate', 'InvoiceNo', 'Quantity', 'StockCode', 'UnitPrice']\n",
|
||
" body: {'query': {'bool': {'must': [{'term': {'Country': 'Germany'}}, {'range': {'Quantity': {'gt': 90}}}]}}}\n",
|
||
" post_processing: []\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(df[(df['Country']=='Germany') & (df['Quantity']>90)].es_info())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "QRTjR8d3W_sl"
|
||
},
|
||
"source": [
|
||
"## Aggregation and Descriptive Statistics\n",
|
||
"\n",
|
||
"Let's begin to ask some questions of our data and use eland to get the answers."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "Pc3OGsQ5W_sl"
|
||
},
|
||
"source": [
|
||
"**How many different countries are there?**"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:22.398231Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:22.397459Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:22.482238Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:22.481338Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "9p8JrleuW_sl",
|
||
"outputId": "9323b109-a59a-4b7c-f6c4-14996666591d"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
"16"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 19
|
||
}
|
||
],
|
||
"source": [
|
||
"df['Country'].nunique()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "9w_Ge7ESW_sl"
|
||
},
|
||
"source": [
|
||
"**What is the total sum of products ordered?**"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:22.492668Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:22.491590Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:22.580015Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:22.578300Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "MFu01N3LW_sl",
|
||
"outputId": "0e10430a-2a2d-4f4c-be41-01f0bcb9afde"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
"111960"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 20
|
||
}
|
||
],
|
||
"source": [
|
||
"df['Quantity'].sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "-Nwc-ybwW_sl"
|
||
},
|
||
"source": [
|
||
"**Show me the sum, mean, min, and max of the qunatity and unit_price fields**"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:22.601432Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:22.600117Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:22.702450Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:22.701499Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/",
|
||
"height": 173
|
||
},
|
||
"id": "QhqUeIQbW_sl",
|
||
"outputId": "aeec1b5e-bee4-4cf5-e338-def0d82b0451"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
" Quantity UnitPrice\n",
|
||
"sum 111960.000 61548.490000\n",
|
||
"mean 7.464 4.103233\n",
|
||
"max 2880.000 950.990000\n",
|
||
"min -9360.000 0.000000"
|
||
],
|
||
"text/html": [
|
||
"\n",
|
||
" <div id=\"df-cbfeceb1-2fd7-4a44-acdf-2b6c13dbb9c2\" class=\"colab-df-container\">\n",
|
||
" <div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Quantity</th>\n",
|
||
" <th>UnitPrice</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>sum</th>\n",
|
||
" <td>111960.000</td>\n",
|
||
" <td>61548.490000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>7.464</td>\n",
|
||
" <td>4.103233</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>2880.000</td>\n",
|
||
" <td>950.990000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>-9360.000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>\n",
|
||
" <div class=\"colab-df-buttons\">\n",
|
||
"\n",
|
||
" <div class=\"colab-df-container\">\n",
|
||
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-cbfeceb1-2fd7-4a44-acdf-2b6c13dbb9c2')\"\n",
|
||
" title=\"Convert this dataframe to an interactive table.\"\n",
|
||
" style=\"display:none;\">\n",
|
||
"\n",
|
||
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
|
||
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
|
||
" </svg>\n",
|
||
" </button>\n",
|
||
"\n",
|
||
" <style>\n",
|
||
" .colab-df-container {\n",
|
||
" display:flex;\n",
|
||
" gap: 12px;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .colab-df-convert {\n",
|
||
" background-color: #E8F0FE;\n",
|
||
" border: none;\n",
|
||
" border-radius: 50%;\n",
|
||
" cursor: pointer;\n",
|
||
" display: none;\n",
|
||
" fill: #1967D2;\n",
|
||
" height: 32px;\n",
|
||
" padding: 0 0 0 0;\n",
|
||
" width: 32px;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .colab-df-convert:hover {\n",
|
||
" background-color: #E2EBFA;\n",
|
||
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
||
" fill: #174EA6;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .colab-df-buttons div {\n",
|
||
" margin-bottom: 4px;\n",
|
||
" }\n",
|
||
"\n",
|
||
" [theme=dark] .colab-df-convert {\n",
|
||
" background-color: #3B4455;\n",
|
||
" fill: #D2E3FC;\n",
|
||
" }\n",
|
||
"\n",
|
||
" [theme=dark] .colab-df-convert:hover {\n",
|
||
" background-color: #434B5C;\n",
|
||
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
||
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
||
" fill: #FFFFFF;\n",
|
||
" }\n",
|
||
" </style>\n",
|
||
"\n",
|
||
" <script>\n",
|
||
" const buttonEl =\n",
|
||
" document.querySelector('#df-cbfeceb1-2fd7-4a44-acdf-2b6c13dbb9c2 button.colab-df-convert');\n",
|
||
" buttonEl.style.display =\n",
|
||
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
||
"\n",
|
||
" async function convertToInteractive(key) {\n",
|
||
" const element = document.querySelector('#df-cbfeceb1-2fd7-4a44-acdf-2b6c13dbb9c2');\n",
|
||
" const dataTable =\n",
|
||
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
||
" [key], {});\n",
|
||
" if (!dataTable) return;\n",
|
||
"\n",
|
||
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
||
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
||
" + ' to learn more about interactive tables.';\n",
|
||
" element.innerHTML = '';\n",
|
||
" dataTable['output_type'] = 'display_data';\n",
|
||
" await google.colab.output.renderOutput(dataTable, element);\n",
|
||
" const docLink = document.createElement('div');\n",
|
||
" docLink.innerHTML = docLinkHtml;\n",
|
||
" element.appendChild(docLink);\n",
|
||
" }\n",
|
||
" </script>\n",
|
||
" </div>\n",
|
||
"\n",
|
||
"\n",
|
||
"<div id=\"df-a515a67f-153d-431f-a346-9ad3f849d5b9\">\n",
|
||
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-a515a67f-153d-431f-a346-9ad3f849d5b9')\"\n",
|
||
" title=\"Suggest charts\"\n",
|
||
" style=\"display:none;\">\n",
|
||
"\n",
|
||
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
||
" width=\"24px\">\n",
|
||
" <g>\n",
|
||
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
|
||
" </g>\n",
|
||
"</svg>\n",
|
||
" </button>\n",
|
||
"\n",
|
||
"<style>\n",
|
||
" .colab-df-quickchart {\n",
|
||
" --bg-color: #E8F0FE;\n",
|
||
" --fill-color: #1967D2;\n",
|
||
" --hover-bg-color: #E2EBFA;\n",
|
||
" --hover-fill-color: #174EA6;\n",
|
||
" --disabled-fill-color: #AAA;\n",
|
||
" --disabled-bg-color: #DDD;\n",
|
||
" }\n",
|
||
"\n",
|
||
" [theme=dark] .colab-df-quickchart {\n",
|
||
" --bg-color: #3B4455;\n",
|
||
" --fill-color: #D2E3FC;\n",
|
||
" --hover-bg-color: #434B5C;\n",
|
||
" --hover-fill-color: #FFFFFF;\n",
|
||
" --disabled-bg-color: #3B4455;\n",
|
||
" --disabled-fill-color: #666;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .colab-df-quickchart {\n",
|
||
" background-color: var(--bg-color);\n",
|
||
" border: none;\n",
|
||
" border-radius: 50%;\n",
|
||
" cursor: pointer;\n",
|
||
" display: none;\n",
|
||
" fill: var(--fill-color);\n",
|
||
" height: 32px;\n",
|
||
" padding: 0;\n",
|
||
" width: 32px;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .colab-df-quickchart:hover {\n",
|
||
" background-color: var(--hover-bg-color);\n",
|
||
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
||
" fill: var(--button-hover-fill-color);\n",
|
||
" }\n",
|
||
"\n",
|
||
" .colab-df-quickchart-complete:disabled,\n",
|
||
" .colab-df-quickchart-complete:disabled:hover {\n",
|
||
" background-color: var(--disabled-bg-color);\n",
|
||
" fill: var(--disabled-fill-color);\n",
|
||
" box-shadow: none;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .colab-df-spinner {\n",
|
||
" border: 2px solid var(--fill-color);\n",
|
||
" border-color: transparent;\n",
|
||
" border-bottom-color: var(--fill-color);\n",
|
||
" animation:\n",
|
||
" spin 1s steps(1) infinite;\n",
|
||
" }\n",
|
||
"\n",
|
||
" @keyframes spin {\n",
|
||
" 0% {\n",
|
||
" border-color: transparent;\n",
|
||
" border-bottom-color: var(--fill-color);\n",
|
||
" border-left-color: var(--fill-color);\n",
|
||
" }\n",
|
||
" 20% {\n",
|
||
" border-color: transparent;\n",
|
||
" border-left-color: var(--fill-color);\n",
|
||
" border-top-color: var(--fill-color);\n",
|
||
" }\n",
|
||
" 30% {\n",
|
||
" border-color: transparent;\n",
|
||
" border-left-color: var(--fill-color);\n",
|
||
" border-top-color: var(--fill-color);\n",
|
||
" border-right-color: var(--fill-color);\n",
|
||
" }\n",
|
||
" 40% {\n",
|
||
" border-color: transparent;\n",
|
||
" border-right-color: var(--fill-color);\n",
|
||
" border-top-color: var(--fill-color);\n",
|
||
" }\n",
|
||
" 60% {\n",
|
||
" border-color: transparent;\n",
|
||
" border-right-color: var(--fill-color);\n",
|
||
" }\n",
|
||
" 80% {\n",
|
||
" border-color: transparent;\n",
|
||
" border-right-color: var(--fill-color);\n",
|
||
" border-bottom-color: var(--fill-color);\n",
|
||
" }\n",
|
||
" 90% {\n",
|
||
" border-color: transparent;\n",
|
||
" border-bottom-color: var(--fill-color);\n",
|
||
" }\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"\n",
|
||
" <script>\n",
|
||
" async function quickchart(key) {\n",
|
||
" const quickchartButtonEl =\n",
|
||
" document.querySelector('#' + key + ' button');\n",
|
||
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
|
||
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
|
||
" try {\n",
|
||
" const charts = await google.colab.kernel.invokeFunction(\n",
|
||
" 'suggestCharts', [key], {});\n",
|
||
" } catch (error) {\n",
|
||
" console.error('Error during call to suggestCharts:', error);\n",
|
||
" }\n",
|
||
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
|
||
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
|
||
" }\n",
|
||
" (() => {\n",
|
||
" let quickchartButtonEl =\n",
|
||
" document.querySelector('#df-a515a67f-153d-431f-a346-9ad3f849d5b9 button');\n",
|
||
" quickchartButtonEl.style.display =\n",
|
||
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
||
" })();\n",
|
||
" </script>\n",
|
||
"</div>\n",
|
||
" </div>\n",
|
||
" </div>\n"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 21
|
||
}
|
||
],
|
||
"source": [
|
||
"df[['Quantity','UnitPrice']].agg(['sum', 'mean', 'max', 'min'])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "CklumXmiW_sm"
|
||
},
|
||
"source": [
|
||
"**Give me descriptive statistics for the entire data frame**"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:22.712002Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:22.711114Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:22.982698Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:22.981770Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/",
|
||
"height": 297
|
||
},
|
||
"id": "AttDFOcRW_sm",
|
||
"outputId": "a515c9f5-5b04-4943-9c72-8da357fa29ae"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
" CustomerID Quantity UnitPrice\n",
|
||
"count 10729.000000 15000.000000 15000.000000\n",
|
||
"mean 15590.776680 7.464000 4.103233\n",
|
||
"std 1764.189592 85.930116 20.106214\n",
|
||
"min 12347.000000 -9360.000000 0.000000\n",
|
||
"25% 14225.913815 1.000000 1.336010\n",
|
||
"50% 15668.124797 2.423796 2.396465\n",
|
||
"75% 17195.974646 7.403795 4.282239\n",
|
||
"max 18239.000000 2880.000000 950.990000"
|
||
],
|
||
"text/html": [
|
||
"\n",
|
||
" <div id=\"df-cfeaff95-e82b-4d7c-bbde-299c17b9493e\" class=\"colab-df-container\">\n",
|
||
" <div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>CustomerID</th>\n",
|
||
" <th>Quantity</th>\n",
|
||
" <th>UnitPrice</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>10729.000000</td>\n",
|
||
" <td>15000.000000</td>\n",
|
||
" <td>15000.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>15590.776680</td>\n",
|
||
" <td>7.464000</td>\n",
|
||
" <td>4.103233</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>1764.189592</td>\n",
|
||
" <td>85.930116</td>\n",
|
||
" <td>20.106214</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>12347.000000</td>\n",
|
||
" <td>-9360.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>14225.913815</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>1.336010</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>15668.124797</td>\n",
|
||
" <td>2.423796</td>\n",
|
||
" <td>2.396465</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>17195.974646</td>\n",
|
||
" <td>7.403795</td>\n",
|
||
" <td>4.282239</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>18239.000000</td>\n",
|
||
" <td>2880.000000</td>\n",
|
||
" <td>950.990000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>\n",
|
||
" <div class=\"colab-df-buttons\">\n",
|
||
"\n",
|
||
" <div class=\"colab-df-container\">\n",
|
||
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-cfeaff95-e82b-4d7c-bbde-299c17b9493e')\"\n",
|
||
" title=\"Convert this dataframe to an interactive table.\"\n",
|
||
" style=\"display:none;\">\n",
|
||
"\n",
|
||
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
|
||
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
|
||
" </svg>\n",
|
||
" </button>\n",
|
||
"\n",
|
||
" <style>\n",
|
||
" .colab-df-container {\n",
|
||
" display:flex;\n",
|
||
" gap: 12px;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .colab-df-convert {\n",
|
||
" background-color: #E8F0FE;\n",
|
||
" border: none;\n",
|
||
" border-radius: 50%;\n",
|
||
" cursor: pointer;\n",
|
||
" display: none;\n",
|
||
" fill: #1967D2;\n",
|
||
" height: 32px;\n",
|
||
" padding: 0 0 0 0;\n",
|
||
" width: 32px;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .colab-df-convert:hover {\n",
|
||
" background-color: #E2EBFA;\n",
|
||
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
||
" fill: #174EA6;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .colab-df-buttons div {\n",
|
||
" margin-bottom: 4px;\n",
|
||
" }\n",
|
||
"\n",
|
||
" [theme=dark] .colab-df-convert {\n",
|
||
" background-color: #3B4455;\n",
|
||
" fill: #D2E3FC;\n",
|
||
" }\n",
|
||
"\n",
|
||
" [theme=dark] .colab-df-convert:hover {\n",
|
||
" background-color: #434B5C;\n",
|
||
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
||
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
||
" fill: #FFFFFF;\n",
|
||
" }\n",
|
||
" </style>\n",
|
||
"\n",
|
||
" <script>\n",
|
||
" const buttonEl =\n",
|
||
" document.querySelector('#df-cfeaff95-e82b-4d7c-bbde-299c17b9493e button.colab-df-convert');\n",
|
||
" buttonEl.style.display =\n",
|
||
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
||
"\n",
|
||
" async function convertToInteractive(key) {\n",
|
||
" const element = document.querySelector('#df-cfeaff95-e82b-4d7c-bbde-299c17b9493e');\n",
|
||
" const dataTable =\n",
|
||
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
||
" [key], {});\n",
|
||
" if (!dataTable) return;\n",
|
||
"\n",
|
||
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
||
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
||
" + ' to learn more about interactive tables.';\n",
|
||
" element.innerHTML = '';\n",
|
||
" dataTable['output_type'] = 'display_data';\n",
|
||
" await google.colab.output.renderOutput(dataTable, element);\n",
|
||
" const docLink = document.createElement('div');\n",
|
||
" docLink.innerHTML = docLinkHtml;\n",
|
||
" element.appendChild(docLink);\n",
|
||
" }\n",
|
||
" </script>\n",
|
||
" </div>\n",
|
||
"\n",
|
||
"\n",
|
||
"<div id=\"df-51e65f9a-bb05-4074-a835-06c70a6b5749\">\n",
|
||
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-51e65f9a-bb05-4074-a835-06c70a6b5749')\"\n",
|
||
" title=\"Suggest charts\"\n",
|
||
" style=\"display:none;\">\n",
|
||
"\n",
|
||
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
||
" width=\"24px\">\n",
|
||
" <g>\n",
|
||
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
|
||
" </g>\n",
|
||
"</svg>\n",
|
||
" </button>\n",
|
||
"\n",
|
||
"<style>\n",
|
||
" .colab-df-quickchart {\n",
|
||
" --bg-color: #E8F0FE;\n",
|
||
" --fill-color: #1967D2;\n",
|
||
" --hover-bg-color: #E2EBFA;\n",
|
||
" --hover-fill-color: #174EA6;\n",
|
||
" --disabled-fill-color: #AAA;\n",
|
||
" --disabled-bg-color: #DDD;\n",
|
||
" }\n",
|
||
"\n",
|
||
" [theme=dark] .colab-df-quickchart {\n",
|
||
" --bg-color: #3B4455;\n",
|
||
" --fill-color: #D2E3FC;\n",
|
||
" --hover-bg-color: #434B5C;\n",
|
||
" --hover-fill-color: #FFFFFF;\n",
|
||
" --disabled-bg-color: #3B4455;\n",
|
||
" --disabled-fill-color: #666;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .colab-df-quickchart {\n",
|
||
" background-color: var(--bg-color);\n",
|
||
" border: none;\n",
|
||
" border-radius: 50%;\n",
|
||
" cursor: pointer;\n",
|
||
" display: none;\n",
|
||
" fill: var(--fill-color);\n",
|
||
" height: 32px;\n",
|
||
" padding: 0;\n",
|
||
" width: 32px;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .colab-df-quickchart:hover {\n",
|
||
" background-color: var(--hover-bg-color);\n",
|
||
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
||
" fill: var(--button-hover-fill-color);\n",
|
||
" }\n",
|
||
"\n",
|
||
" .colab-df-quickchart-complete:disabled,\n",
|
||
" .colab-df-quickchart-complete:disabled:hover {\n",
|
||
" background-color: var(--disabled-bg-color);\n",
|
||
" fill: var(--disabled-fill-color);\n",
|
||
" box-shadow: none;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .colab-df-spinner {\n",
|
||
" border: 2px solid var(--fill-color);\n",
|
||
" border-color: transparent;\n",
|
||
" border-bottom-color: var(--fill-color);\n",
|
||
" animation:\n",
|
||
" spin 1s steps(1) infinite;\n",
|
||
" }\n",
|
||
"\n",
|
||
" @keyframes spin {\n",
|
||
" 0% {\n",
|
||
" border-color: transparent;\n",
|
||
" border-bottom-color: var(--fill-color);\n",
|
||
" border-left-color: var(--fill-color);\n",
|
||
" }\n",
|
||
" 20% {\n",
|
||
" border-color: transparent;\n",
|
||
" border-left-color: var(--fill-color);\n",
|
||
" border-top-color: var(--fill-color);\n",
|
||
" }\n",
|
||
" 30% {\n",
|
||
" border-color: transparent;\n",
|
||
" border-left-color: var(--fill-color);\n",
|
||
" border-top-color: var(--fill-color);\n",
|
||
" border-right-color: var(--fill-color);\n",
|
||
" }\n",
|
||
" 40% {\n",
|
||
" border-color: transparent;\n",
|
||
" border-right-color: var(--fill-color);\n",
|
||
" border-top-color: var(--fill-color);\n",
|
||
" }\n",
|
||
" 60% {\n",
|
||
" border-color: transparent;\n",
|
||
" border-right-color: var(--fill-color);\n",
|
||
" }\n",
|
||
" 80% {\n",
|
||
" border-color: transparent;\n",
|
||
" border-right-color: var(--fill-color);\n",
|
||
" border-bottom-color: var(--fill-color);\n",
|
||
" }\n",
|
||
" 90% {\n",
|
||
" border-color: transparent;\n",
|
||
" border-bottom-color: var(--fill-color);\n",
|
||
" }\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"\n",
|
||
" <script>\n",
|
||
" async function quickchart(key) {\n",
|
||
" const quickchartButtonEl =\n",
|
||
" document.querySelector('#' + key + ' button');\n",
|
||
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
|
||
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
|
||
" try {\n",
|
||
" const charts = await google.colab.kernel.invokeFunction(\n",
|
||
" 'suggestCharts', [key], {});\n",
|
||
" } catch (error) {\n",
|
||
" console.error('Error during call to suggestCharts:', error);\n",
|
||
" }\n",
|
||
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
|
||
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
|
||
" }\n",
|
||
" (() => {\n",
|
||
" let quickchartButtonEl =\n",
|
||
" document.querySelector('#df-51e65f9a-bb05-4074-a835-06c70a6b5749 button');\n",
|
||
" quickchartButtonEl.style.display =\n",
|
||
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
||
" })();\n",
|
||
" </script>\n",
|
||
"</div>\n",
|
||
" </div>\n",
|
||
" </div>\n"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 22
|
||
}
|
||
],
|
||
"source": [
|
||
"# NBVAL_IGNORE_OUTPUT\n",
|
||
"df.describe()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "LfPjHhpKW_sn"
|
||
},
|
||
"source": [
|
||
"**Show me a histogram of numeric columns**"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:23.000466Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:22.999571Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:23.576387Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:23.576703Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/",
|
||
"height": 391
|
||
},
|
||
"id": "ZJ1VoTG_W_sn",
|
||
"outputId": "a32d2fec-17db-428a-d4a2-71a17faa41fa"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "display_data",
|
||
"data": {
|
||
"text/plain": [
|
||
"<Figure size 1200x400 with 2 Axes>"
|
||
],
|
||
"image/png": "\n"
|
||
},
|
||
"metadata": {}
|
||
}
|
||
],
|
||
"source": [
|
||
"df[(df['Quantity']>-50) &\n",
|
||
" (df['Quantity']<50) &\n",
|
||
" (df['UnitPrice']>0) &\n",
|
||
" (df['UnitPrice']<100)][['Quantity', 'UnitPrice']].hist(figsize=[12,4], bins=30)\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:23.584264Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:23.583784Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:24.494000Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:24.493618Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/",
|
||
"height": 391
|
||
},
|
||
"id": "JbSWJJKUW_sn",
|
||
"outputId": "94aaf248-35ea-4e1d-fbf8-a1cf60b7778d"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "display_data",
|
||
"data": {
|
||
"text/plain": [
|
||
"<Figure size 1200x400 with 2 Axes>"
|
||
],
|
||
"image/png": "\n"
|
||
},
|
||
"metadata": {}
|
||
}
|
||
],
|
||
"source": [
|
||
"df[(df['Quantity']>-50) &\n",
|
||
" (df['Quantity']<50) &\n",
|
||
" (df['UnitPrice']>0) &\n",
|
||
" (df['UnitPrice']<100)][['Quantity', 'UnitPrice']].hist(figsize=[12,4], bins=30, log=True)\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:24.504460Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:24.504086Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:26.468550Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:26.466711Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "5QysMacBW_so",
|
||
"outputId": "e3356521-63c3-404c-e8db-5ed6a0407162"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
" Country CustomerID ... StockCode UnitPrice\n",
|
||
"46 United Kingdom 13748.0 ... 22086 2.55\n",
|
||
"83 United Kingdom 15291.0 ... 21733 2.55\n",
|
||
"96 United Kingdom 14688.0 ... 21212 0.42\n",
|
||
"102 United Kingdom 14688.0 ... 85071B 0.38\n",
|
||
"176 United Kingdom 16029.0 ... 85099C 1.65\n",
|
||
"... ... ... ... ... ...\n",
|
||
"14784 United Kingdom 15061.0 ... 22423 10.95\n",
|
||
"14785 United Kingdom 15061.0 ... 22075 1.45\n",
|
||
"14788 United Kingdom 15061.0 ... 17038 0.07\n",
|
||
"14974 United Kingdom 14739.0 ... 21704 0.72\n",
|
||
"14980 United Kingdom 14739.0 ... 22178 1.06\n",
|
||
"\n",
|
||
"[258 rows x 8 columns]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"filter_quantity_price = df[(df['Quantity'] > 50) & (df['UnitPrice'] < 100)]\n",
|
||
"print(filter_quantity_price)\n",
|
||
"\n",
|
||
"\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "xG1_wb5RW_so"
|
||
},
|
||
"source": [
|
||
"## Arithmetic Operations"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "CxqA7gO-W_so"
|
||
},
|
||
"source": [
|
||
"Numeric values"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 34,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:26.483774Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:26.482084Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:26.907406Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:26.906448Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "HWoI0tYcW_so",
|
||
"outputId": "727db684-740b-45bf-9e4e-3887674b3e8c"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
"0 6\n",
|
||
"1 6\n",
|
||
"2 8\n",
|
||
"3 6\n",
|
||
"4 6\n",
|
||
"Name: Quantity, dtype: int64"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 34
|
||
}
|
||
],
|
||
"source": [
|
||
"df['Quantity'].head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:26.912916Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:26.910149Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:27.361783Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:27.362723Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "I6Rs0pTAW_sp",
|
||
"outputId": "68aa067b-b70c-4030-8acf-a2b36310fc52"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
"0 2.55\n",
|
||
"1 3.39\n",
|
||
"2 2.75\n",
|
||
"3 3.39\n",
|
||
"4 3.39\n",
|
||
"Name: UnitPrice, dtype: float64"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 42
|
||
}
|
||
],
|
||
"source": [
|
||
"df['UnitPrice'].head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:27.383414Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:27.374098Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:27.387546Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:27.388753Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "wbibhbn6W_sp",
|
||
"outputId": "d0b6b88b-6282-4901-fea1-94d2e94c773d"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stderr",
|
||
"text": [
|
||
"/usr/local/lib/python3.10/dist-packages/eland/field_mappings.py:715: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
|
||
" self._mappings_capabilities = self._mappings_capabilities.append(\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"product = df['Quantity'] * df['UnitPrice']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:27.398754Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:27.397557Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:27.818022Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:27.819640Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "2FstW8ZCW_sp",
|
||
"outputId": "b55d687e-6b7d-48a8-8815-e64bc6ebf6dd"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
"0 15.30\n",
|
||
"1 20.34\n",
|
||
"2 22.00\n",
|
||
"3 20.34\n",
|
||
"4 20.34\n",
|
||
"dtype: float64"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 44
|
||
}
|
||
],
|
||
"source": [
|
||
"product.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "Pd2zMWGBW_sp"
|
||
},
|
||
"source": [
|
||
"String concatenation"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"execution": {
|
||
"iopub.execute_input": "2021-12-15T20:25:27.837007Z",
|
||
"iopub.status.busy": "2021-12-15T20:25:27.836370Z",
|
||
"iopub.status.idle": "2021-12-15T20:25:29.072872Z",
|
||
"shell.execute_reply": "2021-12-15T20:25:29.074153Z"
|
||
},
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "5MpSfh0fW_sp",
|
||
"outputId": "e0c52ddf-19bc-40af-813a-75c7e808bef6"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stderr",
|
||
"text": [
|
||
"/usr/local/lib/python3.10/dist-packages/eland/field_mappings.py:715: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
|
||
" self._mappings_capabilities = self._mappings_capabilities.append(\n"
|
||
]
|
||
},
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
"0 United Kingdom85123A\n",
|
||
"1 United Kingdom71053\n",
|
||
"2 United Kingdom84406B\n",
|
||
"3 United Kingdom84029G\n",
|
||
"4 United Kingdom84029E\n",
|
||
" ... \n",
|
||
"14995 United Kingdom72349B\n",
|
||
"14996 United Kingdom72741\n",
|
||
"14997 United Kingdom22762\n",
|
||
"14998 United Kingdom21773\n",
|
||
"14999 United Kingdom22149\n",
|
||
"Length: 15000, dtype: object"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 45
|
||
}
|
||
],
|
||
"source": [
|
||
"df['Country'] + df['StockCode']"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.8.6"
|
||
},
|
||
"pycharm": {
|
||
"stem_cell": {
|
||
"cell_type": "raw",
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"source": []
|
||
}
|
||
},
|
||
"colab": {
|
||
"provenance": []
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 0
|
||
} |