diff --git a/docs/sphinx/examples/online_retail_analysis.ipynb b/docs/sphinx/examples/online_retail_analysis.ipynb
index f1ec185..b3821c2 100644
--- a/docs/sphinx/examples/online_retail_analysis.ipynb
+++ b/docs/sphinx/examples/online_retail_analysis.ipynb
@@ -1,1635 +1,2307 @@
{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:06.764412Z",
- "iopub.status.busy": "2021-12-15T20:25:06.755567Z",
- "iopub.status.idle": "2021-12-15T20:25:07.316950Z",
- "shell.execute_reply": "2021-12-15T20:25:07.316561Z"
- }
- },
- "outputs": [],
- "source": [
- "import eland as ed\n",
- "import pandas as pd\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "\n",
- "# Fix console size for consistent test results\n",
- "from eland.conftest import *"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Online Retail Analysis"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Getting Started\n",
- "\n",
- "To get started, let's create an `eland.DataFrame` by reading a csv file. This creates and populates the \n",
- "`online-retail` index in the local Elasticsearch cluster."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:07.324283Z",
- "iopub.status.busy": "2021-12-15T20:25:07.323764Z",
- "iopub.status.idle": "2021-12-15T20:25:16.241379Z",
- "shell.execute_reply": "2021-12-15T20:25:16.241877Z"
- }
- },
- "outputs": [],
- "source": [
- "df = ed.csv_to_eland(\"data/online-retail.csv.gz\",\n",
- " es_client='http://localhost:9200', \n",
- " es_dest_index='online-retail', \n",
- " es_if_exists='replace', \n",
- " es_dropna=True,\n",
- " es_refresh=True,\n",
- " compression='gzip',\n",
- " index_col=0)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Here we see that the `\"_id\"` field was used to index our data frame. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:16.246737Z",
- "iopub.status.busy": "2021-12-15T20:25:16.244084Z",
- "iopub.status.idle": "2021-12-15T20:25:16.250080Z",
- "shell.execute_reply": "2021-12-15T20:25:16.250410Z"
- }
- },
- "outputs": [
+ "cells": [
{
- "data": {
- "text/plain": [
- "'_id'"
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.index.es_index_field"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Next, we can check which field from elasticsearch are available to our eland data frame. `columns` is available as a parameter when instantiating the data frame which allows one to choose only a subset of fields from your index to be included in the data frame. Since we didn't set this parameter, we have access to all fields."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:16.254703Z",
- "iopub.status.busy": "2021-12-15T20:25:16.254060Z",
- "iopub.status.idle": "2021-12-15T20:25:16.256567Z",
- "shell.execute_reply": "2021-12-15T20:25:16.256138Z"
- }
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Index(['Country', 'CustomerID', 'Description', 'InvoiceDate', 'InvoiceNo', 'Quantity', 'StockCode',\n",
- " 'UnitPrice'],\n",
- " dtype='object')"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.columns"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Now, let's see the data types of our fields. Running `df.dtypes`, we can see that elasticsearch field types are mapped to pandas field types."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:16.261335Z",
- "iopub.status.busy": "2021-12-15T20:25:16.260762Z",
- "iopub.status.idle": "2021-12-15T20:25:16.263024Z",
- "shell.execute_reply": "2021-12-15T20:25:16.263323Z"
- }
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Country object\n",
- "CustomerID float64\n",
- "Description object\n",
- "InvoiceDate object\n",
- "InvoiceNo object\n",
- "Quantity int64\n",
- "StockCode object\n",
- "UnitPrice float64\n",
- "dtype: object"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.dtypes"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We also offer a `.es_info()` data frame method that shows all info about the underlying index. It also contains information about operations being passed from data frame methods to elasticsearch. More on this later."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:16.266245Z",
- "iopub.status.busy": "2021-12-15T20:25:16.265860Z",
- "iopub.status.idle": "2021-12-15T20:25:16.271135Z",
- "shell.execute_reply": "2021-12-15T20:25:16.270816Z"
- }
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "es_index_pattern: online-retail\n",
- "Index:\n",
- " es_index_field: _id\n",
- " is_source_field: False\n",
- "Mappings:\n",
- " capabilities:\n",
- " es_field_name is_source es_dtype es_date_format pd_dtype is_searchable is_aggregatable is_scripted aggregatable_es_field_name\n",
- "Country Country True keyword None object True True False Country\n",
- "CustomerID CustomerID True double None float64 True True False CustomerID\n",
- "Description Description True keyword None object True True False Description\n",
- "InvoiceDate InvoiceDate True keyword None object True True False InvoiceDate\n",
- "InvoiceNo InvoiceNo True keyword None object True True False InvoiceNo\n",
- "Quantity Quantity True long None int64 True True False Quantity\n",
- "StockCode StockCode True keyword None object True True False StockCode\n",
- "UnitPrice UnitPrice True double None float64 True True False UnitPrice\n",
- "Operations:\n",
- " tasks: []\n",
- " size: None\n",
- " sort_params: None\n",
- " _source: ['Country', 'CustomerID', 'Description', 'InvoiceDate', 'InvoiceNo', 'Quantity', 'StockCode', 'UnitPrice']\n",
- " body: {}\n",
- " post_processing: []\n",
- "\n"
- ]
- }
- ],
- "source": [
- "print(df.es_info())"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Selecting and Indexing Data\n",
- "\n",
- "Now that we understand how to create a data frame and get access to it's underlying attributes, let's see how we can select subsets of our data."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### head and tail\n",
- "\n",
- "much like pandas, eland data frames offer `.head(n)` and `.tail(n)` methods that return the first and last n rows, respectively."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:16.274779Z",
- "iopub.status.busy": "2021-12-15T20:25:16.274393Z",
- "iopub.status.idle": "2021-12-15T20:25:17.555325Z",
- "shell.execute_reply": "2021-12-15T20:25:17.555642Z"
- }
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Country | \n",
- " CustomerID | \n",
- " ... | \n",
- " StockCode | \n",
- " UnitPrice | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " United Kingdom | \n",
- " 17850.0 | \n",
- " ... | \n",
- " 85123A | \n",
- " 2.55 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " United Kingdom | \n",
- " 17850.0 | \n",
- " ... | \n",
- " 71053 | \n",
- " 3.39 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "2 rows × 8 columns
"
+ "cell_type": "markdown",
+ "source": [
+ "# Online retail analysis using Eland\n",
+ "\n",
+ "[](https://colab.research.google.com/github/elastic/eland/blob/main/docs/sphinx/examples/online_retail_analysis.ipynb)\n",
+ "\n",
+ "Learn how to analyze some online retail data using Eland."
],
- "text/plain": [
- " Country CustomerID ... StockCode UnitPrice\n",
- "0 United Kingdom 17850.0 ... 85123A 2.55\n",
- "1 United Kingdom 17850.0 ... 71053 3.39\n",
- "\n",
- "[2 rows x 8 columns]"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.head(2)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:17.559534Z",
- "iopub.status.busy": "2021-12-15T20:25:17.559123Z",
- "iopub.status.idle": "2021-12-15T20:25:17.637500Z",
- "shell.execute_reply": "2021-12-15T20:25:17.637125Z"
- }
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "es_index_pattern: online-retail\n",
- "Index:\n",
- " es_index_field: _id\n",
- " is_source_field: False\n",
- "Mappings:\n",
- " capabilities:\n",
- " es_field_name is_source es_dtype es_date_format pd_dtype is_searchable is_aggregatable is_scripted aggregatable_es_field_name\n",
- "Country Country True keyword None object True True False Country\n",
- "CustomerID CustomerID True double None float64 True True False CustomerID\n",
- "Description Description True keyword None object True True False Description\n",
- "InvoiceDate InvoiceDate True keyword None object True True False InvoiceDate\n",
- "InvoiceNo InvoiceNo True keyword None object True True False InvoiceNo\n",
- "Quantity Quantity True long None int64 True True False Quantity\n",
- "StockCode StockCode True keyword None object True True False StockCode\n",
- "UnitPrice UnitPrice True double None float64 True True False UnitPrice\n",
- "Operations:\n",
- " tasks: [('tail': ('sort_field': '_doc', 'count': 2)), ('head': ('sort_field': '_doc', 'count': 2)), ('tail': ('sort_field': '_doc', 'count': 2))]\n",
- " size: 2\n",
- " sort_params: {'_doc': 'desc'}\n",
- " _source: ['Country', 'CustomerID', 'Description', 'InvoiceDate', 'InvoiceNo', 'Quantity', 'StockCode', 'UnitPrice']\n",
- " body: {}\n",
- " post_processing: [('sort_index'), ('head': ('count': 2)), ('tail': ('count': 2))]\n",
- "\n"
- ]
- }
- ],
- "source": [
- "print(df.tail(2).head(2).tail(2).es_info())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:17.640519Z",
- "iopub.status.busy": "2021-12-15T20:25:17.640139Z",
- "iopub.status.idle": "2021-12-15T20:25:18.647340Z",
- "shell.execute_reply": "2021-12-15T20:25:18.646548Z"
- }
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Country | \n",
- " CustomerID | \n",
- " ... | \n",
- " StockCode | \n",
- " UnitPrice | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 12498 | \n",
- " United Kingdom | \n",
- " 16710.0 | \n",
- " ... | \n",
- " 20975 | \n",
- " 0.65 | \n",
- "
\n",
- " \n",
- " 12499 | \n",
- " United Kingdom | \n",
- " 16710.0 | \n",
- " ... | \n",
- " 22445 | \n",
- " 2.95 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "2 rows × 8 columns
"
- ],
- "text/plain": [
- " Country CustomerID ... StockCode UnitPrice\n",
- "12498 United Kingdom 16710.0 ... 20975 0.65\n",
- "12499 United Kingdom 16710.0 ... 22445 2.95\n",
- "\n",
- "[2 rows x 8 columns]"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.tail(2)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Selecting columns\n",
- "\n",
- "you can also pass a list of columns to select columns from the data frame in a specified order."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:18.654238Z",
- "iopub.status.busy": "2021-12-15T20:25:18.653517Z",
- "iopub.status.idle": "2021-12-15T20:25:19.431749Z",
- "shell.execute_reply": "2021-12-15T20:25:19.431127Z"
- }
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Country | \n",
- " InvoiceDate | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " United Kingdom | \n",
- " 2010-12-01 08:26:00 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " United Kingdom | \n",
- " 2010-12-01 08:26:00 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " United Kingdom | \n",
- " 2010-12-01 08:26:00 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " United Kingdom | \n",
- " 2010-12-01 08:26:00 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " United Kingdom | \n",
- " 2010-12-01 08:26:00 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "5 rows × 2 columns
"
- ],
- "text/plain": [
- " Country InvoiceDate\n",
- "0 United Kingdom 2010-12-01 08:26:00\n",
- "1 United Kingdom 2010-12-01 08:26:00\n",
- "2 United Kingdom 2010-12-01 08:26:00\n",
- "3 United Kingdom 2010-12-01 08:26:00\n",
- "4 United Kingdom 2010-12-01 08:26:00\n",
- "\n",
- "[5 rows x 2 columns]"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df[['Country', 'InvoiceDate']].head(5)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Boolean Indexing\n",
- "\n",
- "we also allow you to filter the data frame using boolean indexing. Under the hood, a boolean index maps to a `terms` query that is then passed to elasticsearch to filter the index."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:19.440640Z",
- "iopub.status.busy": "2021-12-15T20:25:19.439831Z",
- "iopub.status.idle": "2021-12-15T20:25:20.066747Z",
- "shell.execute_reply": "2021-12-15T20:25:20.067477Z"
- }
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "{'term': {'Country': 'Germany'}}\n"
- ]
+ "metadata": {
+ "id": "2IM_dmhFeRhR"
+ }
},
{
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Country | \n",
- " CustomerID | \n",
- " ... | \n",
- " StockCode | \n",
- " UnitPrice | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 1109 | \n",
- " Germany | \n",
- " 12662.0 | \n",
- " ... | \n",
- " 22809 | \n",
- " 2.95 | \n",
- "
\n",
- " \n",
- " 1110 | \n",
- " Germany | \n",
- " 12662.0 | \n",
- " ... | \n",
- " 84347 | \n",
- " 2.55 | \n",
- "
\n",
- " \n",
- " 1111 | \n",
- " Germany | \n",
- " 12662.0 | \n",
- " ... | \n",
- " 84945 | \n",
- " 0.85 | \n",
- "
\n",
- " \n",
- " 1112 | \n",
- " Germany | \n",
- " 12662.0 | \n",
- " ... | \n",
- " 22242 | \n",
- " 1.65 | \n",
- "
\n",
- " \n",
- " 1113 | \n",
- " Germany | \n",
- " 12662.0 | \n",
- " ... | \n",
- " 22244 | \n",
- " 1.95 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "5 rows × 8 columns
"
+ "cell_type": "markdown",
+ "source": [
+ "## Install and import packages"
],
- "text/plain": [
- " Country CustomerID ... StockCode UnitPrice\n",
- "1109 Germany 12662.0 ... 22809 2.95\n",
- "1110 Germany 12662.0 ... 84347 2.55\n",
- "1111 Germany 12662.0 ... 84945 0.85\n",
- "1112 Germany 12662.0 ... 22242 1.65\n",
- "1113 Germany 12662.0 ... 22244 1.95\n",
- "\n",
- "[5 rows x 8 columns]"
- ]
- },
- "execution_count": 11,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# the construction of a boolean vector maps directly to an elasticsearch query\n",
- "print(df['Country']=='Germany')\n",
- "df[(df['Country']=='Germany')].head(5)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "we can also filter the data frame using a list of values."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:20.077022Z",
- "iopub.status.busy": "2021-12-15T20:25:20.076412Z",
- "iopub.status.idle": "2021-12-15T20:25:21.233013Z",
- "shell.execute_reply": "2021-12-15T20:25:21.234073Z"
- }
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "{'terms': {'Country': ['Germany', 'United States']}}\n"
- ]
+ "metadata": {
+ "id": "xx09MAYFe3Ep"
+ }
},
{
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Country | \n",
- " CustomerID | \n",
- " ... | \n",
- " StockCode | \n",
- " UnitPrice | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " United Kingdom | \n",
- " 17850.0 | \n",
- " ... | \n",
- " 85123A | \n",
- " 2.55 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " United Kingdom | \n",
- " 17850.0 | \n",
- " ... | \n",
- " 71053 | \n",
- " 3.39 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " United Kingdom | \n",
- " 17850.0 | \n",
- " ... | \n",
- " 84406B | \n",
- " 2.75 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " United Kingdom | \n",
- " 17850.0 | \n",
- " ... | \n",
- " 84029G | \n",
- " 3.39 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " United Kingdom | \n",
- " 17850.0 | \n",
- " ... | \n",
- " 84029E | \n",
- " 3.39 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "5 rows × 8 columns
"
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:06.764412Z",
+ "iopub.status.busy": "2021-12-15T20:25:06.755567Z",
+ "iopub.status.idle": "2021-12-15T20:25:07.316950Z",
+ "shell.execute_reply": "2021-12-15T20:25:07.316561Z"
+ },
+ "id": "Z7MEVJy9W_sd",
+ "outputId": "91b215ff-9f4f-4b1f-d204-f24bcc38b3f6",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Collecting eland\n",
+ " Downloading eland-8.11.1-py3-none-any.whl (157 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m157.9/157.9 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting elasticsearch<9,>=8.3 (from eland)\n",
+ " Downloading elasticsearch-8.11.1-py3-none-any.whl (412 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m412.8/412.8 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: pandas<2,>=1.5 in /usr/local/lib/python3.10/dist-packages (from eland) (1.5.3)\n",
+ "Requirement already satisfied: matplotlib>=3.6 in /usr/local/lib/python3.10/dist-packages (from eland) (3.7.1)\n",
+ "Requirement already satisfied: numpy<2,>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from eland) (1.23.5)\n",
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from eland) (23.2)\n",
+ "Collecting elastic-transport<9,>=8 (from elasticsearch<9,>=8.3->eland)\n",
+ " Downloading elastic_transport-8.11.0-py3-none-any.whl (59 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.8/59.8 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (1.2.0)\n",
+ "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (0.12.1)\n",
+ "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (4.47.0)\n",
+ "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (1.4.5)\n",
+ "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (9.4.0)\n",
+ "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (3.1.1)\n",
+ "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.6->eland) (2.8.2)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<2,>=1.5->eland) (2023.3.post1)\n",
+ "Requirement already satisfied: urllib3<3,>=1.26.2 in /usr/local/lib/python3.10/dist-packages (from elastic-transport<9,>=8->elasticsearch<9,>=8.3->eland) (2.0.7)\n",
+ "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from elastic-transport<9,>=8->elasticsearch<9,>=8.3->eland) (2023.11.17)\n",
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib>=3.6->eland) (1.16.0)\n",
+ "Installing collected packages: elastic-transport, elasticsearch, eland\n",
+ "Successfully installed eland-8.11.1 elastic-transport-8.11.0 elasticsearch-8.11.1\n",
+ "Requirement already satisfied: elasticsearch in /usr/local/lib/python3.10/dist-packages (8.11.1)\n",
+ "Requirement already satisfied: elastic-transport<9,>=8 in /usr/local/lib/python3.10/dist-packages (from elasticsearch) (8.11.0)\n",
+ "Requirement already satisfied: urllib3<3,>=1.26.2 in /usr/local/lib/python3.10/dist-packages (from elastic-transport<9,>=8->elasticsearch) (2.0.7)\n",
+ "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from elastic-transport<9,>=8->elasticsearch) (2023.11.17)\n"
+ ]
+ }
],
- "text/plain": [
- " Country CustomerID ... StockCode UnitPrice\n",
- "0 United Kingdom 17850.0 ... 85123A 2.55\n",
- "1 United Kingdom 17850.0 ... 71053 3.39\n",
- "2 United Kingdom 17850.0 ... 84406B 2.75\n",
- "3 United Kingdom 17850.0 ... 84029G 3.39\n",
- "4 United Kingdom 17850.0 ... 84029E 3.39\n",
- "\n",
- "[5 rows x 8 columns]"
+ "source": [
+ "!pip install eland\n",
+ "!pip install elasticsearch\n",
+ "\n",
+ "from elasticsearch import Elasticsearch\n",
+ "import eland as ed\n",
+ "import getpass\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "# Fix console size for consistent test results\n",
+ "from eland.conftest import *"
]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "print(df['Country'].isin(['Germany', 'United States']))\n",
- "df[df['Country'].isin(['Germany', 'United Kingdom'])].head(5)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We can also combine boolean vectors to further filter the data frame."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:21.245390Z",
- "iopub.status.busy": "2021-12-15T20:25:21.244737Z",
- "iopub.status.idle": "2021-12-15T20:25:22.358701Z",
- "shell.execute_reply": "2021-12-15T20:25:22.355150Z"
- }
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Country | \n",
- " CustomerID | \n",
- " ... | \n",
- " StockCode | \n",
- " UnitPrice | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- "
\n",
- "
\n",
- "0 rows × 8 columns
"
- ],
- "text/plain": [
- "Empty DataFrame\n",
- "Columns: [Country, CustomerID, Description, InvoiceDate, InvoiceNo, Quantity, StockCode, UnitPrice]\n",
- "Index: []\n",
- "\n",
- "[0 rows x 8 columns]"
- ]
- },
- "execution_count": 13,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df[(df['Country']=='Germany') & (df['Quantity']>90)]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Using this example, let see how eland translates this boolean filter to an elasticsearch `bool` query."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:22.383610Z",
- "iopub.status.busy": "2021-12-15T20:25:22.370577Z",
- "iopub.status.idle": "2021-12-15T20:25:22.390275Z",
- "shell.execute_reply": "2021-12-15T20:25:22.388963Z"
- }
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "es_index_pattern: online-retail\n",
- "Index:\n",
- " es_index_field: _id\n",
- " is_source_field: False\n",
- "Mappings:\n",
- " capabilities:\n",
- " es_field_name is_source es_dtype es_date_format pd_dtype is_searchable is_aggregatable is_scripted aggregatable_es_field_name\n",
- "Country Country True keyword None object True True False Country\n",
- "CustomerID CustomerID True double None float64 True True False CustomerID\n",
- "Description Description True keyword None object True True False Description\n",
- "InvoiceDate InvoiceDate True keyword None object True True False InvoiceDate\n",
- "InvoiceNo InvoiceNo True keyword None object True True False InvoiceNo\n",
- "Quantity Quantity True long None int64 True True False Quantity\n",
- "StockCode StockCode True keyword None object True True False StockCode\n",
- "UnitPrice UnitPrice True double None float64 True True False UnitPrice\n",
- "Operations:\n",
- " tasks: [('boolean_filter': ('boolean_filter': {'bool': {'must': [{'term': {'Country': 'Germany'}}, {'range': {'Quantity': {'gt': 90}}}]}}))]\n",
- " size: None\n",
- " sort_params: None\n",
- " _source: ['Country', 'CustomerID', 'Description', 'InvoiceDate', 'InvoiceNo', 'Quantity', 'StockCode', 'UnitPrice']\n",
- " body: {'query': {'bool': {'must': [{'term': {'Country': 'Germany'}}, {'range': {'Quantity': {'gt': 90}}}]}}}\n",
- " post_processing: []\n",
- "\n"
- ]
- }
- ],
- "source": [
- "print(df[(df['Country']=='Germany') & (df['Quantity']>90)].es_info())"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Aggregation and Descriptive Statistics\n",
- "\n",
- "Let's begin to ask some questions of our data and use eland to get the answers."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**How many different countries are there?**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:22.398231Z",
- "iopub.status.busy": "2021-12-15T20:25:22.397459Z",
- "iopub.status.idle": "2021-12-15T20:25:22.482238Z",
- "shell.execute_reply": "2021-12-15T20:25:22.481338Z"
- }
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "16"
- ]
- },
- "execution_count": 15,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df['Country'].nunique()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**What is the total sum of products ordered?**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:22.492668Z",
- "iopub.status.busy": "2021-12-15T20:25:22.491590Z",
- "iopub.status.idle": "2021-12-15T20:25:22.580015Z",
- "shell.execute_reply": "2021-12-15T20:25:22.578300Z"
- }
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "111960"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df['Quantity'].sum()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Show me the sum, mean, min, and max of the qunatity and unit_price fields**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:22.601432Z",
- "iopub.status.busy": "2021-12-15T20:25:22.600117Z",
- "iopub.status.idle": "2021-12-15T20:25:22.702450Z",
- "shell.execute_reply": "2021-12-15T20:25:22.701499Z"
- }
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Quantity | \n",
- " UnitPrice | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " sum | \n",
- " 111960.000 | \n",
- " 61548.490000 | \n",
- "
\n",
- " \n",
- " mean | \n",
- " 7.464 | \n",
- " 4.103233 | \n",
- "
\n",
- " \n",
- " max | \n",
- " 2880.000 | \n",
- " 950.990000 | \n",
- "
\n",
- " \n",
- " min | \n",
- " -9360.000 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Quantity UnitPrice\n",
- "sum 111960.000 61548.490000\n",
- "mean 7.464 4.103233\n",
- "max 2880.000 950.990000\n",
- "min -9360.000 0.000000"
- ]
- },
- "execution_count": 17,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df[['Quantity','UnitPrice']].agg(['sum', 'mean', 'max', 'min'])"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Give me descriptive statistics for the entire data frame**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:22.712002Z",
- "iopub.status.busy": "2021-12-15T20:25:22.711114Z",
- "iopub.status.idle": "2021-12-15T20:25:22.982698Z",
- "shell.execute_reply": "2021-12-15T20:25:22.981770Z"
- }
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " CustomerID | \n",
- " Quantity | \n",
- " UnitPrice | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " count | \n",
- " 10729.000000 | \n",
- " 15000.000000 | \n",
- " 15000.000000 | \n",
- "
\n",
- " \n",
- " mean | \n",
- " 15590.776680 | \n",
- " 7.464000 | \n",
- " 4.103233 | \n",
- "
\n",
- " \n",
- " std | \n",
- " 1764.189592 | \n",
- " 85.930116 | \n",
- " 20.106214 | \n",
- "
\n",
- " \n",
- " min | \n",
- " 12347.000000 | \n",
- " -9360.000000 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 25% | \n",
- " 14222.249164 | \n",
- " 1.000000 | \n",
- " 1.250000 | \n",
- "
\n",
- " \n",
- " 50% | \n",
- " 15663.037856 | \n",
- " 2.000000 | \n",
- " 2.510000 | \n",
- "
\n",
- " \n",
- " 75% | \n",
- " 17219.040670 | \n",
- " 6.425347 | \n",
- " 4.210000 | \n",
- "
\n",
- " \n",
- " max | \n",
- " 18239.000000 | \n",
- " 2880.000000 | \n",
- " 950.990000 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " CustomerID Quantity UnitPrice\n",
- "count 10729.000000 15000.000000 15000.000000\n",
- "mean 15590.776680 7.464000 4.103233\n",
- "std 1764.189592 85.930116 20.106214\n",
- "min 12347.000000 -9360.000000 0.000000\n",
- "25% 14222.249164 1.000000 1.250000\n",
- "50% 15663.037856 2.000000 2.510000\n",
- "75% 17219.040670 6.425347 4.210000\n",
- "max 18239.000000 2880.000000 950.990000"
- ]
- },
- "execution_count": 18,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# NBVAL_IGNORE_OUTPUT\n",
- "df.describe()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Show me a histogram of numeric columns**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:23.000466Z",
- "iopub.status.busy": "2021-12-15T20:25:22.999571Z",
- "iopub.status.idle": "2021-12-15T20:25:23.576387Z",
- "shell.execute_reply": "2021-12-15T20:25:23.576703Z"
- }
- },
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "df[(df['Quantity']>-50) & \n",
- " (df['Quantity']<50) & \n",
- " (df['UnitPrice']>0) & \n",
- " (df['UnitPrice']<100)][['Quantity', 'UnitPrice']].hist(figsize=[12,4], bins=30)\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:23.584264Z",
- "iopub.status.busy": "2021-12-15T20:25:23.583784Z",
- "iopub.status.idle": "2021-12-15T20:25:24.494000Z",
- "shell.execute_reply": "2021-12-15T20:25:24.493618Z"
- }
- },
- "outputs": [
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAs4AAAEICAYAAABPtXIYAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAeNUlEQVR4nO3df5Qd91nf8fcndpwEq5FJDIojuciJjKmxaEP22ElpqVwSkLEVAycNdkzBqbFOODWFVoUqQMuPklND40KMDRwRGyVgrLgmTaRYIaGUbQgEcMyP2vGPIhwllnGsGBLBKjSJzNM/7iy5bLTauXvv3bt39v06x8d7Z+6deR7Nndlnn/nOTKoKSZIkSaf2jEkHIEmSJE0DC2dJkiSpBQtnSZIkqQULZ0mSJKkFC2dJkiSpBQtnSZIkqQULZ61JSeaSvGjScUiSIMnPJ/mPq3V50jwLZ62YJNcmuT/Jp5N8PMnPJlm/AuudTfKd/dOqal1VPdrM35vkx8cdhyR1VZJKsmXBtB9J8sttPl9Vr6+q/9x8bluSIydZ1ueapsenkvxOkpe3WZ40ShbOWhFJdgE/AXwfsB54GbAZeF+SZ04wNEnSdHh7Va0DvgT4APCOJFn4piSnrXhkWjMsnDV2SZ4L/Cjw3VX1a1X1uao6DLwGeBHw2oVd34UdhyS7k/xpkr9K8mCSb+6bd22SDyR5U5JPJvlIksuaeW8E/ilwS9OpuKWZXkm2JNkJXAN8fzP/QJLvS/KrC3K4Ocmbx/VvJEldNn9MT7IrydEkTyR5Xd/8vUl+PMmZwHuAFzbH5LkkL+xfVlV9Dngr8ALg+c1nfy7JwSTHgUtP8jvlyiR/lOQvm98l25vp65Pc1sTzeBODhbcWZeGslfCPgWcD7+ifWFVzwEHg61ss40/pFcDr6RXhv5zknL75lwCPAGcDPwncliRV9YPAbwE3NMMzblgQwx7gDuAnm/k7gF8Gtic5CyDJ6cBVwNsGylqS1O8F9I7hG4HrgFuTfHH/G6rqOHAZ8GfNMXldVf1Z/3uSPAu4Fnisqp5qJr8WeCPw9+h1o/vffzG94/f3AWcBXwscbmbvBU4AW4CX0Pt99HeG9kn9LJy1Es4GnqqqEyeZ9wS9026nVFX/var+rKr+pqreDvwJcHHfWz5aVb9QVU/T60ScA2xYTrBV9QTwfuBfNJO2N/Hft5zlSZIA+BzwY81Zx4PAHHDBAJ9/TZJPAY8BLwW+uW/eu6rqt5vfEf9vweeuA26vql9v5j9eVQ8n2QB8I/C9VXW8qo4CP0WvUSKd1OmTDkBrwlPA2UlOP0nxfE4z/5SSfDvw7+iNiwZYR68gn/fx+R+q6tPNsLd1Q8T8VuC7gF8Avg34pSGWJUld9zSw8HqVZ9Irluf9+YLfAZ9msOP0XVX1bYvMe+wUnzuX3tnNhb6sifGJvqHSz1hiWVrj7DhrJXwQ+AzwLf0Tk6yjd0puFjgOfFHf7Bf0ve/L6BWwNwDPr6qzgAeAL7goZBG1jPnvBL4qyUXAFfSGc0iSTu5jfL6xMe884KPLWNZSx+xBP/MY8OJFpn8GOLuqzmr+e25VfeUy1q81wsJZY1dVx+iNS/6ZJNuTPDPJZuAuet3mO4A/Ar4xyfOSvAD43r5FnEnvoPgJgOaCkosGCOFJehchtp7fnOq7G/gV4Per6mMDrE+S1pq3Az+UZFOSZyR5BbCD3nF0UE/Su+hvVLcrvQ14XZKva2LbmOQrmmF57wNuSvLcZt6Lk/yzEa1XHWThrBVRVT8J/ADwJuCvgI/Q6zC/orkY5JeAP6Z3wcb76B2E5z/7IHATvc71k8BW4LcHWP2bgVc3d9y4+STzbwMubO4N+s6+6W9t1uUwDUk6tR8DfofehXmfpHeR9jVV9cCgC6qqh4E7gUeb4/ILl/rMEsv7feB19MYvHwP+N71hGgDfDpwBPNjEfTe9IYTSSaVqOWdEpOE0XeMfA75mtXZzk/x94GHgBVX1l5OOR5IkTZYXB2oiquoXk5ygd6u6VVc4J3kGvYsR91k0S5IksOMsfYHmBvxP0ruoZXtVeYW1JEmycJYkSZLa8OJASZIkqYVVMcb57LPPrs2bN086DACOHz/OmWeeOekwxqKruXU1L+hubqs1r/vuu++pqlrySZZankGP9av1ezIq5je9upwbdDu/48eP8/DDDy/7WL8qCufNmzfzoQ99aNJhADA7O8u2bdsmHcZYdDW3ruYF3c1tteaVZDkPa1BLgx7rV+v3ZFTMb3p1OTfodn6zs7Nceumlyz7WO1RDkiRJamEshXOSM5N8KMkV41i+JEmStNJaFc5Jbk9yNMkDC6ZvT/JIkkNJdvfN+g/0HqcsSZIkdULbjvNeYHv/hCSnAbcClwEXAlcnuTDJK+k9uvLoCOOUJK0ynl2UtNa0ujiwqt6fZPOCyRcDh6rqUYAk+4ArgXXAmfSK6b9OcrCq/mbhMpPsBHYCbNiwgdnZ2eXmMFJzc3OrJpZR62puXc0LuptbV/OadkluB64AjlbVRX3TtwNvBk4D3lJVNzazPLsoaU0Z5q4aG4H+J6odAS6pqhsAklwLPHWyohmgqvYAewBmZmZqtVy92fUrSbuYW1fzgu7m1tW8OmAvcAvwtvkJfWcXX0nvOH9vkv30fgc8CDx75cOUpMkY2+3oqmrvuJYtSRq91XZ2setnJsxvenU5N+h2fnNzc0N9fpjC+XHg3L7Xm5pprSXZAezYsmXLEGFIksZoYmcXu35mwvymV5dzg27nN+wfBMMUzvcC5yc5j17BfBXw2kEWUFUHgAMzMzPXDxGHBMDm3feccv7hGy9foUiktaPN2cVhmiRL7dfgvi1p5bS9Hd2dwAeBC5IcSXJdVZ0AbgDeCzwE3FVVHx5k5Ul2JNlz7NixQeOWJK2Moc8uVtWBqtq5fv36kQYmSSut7V01rl5k+kHg4HJXbsdZkla9oc8uSlJX+MhtSRLg2UVJWsrY7qrRhhcHStLq4dlFSTq1iXacHfcmSZKkaeFQDUnSWDlUQ1JXWDhLksbKs4uSumKihbNdCEmSJE0LxzhLksbKJomkrnCohiRprGySSOoKC2dJkiSpBcc4S5IkSS04xlmSNFY2SSR1hUM1JEljZZNEUldYOEuSJEktWDhLkiRJLXhxoCRJktSCFwdKksbKJomkrnCohiRprGySSOoKC2dJkiSphdMnHYAkScPYvPueU84/fOPlKxSJpK6z4yxJkiS14F01JEmSpBa8q4YkaaxskkjqCodqSJLGyiaJpK6wcJYkSZJa8K4aWjOWuvIevPpekiQtzo6zJEmS1IKFsyRJktSChbMkSZLUgoWzJEmS1IIPQJEkSZJa8AEokqSxskkiqSscqiFJGiubJJK6wsJZkiRJasHCWZIkSWrBwlmSJElqwcJZkiRJauH0SQcgTZvNu+9Z8j2Hb7x8BSKRJEkryY6zJEmS1IKFsyRJktSChbMkSZLUgoWzJEmS1MLIC+ck/yDJzye5O8l3jXr5kiRJ0iS0KpyT3J7kaJIHFkzfnuSRJIeS7Aaoqoeq6vXAa4CvGX3IkqRJs0kiaS1q23HeC2zvn5DkNOBW4DLgQuDqJBc2814F3AMcHFmkkqSxskkiSafW6j7OVfX+JJsXTL4YOFRVjwIk2QdcCTxYVfuB/UnuAX7lZMtMshPYCbBhwwZmZ2eXlcCozc3NrZpYRq2ruc3ntWvriaGX1ebfp816RvXv3PVtplVnL3AL8Lb5CX1NklcCR4B7k+yvqgebJsl3Ab80gVhb897rkkZlmAegbAQe63t9BLgkyTbgW4BncYqOc1XtAfYAzMzM1LZt24YIZXRmZ2dZLbGMWldzm8/r2ha/HJdy+JptS76nzXraLKeNrm8zrS6rrUkyNzfHrq1PD5rGskziD7mu/wHZ5fy6nBt0O7+5ubmhPj/yJwdW1Sww2+a9SXYAO7Zs2TLqMCRJozGxJsns7Cw3feD4wAEvx6j+2B1E1/+A7HJ+Xc4Nup3fsH8QDFM4Pw6c2/d6UzOttao6AByYmZm5fog4JEkrzCaJpLVomML5XuD8JOfRK5ivAl47kqikCWkzFlJaY2ySSFKj7e3o7gQ+CFyQ5EiS66rqBHAD8F7gIeCuqvrwICtPsiPJnmPHjg0atyRpZfxtkyTJGfSaJPsnHJMkTUSrwrmqrq6qc6rqmVW1qapua6YfrKovr6oXV9UbB115VR2oqp3r168f9KOSpBGzSSJJpzbyiwMlSdOpqq5eZPpBhrgvv0M1JHXFyB+5PQi7EJIkSZoWEy2cHaohSd1nk0RSV0y0cJYkdZ9NEkld4VANSZIkqYWJXhzoBSPScNrcd/rwjZevQCTS4nwAiqSucKiGJGmsHKohqSssnCVJkqQWJjpUw9N3kqTVwGFPktrwdnSSpLHyQnBJXeFQDUnSWNkkkdQVFs6SJElSCxbOkiRJUgs+AEWSJElqwYsDJUljZZNEUlc4VEOSNFY2SSR1hYWzJEmS1IKFsyRJktSChbMkSZLUgnfVkCRJklrwrhqSpLGySSKpKxyqIUkaK5skkrrCwlmSJElqwcJZkiRJasHCWZIkSWrBwlmSJElqwcJZkiRJasHCWZIkSWrBB6BIkiRJLfgAFEnSWNkkkdQVDtWQJI2VTRJJXWHhLEmSJLVg4SxJkiS1YOEsSZIktXD6pAOQpEFt3n3Pku85fOPlKxCJJGktseMsSZIktWDhLEmSJLVg4SxJkiS14BhnSZJGxPH3UrdZOEtqxYJAkrTWjaVwTvJNwOXAc4Hbqup941iPJEmStFJaj3FOcnuSo0keWDB9e5JHkhxKshugqt5ZVdcDrwe+dbQhS5ImLck3JfmFJG9P8vWTjkeSVsIgHee9wC3A2+YnJDkNuBV4JXAEuDfJ/qp6sHnLDzXzJUmrXJLbgSuAo1V1Ud/07cCbgdOAt1TVjVX1TuCdSb4YeBPQ+TOLbYYrSeq21oVzVb0/yeYFky8GDlXVowBJ9gFXJnkIuBF4T1X9wcmWl2QnsBNgw4YNzM7ODh79GMzNza2aWEatq7nN57Vr64lJh/K3fuaOdy35nq0b1y/5nqW2WZucR7XNR7muYb+LK5n3GrMXGySStKhhxzhvBB7re30EuAT4buAVwPokW6rq5xd+sKr2AHsAZmZmatu2bUOGMhqzs7OsllhGbZpzO1WnZ9fWp7npA8eZtmtdD1+zbcn3/Mwd72pyW8zSObdZTxvXtrk4sOW6hv0ujjIWfd6oGyTN+5fdJJmbm2PX1qcHymEazP8bdLWZMa/L+XU5N+h2fnNzc0N9fiyVRlXdDNy81PuS7AB2bNmyZRxhSJKGt+wGCQzXJJmdnV3iD8fpNP9H3TQ3M9rocn5dzg26nd+wfxAM+wCUx4Fz+15vaqa1UlUHqmrn+vVLn7KWJK0eVXVzVb20ql6/WNE8L8mOJHuOHTu2UuFJ0lgMWzjfC5yf5LwkZwBXAfuHD0uStEoM1SABmySSumOQ29HdCXwQuCDJkSTXVdUJ4AbgvcBDwF1V9eEBlmkXQpJWNxskktRoXThX1dVVdU5VPbOqNlXVbc30g1X15VX14qp64yArtwshSavHOBokzXJtkkjqhOm6DYEkaWyq6upFph8EDg6x3APAgZmZmeuXuwxJWg2GHeM8FLsQkiRJmhYTLZwdqiFJ3WeTRFJXOFRDmpA2j+/dtXUFApHGzKEakrpiooWzD0CRVoc2RbwkSWudQzUkSWPlUA1JXTHRwlmS1H02SSR1hYWzJEmS1IK3o5MkSZJacIyzJGmsbJJI6gqHakiSxsomiaSu8D7OkiStoPnbP+7aeoJrF7kV5OEbL1/JkCS1ZMdZkiRJasGLAyVJkqQWvDhQkjRWNkkkdYVDNSRJY2WTRFJXWDhLkiRJLXhXDUkrxrsJSJKmmR1nSZIkqQULZ0mSJKmFiQ7VSLID2LFly5ZJhqEJ27zIKXtJ3eCxXlJXTLRwrqoDwIGZmZnrJxmHJGl8PNYPbqmGgtcCSJPhUA1JkiSpBQtnSZIkqQULZ0mSJKkFC2dJkiSpBQtnSZIkqQULZ0mSJKmFiRbOSXYk2XPs2LFJhiFJkiQtaaKFc1UdqKqd69evn2QYkqQxskkiqSscqiFJGiubJJK6wsJZkiRJamGij9xW9y312FhJkqRpYcdZkiRJasHCWZIkSWrBoRqSJHVQm6Fyh2+8fAUikbrDjrMkSZLUgoWzJEmS1IJDNSRJWqMcziENxo6zJEmS1MLIC+ckL0pyW5K7R71sSZIkaVJaFc5Jbk9yNMkDC6ZvT/JIkkNJdgNU1aNVdd04gpUkrQ42SSStRW07znuB7f0TkpwG3ApcBlwIXJ3kwpFGJ0laMTZJJOnUWhXOVfV+4C8WTL4YONQcPD8L7AOuHHF8kqSVsxebJJK0qFRVuzcmm4F3V9VFzetXA9ur6jub1/8SuAT4YeCNwCuBt1TVf1lkeTuBnQAbNmx46b59+4bLZETm5uZYt27dpMMYi0nkdv/jx8a+jg3PgSf/euyrmYhpy23rxvWnnD//fThVXksto385w8RyMpdeeul9VTUz8Ac75CTH+pcDP1JV39C8fgPA/LE9yd1V9epTLG/Zx/q5uTk+cuzpZWay+g2zf49qPxnVuk7G36fTq8v5zc3NsWPHjmUf60d+O7qq+nPg9S3etwfYAzAzM1Pbtm0bdSjLMjs7y2qJZdQmkdu1LW51NKxdW09w0/3dvLPitOV2+Jptp5w//304VV5LLaN/OcPEotY2Ao/1vT4CXJLk+fSaJC9J8obFmiTDHOtnZ2e56QPHlxv3qjfM/j2q/WRU6zoZf59Ory7nNzs7O9Tnh/mN/Dhwbt/rTc201pLsAHZs2bJliDA0KW3u/ympm9o2ScBjvaTuGOZ2dPcC5yc5L8kZwFXA/kEWUFUHqmrn+vXLOw0kSRq7oZskHusldUXb29HdCXwQuCDJkSTXVdUJ4AbgvcBDwF1V9eFBVp5kR5I9x46NfxysJGlZhm6SSFJXtBqqUVVXLzL9IHBwuSuvqgPAgZmZmeuXuwxJ0mg0TZJtwNlJjgA/XFW3JZlvkpwG3L6cJgkO1ZhaPpZb+rzpuepIkjRWNkkk6dRG/sjtQThUQ5IkSdNiooWzF4xIUvfZJJHUFRMtnCVJ3WeTRFJXWDhLkiRJLUz04kCvtJak7vNYP3o+gEqaDMc4S5LGymO9pK5wqIYkSZLUgoWzJEmS1IJjnKeMT3CSNG081kvqCsc4S5LGymO9pK5wqIYkSZLUgoWzJEmS1IKFsyRJktSCFwdKksbKY71GxQvkNWleHChJGiuP9ZK6wqEakiRJUgsWzpIkSVILFs6SJElSCxbOkiRJUgtTe1eNlbyy1qt4pXba7Ctae7yrhqSu8K4akqSx8lgvqSscqiFJkiS1YOEsSZIktWDhLEmSJLVg4SxJkiS1YOEsSZIktWDhLEmSJLVg4SxJkiS1MLUPQJEkTQeP9d13socf7dp6gmub6Sv5kDAfWqZx8gEokqSx8lgvqSscqiFJkiS1YOEsSZIktWDhLEmSJLVg4SxJkiS1YOEsSZIktWDhLEmSJLVg4SxJkiS1YOEsSZIktWDhLEmSJLVg4SxJkiS1cPqoF5jkTOBngc8Cs1V1x6jXIUmaLI/1ktaiVh3nJLcnOZrkgQXTtyd5JMmhJLubyd8C3F1V1wOvGnG8kqQx8VgvSafWdqjGXmB7/4QkpwG3ApcBFwJXJ7kQ2AQ81rzt6dGEKUlaAXvxWC9Ji0pVtXtjshl4d1Vd1Lx+OfAjVfUNzes3NG89Anyyqt6dZF9VXbXI8nYCOwE2bNjw0n379g0U+P2PH1vyPVs3rh9omQBzc3OsW7duRda1HG1iWcyG58CTf937eRTxDhPLKPXn1TVdze1UebX5bo5rn7z00kvvq6qZgT/YIavpWD83N8dHjnW3Ju/q/j1v0PxGte+vxLo2PAe+9Hkr83t/VAb5txv2GD0qS8W83Dpvx44dyz7WDzPGeSOf7zZA7yB6CXAzcEuSy4EDi324qvYAewBmZmZq27ZtA6382t33LPmew9cMtkyA2dlZFsYyrnUtR5tYFrNr6wluur+3yUcR7zCxjFJ/Xl3T1dxOlVeb7+Zq2ifXgIkd62dnZ7npA8eXEfJ06Or+PW/Q/Ea176/EunZtPcFrBqxbJm2Qf7thj9GjslTMy63zhjHyPbaqjgOva/PeJDuAHVu2bBl1GJKkMfJYL2ktGuZ2dI8D5/a93tRMa62qDlTVzvXrp+t0hyStIR7rJakxTOF8L3B+kvOSnAFcBewfTViSpFXCY70kNdreju5O4IPABUmOJLmuqk4ANwDvBR4C7qqqDw+y8iQ7kuw5dmx1XGQmSWuZx3pJOrVWY5yr6upFph8EDi535VV1ADgwMzNz/XKXIUkaDY/1knRqPnJbkiRJamGihbOn7ySp+zzWS+qKiRbOXmktSd3nsV5SV7R+cuBYg0g+AXx00nE0zgaemnQQY9LV3LqaF3Q3t9Wa15dV1ZdMOoiuWsaxfrV+T0bF/KZXl3ODbud3NnDmco/1q6JwXk2SfKirj9ztam5dzQu6m1tX89Jodf17Yn7Tq8u5QbfzGzY3Lw6UJEmSWrBwliRJklqwcP5CeyYdwBh1Nbeu5gXdza2reWm0uv49Mb/p1eXcoNv5DZWbY5wlSZKkFuw4S5IkSS1YOEuSJEktWDgvkGRXkkpydvM6SW5OcijJ/0ny1ZOOcRBJ/muSh5vY/0eSs/rmvaHJ65Ek3zDBMJctyfYm/kNJdk86nuVKcm6S30zyYJIPJ/meZvrzkvx6kj9p/v/Fk451uZKcluQPk7y7eX1ekt9rtt3bk5wx6Ri1enRl34a1sX9Dt/fxJGclubv5ffpQkpd3Zfsl+bfN9/KBJHcmefY0b7sktyc5muSBvmkn3VbLqfEsnPskORf4euBjfZMvA85v/tsJ/NwEQhvGrwMXVdVXAf8XeANAkguBq4CvBLYDP5vktIlFuQxNvLfS20YXAlc3eU2jE8CuqroQeBnwr5tcdgO/UVXnA7/RvJ5W3wM81Pf6J4CfqqotwCeB6yYSlVadju3bsDb2b+j2Pv5m4Neq6iuAf0gvz6nffkk2Av8GmKmqi4DT6NUG07zt9tKra/ottq0GrvEsnP+unwK+H+i/YvJK4G3V87vAWUnOmUh0y1BV76uqE83L3wU2NT9fCeyrqs9U1UeAQ8DFk4hxCBcDh6rq0ar6LLCPXl5Tp6qeqKo/aH7+K3oH5Y308nlr87a3At80kQCHlGQTcDnwluZ1gH8O3N28ZWpz01h0Zt+G7u/f0O19PMl64GuB2wCq6rNV9Sm6s/1OB56T5HTgi4AnmOJtV1XvB/5iweTFttXANZ6FcyPJlcDjVfXHC2ZtBB7re32kmTaN/hXwnubnLuTVhRy+QJLNwEuA3wM2VNUTzayPAxsmFdeQfpreH6V/07x+PvCpvj/qOrHtNDKd3Lehs/s3dHsfPw/4BPCLzVCUtyQ5kw5sv6p6HHgTvTPtTwDHgPvozrabt9i2GvhYs6YK5yT/sxnDs/C/K4EfAP7TpGNcjiXymn/PD9I7XXjH5CLVUpKsA34V+N6q+sv+edW7d+TU3T8yyRXA0aq6b9KxSJPUxf0b1sQ+fjrw1cDPVdVLgOMsGJYxrduvGet7Jb0/Dl4InMkXDnPolGG31ekjjGXVq6pXnGx6kq30vjR/3Du7xCbgD5JcDDwOnNv39k3NtFVjsbzmJbkWuAL4uvr8jbtXfV4tdCGHv5XkmfR+qd5RVe9oJj+Z5JyqeqI5fXR0chEu29cAr0ryjcCzgefSGy94VpLTm67GVG87jVyn9m3o9P4N3d/HjwBHqur3mtd30yucu7D9XgF8pKo+AZDkHfS2Z1e23bzFttXAx5o11XFeTFXdX1VfWlWbq2ozvZ3kq6vq48B+4NubKy9fBhzra/evekm20zt99qqq+nTfrP3AVUmeleQ8egPjf38SMQ7hXuD85urfM+hd0LB/wjEtSzMe8Dbgoar6b32z9gPf0fz8HcC7Vjq2YVXVG6pqU7NvXQX8r6q6BvhN4NXN26YyN41NZ/Zt6Pb+Dd3fx5ta4LEkFzSTvg54kG5sv48BL0vyRc33dD63Tmy7Pottq4FrPJ8ceBJJDtO7wvSp5ot0C71TF58GXldVH5pkfINIcgh4FvDnzaTfrarXN/N+kN645xP0Th2+5+RLWb2aDsdP07sS+PaqeuNkI1qeJP8E+C3gfj4/RvAH6I2DvAv4+8BHgddU1cKLHqZGkm3Av6+qK5K8iN5FX88D/hD4tqr6zATD0yrSlX0b1s7+Dd3dx5P8I3oXPp4BPAq8jl7zceq3X5IfBb6VXi3wh8B30hvnO5XbLsmdwDbgbOBJ4IeBd3KSbbWcGs/CWZIkSWrBoRqSJElSCxbOkiRJUgsWzpIkSVILFs6SJElSCxbOkiRJUgsWzpIkSVILFs6SJElSC/8fA6+SxudduSkAAAAASUVORK5CYII=",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "df[(df['Quantity']>-50) & \n",
- " (df['Quantity']<50) & \n",
- " (df['UnitPrice']>0) & \n",
- " (df['UnitPrice']<100)][['Quantity', 'UnitPrice']].hist(figsize=[12,4], bins=30, log=True)\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:24.504460Z",
- "iopub.status.busy": "2021-12-15T20:25:24.504086Z",
- "iopub.status.idle": "2021-12-15T20:25:26.468550Z",
- "shell.execute_reply": "2021-12-15T20:25:26.466711Z"
- }
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Country | \n",
- " CustomerID | \n",
- " ... | \n",
- " StockCode | \n",
- " UnitPrice | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 46 | \n",
- " United Kingdom | \n",
- " 13748.0 | \n",
- " ... | \n",
- " 22086 | \n",
- " 2.55 | \n",
- "
\n",
- " \n",
- " 83 | \n",
- " United Kingdom | \n",
- " 15291.0 | \n",
- " ... | \n",
- " 21733 | \n",
- " 2.55 | \n",
- "
\n",
- " \n",
- " 96 | \n",
- " United Kingdom | \n",
- " 14688.0 | \n",
- " ... | \n",
- " 21212 | \n",
- " 0.42 | \n",
- "
\n",
- " \n",
- " 102 | \n",
- " United Kingdom | \n",
- " 14688.0 | \n",
- " ... | \n",
- " 85071B | \n",
- " 0.38 | \n",
- "
\n",
- " \n",
- " 176 | \n",
- " United Kingdom | \n",
- " 16029.0 | \n",
- " ... | \n",
- " 85099C | \n",
- " 1.65 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 11924 | \n",
- " United Kingdom | \n",
- " 14708.0 | \n",
- " ... | \n",
- " 84945 | \n",
- " 0.72 | \n",
- "
\n",
- " \n",
- " 12007 | \n",
- " United Kingdom | \n",
- " 18113.0 | \n",
- " ... | \n",
- " 84946 | \n",
- " 1.06 | \n",
- "
\n",
- " \n",
- " 12015 | \n",
- " United Kingdom | \n",
- " 17596.0 | \n",
- " ... | \n",
- " 17003 | \n",
- " 0.21 | \n",
- "
\n",
- " \n",
- " 12058 | \n",
- " United Kingdom | \n",
- " 17596.0 | \n",
- " ... | \n",
- " 84536A | \n",
- " 0.42 | \n",
- "
\n",
- " \n",
- " 12448 | \n",
- " EIRE | \n",
- " 14911.0 | \n",
- " ... | \n",
- " 84945 | \n",
- " 0.85 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "258 rows × 8 columns
"
- ],
- "text/plain": [
- " Country CustomerID ... StockCode UnitPrice\n",
- "46 United Kingdom 13748.0 ... 22086 2.55\n",
- "83 United Kingdom 15291.0 ... 21733 2.55\n",
- "96 United Kingdom 14688.0 ... 21212 0.42\n",
- "102 United Kingdom 14688.0 ... 85071B 0.38\n",
- "176 United Kingdom 16029.0 ... 85099C 1.65\n",
- "... ... ... ... ... ...\n",
- "11924 United Kingdom 14708.0 ... 84945 0.72\n",
- "12007 United Kingdom 18113.0 ... 84946 1.06\n",
- "12015 United Kingdom 17596.0 ... 17003 0.21\n",
- "12058 United Kingdom 17596.0 ... 84536A 0.42\n",
- "12448 EIRE 14911.0 ... 84945 0.85\n",
- "\n",
- "[258 rows x 8 columns]"
- ]
- },
- "execution_count": 21,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.query('Quantity>50 & UnitPrice<100')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Arithmetic Operations"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Numeric values"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:26.483774Z",
- "iopub.status.busy": "2021-12-15T20:25:26.482084Z",
- "iopub.status.idle": "2021-12-15T20:25:26.907406Z",
- "shell.execute_reply": "2021-12-15T20:25:26.906448Z"
- }
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0 6\n",
- "1 6\n",
- "2 8\n",
- "3 6\n",
- "4 6\n",
- "Name: Quantity, dtype: int64"
- ]
- },
- "execution_count": 22,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df['Quantity'].head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 23,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:26.912916Z",
- "iopub.status.busy": "2021-12-15T20:25:26.910149Z",
- "iopub.status.idle": "2021-12-15T20:25:27.361783Z",
- "shell.execute_reply": "2021-12-15T20:25:27.362723Z"
- }
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0 2.55\n",
- "1 3.39\n",
- "2 2.75\n",
- "3 3.39\n",
- "4 3.39\n",
- "Name: UnitPrice, dtype: float64"
- ]
- },
- "execution_count": 23,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df['UnitPrice'].head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:27.383414Z",
- "iopub.status.busy": "2021-12-15T20:25:27.374098Z",
- "iopub.status.idle": "2021-12-15T20:25:27.387546Z",
- "shell.execute_reply": "2021-12-15T20:25:27.388753Z"
- }
- },
- "outputs": [],
- "source": [
- "product = df['Quantity'] * df['UnitPrice']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 25,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:27.398754Z",
- "iopub.status.busy": "2021-12-15T20:25:27.397557Z",
- "iopub.status.idle": "2021-12-15T20:25:27.818022Z",
- "shell.execute_reply": "2021-12-15T20:25:27.819640Z"
- }
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0 15.30\n",
- "1 20.34\n",
- "2 22.00\n",
- "3 20.34\n",
- "4 20.34\n",
- "dtype: float64"
- ]
- },
- "execution_count": 25,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "product.head()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "String concatenation"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 26,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2021-12-15T20:25:27.837007Z",
- "iopub.status.busy": "2021-12-15T20:25:27.836370Z",
- "iopub.status.idle": "2021-12-15T20:25:29.072872Z",
- "shell.execute_reply": "2021-12-15T20:25:29.074153Z"
- }
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0 United Kingdom85123A\n",
- "1 United Kingdom71053\n",
- "2 United Kingdom84406B\n",
- "3 United Kingdom84029G\n",
- "4 United Kingdom84029E\n",
- " ... \n",
- "12495 United Kingdom84692\n",
- "12496 United Kingdom22075\n",
- "12497 United Kingdom20979\n",
- "12498 United Kingdom20975\n",
- "12499 United Kingdom22445\n",
- "Length: 15000, dtype: object"
- ]
- },
- "execution_count": 26,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df['Country'] + df['StockCode']"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.6"
- },
- "pycharm": {
- "stem_cell": {
- "cell_type": "raw",
- "metadata": {
- "collapsed": false
},
- "source": []
- }
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Connect to Elasticsearch\n",
+ "\n",
+ "First we need to connect to a running Elasticsearch instance.\n",
+ "In this example we'll be using Elastic Cloud.\n",
+ "Sign up for a [free trial](https://cloud.elastic.co/registration).\n",
+ "\n",
+ "See [documentation](https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html#connect-self-managed-new) if you want to connect to a self-managed cluster."
+ ],
+ "metadata": {
+ "id": "SD0Ul-I_Xipy"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Connect to an Elastic Cloud instance\n",
+ "\n",
+ "ELASTIC_CLOUD_ID = getpass.getpass(\"Cloud ID:\")\n",
+ "ELASTIC_CLOUD_PASSWORD = getpass.getpass(\"`elastic` user password:\")\n",
+ "\n",
+ "es = Elasticsearch(\n",
+ " cloud_id=ELASTIC_CLOUD_ID,\n",
+ " basic_auth=(\"elastic\", ELASTIC_CLOUD_PASSWORD)\n",
+ ")\n",
+ "print(es.info())"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "mNrJvFU1Yagd",
+ "outputId": "db8829bf-bd59-4d86-8b73-c956d09f373e"
+ },
+ "execution_count": 4,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Cloud ID:··········\n",
+ "`elastic` user password:··········\n",
+ "{'name': 'instance-0000000001', 'cluster_name': '69662f53fe844e2d81effcbc7f41e867', 'cluster_uuid': 'GHyCC4NpTAC3SyxZkx65Jw', 'version': {'number': '8.12.0-SNAPSHOT', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '38ddf39a3efc422a702adc83b1bb2cd6fc2edc5b', 'build_date': '2024-01-03T12:58:40.771552945Z', 'build_snapshot': True, 'lucene_version': '9.9.1', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "uvhOwB3dW_sg"
+ },
+ "source": [
+ "## Download test data\n",
+ "\n",
+ "Let's start by downloading our test data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import requests\n",
+ "\n",
+ "# URL of the raw file on GitHub\n",
+ "file_url = \"https://github.com/elastic/eland/raw/main/docs/sphinx/examples/data/online-retail.csv.gz\"\n",
+ "\n",
+ "# Local path where you want to save the file\n",
+ "local_filename = \"online-retail.csv.gz\"\n",
+ "\n",
+ "# Send a GET request to the file URL\n",
+ "response = requests.get(file_url, stream=True)\n",
+ "\n",
+ "# Open a local file in binary write mode\n",
+ "with open(local_filename, 'wb') as file:\n",
+ " for chunk in response.iter_content(chunk_size=128):\n",
+ " file.write(chunk)\n",
+ "\n",
+ "print(f\"File downloaded: {local_filename}\")"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "FTCeS2rFZrYh",
+ "outputId": "131aeeac-0af5-4746-e639-626b3c80fb69"
+ },
+ "execution_count": 5,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "File downloaded: online-retail.csv.gz\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Create Eland dataframe\n",
+ "\n",
+ "To get started, let's create an `eland.DataFrame` by reading a csv file. This creates and populates the\n",
+ "`online-retail` index in the local Elasticsearch cluster."
+ ],
+ "metadata": {
+ "id": "V-BiYFnvZwTd"
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:07.324283Z",
+ "iopub.status.busy": "2021-12-15T20:25:07.323764Z",
+ "iopub.status.idle": "2021-12-15T20:25:16.241379Z",
+ "shell.execute_reply": "2021-12-15T20:25:16.241877Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "3Pq2jtVQW_sh",
+ "outputId": "e677a2eb-e59d-4403-f19b-7326ec954a6b"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/eland/etl.py:529: FutureWarning: the 'mangle_dupe_cols' keyword is deprecated and will be removed in a future version. Please take steps to stop the use of 'mangle_dupe_cols'\n",
+ " reader = pd.read_csv(filepath_or_buffer, **kwargs)\n",
+ "/usr/local/lib/python3.10/dist-packages/eland/etl.py:529: FutureWarning: The squeeze argument has been deprecated and will be removed in a future version. Append .squeeze(\"columns\") to the call to squeeze.\n",
+ "\n",
+ "\n",
+ " reader = pd.read_csv(filepath_or_buffer, **kwargs)\n"
+ ]
+ }
+ ],
+ "source": [
+ "df = ed.csv_to_eland(\"online-retail.csv.gz\",\n",
+ " es_client=es,\n",
+ " es_dest_index='online-retail',\n",
+ " es_if_exists='replace',\n",
+ " es_dropna=True,\n",
+ " es_refresh=True,\n",
+ " compression='gzip',\n",
+ " index_col=0)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "RT8kO2rqW_sh"
+ },
+ "source": [
+ "Here we see that the `\"_id\"` field was used to index our data frame."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:16.246737Z",
+ "iopub.status.busy": "2021-12-15T20:25:16.244084Z",
+ "iopub.status.idle": "2021-12-15T20:25:16.250080Z",
+ "shell.execute_reply": "2021-12-15T20:25:16.250410Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 35
+ },
+ "id": "x2um6Xd4W_sh",
+ "outputId": "69a5a3b3-95cb-4851-dec5-f54e55d16235"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "'_id'"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "string"
+ }
+ },
+ "metadata": {},
+ "execution_count": 7
+ }
+ ],
+ "source": [
+ "df.index.es_index_field"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "YHFiR_8MW_si"
+ },
+ "source": [
+ "Next, we can check which field from elasticsearch are available to our eland data frame. `columns` is available as a parameter when instantiating the data frame which allows one to choose only a subset of fields from your index to be included in the data frame. Since we didn't set this parameter, we have access to all fields."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:16.254703Z",
+ "iopub.status.busy": "2021-12-15T20:25:16.254060Z",
+ "iopub.status.idle": "2021-12-15T20:25:16.256567Z",
+ "shell.execute_reply": "2021-12-15T20:25:16.256138Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "h_6_2YphW_si",
+ "outputId": "1fb83e3b-8827-4f18-db7d-79bb25e7123d"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Index(['Country', 'CustomerID', 'Description', 'InvoiceDate', 'InvoiceNo', 'Quantity', 'StockCode',\n",
+ " 'UnitPrice'],\n",
+ " dtype='object')"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 8
+ }
+ ],
+ "source": [
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "-LmvXDA3W_si"
+ },
+ "source": [
+ "Now, let's see the data types of our fields. Running `df.dtypes`, we can see that elasticsearch field types are mapped to pandas field types."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:16.261335Z",
+ "iopub.status.busy": "2021-12-15T20:25:16.260762Z",
+ "iopub.status.idle": "2021-12-15T20:25:16.263024Z",
+ "shell.execute_reply": "2021-12-15T20:25:16.263323Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "4KEZ3zsEW_si",
+ "outputId": "090674df-7f4d-41bd-b4bf-3ee476945775"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Country object\n",
+ "CustomerID float64\n",
+ "Description object\n",
+ "InvoiceDate object\n",
+ "InvoiceNo object\n",
+ "Quantity int64\n",
+ "StockCode object\n",
+ "UnitPrice float64\n",
+ "dtype: object"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 9
+ }
+ ],
+ "source": [
+ "df.dtypes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "4Levcmf0W_si"
+ },
+ "source": [
+ "We also offer a `.es_info()` data frame method that shows all info about the underlying index. It also contains information about operations being passed from data frame methods to elasticsearch. More on this later."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:16.266245Z",
+ "iopub.status.busy": "2021-12-15T20:25:16.265860Z",
+ "iopub.status.idle": "2021-12-15T20:25:16.271135Z",
+ "shell.execute_reply": "2021-12-15T20:25:16.270816Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "RhxvDBAiW_si",
+ "outputId": "db60167f-d973-41d9-bd42-438855cef1a4"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "es_index_pattern: online-retail\n",
+ "Index:\n",
+ " es_index_field: _id\n",
+ " is_source_field: False\n",
+ "Mappings:\n",
+ " capabilities:\n",
+ " es_field_name is_source es_dtype es_date_format pd_dtype is_searchable is_aggregatable is_scripted aggregatable_es_field_name\n",
+ "Country Country True keyword None object True True False Country\n",
+ "CustomerID CustomerID True double None float64 True True False CustomerID\n",
+ "Description Description True keyword None object True True False Description\n",
+ "InvoiceDate InvoiceDate True keyword None object True True False InvoiceDate\n",
+ "InvoiceNo InvoiceNo True keyword None object True True False InvoiceNo\n",
+ "Quantity Quantity True long None int64 True True False Quantity\n",
+ "StockCode StockCode True keyword None object True True False StockCode\n",
+ "UnitPrice UnitPrice True double None float64 True True False UnitPrice\n",
+ "Operations:\n",
+ " tasks: []\n",
+ " size: None\n",
+ " sort_params: None\n",
+ " _source: ['Country', 'CustomerID', 'Description', 'InvoiceDate', 'InvoiceNo', 'Quantity', 'StockCode', 'UnitPrice']\n",
+ " body: {}\n",
+ " post_processing: []\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(df.es_info())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Zlp7YIrPW_sj"
+ },
+ "source": [
+ "## Selecting and Indexing Data\n",
+ "\n",
+ "Now that we understand how to create a data frame and get access to it's underlying attributes, let's see how we can select subsets of our data."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "6dvAlxBTW_sj"
+ },
+ "source": [
+ "### head and tail\n",
+ "\n",
+ "much like pandas, eland data frames offer `.head(n)` and `.tail(n)` methods that return the first and last n rows, respectively."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:16.274779Z",
+ "iopub.status.busy": "2021-12-15T20:25:16.274393Z",
+ "iopub.status.idle": "2021-12-15T20:25:17.555325Z",
+ "shell.execute_reply": "2021-12-15T20:25:17.555642Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 140
+ },
+ "id": "WaV6ZJ7CW_sj",
+ "outputId": "203a8b15-5a08-4808-a675-8a98d1174c1e"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Country CustomerID ... StockCode UnitPrice\n",
+ "0 United Kingdom 17850.0 ... 85123A 2.55\n",
+ "1 United Kingdom 17850.0 ... 71053 3.39\n",
+ "\n",
+ "[2 rows x 8 columns]"
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Country | \n",
+ " CustomerID | \n",
+ " ... | \n",
+ " StockCode | \n",
+ " UnitPrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " United Kingdom | \n",
+ " 17850.0 | \n",
+ " ... | \n",
+ " 85123A | \n",
+ " 2.55 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " United Kingdom | \n",
+ " 17850.0 | \n",
+ " ... | \n",
+ " 71053 | \n",
+ " 3.39 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "2 rows × 8 columns
"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 11
+ }
+ ],
+ "source": [
+ "df.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:17.559534Z",
+ "iopub.status.busy": "2021-12-15T20:25:17.559123Z",
+ "iopub.status.idle": "2021-12-15T20:25:17.637500Z",
+ "shell.execute_reply": "2021-12-15T20:25:17.637125Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "XpwHbrGwW_sj",
+ "outputId": "130ba338-abcf-4fe4-cce3-a1840f0fe46b"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "es_index_pattern: online-retail\n",
+ "Index:\n",
+ " es_index_field: _id\n",
+ " is_source_field: False\n",
+ "Mappings:\n",
+ " capabilities:\n",
+ " es_field_name is_source es_dtype es_date_format pd_dtype is_searchable is_aggregatable is_scripted aggregatable_es_field_name\n",
+ "Country Country True keyword None object True True False Country\n",
+ "CustomerID CustomerID True double None float64 True True False CustomerID\n",
+ "Description Description True keyword None object True True False Description\n",
+ "InvoiceDate InvoiceDate True keyword None object True True False InvoiceDate\n",
+ "InvoiceNo InvoiceNo True keyword None object True True False InvoiceNo\n",
+ "Quantity Quantity True long None int64 True True False Quantity\n",
+ "StockCode StockCode True keyword None object True True False StockCode\n",
+ "UnitPrice UnitPrice True double None float64 True True False UnitPrice\n",
+ "Operations:\n",
+ " tasks: [('tail': ('sort_field': '_doc', 'count': 2)), ('head': ('sort_field': '_doc', 'count': 2)), ('tail': ('sort_field': '_doc', 'count': 2))]\n",
+ " size: 2\n",
+ " sort_params: {'_doc': 'desc'}\n",
+ " _source: ['Country', 'CustomerID', 'Description', 'InvoiceDate', 'InvoiceNo', 'Quantity', 'StockCode', 'UnitPrice']\n",
+ " body: {}\n",
+ " post_processing: [('sort_index'), ('head': ('count': 2)), ('tail': ('count': 2))]\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(df.tail(2).head(2).tail(2).es_info())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:17.640519Z",
+ "iopub.status.busy": "2021-12-15T20:25:17.640139Z",
+ "iopub.status.idle": "2021-12-15T20:25:18.647340Z",
+ "shell.execute_reply": "2021-12-15T20:25:18.646548Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 140
+ },
+ "id": "Zf-TFwvXW_sj",
+ "outputId": "4300d820-843d-448f-879d-586756c2e620"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Country CustomerID ... StockCode UnitPrice\n",
+ "14998 United Kingdom 17419.0 ... 21773 1.25\n",
+ "14999 United Kingdom 17419.0 ... 22149 2.10\n",
+ "\n",
+ "[2 rows x 8 columns]"
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Country | \n",
+ " CustomerID | \n",
+ " ... | \n",
+ " StockCode | \n",
+ " UnitPrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 14998 | \n",
+ " United Kingdom | \n",
+ " 17419.0 | \n",
+ " ... | \n",
+ " 21773 | \n",
+ " 1.25 | \n",
+ "
\n",
+ " \n",
+ " 14999 | \n",
+ " United Kingdom | \n",
+ " 17419.0 | \n",
+ " ... | \n",
+ " 22149 | \n",
+ " 2.10 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "2 rows × 8 columns
"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 13
+ }
+ ],
+ "source": [
+ "df.tail(2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "AkbC-qckW_sj"
+ },
+ "source": [
+ "### Selecting columns\n",
+ "\n",
+ "you can also pass a list of columns to select columns from the data frame in a specified order."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:18.654238Z",
+ "iopub.status.busy": "2021-12-15T20:25:18.653517Z",
+ "iopub.status.idle": "2021-12-15T20:25:19.431749Z",
+ "shell.execute_reply": "2021-12-15T20:25:19.431127Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 233
+ },
+ "id": "Tdhy8cQzW_sk",
+ "outputId": "ab4bcc6b-43ab-45b3-8880-545d52410851"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Country InvoiceDate\n",
+ "0 United Kingdom 2010-12-01 08:26:00\n",
+ "1 United Kingdom 2010-12-01 08:26:00\n",
+ "2 United Kingdom 2010-12-01 08:26:00\n",
+ "3 United Kingdom 2010-12-01 08:26:00\n",
+ "4 United Kingdom 2010-12-01 08:26:00\n",
+ "\n",
+ "[5 rows x 2 columns]"
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Country | \n",
+ " InvoiceDate | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " United Kingdom | \n",
+ " 2010-12-01 08:26:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " United Kingdom | \n",
+ " 2010-12-01 08:26:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " United Kingdom | \n",
+ " 2010-12-01 08:26:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " United Kingdom | \n",
+ " 2010-12-01 08:26:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " United Kingdom | \n",
+ " 2010-12-01 08:26:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "5 rows × 2 columns
"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 14
+ }
+ ],
+ "source": [
+ "df[['Country', 'InvoiceDate']].head(5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "CYV4WfSHW_sk"
+ },
+ "source": [
+ "### Boolean Indexing\n",
+ "\n",
+ "we also allow you to filter the data frame using boolean indexing. Under the hood, a boolean index maps to a `terms` query that is then passed to elasticsearch to filter the index."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:19.440640Z",
+ "iopub.status.busy": "2021-12-15T20:25:19.439831Z",
+ "iopub.status.idle": "2021-12-15T20:25:20.066747Z",
+ "shell.execute_reply": "2021-12-15T20:25:20.067477Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 250
+ },
+ "id": "p2SPKSOEW_sk",
+ "outputId": "288a036f-fda0-4d22-9fb4-15ba2cb551b3"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "{'term': {'Country': 'Germany'}}\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Country CustomerID ... StockCode UnitPrice\n",
+ "1109 Germany 12662.0 ... 22809 2.95\n",
+ "1110 Germany 12662.0 ... 84347 2.55\n",
+ "1111 Germany 12662.0 ... 84945 0.85\n",
+ "1112 Germany 12662.0 ... 22242 1.65\n",
+ "1113 Germany 12662.0 ... 22244 1.95\n",
+ "\n",
+ "[5 rows x 8 columns]"
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Country | \n",
+ " CustomerID | \n",
+ " ... | \n",
+ " StockCode | \n",
+ " UnitPrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1109 | \n",
+ " Germany | \n",
+ " 12662.0 | \n",
+ " ... | \n",
+ " 22809 | \n",
+ " 2.95 | \n",
+ "
\n",
+ " \n",
+ " 1110 | \n",
+ " Germany | \n",
+ " 12662.0 | \n",
+ " ... | \n",
+ " 84347 | \n",
+ " 2.55 | \n",
+ "
\n",
+ " \n",
+ " 1111 | \n",
+ " Germany | \n",
+ " 12662.0 | \n",
+ " ... | \n",
+ " 84945 | \n",
+ " 0.85 | \n",
+ "
\n",
+ " \n",
+ " 1112 | \n",
+ " Germany | \n",
+ " 12662.0 | \n",
+ " ... | \n",
+ " 22242 | \n",
+ " 1.65 | \n",
+ "
\n",
+ " \n",
+ " 1113 | \n",
+ " Germany | \n",
+ " 12662.0 | \n",
+ " ... | \n",
+ " 22244 | \n",
+ " 1.95 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "5 rows × 8 columns
"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 15
+ }
+ ],
+ "source": [
+ "# the construction of a boolean vector maps directly to an elasticsearch query\n",
+ "print(df['Country']=='Germany')\n",
+ "df[(df['Country']=='Germany')].head(5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "xV2bkyC3W_sk"
+ },
+ "source": [
+ "we can also filter the data frame using a list of values."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:20.077022Z",
+ "iopub.status.busy": "2021-12-15T20:25:20.076412Z",
+ "iopub.status.idle": "2021-12-15T20:25:21.233013Z",
+ "shell.execute_reply": "2021-12-15T20:25:21.234073Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 250
+ },
+ "id": "atifgs9hW_sk",
+ "outputId": "26468789-6032-428c-f64a-ce36c5a5f426"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "{'terms': {'Country': ['Germany', 'United States']}}\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Country CustomerID ... StockCode UnitPrice\n",
+ "0 United Kingdom 17850.0 ... 85123A 2.55\n",
+ "1 United Kingdom 17850.0 ... 71053 3.39\n",
+ "2 United Kingdom 17850.0 ... 84406B 2.75\n",
+ "3 United Kingdom 17850.0 ... 84029G 3.39\n",
+ "4 United Kingdom 17850.0 ... 84029E 3.39\n",
+ "\n",
+ "[5 rows x 8 columns]"
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Country | \n",
+ " CustomerID | \n",
+ " ... | \n",
+ " StockCode | \n",
+ " UnitPrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " United Kingdom | \n",
+ " 17850.0 | \n",
+ " ... | \n",
+ " 85123A | \n",
+ " 2.55 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " United Kingdom | \n",
+ " 17850.0 | \n",
+ " ... | \n",
+ " 71053 | \n",
+ " 3.39 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " United Kingdom | \n",
+ " 17850.0 | \n",
+ " ... | \n",
+ " 84406B | \n",
+ " 2.75 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " United Kingdom | \n",
+ " 17850.0 | \n",
+ " ... | \n",
+ " 84029G | \n",
+ " 3.39 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " United Kingdom | \n",
+ " 17850.0 | \n",
+ " ... | \n",
+ " 84029E | \n",
+ " 3.39 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "5 rows × 8 columns
"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 16
+ }
+ ],
+ "source": [
+ "print(df['Country'].isin(['Germany', 'United States']))\n",
+ "df[df['Country'].isin(['Germany', 'United Kingdom'])].head(5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "oFwNF24EW_sk"
+ },
+ "source": [
+ "We can also combine boolean vectors to further filter the data frame."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:21.245390Z",
+ "iopub.status.busy": "2021-12-15T20:25:21.244737Z",
+ "iopub.status.idle": "2021-12-15T20:25:22.358701Z",
+ "shell.execute_reply": "2021-12-15T20:25:22.355150Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 78
+ },
+ "id": "WK3xl6JQW_sk",
+ "outputId": "7e7d9cb2-79d8-4f2b-bb95-02a316246fa6"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [Country, CustomerID, Description, InvoiceDate, InvoiceNo, Quantity, StockCode, UnitPrice]\n",
+ "Index: []\n",
+ "\n",
+ "[0 rows x 8 columns]"
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Country | \n",
+ " CustomerID | \n",
+ " ... | \n",
+ " StockCode | \n",
+ " UnitPrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "0 rows × 8 columns
"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 17
+ }
+ ],
+ "source": [
+ "df[(df['Country']=='Germany') & (df['Quantity']>90)]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "OOZMEOTXW_sk"
+ },
+ "source": [
+ "Using this example, let see how eland translates this boolean filter to an elasticsearch `bool` query."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:22.383610Z",
+ "iopub.status.busy": "2021-12-15T20:25:22.370577Z",
+ "iopub.status.idle": "2021-12-15T20:25:22.390275Z",
+ "shell.execute_reply": "2021-12-15T20:25:22.388963Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "zg5CX5A9W_sl",
+ "outputId": "929c23f7-be9b-4ea6-c1bc-e934f25e9fb0"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "es_index_pattern: online-retail\n",
+ "Index:\n",
+ " es_index_field: _id\n",
+ " is_source_field: False\n",
+ "Mappings:\n",
+ " capabilities:\n",
+ " es_field_name is_source es_dtype es_date_format pd_dtype is_searchable is_aggregatable is_scripted aggregatable_es_field_name\n",
+ "Country Country True keyword None object True True False Country\n",
+ "CustomerID CustomerID True double None float64 True True False CustomerID\n",
+ "Description Description True keyword None object True True False Description\n",
+ "InvoiceDate InvoiceDate True keyword None object True True False InvoiceDate\n",
+ "InvoiceNo InvoiceNo True keyword None object True True False InvoiceNo\n",
+ "Quantity Quantity True long None int64 True True False Quantity\n",
+ "StockCode StockCode True keyword None object True True False StockCode\n",
+ "UnitPrice UnitPrice True double None float64 True True False UnitPrice\n",
+ "Operations:\n",
+ " tasks: [('boolean_filter': ('boolean_filter': {'bool': {'must': [{'term': {'Country': 'Germany'}}, {'range': {'Quantity': {'gt': 90}}}]}}))]\n",
+ " size: None\n",
+ " sort_params: None\n",
+ " _source: ['Country', 'CustomerID', 'Description', 'InvoiceDate', 'InvoiceNo', 'Quantity', 'StockCode', 'UnitPrice']\n",
+ " body: {'query': {'bool': {'must': [{'term': {'Country': 'Germany'}}, {'range': {'Quantity': {'gt': 90}}}]}}}\n",
+ " post_processing: []\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(df[(df['Country']=='Germany') & (df['Quantity']>90)].es_info())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "QRTjR8d3W_sl"
+ },
+ "source": [
+ "## Aggregation and Descriptive Statistics\n",
+ "\n",
+ "Let's begin to ask some questions of our data and use eland to get the answers."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Pc3OGsQ5W_sl"
+ },
+ "source": [
+ "**How many different countries are there?**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:22.398231Z",
+ "iopub.status.busy": "2021-12-15T20:25:22.397459Z",
+ "iopub.status.idle": "2021-12-15T20:25:22.482238Z",
+ "shell.execute_reply": "2021-12-15T20:25:22.481338Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "9p8JrleuW_sl",
+ "outputId": "9323b109-a59a-4b7c-f6c4-14996666591d"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "16"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 19
+ }
+ ],
+ "source": [
+ "df['Country'].nunique()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "9w_Ge7ESW_sl"
+ },
+ "source": [
+ "**What is the total sum of products ordered?**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:22.492668Z",
+ "iopub.status.busy": "2021-12-15T20:25:22.491590Z",
+ "iopub.status.idle": "2021-12-15T20:25:22.580015Z",
+ "shell.execute_reply": "2021-12-15T20:25:22.578300Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "MFu01N3LW_sl",
+ "outputId": "0e10430a-2a2d-4f4c-be41-01f0bcb9afde"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "111960"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 20
+ }
+ ],
+ "source": [
+ "df['Quantity'].sum()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "-Nwc-ybwW_sl"
+ },
+ "source": [
+ "**Show me the sum, mean, min, and max of the qunatity and unit_price fields**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:22.601432Z",
+ "iopub.status.busy": "2021-12-15T20:25:22.600117Z",
+ "iopub.status.idle": "2021-12-15T20:25:22.702450Z",
+ "shell.execute_reply": "2021-12-15T20:25:22.701499Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 173
+ },
+ "id": "QhqUeIQbW_sl",
+ "outputId": "aeec1b5e-bee4-4cf5-e338-def0d82b0451"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Quantity UnitPrice\n",
+ "sum 111960.000 61548.490000\n",
+ "mean 7.464 4.103233\n",
+ "max 2880.000 950.990000\n",
+ "min -9360.000 0.000000"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Quantity | \n",
+ " UnitPrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " sum | \n",
+ " 111960.000 | \n",
+ " 61548.490000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 7.464 | \n",
+ " 4.103233 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 2880.000 | \n",
+ " 950.990000 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " -9360.000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 21
+ }
+ ],
+ "source": [
+ "df[['Quantity','UnitPrice']].agg(['sum', 'mean', 'max', 'min'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "CklumXmiW_sm"
+ },
+ "source": [
+ "**Give me descriptive statistics for the entire data frame**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:22.712002Z",
+ "iopub.status.busy": "2021-12-15T20:25:22.711114Z",
+ "iopub.status.idle": "2021-12-15T20:25:22.982698Z",
+ "shell.execute_reply": "2021-12-15T20:25:22.981770Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 297
+ },
+ "id": "AttDFOcRW_sm",
+ "outputId": "a515c9f5-5b04-4943-9c72-8da357fa29ae"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " CustomerID Quantity UnitPrice\n",
+ "count 10729.000000 15000.000000 15000.000000\n",
+ "mean 15590.776680 7.464000 4.103233\n",
+ "std 1764.189592 85.930116 20.106214\n",
+ "min 12347.000000 -9360.000000 0.000000\n",
+ "25% 14225.913815 1.000000 1.336010\n",
+ "50% 15668.124797 2.423796 2.396465\n",
+ "75% 17195.974646 7.403795 4.282239\n",
+ "max 18239.000000 2880.000000 950.990000"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CustomerID | \n",
+ " Quantity | \n",
+ " UnitPrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 10729.000000 | \n",
+ " 15000.000000 | \n",
+ " 15000.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 15590.776680 | \n",
+ " 7.464000 | \n",
+ " 4.103233 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 1764.189592 | \n",
+ " 85.930116 | \n",
+ " 20.106214 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 12347.000000 | \n",
+ " -9360.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 14225.913815 | \n",
+ " 1.000000 | \n",
+ " 1.336010 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 15668.124797 | \n",
+ " 2.423796 | \n",
+ " 2.396465 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 17195.974646 | \n",
+ " 7.403795 | \n",
+ " 4.282239 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 18239.000000 | \n",
+ " 2880.000000 | \n",
+ " 950.990000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 22
+ }
+ ],
+ "source": [
+ "# NBVAL_IGNORE_OUTPUT\n",
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "LfPjHhpKW_sn"
+ },
+ "source": [
+ "**Show me a histogram of numeric columns**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:23.000466Z",
+ "iopub.status.busy": "2021-12-15T20:25:22.999571Z",
+ "iopub.status.idle": "2021-12-15T20:25:23.576387Z",
+ "shell.execute_reply": "2021-12-15T20:25:23.576703Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 391
+ },
+ "id": "ZJ1VoTG_W_sn",
+ "outputId": "a32d2fec-17db-428a-d4a2-71a17faa41fa"
+ },
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "image/png": "\n"
+ },
+ "metadata": {}
+ }
+ ],
+ "source": [
+ "df[(df['Quantity']>-50) &\n",
+ " (df['Quantity']<50) &\n",
+ " (df['UnitPrice']>0) &\n",
+ " (df['UnitPrice']<100)][['Quantity', 'UnitPrice']].hist(figsize=[12,4], bins=30)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:23.584264Z",
+ "iopub.status.busy": "2021-12-15T20:25:23.583784Z",
+ "iopub.status.idle": "2021-12-15T20:25:24.494000Z",
+ "shell.execute_reply": "2021-12-15T20:25:24.493618Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 391
+ },
+ "id": "JbSWJJKUW_sn",
+ "outputId": "94aaf248-35ea-4e1d-fbf8-a1cf60b7778d"
+ },
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAA+UAAAF2CAYAAAAIrjlOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA8CklEQVR4nO3df3RU1bn/8U8SkolREsBIMBAM2oqmaEKB8KX+ggrEXIo/EEupaEAXYjtp0Vxqg60B6qKgKKXFKfFHFbG0UmyxKlRJYzW1RQjB6NUUKha8VEgQkAwkNcTJ+f7hylxiQpiTzMyeM3m/1nK1c87OmWceJif72WeffWIsy7IEAAAAAADCLtZ0AAAAAAAA9FQU5QAAAAAAGEJRDgAAAACAIRTlAAAAAAAYQlEOAAAAAIAhFOUAAAAAABhCUQ4AAAAAgCEU5QAAAAAAGEJRDgAAAACAIRTlADo0duxYjR071nQYAADAhpkzZyozMzNs70d/Aeg+inIgRN577z3NmDFDAwcOlMvlUnp6umbMmKGamhrTofnV1NRo4cKF2rt372nb7t+/XwsXLlR1dXXI4wIAINotXLhQMTExOnToUIf7hw0bFpRit7GxUQsXLtRrr712yhha/0tKSlJWVpZ+/OMfy+v1dvu9AQSml+kAgGj0hz/8QdOnT1e/fv10++23a8iQIdq7d69+9atf6bnnntO6det03XXXmQ5TNTU1WrRokcaOHdtuVH3z5s1tXu/fv1+LFi1SZmamcnJywhckAAAI2OOPP66Wlhb/68bGRi1atEiSTlnkr1q1SmeddZaOHz+uzZs3a/HixXr11Vf1t7/9TTExMZ2+3xf7CwDsoygHguyDDz7QLbfcovPPP18VFRU655xz/Pvmzp2rK664QjNmzNA777yjIUOGGIy0cwkJCaZDAAAANsXHx9v+malTpyo1NVWSdOedd+rGG2/UH/7wB7355psaM2ZMhz/T2NiopKQk+gtAEDB9HQiyZcuWqbGxUY899libglySUlNT9eijj+r48eNatmyZpFPf+9U6pexkTz31lL7+9a+rf//+crlcysrK0qpVq9r9bGZmpr7xjW/ojTfeUG5urhITE3X++edrzZo1/jarV6/WTTfdJEkaN26cf+pa6/S2k+8Re+211zRq1ChJ0qxZs/xtV69erQULFig+Pl4ff/xxuzjuuOMO9enTR59++mlgyQMAAO289tpriomJ0e9+9zstXrxYgwYNUmJioq6++mrt3r27TduT+xV79+7190UWLVrk//u9cOHCTt/v61//uiRpz549kj7vEwwbNkxVVVW68sorlZSUpHvvvde/74tX4D/99FMtXLhQF154oRITE3XuuedqypQp+uCDD/xtWlpatGLFCn3lK19RYmKi0tLSNGfOHH3yySddTRPgWBTlQJC9+OKLyszM1BVXXNHh/iuvvFKZmZl68cUXbR971apVOu+883Tvvffq4YcfVkZGhr773e/K4/G0a7t7925NnTpVEyZM0MMPP6y+fftq5syZeu+99/xxfP/735ck3XvvvXrmmWf0zDPP6OKLL253rIsvvlg/+clPJH1eaLe2vfLKK3XLLbfos88+07p169r8zIkTJ/Tcc8/pxhtvVGJiou3PCgAA2lq6dKk2bNigefPmaf78+XrzzTd18803n7L9Oeec4x+8v+GGG/x/v6dMmdLp+7QWz2effbZ/2+HDh5Wfn6+cnBytWLFC48aN6/BnfT6fvvGNb2jRokUaMWKEHn74Yc2dO1f19fV69913/e3mzJmjH/zgB7rsssv085//XLNmzdLatWuVl5en5ubmgHMCRAOmrwNBVF9fr/3795/2fvFLL71UL7zwgo4dO2br+K+//rrOOOMM/+vCwkJdc801Wr58udxud5u2u3btUkVFhX9w4Jvf/KYyMjL01FNP6aGHHtL555+vK664Qr/4xS80YcKETheTSUtLU35+vkpKSjRmzBjNmDGjzf4xY8bo17/+tQoLC/3bNm7cqE8++US33HKLrc8IAAA69umnn6q6uto/Zbxv376aO3eu3n33XQ0bNqxd+zPPPFNTp07Vd77zHV166aXt/n63OnLkiCT57yn/5S9/qbS0tDYXGGpra1VaWqo5c+Z0GuOaNWtUXl6u5cuX6+677/ZvLy4ulmVZkqQ33nhDTzzxhNauXatvf/vb/jbjxo3TNddco/Xr17fZDkQ7rpQDQdRaZPfu3bvTdq377RblJxfk9fX1OnTokK666ir961//Un19fZu2WVlZbf6YnnPOORo6dKj+9a9/2XrPQNx6663aunVrm2lpa9euVUZGhq666qqgvx8AAD3RrFmz2tzD3fp3vrt/24cOHapzzjlHQ4YM0Zw5c/SlL31JGzduVFJSkr+Ny+XSrFmzTnus3//+90pNTdX3vve9dvtab8tbv369UlJSNGHCBB06dMj/34gRI3TWWWfpL3/5S7c+D+A0XCkHgijQYvvYsWOKiYnxL6oSqL/97W9asGCBtmzZosbGxjb76uvrlZKS4n89ePDgdj/ft2/fkNyrNW3aNN11111au3atSkpKVF9fr5deekl33333aVdtBQAAHfvi39Av/m3v27evJHX7b/vvf/97JScnKz4+XoMGDdIFF1zQrs3AgQMDWtTtgw8+0NChQ9Wr16nLjPfff1/19fXq379/h/sPHjwYePBAFKAoB4IoJSVF6enpeueddzpt984772jQoEFKSEg4ZdHq8/navP7ggw909dVX66KLLtLy5cuVkZGhhIQEbdq0ST/72c/aPP5EkuLi4jo8buvUsWDq27evvvGNb/iL8ueee05NTU2nnCYHAEBP17reyn/+858O9zc2NrZbkyVUf9uvvPLK014oOHm2Xne1tLSof//+Wrt2bYf7v7hQLhDtKMqBIJs8ebIeffRRvfHGG7r88svb7f/rX/+qvXv3qqioSNLnBe3Ro0fbtfvwww/bvH7xxRfV1NSkF154oc1IeXemeNm5in26trfeequuu+46VVZWau3atRo+fLi+8pWvdDk2AACi2XnnnSfp8zVgMjIy2uxrbGzUvn37NHHixG6/T7hnrF1wwQXaunWrmpubT/l4tgsuuEB//vOfddlllwW12AecinvKgSCbN2+ekpKSNGfOHB0+fLjNviNHjujOO+9UcnKyf1G0Cy64QPX19W2urh84cEAbNmxo87Oto+Mnj4bX19frqaee6nKsZ555piR1OChgt21+fr5SU1P1wAMP6PXXX+cqOQAAnbj66quVkJCgVatWtZvt9thjj+mzzz5Tfn5+t9+n9b7wQP7WB8ONN96oQ4cO6ZFHHmm3r7UP881vflM+n0/3339/uzafffZZ2GIFIgVXyoEg+9KXvqQ1a9Zo+vTpuuSSS3T77bdryJAh2rt3r371q1/pk08+0bPPPqshQ4ZIkr71rW/phz/8oW644QZ9//vfV2Njo1atWqULL7xQO3bs8B934sSJSkhI0OTJkzVnzhwdP35cjz/+uPr3768DBw50KdacnBzFxcXpgQceUH19vVwul/856F90wQUXqE+fPiotLVXv3r115plnavTo0f7PER8fr29961t65JFHFBcXp+nTp3cpJgAAeoL+/furpKREP/7xj3XllVfq2muvVVJSkv7+97/rt7/9rSZOnKjJkyd3+33OOOMMZWVlad26dbrwwgvVr18/DRs2rMPV2oPh1ltv1Zo1a1RUVKRt27bpiiuuUENDg/785z/ru9/9rq677jpdddVVmjNnjpYsWaLq6mpNnDhR8fHxev/997V+/Xr9/Oc/19SpU0MSHxCJuFIOhMCNN96oHTt2aNy4cXriiSc0e/Zs3X///Tpy5Ii2b9+ua6+91t/27LPP1oYNG5SUlKR77rlHTz/9tJYsWdLuD/HQoUP13HPPKSYmRvPmzVNpaanuuOMOzZ07t8txDhgwQKWlpTp48KBuv/12TZ8+XTU1NR22jY+P19NPP624uDjdeeedmj59ul5//fU2bW699VZJn4/+n3vuuV2OCwCAnuBHP/qRfv3rX8vn8+knP/mJ5s2bp7feekuLFi3SCy+8oNjY4HTVn3jiCQ0cOFB33323pk+frueeey4ox+1IXFycNm3apB/96EfaunWr7rrrLi1fvlzJycm65JJL/O1KS0v12GOP6eDBg7r33ns1f/58vfrqq5oxY4Yuu+yykMUHRKIYKxSrPgFoZ82aNZo5c6ZmzJihNWvWmA4nJN5++23l5ORozZo1PJ8cAAAACADT14EwufXWW3XgwAEVFxdr0KBB+ulPf2o6pKB7/PHHddZZZ2nKlCmmQwEAAAAcgSvlALrtxRdfVE1Nje677z4VFhZq+fLlpkMCAAAAHIGiHEC3ZWZmqq6uTnl5eXrmmWfUu3dv0yEBAAAAjkBRDgAAAACAIay+DgAAAACAIRTlAAAAAAAY4rjV11taWrR//3717t1bMTExpsMBADiUZVk6duyY0tPTg/YsYEQe+g0AgGAIZb/BcUX5/v37lZGRYToMAECU2LdvnwYNGmQ6DIQI/QYAQDCFot/guKK8dVXnffv2KTk52XA0XdPc3KzNmzdr4sSJio+PNx1OxCNfgSNX9pAve6ItX16vVxkZGTwtIMoFq98Qbd//cCBn9pEz+8iZfeSsa44cOaIhQ4aEpN/guKK8depZcnKyo4vypKQkJScn84sQAPIVOHJlD/myJ1rzxZTm6BasfkO0fv9DiZzZR87sI2f2kbOuaW5ulhSafoOxm+gaGxt13nnnad68eaZCAAAAAADAKGNF+eLFi/X//t//M/X2AAAAAAAYZ6Qof//997Vz507l5+ebeHsAAOAwzLADAEQr20V5RUWFJk+erPT0dMXExOj5559v18bj8SgzM1OJiYkaPXq0tm3b1mb/vHnztGTJki4HDQAAehZm2AEAopXtoryhoUHZ2dnyeDwd7l+3bp2Kioq0YMEC7dixQ9nZ2crLy9PBgwclSX/84x914YUX6sILL+xe5AAAoEdghh0AIJrZXn09Pz+/0z+Ky5cv1+zZszVr1ixJUmlpqTZu3Kgnn3xSxcXFevPNN/Xss89q/fr1On78uJqbm5WcnKySkpIOj9fU1KSmpib/a6/XK+nz1e9aV8Bzmta4nRp/uJGvwJEre8iXPdGWr2j5HJGuoqJCy5YtU1VVlQ4cOKANGzbo+uuvb9PG4/Fo2bJlqq2tVXZ2tlauXKnc3Fz//nnz5mnZsmX6+9//HuboAQAIvaA+Eu3EiROqqqrS/Pnz/dtiY2M1fvx4bdmyRZK0ZMkS/9T11atX69133z1lQd7aftGiRe22b968WUlJScEMP+zKyspMh+Ao5Ctw5Moe8mVPtOSrsbHRdAg9QusMu9tuu01Tpkxpt791hl1paalGjx6tFStWKC8vT7t27VL//v3bzLCjKAcARKOgFuWHDh2Sz+dTWlpam+1paWnauXNnl445f/58FRUV+V97vV5lZGRo4sSJjn5OeVlZmSZMmMCzAQNAvgJHruwhX/ZEW75aZ14htKJlhl20zRQJB3JmHzmzj5zZR866JpT5CmpRbtfMmTNP28blcsnlcrXbHh8f7/hOYTR8hnAiX4EjV/aQL3uiJV/R8Bmczokz7KJlpkg4kTP7yJl95Mw+cmZPKGfYBbUoT01NVVxcnOrq6tpsr6ur04ABA7p1bI/HI4/HI5/P163jAACAyOCkGXbRNlMkHMiZfeTMPnJmHznrmsOHD4fs2EEtyhMSEjRixAiVl5f7F3FpaWlReXm5CgsLu3Vst9stt9str9erlJSUIEQLAACcJBJm2EXLTJFwImf2kTP7yJl95MyeUObKdlF+/Phx7d692/96z549qq6uVr9+/TR48GAVFRWpoKBAI0eOVG5urlasWKGGhgb/vWIAwiOzeKPtn9m7dFIIIgGAjoVyhl2oDFv4ipp8MQG15ZwKAAiE7eeUb9++XcOHD9fw4cMlSUVFRRo+fLj//q5p06bpoYceUklJiXJyclRdXa2XX3653dQ0AADQs508w65V6wy7MWPGdOvYHo9HWVlZGjVqVHfDBAAgpGxfKR87dqwsy+q0TWFhYbenq38R95QDAOA8pmbYcdsbAMApjK6+bgd/XAEAcJ7t27dr3Lhx/teti7AVFBRo9erVmjZtmj7++GOVlJSotrZWOTk5zLADAPQojinKAQCA8zDDDgCAztm+pxwAACDSud1u1dTUqLKy0nQoAAB0yjFFOQu2AAAAAACijWOKcka8AQAAAADRxjFFOQAAQKCYYQcAcAqKcgAAEHWYYQcAcAqKcgAAAAAADHFMUc40NAAAAABAtHFMUc40NAAAAABAtHFMUQ4AABAoZtgBAJyCohwAAEQdZtgBAJyCohwAAAAAAEMcU5QzDQ0AAAAAEG0cU5QzDQ0AAAAAEG0cU5QDAAAAABBtKMoBAEDU4bY3AIBTUJQDAICow21vAACnoCgHAAAAAMAQinIAAAAAAAxxTFHOvWEAAAAAgGjjmKKce8MAAAAAANHGMUU5AABAoJhhBwBwCopyAAAQdZhhBwBwCopyAAAAAAAMoSgHAAAAAMCQXqYDAAAAiEaZxRtt/8zepZNCEAkAIJJxpRwAAAAAAEMoygEAAAAAMMQxRTmPNgEAAAAARBvHFOU82gQAAAAAEG0cU5QDAAAEihl2AACnoCgHAABRhxl2AACnoCgHAAAAAMAQinIAAAAAAAzpZToAAJEjs3ijrfZ7l04KUSQAAABAz8CVcgAAAAAADKEoBwAAAADAEIpyAAAAAAAMoSgHAAAAAMAQinIAAAAAAAyhKAcAAAAAwBDHFOUej0dZWVkaNWqU6VAAAAAAAAgKxxTlbrdbNTU1qqysNB0KAAAAAABB4ZiiHAAAIFDMsAMAOAVFOQAAiDrMsAMAOAVFOQAAAAAAhlCUAwAAAABgCEU5AAAAAACGUJQDAAAAAGAIRTkAAAAAAIZQlAMAAAAAYEgv0wEA6Dkyizfaar936aQQRQIAAABEBq6UAwAAAABgCEU5AAAAAACGUJQDAAAAAGAIRTkAAAAAAIaEvSg/evSoRo4cqZycHA0bNkyPP/54uEMAAAAAACAihH319d69e6uiokJJSUlqaGjQsGHDNGXKFJ199tnhDgUAAAAAAKPCfqU8Li5OSUlJkqSmpiZZliXLssIdBgAAcABm2AEAop3toryiokKTJ09Wenq6YmJi9Pzzz7dr4/F4lJmZqcTERI0ePVrbtm1rs//o0aPKzs7WoEGD9IMf/ECpqald/gAAACB6tc6wq66u1tatW/XTn/5Uhw8fNh0WAABBY7sob2hoUHZ2tjweT4f7161bp6KiIi1YsEA7duxQdna28vLydPDgQX+bPn366O2339aePXv0m9/8RnV1dV3/BAAAIGoxww4AEO1s31Oen5+v/Pz8U+5fvny5Zs+erVmzZkmSSktLtXHjRj355JMqLi5u0zYtLU3Z2dn661//qqlTp3Z4vKamJjU1Nflfe71eSVJzc7Oam5vthh8RWuN2avzhRr4Cd3KuXHGh77Ta/TexG1Oo/835btkTbfmKls8R6SoqKrRs2TJVVVXpwIED2rBhg66//vo2bTwej5YtW6ba2lplZ2dr5cqVys3N9e8/evSorrrqKr3//vtatmwZM+wAAFElqAu9nThxQlVVVZo/f75/W2xsrMaPH68tW7ZIkurq6pSUlKTevXurvr5eFRUV+s53vnPKYy5ZskSLFi1qt33z5s3+kXOnKisrMx2Co5CvwJWVlenB3NO3665NmzbZam83JrvH7yq+W/ZES74aGxtNh9AjtM6wu+222zRlypR2+1tn2JWWlmr06NFasWKF8vLytGvXLvXv31/S/82wq6ur05QpUzR16lSlpaWF+6OERWbxRlvt9y6dFKJIAADhEtSi/NChQ/L5fO3+UKalpWnnzp2SpA8//FB33HGHf/rZ9773PV1yySWnPOb8+fNVVFTkf+31epWRkaGJEycqOTk5mOGHTXNzs8rKyjRhwgTFx8ebDifika/AnZyr4YtfDfn7vbswz1b7YQtfCenx7eK7ZU+05at15hVCK1pm2LX+rCs2sqbOR/KMj2ibXRMO5Mw+cmYfOeuaUOYr7I9Ey83NVXV1dcDtXS6XXC5Xu+3x8fGO7xRGw2cIJ/IVuPj4eDX5YsLyPnbYjSlc/958t+yJlnxFw2dwOifOsLt/ZEu3jxFM4ZpR1B3RMrsmnMiZfeTMPnJmTyhn2AW1KE9NTVVcXFy7hdvq6uo0YMCAYL4VAABwOCfNsGudKXLf9lg1tYR+0DNQoZ5R1B3RNrsmHMiZfeTMPnLWNaF88kdQi/KEhASNGDFC5eXl/kVcWlpaVF5ersLCwm4d2+PxyOPxyOfzBSFSAADgBJE2w66pJSYsM5EC5YQOdbTMrgkncmYfObOPnNkTylzZLsqPHz+u3bt3+1/v2bNH1dXV6tevnwYPHqyioiIVFBRo5MiRys3N1YoVK9TQ0OC/V6yr3G633G63vF6vUlJSunUsAABgXihn2DGYDwBwCttF+fbt2zVu3Dj/69YpYgUFBVq9erWmTZumjz/+WCUlJaqtrVVOTo5efvnlqF0lFejJ7K4SDAAnC+UMOwbzAQBOYbsoHzt2rCyr85VHCwsLu/3H9IsY8QYAwHlMzbADAMApwr76elcx4g0AgPMwww4AgM45pigHAADOwww7AAA6F2s6AAAAgGBzu92qqalRZWWl6VAAAOiUY4pyj8ejrKwsjRo1ynQoAAAAAAAEhWOKcka8AQAAAADRxjFFOQAAQKCYYQcAcAqKcgAAEHWYYQcAcAqKcgAAAAAADHFMUc40NAAAAABAtHHMc8rdbrfcbre8Xq9SUlJMhwOgh8os3mir/d6lk0IUCQAAAKKBY66UAwAABIoZdgAAp6AoBwAAUYeF3gAATkFRDgAAAACAIRTlAAAAAAAY4piF3jwejzwej3w+n+lQAAAAIgKLTwKA8znmSjn3hgEAgECx0BsAwCkcU5QDAAAEisF8AIBTUJQDAAAAAGAIRTkAAAAAAIZQlAMAAAAAYAhFOQAAAAAAhjimKGcVVQAAAABAtHFMUc4qqgAAIFAM5gMAnMIxRTkAAECgGMwHADgFRTkAAAAAAIZQlAMAAAAAYAhFOQAAAAAAhlCUAwAAAABgCEU5AAAAAACGUJQDAAAAAGCIY4pynjcKAAAAAIg2jinKed4oAAAIFIP5AACncExRDgAAECgG8wEATkFRDgAAAACAIRTlAAAAAAAYQlEOAAAAAIAhFOUAAAAAABhCUQ4AAAAAgCEU5QAAAAAAGEJRDgAAAACAIRTlAAAAAAAYQlEOAAAAAIAhjinKPR6PsrKyNGrUKNOhAAAAAAAQFI4pyt1ut2pqalRZWWk6FAAAEOEYzAcAOIVjinIAAIBAMZgPAHAKinIAAAAAAAyhKAcAAAAAwBCKcgAAAAAADKEoBwAAAADAEIpyAAAAAAAMoSgHAAAAAMCQXqYDAAAgs3ijrfZ7l04KUSQAAADhxZVyAAAAAAAMoSgHAAAAAMAQinIAAAAAAAyhKAcAAAAAwBCKcgAAAAAADAn76uv79u3TLbfcooMHD6pXr1667777dNNNN4U7DACIGqxcDiBU7J5fJM4xAGBX2IvyXr16acWKFcrJyVFtba1GjBih//qv/9KZZ54Z7lAAAAAAADAq7NPXzz33XOXk5EiSBgwYoNTUVB05ciTcYQAAAAfYt2+fxo4dq6ysLF166aVav3696ZAAAAgq20V5RUWFJk+erPT0dMXExOj5559v18bj8SgzM1OJiYkaPXq0tm3b1uGxqqqq5PP5lJGRYTtwAAAQ/Vpn2NXU1Gjz5s2666671NDQYDosAACCxnZR3tDQoOzsbHk8ng73r1u3TkVFRVqwYIF27Nih7Oxs5eXl6eDBg23aHTlyRLfeeqsee+yxrkUOAACiHjPsAADRzvY95fn5+crPzz/l/uXLl2v27NmaNWuWJKm0tFQbN27Uk08+qeLiYklSU1OTrr/+ehUXF+trX/taF0MHAACRrqKiQsuWLVNVVZUOHDigDRs26Prrr2/TxuPxaNmyZaqtrVV2drZWrlyp3Nzcdsdihl33dWXhNgBAaAV1obcTJ06oqqpK8+fP92+LjY3V+PHjtWXLFkmSZVmaOXOmvv71r+uWW2457TGbmprU1NTkf+31eiVJzc3Nam5uDmb4YdMat1PjDzfyFbiTc+WKswxH032h/jfvynfLbl7D8b0NV0yh/F00kVfOKeHROsPutttu05QpU9rtb51hV1paqtGjR2vFihXKy8vTrl271L9/f3+71hl2jz/+eDjDBwAg5IJalB86dEg+n09paWlttqelpWnnzp2SpL/97W9at26dLr30Uv/96M8884wuueSSDo+5ZMkSLVq0qN32zZs3KykpKZjhh11ZWZnpEByFfAWurKxMD7a/yOQ4mzZtCsv72Plu2c1rOD5DuGMKxe+iibw2NjZ2+xg4vXDPsAvVYH7rz7pinT/gGWpfHMBjACxw5Mw+cmYfOeuaUOYr7I9Eu/zyy9XS0hJw+/nz56uoqMj/2uv1KiMjQxMnTlRycnIoQgy55uZmlZWVacKECYqPjzcdTsQjX58btvCV07ZxxVq6f2SL7tseq6aWmDBEFXneXZgXcNvW71Yo82Unnq4K5Ltxsq7GFMrfxXB9hpO1FmswJxQz7EI9mH//yMD7MD3VFwfNGFS3j5zZR87sI2f2hHIwP6hFeWpqquLi4lRXV9dme11dnQYMGNClY7pcLrlcrnbb4+PjHV+gRcNnCKeenq8mX+BFY1NLjK320aQr35FQ5isc31m7sXc3plD8Lob7MwTrGOieUMywC9VgfjgG8aJF66AZg+r2kTP7yJl95KxrDh8+HLJjB7UoT0hI0IgRI1ReXu5fxKWlpUXl5eUqLCzs1rE9Ho88Ho98Pl8QIgUAAE5gd4ZdqAfze/KgZ6C+mOeePqjeFeTMPnJmHzmzJ5S5sl2UHz9+XLt37/a/3rNnj6qrq9WvXz8NHjxYRUVFKigo0MiRI5Wbm6sVK1aooaHBf69YV7ndbrndbnm9XqWkpHTrWAAAwLxQzLBrxWA+AMApbD+nfPv27Ro+fLiGDx8uSSoqKtLw4cNVUlIiSZo2bZoeeughlZSUKCcnR9XV1Xr55ZfbTU0DAAA928kz7Fq1zrAbM2ZMt47tdrtVU1OjysrK7oYJAEBI2b5SPnbsWFlW5yuPFhYWdnu6OgAAcD5TM+wAAHCKsK++3lVMQwMAwHm2b9+ucePG+V+3LsJWUFCg1atXa9q0afr4449VUlKi2tpa5eTkMMMOANCjOKYo555yAACcx9QMOwbzAQBOYfuecgAAgEjHPeUAAKegKAcAAAAAwBDHFOUej0dZWVkaNWqU6VAAAAAAAAgKxxTlTEMDAACBYjAfAOAUjlnoDQACkVm8MeC2rjhLD+aGMBgAxrBALADAKRxzpRwAAAAAgGjjmCvlPNoEQE9h52o/AAAAnM0xV8q5pxwAAASKe8oBAE7hmKIcAAAgUAzmAwCcgqIcAAAAAABDKMoBAAAAADCEohwAAAAAAEMcU5SzYAsAAAAAINo4pihnwRYAABAoBvMBAE7hmKIcAAAgUAzmAwCcgqIcAAAAAABDepkOAAAAANEjs3ijJMkVZ+nBXGnYwlfU5Is5Zfu9SyeFKzQAiEhcKQcAAAAAwBCKcgAAAAAADHFMUc4qqgAAAACAaOOYopxVVAEAQKAYzAcAOIVjinIAAIBAMZgPAHAKinIAAAAAAAyhKAcAAAAAwBCKcgAAAAAADOllOgAAQGTLLN4oSXLFWXowVxq28BU1+WI6/Zm9SyeFIzQAAADH40o5AAAAAACGUJQDAAAAAGCIY6avezweeTwe+Xw+06EAQdE6JRgAAABAz+WYK+U8bxQAAATK4/EoKytLo0aNMh0KAACdcsyVcgAAgEC53W653W55vV6lpKSYDgedsDtzjIUkAUQbx1wpBwAAAAAg2lCUAwAAAABgCEU5AAAAAACGUJQDAAAAAGAIRTkAAAAAAIZQlAMAAAAAYAhFOQAAAAAAhlCUAwAAAABgCEU5AAAAAACGOKYo93g8ysrK0qhRo0yHAgAAAABAUDimKHe73aqpqVFlZaXpUAAAQIRjMB8A4BSOKcoBAAACxWA+AMApKMoBAAAAADCEohwAAAAAAEMoygEAAAAAMKSX6QCAaJBZvNF0CAAAAAAciCvlAAAAAAAYQlEOAAAAAIAhFOUAAAAAABjCPeUAAACIWl1Z92Xv0kkhiAQAOsaVcgAAAAAADKEoBwAAAADAEIpyAAAAAAAM4Z5yAAAA4CR270PnHnQA3cGVcgAAAAAADDFSlN9www3q27evpk6dauLtAQCAg9BvAABEMyNF+dy5c7VmzRoTbw0AAByGfgMAIJoZKcrHjh2r3r17m3hrAADgMPQbAADRzHZRXlFRocmTJys9PV0xMTF6/vnn27XxeDzKzMxUYmKiRo8erW3btgUjVgAA4DD0GwAA6JztoryhoUHZ2dnyeDwd7l+3bp2Kioq0YMEC7dixQ9nZ2crLy9PBgwe7HSwAAHAW+g0AAHTO9iPR8vPzlZ+ff8r9y5cv1+zZszVr1ixJUmlpqTZu3Kgnn3xSxcXFtgNsampSU1OT/7XX65UkNTc3q7m52fbxIkFr3E6NP9yckC9XnGU6BEmSK9Zq87/oXDjy1ZXvbai/T3Zjao3HTr66+h6BCsb5IJLPKdEk3P0GAACcJqjPKT9x4oSqqqo0f/58/7bY2FiNHz9eW7Zs6dIxlyxZokWLFrXbvnnzZiUlJXU51khQVlZmOgRHieR8PZhrOoK27h/ZYjoERwllvjZt2mT7Z0L9fbIb0xfjCSRf3X2PYB+/I42Njd0+BronFP2GUA3mt/4sg56BC9XAZ6gH/boiWIN8TrgQEWnImX3krGtCma+gFuWHDh2Sz+dTWlpam+1paWnauXOn//X48eP19ttvq6GhQYMGDdL69es1ZsyYDo85f/58FRUV+V97vV5lZGRo4sSJSk5ODmb4YdPc3KyysjJNmDBB8fHxpsOJeE7I17CFr5gOQdLnHZ/7R7bovu2xamqJMR1OxOup+Xp3YZ6t9q3fbzv56up7BMru8TvSWqzBnFD0G0I9mM+gp33BzlmoB/26IhgDhSeL5AsRkYqc2UfO7AnlYH5Qi/JA/fnPfw64rcvlksvlarc9Pj4+Ygu0QEXDZwinSM5Xky+yCrqmlpiIiymS9bR82f09+mJuAslXd9/jdIJxLojU8wnas9NvCNVgfusAcU8bxOuOUA18hnrQryuCMVAoOeNCRKQhZ/aRs645fPhwyI4d1KI8NTVVcXFxqqura7O9rq5OAwYM6NaxPR6PPB6PfD5ft44DBCKzeKPpEAAg6oWi3xDqwfyeNogXDMHOWagH/boi2IVNJF+IiFTkzD5yZk8ocxXU55QnJCRoxIgRKi8v929raWlReXn5KaeZBcrtdqumpkaVlZXdDRMAAESAUPYbPB6PsrKyNGrUqO6GCQBASNm+Un78+HHt3r3b/3rPnj2qrq5Wv379NHjwYBUVFamgoEAjR45Ubm6uVqxYoYaGBv+qqgAAoOcw1W9wu91yu93yer1KSUnp7scAACBkbBfl27dv17hx4/yvW+/bKigo0OrVqzVt2jR9/PHHKikpUW1trXJycvTyyy+3W8QFAABEP/oNAAB0znZRPnbsWFlW54+WKCwsVGFhYZeD6gj3lAMA4Dz0G9AT2F2LZu/SSSGKBIATBfWe8lDinnIAABAo+g0AAKdwTFEOAAAAAEC0oSgHAAAAAMAQxxTlPNoEAAAEin4DAMApHFOUc28YAAAIFP0GAIBTOKYoBwAAAAAg2lCUAwAAAABgCEU5AAAAAACG9DIdQKA8Ho88Ho98Pp/pUAAAQISj3xC9Mos3mg4BAILKMVfKWbAFAAAEin4DAMApHFOUAwAAAAAQbSjKAQAAAAAwhKIcAAAAAABDWOgNQWd3AZa9SyeFKBIAAAAAiGyOuVLOgi0AACBQHo9HWVlZGjVqlOlQAADolGOKcgAAgEAxmA8AcAqKcgAAAAAADKEoBwAAAADAEIpyAAAAAAAMoSgHAAAAAMAQxxTlrKIKAAAAAIg2jnlOudvtltvtltfrVUpKiulwAABABPN4PPJ4PPL5fKZDARwhs3ijrfZ7l04KUSRAz+OYK+UAAACB4pFoAACnoCgHAAAAAMAQinIAAAAAAAyhKAcAAAAAwBCKcgAAAAAADKEoBwAAAADAEIpyAAAAAAAMccxzykPxvFG7z2OUQv9MRp4RCSDUunLuAwAAQGg45ko5zxsFAACB8ng8ysrK0qhRo0yHAgBApxxTlAMAAASKwXwAgFNQlAMAAAAAYAhFOQAAAAAAhlCUAwAAAABgCEU5AAAAAACGUJQDAAAAAGAIRTkAAAAAAIZQlAMAAAAAYAhFOQAAAAAAhlCUAwAAAABgCEU5AAAAAACG9DIdQKA8Ho88Ho98Pp/pUAAAQISj34BIllm8scPtrjhLD+ZKwxa+oiZfjH/73qWTwhVawE71GU4lEj8DECkcc6Xc7XarpqZGlZWVpkMBAAARjn4DAMApHFOUAwAAAAAQbSjKAQAAAAAwhKIcAAAAAABDKMoBAAAAADCEohwAAAAAAEMoygEAAAAAMISiHAAAAAAAQyjKAQAAAAAwhKIcAAAAAABDKMoBAAAAADCEohwAAAAAAEMoygEAAAAAMISiHAAAAAAAQ4wU5S+99JKGDh2qL3/5y3riiSdMhAAAAByCfgMAIJr1CvcbfvbZZyoqKtJf/vIXpaSkaMSIEbrhhht09tlnhzsUAAAQ4eg3AACiXdivlG/btk1f+cpXNHDgQJ111lnKz8/X5s2bwx0GAABwAPoNAIBoZ7sor6io0OTJk5Wenq6YmBg9//zz7dp4PB5lZmYqMTFRo0eP1rZt2/z79u/fr4EDB/pfDxw4UB999FHXogcAABGNfgMAAJ2zXZQ3NDQoOztbHo+nw/3r1q1TUVGRFixYoB07dig7O1t5eXk6ePBgt4MFAADOQr8BAIDO2b6nPD8/X/n5+afcv3z5cs2ePVuzZs2SJJWWlmrjxo168sknVVxcrPT09DYj3B999JFyc3NPebympiY1NTX5X3u9XklSc3Ozmpub7YbfhivOsv0z3X3Pk4/R0bHsxhSMeIIt2J+hs3yFSle+G5HAFWu1+V90jnzZYydfdn9fTZz7IvH8GY2ipd/Q+rOcLwLHOda+U+UsHOerUPd9QvUZTPQTnY6cdU0o8xVjWVaXfwNjYmK0YcMGXX/99ZKkEydOKCkpSc8995x/myQVFBTo6NGj+uMf/6jPPvtMF198sV577TX/gi1///vfT7lgy8KFC7Vo0aJ223/zm98oKSmpq6EDAHq4xsZGffvb31Z9fb2Sk5NNh9Mj0G8AADhVKPsNQV19/dChQ/L5fEpLS2uzPS0tTTt37vz8DXv10sMPP6xx48appaVF99xzT6crqM6fP19FRUX+116vVxkZGZo4cWK3kzFs4Su2f+bdhXndek/p81GWsrIyTZgwQfHx8d2KKRjxBFtX8toZV6yl+0e26L7tsWpqiZEU+s8d7M8QLh3lCqdGvuyxky+7v6Mmzn2tV1BhjpP6Da1/uzlfBI5zrH3BzFmoz8N2deW8HUhMJ+esquSaroTmaF35d7P7PYu0esNUDXf48OFuH+NUwv5INEm69tprde211wbU1uVyyeVytdseHx/frqC1q8ln/2TX3ff84rG+eDy7MQUznmDpSl4DOm5LjP/Yof7cofoM4XJyrnB65MueQPJl93fUxLkvEs+f6Fik9BskzhddQc7sC0bOQn0etqsrv392YmpqiemR5/Xu/LsF+j2LtLyaquFCmYegPhItNTVVcXFxqqura7O9rq5OAwYMCOZbAQAAh6PfAABAkIvyhIQEjRgxQuXl5f5tLS0tKi8v15gxY7p1bI/Ho6ysLI0aNaq7YQIAgAhAvwEAgC5MXz9+/Lh2797tf71nzx5VV1erX79+Gjx4sIqKilRQUKCRI0cqNzdXK1asUENDg39V1a5yu91yu93yer1KSUnp1rEAAEB40G8AAKBztovy7du3a9y4cf7XrYupFBQUaPXq1Zo2bZo+/vhjlZSUqLa2Vjk5OXr55ZfbLeICAACiH/0GAAA6Z7soHzt2rE73FLXCwkIVFhZ2OaiOeDweeTwe+Xy+oB4XAACEDv0GAAA6F9R7ykPJ7XarpqZGlZWVpkMBAAARjn4DAMApHFOUAwAAAAAQbSjKAQAAAAAwxDFFOY82AQAAgaLfAABwCscU5dwbBgAAAkW/AQDgFLZXXzetdQVXr9fb7WO1NDXa/plgvG9zc7MaGxvl9XoVHx/frZiCEU+wdSWvnfHFWWps9MnXFKcWX4yk0H/uYH+GcOkoVzg18mWPnXzZ/R01ce5rPcbpVgaHswWr39D6t5vzReA4x9oXzJyF+jxsV1d+BwOJ6eScRWK/ONS68u9m93sWaXk1VcMdO3ZMUmj6DTGWw3oj//73v5WRkWE6DABAlNi3b58GDRpkOgyECP0GAEAwffDBBzr//PODekzHFeUtLS3av3+/evfurZgYZ466er1eZWRkaN++fUpOTjYdTsQjX4EjV/aQL3uiLV+WZenYsWNKT09XbKxj7uaCTcHqN0Tb9z8cyJl95Mw+cmYfOeua+vp6DR48WJ988on69OkT1GM7bvp6bGxs1FzRSE5O5hfBBvIVOHJlD/myJ5rylZKSYjoEhFiw+w3R9P0PF3JmHzmzj5zZR866JhQD+VwaAAAAAADAEIpyAAAAAAAMoSg3wOVyacGCBXK5XKZDcQTyFThyZQ/5sod8oSfj+28fObOPnNlHzuwjZ10Tyrw5bqE3AAAAAACiBVfKAQAAAAAwhKIcAAAAAABDKMoBAAAAADCEohwAAAAAAEMoyg1pampSTk6OYmJiVF1d3WbfO++8oyuuuEKJiYnKyMjQgw8+aCZIg/bu3avbb79dQ4YM0RlnnKELLrhACxYs0IkTJ9q0I1dteTweZWZmKjExUaNHj9a2bdtMh2TckiVLNGrUKPXu3Vv9+/fX9ddfr127drVp8+mnn8rtduvss8/WWWedpRtvvFF1dXWGIo4sS5cuVUxMjO666y7/NvKFnoZz66lxju0+zrOB+eijjzRjxgydffbZOuOMM3TJJZdo+/bt/v2WZamkpETnnnuuzjjjDI0fP17vv/++wYjN8/l8uu+++9r0p++//36dvM53T89bRUWFJk+erPT0dMXExOj5559vsz+Q/Bw5ckQ333yzkpOT1adPH91+++06fvy4rTgoyg255557lJ6e3m671+vVxIkTdd5556mqqkrLli3TwoUL9dhjjxmI0pydO3eqpaVFjz76qN577z397Gc/U2lpqe69915/G3LV1rp161RUVKQFCxZox44dys7OVl5eng4ePGg6NKNef/11ud1uvfnmmyorK1Nzc7MmTpyohoYGf5u7775bL774otavX6/XX39d+/fv15QpUwxGHRkqKyv16KOP6tJLL22znXyhJ+Hc2jnOsd3DeTYwn3zyiS677DLFx8frT3/6k2pqavTwww+rb9++/jYPPvigfvGLX6i0tFRbt27VmWeeqby8PH366acGIzfrgQce0KpVq/TII4/oH//4hx544AE9+OCDWrlypb9NT89bQ0ODsrOz5fF4OtwfSH5uvvlmvffeeyorK9NLL72kiooK3XHHHfYCsRB2mzZtsi666CLrvffesyRZb731ln/fL3/5S6tv375WU1OTf9sPf/hDa+jQoQYijSwPPvigNWTIEP9rctVWbm6u5Xa7/a99Pp+Vnp5uLVmyxGBUkefgwYOWJOv111+3LMuyjh49asXHx1vr16/3t/nHP/5hSbK2bNliKkzjjh07Zn35y1+2ysrKrKuuusqaO3euZVnkCz0P51Z7OMcGjvNs4H74wx9al19++Sn3t7S0WAMGDLCWLVvm33b06FHL5XJZv/3tb8MRYkSaNGmSddttt7XZNmXKFOvmm2+2LIu8fZEka8OGDf7XgeSnpqbGkmRVVlb62/zpT3+yYmJirI8++ijg9+ZKeZjV1dVp9uzZeuaZZ5SUlNRu/5YtW3TllVcqISHBvy0vL0+7du3SJ598Es5QI059fb369evnf02u/s+JEydUVVWl8ePH+7fFxsZq/Pjx2rJli8HIIk99fb0k+b9LVVVVam5ubpO7iy66SIMHD+7RuXO73Zo0aVKbvEjkCz0L51b7OMcGjvNs4F544QWNHDlSN910k/r376/hw4fr8ccf9+/fs2ePamtr2+QsJSVFo0eP7rE5k6Svfe1rKi8v1z//+U9J0ttvv6033nhD+fn5ksjb6QSSny1btqhPnz4aOXKkv8348eMVGxurrVu3BvxevYIXNk7HsizNnDlTd955p0aOHKm9e/e2a1NbW6shQ4a02ZaWlubfd/I0nZ5k9+7dWrlypR566CH/NnL1fw4dOiSfz+f//K3S0tK0c+dOQ1FFnpaWFt1111267LLLNGzYMEmff1cSEhLUp0+fNm3T0tJUW1trIErznn32We3YsUOVlZXt9pEv9CScW+3hHBs4zrP2/Otf/9KqVatUVFSke++9V5WVlfr+97+vhIQEFRQU+PPS0e9qT82ZJBUXF8vr9eqiiy5SXFycfD6fFi9erJtvvlmSyNtpBJKf2tpa9e/fv83+Xr16qV+/frZyyJXyICguLlZMTEyn/+3cuVMrV67UsWPHNH/+fNMhGxNork720Ucf6ZprrtFNN92k2bNnG4oc0cDtduvdd9/Vs88+azqUiLVv3z7NnTtXa9euVWJioulwADgI59jAcJ61r6WlRV/96lf105/+VMOHD9cdd9yh2bNnq7S01HRoEe13v/ud1q5dq9/85jfasWOHnn76aT300EN6+umnTYeGL+BKeRD893//t2bOnNlpm/PPP1+vvvqqtmzZIpfL1WbfyJEjdfPNN+vpp5/WgAED2q2u2fp6wIABQY3bhEBz1Wr//v0aN26cvva1r7VbwC3ac2VHamqq4uLiOsxHT8vFqRQWFvoX3xg0aJB/+4ABA3TixAkdPXq0zVWJnpq7qqoqHTx4UF/96lf923w+nyoqKvTII4/olVdeIV/oMTi3Bo5zbOA4z9p37rnnKisrq822iy++WL///e8l/V+/r66uTueee66/TV1dnXJycsIWZ6T5wQ9+oOLiYn3rW9+SJF1yySX68MMPtWTJEhUUFJC30wgkPwMGDGi38Odnn32mI0eO2Pp95Up5EJxzzjm66KKLOv0vISFBv/jFL/T222+rurpa1dXV2rRpk6TPV3ZdvHixJGnMmDGqqKhQc3Oz//hlZWUaOnRoVEzHDjRX0udXyMeOHasRI0boqaeeUmxs269rtOfKjoSEBI0YMULl5eX+bS0tLSovL9eYMWMMRmaeZVkqLCzUhg0b9Oqrr7a75WHEiBGKj49vk7tdu3bpf//3f3tk7q6++mr9z//8j/88VV1d7R84bP3/5As9BefW0+Mcax/nWfsuu+yydo/a++c//6nzzjtPkjRkyBANGDCgTc68Xq+2bt3aY3MmSY2Nje36z3FxcWppaZFE3k4nkPyMGTNGR48eVVVVlb/Nq6++qpaWFo0ePTrwN+vmInXohj179rRbff3o0aNWWlqadcstt1jvvvuu9eyzz1pJSUnWo48+ai5QA/79739bX/rSl6yrr77a+ve//20dOHDA/18rctXWs88+a7lcLmv16tVWTU2Ndccdd1h9+vSxamtrTYdm1He+8x0rJSXFeu2119p8jxobG/1t7rzzTmvw4MHWq6++am3fvt0aM2aMNWbMGINRR5aTVwW2LPKFnoVza+c4xwYH59nObdu2zerVq5e1ePFi6/3337fWrl1rJSUlWb/+9a/9bZYuXWr16dPH+uMf/2i988471nXXXWcNGTLE+s9//mMwcrMKCgqsgQMHWi+99JK1Z88e6w9/+IOVmppq3XPPPf42PT1vx44ds9566y3rrbfesiRZy5cvt9566y3rww8/tCwrsPxcc8011vDhw62tW7dab7zxhvXlL3/Zmj59uq04KMoN6qgotyzLevvtt63LL7/ccrlc1sCBA62lS5eaCdCgp556ypLU4X8nI1dtrVy50ho8eLCVkJBg5ebmWm+++abpkIw71ffoqaee8rf5z3/+Y333u9+1+vbtayUlJVk33HBDmwGgnu6LnUXyhZ6Gc+upcY4NDs6zp/fiiy9aw4YNs1wul3XRRRdZjz32WJv9LS0t1n333WelpaVZLpfLuvrqq61du3YZijYyeL1ea+7cudbgwYOtxMRE6/zzz7d+9KMftXmccE/P21/+8pcOz2EFBQWWZQWWn8OHD1vTp0+3zjrrLCs5OdmaNWuWdezYMVtxxFiWZdm8kg8AAAAAAIKAe8oBAAAAADCEohwAAAAAAEMoygEAAAAAMISiHAAAAAAAQyjKAQAAAAAwhKIcAAAAAABDKMoBAAAAADCEohwAAAAAAEMoygEAAAAAMISiHAAAAAAAQyjKAQAAAAAwhKIcAAAAAABD/j8nbAaz0E9J8AAAAABJRU5ErkJggg==\n"
+ },
+ "metadata": {}
+ }
+ ],
+ "source": [
+ "df[(df['Quantity']>-50) &\n",
+ " (df['Quantity']<50) &\n",
+ " (df['UnitPrice']>0) &\n",
+ " (df['UnitPrice']<100)][['Quantity', 'UnitPrice']].hist(figsize=[12,4], bins=30, log=True)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:24.504460Z",
+ "iopub.status.busy": "2021-12-15T20:25:24.504086Z",
+ "iopub.status.idle": "2021-12-15T20:25:26.468550Z",
+ "shell.execute_reply": "2021-12-15T20:25:26.466711Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "5QysMacBW_so",
+ "outputId": "e3356521-63c3-404c-e8db-5ed6a0407162"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " Country CustomerID ... StockCode UnitPrice\n",
+ "46 United Kingdom 13748.0 ... 22086 2.55\n",
+ "83 United Kingdom 15291.0 ... 21733 2.55\n",
+ "96 United Kingdom 14688.0 ... 21212 0.42\n",
+ "102 United Kingdom 14688.0 ... 85071B 0.38\n",
+ "176 United Kingdom 16029.0 ... 85099C 1.65\n",
+ "... ... ... ... ... ...\n",
+ "14784 United Kingdom 15061.0 ... 22423 10.95\n",
+ "14785 United Kingdom 15061.0 ... 22075 1.45\n",
+ "14788 United Kingdom 15061.0 ... 17038 0.07\n",
+ "14974 United Kingdom 14739.0 ... 21704 0.72\n",
+ "14980 United Kingdom 14739.0 ... 22178 1.06\n",
+ "\n",
+ "[258 rows x 8 columns]\n"
+ ]
+ }
+ ],
+ "source": [
+ "filter_quantity_price = df[(df['Quantity'] > 50) & (df['UnitPrice'] < 100)]\n",
+ "print(filter_quantity_price)\n",
+ "\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "xG1_wb5RW_so"
+ },
+ "source": [
+ "## Arithmetic Operations"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "CxqA7gO-W_so"
+ },
+ "source": [
+ "Numeric values"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:26.483774Z",
+ "iopub.status.busy": "2021-12-15T20:25:26.482084Z",
+ "iopub.status.idle": "2021-12-15T20:25:26.907406Z",
+ "shell.execute_reply": "2021-12-15T20:25:26.906448Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "HWoI0tYcW_so",
+ "outputId": "727db684-740b-45bf-9e4e-3887674b3e8c"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "0 6\n",
+ "1 6\n",
+ "2 8\n",
+ "3 6\n",
+ "4 6\n",
+ "Name: Quantity, dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 34
+ }
+ ],
+ "source": [
+ "df['Quantity'].head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:26.912916Z",
+ "iopub.status.busy": "2021-12-15T20:25:26.910149Z",
+ "iopub.status.idle": "2021-12-15T20:25:27.361783Z",
+ "shell.execute_reply": "2021-12-15T20:25:27.362723Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "I6Rs0pTAW_sp",
+ "outputId": "68aa067b-b70c-4030-8acf-a2b36310fc52"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "0 2.55\n",
+ "1 3.39\n",
+ "2 2.75\n",
+ "3 3.39\n",
+ "4 3.39\n",
+ "Name: UnitPrice, dtype: float64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 42
+ }
+ ],
+ "source": [
+ "df['UnitPrice'].head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:27.383414Z",
+ "iopub.status.busy": "2021-12-15T20:25:27.374098Z",
+ "iopub.status.idle": "2021-12-15T20:25:27.387546Z",
+ "shell.execute_reply": "2021-12-15T20:25:27.388753Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "wbibhbn6W_sp",
+ "outputId": "d0b6b88b-6282-4901-fea1-94d2e94c773d"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/eland/field_mappings.py:715: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+ " self._mappings_capabilities = self._mappings_capabilities.append(\n"
+ ]
+ }
+ ],
+ "source": [
+ "product = df['Quantity'] * df['UnitPrice']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:27.398754Z",
+ "iopub.status.busy": "2021-12-15T20:25:27.397557Z",
+ "iopub.status.idle": "2021-12-15T20:25:27.818022Z",
+ "shell.execute_reply": "2021-12-15T20:25:27.819640Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "2FstW8ZCW_sp",
+ "outputId": "b55d687e-6b7d-48a8-8815-e64bc6ebf6dd"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "0 15.30\n",
+ "1 20.34\n",
+ "2 22.00\n",
+ "3 20.34\n",
+ "4 20.34\n",
+ "dtype: float64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 44
+ }
+ ],
+ "source": [
+ "product.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Pd2zMWGBW_sp"
+ },
+ "source": [
+ "String concatenation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2021-12-15T20:25:27.837007Z",
+ "iopub.status.busy": "2021-12-15T20:25:27.836370Z",
+ "iopub.status.idle": "2021-12-15T20:25:29.072872Z",
+ "shell.execute_reply": "2021-12-15T20:25:29.074153Z"
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "5MpSfh0fW_sp",
+ "outputId": "e0c52ddf-19bc-40af-813a-75c7e808bef6"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/eland/field_mappings.py:715: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+ " self._mappings_capabilities = self._mappings_capabilities.append(\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "0 United Kingdom85123A\n",
+ "1 United Kingdom71053\n",
+ "2 United Kingdom84406B\n",
+ "3 United Kingdom84029G\n",
+ "4 United Kingdom84029E\n",
+ " ... \n",
+ "14995 United Kingdom72349B\n",
+ "14996 United Kingdom72741\n",
+ "14997 United Kingdom22762\n",
+ "14998 United Kingdom21773\n",
+ "14999 United Kingdom22149\n",
+ "Length: 15000, dtype: object"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 45
+ }
+ ],
+ "source": [
+ "df['Country'] + df['StockCode']"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.6"
+ },
+ "pycharm": {
+ "stem_cell": {
+ "cell_type": "raw",
+ "metadata": {
+ "collapsed": false
+ },
+ "source": []
+ }
+ },
+ "colab": {
+ "provenance": []
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file