mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
326 lines
9.0 KiB
Plaintext
326 lines
9.0 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import eland as ed\n",
|
||
"from elasticsearch import Elasticsearch"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"False"
|
||
]
|
||
},
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"es = Elasticsearch()\n",
|
||
"ed_df = ed.DataFrame('localhost', 'flights', columns = [\"AvgTicketPrice\", \"Cancelled\", \"dayOfWeek\", \"timestamp\", \"DestCountry\"])\n",
|
||
"es.indices.exists(index=\"churn\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2020-10-28 22:01:46.397163: read 10000 rows\n",
|
||
"2020-10-28 22:01:47.100938: read 13059 rows\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# NBVAL_IGNORE_OUTPUT\n",
|
||
"pd_df = ed.eland_to_pandas(ed_df, show_progress=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"pandas.core.frame.DataFrame"
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"type(pd_df)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "3e3e6e7371be43aabd4f9a2bb62ed737",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"HBox(children=(HTML(value='Progress'), FloatProgress(value=0.0, max=2.0), HTML(value='')))"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>account length</th>\n",
|
||
" <th>area code</th>\n",
|
||
" <th>churn</th>\n",
|
||
" <th>customer service calls</th>\n",
|
||
" <th>international plan</th>\n",
|
||
" <th>number vmail messages</th>\n",
|
||
" <th>phone number</th>\n",
|
||
" <th>state</th>\n",
|
||
" <th>total day calls</th>\n",
|
||
" <th>total day charge</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>total eve calls</th>\n",
|
||
" <th>total eve charge</th>\n",
|
||
" <th>total eve minutes</th>\n",
|
||
" <th>total intl calls</th>\n",
|
||
" <th>total intl charge</th>\n",
|
||
" <th>total intl minutes</th>\n",
|
||
" <th>total night calls</th>\n",
|
||
" <th>total night charge</th>\n",
|
||
" <th>total night minutes</th>\n",
|
||
" <th>voice mail plan</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>128</td>\n",
|
||
" <td>415</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>no</td>\n",
|
||
" <td>25</td>\n",
|
||
" <td>382-4657</td>\n",
|
||
" <td>KS</td>\n",
|
||
" <td>110</td>\n",
|
||
" <td>45.07</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>99</td>\n",
|
||
" <td>16.78</td>\n",
|
||
" <td>197.4</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>2.7</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>91</td>\n",
|
||
" <td>11.01</td>\n",
|
||
" <td>244.7</td>\n",
|
||
" <td>yes</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>107</td>\n",
|
||
" <td>415</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>no</td>\n",
|
||
" <td>26</td>\n",
|
||
" <td>371-7191</td>\n",
|
||
" <td>OH</td>\n",
|
||
" <td>123</td>\n",
|
||
" <td>27.47</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>103</td>\n",
|
||
" <td>16.62</td>\n",
|
||
" <td>195.5</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>3.7</td>\n",
|
||
" <td>13.7</td>\n",
|
||
" <td>103</td>\n",
|
||
" <td>11.45</td>\n",
|
||
" <td>254.4</td>\n",
|
||
" <td>yes</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>\n",
|
||
"<p>2 rows × 21 columns</p>"
|
||
],
|
||
"text/plain": [
|
||
" account length area code churn customer service calls \\\n",
|
||
"0 128 415 0 1 \n",
|
||
"1 107 415 0 1 \n",
|
||
"\n",
|
||
" international plan number vmail messages phone number state \\\n",
|
||
"0 no 25 382-4657 KS \n",
|
||
"1 no 26 371-7191 OH \n",
|
||
"\n",
|
||
" total day calls total day charge ... total eve calls total eve charge \\\n",
|
||
"0 110 45.07 ... 99 16.78 \n",
|
||
"1 123 27.47 ... 103 16.62 \n",
|
||
"\n",
|
||
" total eve minutes total intl calls total intl charge total intl minutes \\\n",
|
||
"0 197.4 3 2.7 10.0 \n",
|
||
"1 195.5 3 3.7 13.7 \n",
|
||
"\n",
|
||
" total night calls total night charge total night minutes voice mail plan \n",
|
||
"0 91 11.01 244.7 yes \n",
|
||
"1 103 11.45 254.4 yes \n",
|
||
"\n",
|
||
"[2 rows x 21 columns]"
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# NBVAL_IGNORE_OUTPUT\n",
|
||
"ed.csv_to_eland(\"./test_churn.csv\", es_client='localhost', es_dest_index='churn', es_refresh=True, index_col=0)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'took': 0,\n",
|
||
" 'timed_out': False,\n",
|
||
" '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},\n",
|
||
" 'hits': {'total': {'value': 2, 'relation': 'eq'},\n",
|
||
" 'max_score': 1.0,\n",
|
||
" 'hits': [{'_index': 'churn',\n",
|
||
" '_type': '_doc',\n",
|
||
" '_id': '0',\n",
|
||
" '_score': 1.0,\n",
|
||
" '_source': {'state': 'KS',\n",
|
||
" 'account length': 128,\n",
|
||
" 'area code': 415,\n",
|
||
" 'phone number': '382-4657',\n",
|
||
" 'international plan': 'no',\n",
|
||
" 'voice mail plan': 'yes',\n",
|
||
" 'number vmail messages': 25,\n",
|
||
" 'total day minutes': 265.1,\n",
|
||
" 'total day calls': 110,\n",
|
||
" 'total day charge': 45.07,\n",
|
||
" 'total eve minutes': 197.4,\n",
|
||
" 'total eve calls': 99,\n",
|
||
" 'total eve charge': 16.78,\n",
|
||
" 'total night minutes': 244.7,\n",
|
||
" 'total night calls': 91,\n",
|
||
" 'total night charge': 11.01,\n",
|
||
" 'total intl minutes': 10.0,\n",
|
||
" 'total intl calls': 3,\n",
|
||
" 'total intl charge': 2.7,\n",
|
||
" 'customer service calls': 1,\n",
|
||
" 'churn': 0}}]}}"
|
||
]
|
||
},
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# NBVAL_IGNORE_OUTPUT\n",
|
||
"es.search(index=\"churn\", size=1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'acknowledged': True}"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"es.indices.delete(index='churn', ignore=[400, 404])"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.8.5"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 4
|
||
}
|