{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import eland as ed\n", "from elasticsearch import Elasticsearch" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "HeadApiResponse(False)" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# NBVAL_IGNORE_OUTPUT\n", "es = Elasticsearch('http://localhost:9200')\n", "ed_df = ed.DataFrame('http://localhost:9200', 'flights', columns = [\"AvgTicketPrice\", \"Cancelled\", \"dayOfWeek\", \"timestamp\", \"DestCountry\"])\n", "es.indices.exists(index=\"churn\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2024-05-21 09:07:17.882569: read 10000 rows\n", "2024-05-21 09:07:18.375305: read 13059 rows\n" ] } ], "source": [ "# NBVAL_IGNORE_OUTPUT\n", "pd_df = ed.eland_to_pandas(ed_df, show_progress=True)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pandas.core.frame.DataFrame" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(pd_df)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/codespace/.python/current/lib/python3.10/site-packages/eland/etl.py:529: FutureWarning: the 'mangle_dupe_cols' keyword is deprecated and will be removed in a future version. Please take steps to stop the use of 'mangle_dupe_cols'\n", " reader = pd.read_csv(filepath_or_buffer, **kwargs)\n", "/home/codespace/.python/current/lib/python3.10/site-packages/eland/etl.py:529: FutureWarning: The squeeze argument has been deprecated and will be removed in a future version. Append .squeeze(\"columns\") to the call to squeeze.\n", "\n", "\n", " reader = pd.read_csv(filepath_or_buffer, **kwargs)\n" ] }, { "data": { "text/html": [ "
\n", " | account length | \n", "area code | \n", "churn | \n", "customer service calls | \n", "international plan | \n", "number vmail messages | \n", "phone number | \n", "state | \n", "total day calls | \n", "total day charge | \n", "... | \n", "total eve calls | \n", "total eve charge | \n", "total eve minutes | \n", "total intl calls | \n", "total intl charge | \n", "total intl minutes | \n", "total night calls | \n", "total night charge | \n", "total night minutes | \n", "voice mail plan | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "128 | \n", "415 | \n", "0 | \n", "1 | \n", "no | \n", "25 | \n", "382-4657 | \n", "KS | \n", "110 | \n", "45.07 | \n", "... | \n", "99 | \n", "16.78 | \n", "197.4 | \n", "3 | \n", "2.7 | \n", "10.0 | \n", "91 | \n", "11.01 | \n", "244.7 | \n", "yes | \n", "
1 | \n", "107 | \n", "415 | \n", "0 | \n", "1 | \n", "no | \n", "26 | \n", "371-7191 | \n", "OH | \n", "123 | \n", "27.47 | \n", "... | \n", "103 | \n", "16.62 | \n", "195.5 | \n", "3 | \n", "3.7 | \n", "13.7 | \n", "103 | \n", "11.45 | \n", "254.4 | \n", "yes | \n", "
2 rows × 21 columns
" ], "text/plain": [ " account length area code churn customer service calls \\\n", "0 128 415 0 1 \n", "1 107 415 0 1 \n", "\n", " international plan number vmail messages phone number state \\\n", "0 no 25 382-4657 KS \n", "1 no 26 371-7191 OH \n", "\n", " total day calls total day charge ... total eve calls total eve charge \\\n", "0 110 45.07 ... 99 16.78 \n", "1 123 27.47 ... 103 16.62 \n", "\n", " total eve minutes total intl calls total intl charge total intl minutes \\\n", "0 197.4 3 2.7 10.0 \n", "1 195.5 3 3.7 13.7 \n", "\n", " total night calls total night charge total night minutes voice mail plan \n", "0 91 11.01 244.7 yes \n", "1 103 11.45 254.4 yes \n", "\n", "[2 rows x 21 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# NBVAL_IGNORE_OUTPUT\n", "ed.csv_to_eland(\"./test_churn.csv\", es_client='http://localhost:9200', es_dest_index='churn', es_refresh=True, index_col=0)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ObjectApiResponse({'took': 0, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 2, 'relation': 'eq'}, 'max_score': 1.0, 'hits': [{'_index': 'churn', '_id': '0', '_score': 1.0, '_source': {'state': 'KS', 'account length': 128, 'area code': 415, 'phone number': '382-4657', 'international plan': 'no', 'voice mail plan': 'yes', 'number vmail messages': 25, 'total day minutes': 265.1, 'total day calls': 110, 'total day charge': 45.07, 'total eve minutes': 197.4, 'total eve calls': 99, 'total eve charge': 16.78, 'total night minutes': 244.7, 'total night calls': 91, 'total night charge': 11.01, 'total intl minutes': 10.0, 'total intl calls': 3, 'total intl charge': 2.7, 'customer service calls': 1, 'churn': 0}}]}})" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# NBVAL_IGNORE_OUTPUT\n", "es.search(index=\"churn\", size=1)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ObjectApiResponse({'acknowledged': True})" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# NBVAL_IGNORE_OUTPUT\n", "es.options(ignore_status=[400, 404]).indices.delete(index='churn')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 4 }