mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Feature/python 3.5 (#93)
* Adding python 3.5 compatibility. Main issue is ordering of dictionaries. * Updating notebooks with 3.7 results. * Removing tempoorary code. * Defaulting to OrderedDict for python 3.5 + lint all code All code reformated by PyCharm and inspection results analysed.
This commit is contained in:
parent
9a2d55f3c8
commit
c5730e6d38
@ -140,7 +140,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -166,7 +170,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -199,7 +207,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -230,7 +242,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -268,7 +284,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -421,7 +441,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -581,7 +605,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -601,7 +629,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -628,7 +660,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -648,7 +684,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -677,7 +717,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -700,12 +744,16 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<eland.index.Index at 0x11214bfd0>"
|
||||
"<eland.index.Index at 0x12036ef90>"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
@ -721,7 +769,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -750,7 +802,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -782,7 +838,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@ -1023,7 +1083,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## DataFrame.tail"
|
||||
"### DataFrame.tail"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -1242,7 +1302,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -1268,7 +1332,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -1301,7 +1369,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -1332,7 +1404,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -1363,7 +1439,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -1487,7 +1567,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@ -1514,7 +1598,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -1676,7 +1764,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -1836,7 +1928,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -1991,7 +2087,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -2160,7 +2260,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -2233,7 +2337,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -2313,7 +2421,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -2344,7 +2456,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -2382,7 +2498,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -2515,7 +2635,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -2580,15 +2704,15 @@
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>25%</th>\n",
|
||||
" <td>409.983219</td>\n",
|
||||
" <td>410.008918</td>\n",
|
||||
" <td>2470.545974</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>251.738513</td>\n",
|
||||
" <td>251.944994</td>\n",
|
||||
" <td>1.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>50%</th>\n",
|
||||
" <td>640.387285</td>\n",
|
||||
" <td>640.362667</td>\n",
|
||||
" <td>7612.072403</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>503.148975</td>\n",
|
||||
@ -2596,11 +2720,11 @@
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>75%</th>\n",
|
||||
" <td>842.255395</td>\n",
|
||||
" <td>9735.860651</td>\n",
|
||||
" <td>842.254990</td>\n",
|
||||
" <td>9735.660463</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>720.561564</td>\n",
|
||||
" <td>4.230496</td>\n",
|
||||
" <td>4.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>max</th>\n",
|
||||
@ -2621,9 +2745,9 @@
|
||||
"mean 628.253689 7092.142457 ... 511.127842 2.835975\n",
|
||||
"std 266.386661 4578.263193 ... 334.741135 1.939365\n",
|
||||
"min 100.020531 0.000000 ... 0.000000 0.000000\n",
|
||||
"25% 409.983219 2470.545974 ... 251.738513 1.000000\n",
|
||||
"50% 640.387285 7612.072403 ... 503.148975 3.000000\n",
|
||||
"75% 842.255395 9735.860651 ... 720.561564 4.230496\n",
|
||||
"25% 410.008918 2470.545974 ... 251.944994 1.000000\n",
|
||||
"50% 640.362667 7612.072403 ... 503.148975 3.000000\n",
|
||||
"75% 842.254990 9735.660463 ... 720.561564 4.000000\n",
|
||||
"max 1199.729004 19881.482422 ... 1902.901978 6.000000\n",
|
||||
"\n",
|
||||
"[8 rows x 7 columns]"
|
||||
@ -2649,7 +2773,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@ -2697,7 +2825,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@ -2759,7 +2891,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -2795,7 +2931,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -2831,7 +2971,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -2860,7 +3004,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -2896,7 +3044,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -2925,7 +3077,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -2961,7 +3117,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -2990,7 +3150,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 50,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -3026,7 +3190,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 51,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -3049,7 +3217,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 52,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -3079,7 +3251,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -3103,7 +3279,7 @@
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Carrier</th>\n",
|
||||
" <th>DestLocation</th>\n",
|
||||
" <th>DestRegion</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>dayOfWeek</th>\n",
|
||||
" <th>timestamp</th>\n",
|
||||
@ -3113,7 +3289,7 @@
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>Kibana Airlines</td>\n",
|
||||
" <td>{'lat': '-33.94609833', 'lon': '151.177002'}</td>\n",
|
||||
" <td>SE-BD</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2018-01-01 00:00:00</td>\n",
|
||||
@ -3121,7 +3297,7 @@
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>Logstash Airways</td>\n",
|
||||
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
|
||||
" <td>IT-34</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2018-01-01 18:27:00</td>\n",
|
||||
@ -3129,7 +3305,7 @@
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>Logstash Airways</td>\n",
|
||||
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
|
||||
" <td>IT-34</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2018-01-01 17:11:14</td>\n",
|
||||
@ -3137,7 +3313,7 @@
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>Kibana Airlines</td>\n",
|
||||
" <td>{'lat': '45.648399', 'lon': '12.1944'}</td>\n",
|
||||
" <td>IT-34</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2018-01-01 10:33:28</td>\n",
|
||||
@ -3145,7 +3321,7 @@
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>Kibana Airlines</td>\n",
|
||||
" <td>{'lat': '34.447102', 'lon': '108.751999'}</td>\n",
|
||||
" <td>SE-BD</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2018-01-01 05:13:00</td>\n",
|
||||
@ -3161,7 +3337,7 @@
|
||||
" <tr>\n",
|
||||
" <th>13054</th>\n",
|
||||
" <td>Logstash Airways</td>\n",
|
||||
" <td>{'lat': '34.447102', 'lon': '108.751999'}</td>\n",
|
||||
" <td>SE-BD</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>2018-02-11 20:42:25</td>\n",
|
||||
@ -3169,7 +3345,7 @@
|
||||
" <tr>\n",
|
||||
" <th>13055</th>\n",
|
||||
" <td>Logstash Airways</td>\n",
|
||||
" <td>{'lat': '47.464699', 'lon': '8.54917'}</td>\n",
|
||||
" <td>CH-ZH</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>2018-02-11 01:41:57</td>\n",
|
||||
@ -3177,7 +3353,7 @@
|
||||
" <tr>\n",
|
||||
" <th>13056</th>\n",
|
||||
" <td>Logstash Airways</td>\n",
|
||||
" <td>{'lat': '51.169997', 'lon': '128.445007'}</td>\n",
|
||||
" <td>RU-AMU</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>2018-02-11 04:09:27</td>\n",
|
||||
@ -3185,7 +3361,7 @@
|
||||
" <tr>\n",
|
||||
" <th>13057</th>\n",
|
||||
" <td>JetBeats</td>\n",
|
||||
" <td>{'lat': '-34.8222', 'lon': '-58.5358'}</td>\n",
|
||||
" <td>SE-BD</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>2018-02-11 08:28:21</td>\n",
|
||||
@ -3193,44 +3369,31 @@
|
||||
" <tr>\n",
|
||||
" <th>13058</th>\n",
|
||||
" <td>JetBeats</td>\n",
|
||||
" <td>{'lat': '38.94449997', 'lon': '-77.45580292'}</td>\n",
|
||||
" <td>US-DC</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>2018-02-11 14:54:34</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>13059 rows × 21 columns</p>\n",
|
||||
"<p>13059 rows × 20 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Carrier DestLocation ... dayOfWeek \\\n",
|
||||
"0 Kibana Airlines {'lat': '-33.94609833', 'lon': '151.177002'} ... 0 \n",
|
||||
"1 Logstash Airways {'lat': '45.505299', 'lon': '12.3519'} ... 0 \n",
|
||||
"2 Logstash Airways {'lat': '45.505299', 'lon': '12.3519'} ... 0 \n",
|
||||
"3 Kibana Airlines {'lat': '45.648399', 'lon': '12.1944'} ... 0 \n",
|
||||
"4 Kibana Airlines {'lat': '34.447102', 'lon': '108.751999'} ... 0 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"13054 Logstash Airways {'lat': '34.447102', 'lon': '108.751999'} ... 6 \n",
|
||||
"13055 Logstash Airways {'lat': '47.464699', 'lon': '8.54917'} ... 6 \n",
|
||||
"13056 Logstash Airways {'lat': '51.169997', 'lon': '128.445007'} ... 6 \n",
|
||||
"13057 JetBeats {'lat': '-34.8222', 'lon': '-58.5358'} ... 6 \n",
|
||||
"13058 JetBeats {'lat': '38.94449997', 'lon': '-77.45580292'} ... 6 \n",
|
||||
" Carrier DestRegion ... dayOfWeek timestamp\n",
|
||||
"0 Kibana Airlines SE-BD ... 0 2018-01-01 00:00:00\n",
|
||||
"1 Logstash Airways IT-34 ... 0 2018-01-01 18:27:00\n",
|
||||
"2 Logstash Airways IT-34 ... 0 2018-01-01 17:11:14\n",
|
||||
"3 Kibana Airlines IT-34 ... 0 2018-01-01 10:33:28\n",
|
||||
"4 Kibana Airlines SE-BD ... 0 2018-01-01 05:13:00\n",
|
||||
"... ... ... ... ... ...\n",
|
||||
"13054 Logstash Airways SE-BD ... 6 2018-02-11 20:42:25\n",
|
||||
"13055 Logstash Airways CH-ZH ... 6 2018-02-11 01:41:57\n",
|
||||
"13056 Logstash Airways RU-AMU ... 6 2018-02-11 04:09:27\n",
|
||||
"13057 JetBeats SE-BD ... 6 2018-02-11 08:28:21\n",
|
||||
"13058 JetBeats US-DC ... 6 2018-02-11 14:54:34\n",
|
||||
"\n",
|
||||
" timestamp \n",
|
||||
"0 2018-01-01 00:00:00 \n",
|
||||
"1 2018-01-01 18:27:00 \n",
|
||||
"2 2018-01-01 17:11:14 \n",
|
||||
"3 2018-01-01 10:33:28 \n",
|
||||
"4 2018-01-01 05:13:00 \n",
|
||||
"... ... \n",
|
||||
"13054 2018-02-11 20:42:25 \n",
|
||||
"13055 2018-02-11 01:41:57 \n",
|
||||
"13056 2018-02-11 04:09:27 \n",
|
||||
"13057 2018-02-11 08:28:21 \n",
|
||||
"13058 2018-02-11 14:54:34 \n",
|
||||
"\n",
|
||||
"[13059 rows x 21 columns]"
|
||||
"[13059 rows x 20 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 53,
|
||||
@ -3241,6 +3404,7 @@
|
||||
"source": [
|
||||
"pd_flights.drop(columns=['AvgTicketPrice', \n",
|
||||
" 'Cancelled', \n",
|
||||
" 'DestLocation',\n",
|
||||
" 'Dest', \n",
|
||||
" 'DestAirportID', \n",
|
||||
" 'DestCityName', \n",
|
||||
@ -3250,7 +3414,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 54,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -3274,7 +3442,7 @@
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Carrier</th>\n",
|
||||
" <th>DestLocation</th>\n",
|
||||
" <th>DestRegion</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>dayOfWeek</th>\n",
|
||||
" <th>timestamp</th>\n",
|
||||
@ -3284,7 +3452,7 @@
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>Kibana Airlines</td>\n",
|
||||
" <td>{'lon': '151.177002', 'lat': '-33.94609833'}</td>\n",
|
||||
" <td>SE-BD</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2018-01-01 00:00:00</td>\n",
|
||||
@ -3292,7 +3460,7 @@
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>Logstash Airways</td>\n",
|
||||
" <td>{'lon': '12.3519', 'lat': '45.505299'}</td>\n",
|
||||
" <td>IT-34</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2018-01-01 18:27:00</td>\n",
|
||||
@ -3300,7 +3468,7 @@
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>Logstash Airways</td>\n",
|
||||
" <td>{'lon': '12.3519', 'lat': '45.505299'}</td>\n",
|
||||
" <td>IT-34</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2018-01-01 17:11:14</td>\n",
|
||||
@ -3308,7 +3476,7 @@
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>Kibana Airlines</td>\n",
|
||||
" <td>{'lon': '12.1944', 'lat': '45.648399'}</td>\n",
|
||||
" <td>IT-34</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2018-01-01 10:33:28</td>\n",
|
||||
@ -3316,7 +3484,7 @@
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>Kibana Airlines</td>\n",
|
||||
" <td>{'lon': '108.751999', 'lat': '34.447102'}</td>\n",
|
||||
" <td>SE-BD</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2018-01-01 05:13:00</td>\n",
|
||||
@ -3332,7 +3500,7 @@
|
||||
" <tr>\n",
|
||||
" <th>13054</th>\n",
|
||||
" <td>Logstash Airways</td>\n",
|
||||
" <td>{'lon': '108.751999', 'lat': '34.447102'}</td>\n",
|
||||
" <td>SE-BD</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>2018-02-11 20:42:25</td>\n",
|
||||
@ -3340,7 +3508,7 @@
|
||||
" <tr>\n",
|
||||
" <th>13055</th>\n",
|
||||
" <td>Logstash Airways</td>\n",
|
||||
" <td>{'lon': '8.54917', 'lat': '47.464699'}</td>\n",
|
||||
" <td>CH-ZH</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>2018-02-11 01:41:57</td>\n",
|
||||
@ -3348,7 +3516,7 @@
|
||||
" <tr>\n",
|
||||
" <th>13056</th>\n",
|
||||
" <td>Logstash Airways</td>\n",
|
||||
" <td>{'lon': '128.445007', 'lat': '51.169997'}</td>\n",
|
||||
" <td>RU-AMU</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>2018-02-11 04:09:27</td>\n",
|
||||
@ -3356,7 +3524,7 @@
|
||||
" <tr>\n",
|
||||
" <th>13057</th>\n",
|
||||
" <td>JetBeats</td>\n",
|
||||
" <td>{'lon': '-58.5358', 'lat': '-34.8222'}</td>\n",
|
||||
" <td>SE-BD</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>2018-02-11 08:28:21</td>\n",
|
||||
@ -3364,7 +3532,7 @@
|
||||
" <tr>\n",
|
||||
" <th>13058</th>\n",
|
||||
" <td>JetBeats</td>\n",
|
||||
" <td>{'lon': '-77.45580292', 'lat': '38.94449997'}</td>\n",
|
||||
" <td>US-DC</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>2018-02-11 14:54:34</td>\n",
|
||||
@ -3372,36 +3540,23 @@
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>\n",
|
||||
"<p>13059 rows × 21 columns</p>"
|
||||
"<p>13059 rows × 20 columns</p>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Carrier DestLocation ... dayOfWeek \\\n",
|
||||
"0 Kibana Airlines {'lon': '151.177002', 'lat': '-33.94609833'} ... 0 \n",
|
||||
"1 Logstash Airways {'lon': '12.3519', 'lat': '45.505299'} ... 0 \n",
|
||||
"2 Logstash Airways {'lon': '12.3519', 'lat': '45.505299'} ... 0 \n",
|
||||
"3 Kibana Airlines {'lon': '12.1944', 'lat': '45.648399'} ... 0 \n",
|
||||
"4 Kibana Airlines {'lon': '108.751999', 'lat': '34.447102'} ... 0 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"13054 Logstash Airways {'lon': '108.751999', 'lat': '34.447102'} ... 6 \n",
|
||||
"13055 Logstash Airways {'lon': '8.54917', 'lat': '47.464699'} ... 6 \n",
|
||||
"13056 Logstash Airways {'lon': '128.445007', 'lat': '51.169997'} ... 6 \n",
|
||||
"13057 JetBeats {'lon': '-58.5358', 'lat': '-34.8222'} ... 6 \n",
|
||||
"13058 JetBeats {'lon': '-77.45580292', 'lat': '38.94449997'} ... 6 \n",
|
||||
" Carrier DestRegion ... dayOfWeek timestamp\n",
|
||||
"0 Kibana Airlines SE-BD ... 0 2018-01-01 00:00:00\n",
|
||||
"1 Logstash Airways IT-34 ... 0 2018-01-01 18:27:00\n",
|
||||
"2 Logstash Airways IT-34 ... 0 2018-01-01 17:11:14\n",
|
||||
"3 Kibana Airlines IT-34 ... 0 2018-01-01 10:33:28\n",
|
||||
"4 Kibana Airlines SE-BD ... 0 2018-01-01 05:13:00\n",
|
||||
"... ... ... ... ... ...\n",
|
||||
"13054 Logstash Airways SE-BD ... 6 2018-02-11 20:42:25\n",
|
||||
"13055 Logstash Airways CH-ZH ... 6 2018-02-11 01:41:57\n",
|
||||
"13056 Logstash Airways RU-AMU ... 6 2018-02-11 04:09:27\n",
|
||||
"13057 JetBeats SE-BD ... 6 2018-02-11 08:28:21\n",
|
||||
"13058 JetBeats US-DC ... 6 2018-02-11 14:54:34\n",
|
||||
"\n",
|
||||
" timestamp \n",
|
||||
"0 2018-01-01 00:00:00 \n",
|
||||
"1 2018-01-01 18:27:00 \n",
|
||||
"2 2018-01-01 17:11:14 \n",
|
||||
"3 2018-01-01 10:33:28 \n",
|
||||
"4 2018-01-01 05:13:00 \n",
|
||||
"... ... \n",
|
||||
"13054 2018-02-11 20:42:25 \n",
|
||||
"13055 2018-02-11 01:41:57 \n",
|
||||
"13056 2018-02-11 04:09:27 \n",
|
||||
"13057 2018-02-11 08:28:21 \n",
|
||||
"13058 2018-02-11 14:54:34 \n",
|
||||
"\n",
|
||||
"[13059 rows x 21 columns]"
|
||||
"[13059 rows x 20 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 54,
|
||||
@ -3412,6 +3567,7 @@
|
||||
"source": [
|
||||
"ed_flights.drop(columns=['AvgTicketPrice', \n",
|
||||
" 'Cancelled', \n",
|
||||
" 'DestLocation',\n",
|
||||
" 'Dest', \n",
|
||||
" 'DestAirportID', \n",
|
||||
" 'DestCityName', \n",
|
||||
@ -3428,7 +3584,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -3451,7 +3611,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 56,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -3481,7 +3645,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 57,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ed_flights2 = ed_flights[(ed_flights.OriginAirportID == 'AMS') & (ed_flights.FlightDelayMin > 60)]\n",
|
||||
@ -3492,7 +3660,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@ -3537,7 +3709,7 @@
|
||||
" size: 5\n",
|
||||
" sort_params: _doc:desc\n",
|
||||
" _source: ['timestamp', 'OriginAirportID', 'DestAirportID', 'FlightDelayMin']\n",
|
||||
" body: {'query': {'bool': {'must': [{'term': {'OriginAirportID': 'AMS'}}, {'range': {'FlightDelayMin': {'gt': 60}}}]}}, 'aggs': {}}\n",
|
||||
" body: {'query': {'bool': {'must': [{'term': {'OriginAirportID': 'AMS'}}, {'range': {'FlightDelayMin': {'gt': 60}}}]}}}\n",
|
||||
" post_processing: [('sort_index')]\n",
|
||||
"'field_to_display_names': {}\n",
|
||||
"'display_to_field_names': {}\n",
|
||||
|
@ -5,7 +5,7 @@ Examples
|
||||
========
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:maxdepth: 3
|
||||
|
||||
demo_notebook
|
||||
online_retail_analysis
|
||||
|
@ -176,7 +176,7 @@
|
||||
" size: None\n",
|
||||
" sort_params: None\n",
|
||||
" _source: None\n",
|
||||
" body: {'aggs': {}}\n",
|
||||
" body: {}\n",
|
||||
" post_processing: []\n",
|
||||
"'field_to_display_names': {}\n",
|
||||
"'display_to_field_names': {}\n",
|
||||
@ -308,7 +308,7 @@
|
||||
" size: 2\n",
|
||||
" sort_params: _doc:desc\n",
|
||||
" _source: None\n",
|
||||
" body: {'aggs': {}}\n",
|
||||
" body: {}\n",
|
||||
" post_processing: [('sort_index'), ('head': ('count': 2)), ('tail': ('count': 2))]\n",
|
||||
"'field_to_display_names': {}\n",
|
||||
"'display_to_field_names': {}\n",
|
||||
@ -813,7 +813,7 @@
|
||||
" size: None\n",
|
||||
" sort_params: None\n",
|
||||
" _source: None\n",
|
||||
" body: {'query': {'bool': {'must': [{'term': {'Country': 'Germany'}}, {'range': {'Quantity': {'gt': 90}}}]}}, 'aggs': {}}\n",
|
||||
" body: {'query': {'bool': {'must': [{'term': {'Country': 'Germany'}}, {'range': {'Quantity': {'gt': 90}}}]}}}\n",
|
||||
" post_processing: []\n",
|
||||
"'field_to_display_names': {}\n",
|
||||
"'display_to_field_names': {}\n",
|
||||
@ -1037,23 +1037,23 @@
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>25%</th>\n",
|
||||
" <td>14220.581670</td>\n",
|
||||
" <td>14220.529879</td>\n",
|
||||
" <td>1.000000</td>\n",
|
||||
" <td>1.250000</td>\n",
|
||||
" <td>3756.500000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>50%</th>\n",
|
||||
" <td>15666.545935</td>\n",
|
||||
" <td>15661.227460</td>\n",
|
||||
" <td>2.000000</td>\n",
|
||||
" <td>2.510000</td>\n",
|
||||
" <td>7498.861278</td>\n",
|
||||
" <td>7499.363732</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>75%</th>\n",
|
||||
" <td>17213.978376</td>\n",
|
||||
" <td>6.614054</td>\n",
|
||||
" <td>4.215516</td>\n",
|
||||
" <td>17214.478439</td>\n",
|
||||
" <td>6.613198</td>\n",
|
||||
" <td>4.210000</td>\n",
|
||||
" <td>11249.500000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@ -1073,9 +1073,9 @@
|
||||
"mean 15590.776680 7.464000 4.103233 7499.500000\n",
|
||||
"std 1764.025160 85.924387 20.104873 4330.127009\n",
|
||||
"min 12347.000000 -9360.000000 0.000000 0.000000\n",
|
||||
"25% 14220.581670 1.000000 1.250000 3756.500000\n",
|
||||
"50% 15666.545935 2.000000 2.510000 7498.861278\n",
|
||||
"75% 17213.978376 6.614054 4.215516 11249.500000\n",
|
||||
"25% 14220.529879 1.000000 1.250000 3756.500000\n",
|
||||
"50% 15661.227460 2.000000 2.510000 7499.363732\n",
|
||||
"75% 17214.478439 6.613198 4.210000 11249.500000\n",
|
||||
"max 18239.000000 2880.000000 950.990000 14999.000000"
|
||||
]
|
||||
},
|
||||
|
@ -48,3 +48,5 @@ In general, the data resides in elasticsearch and not in memory, which allows el
|
||||
|
||||
* :doc:`examples/index`
|
||||
|
||||
* :doc:`examples/demo_notebook`
|
||||
* :doc:`examples/online_retail_analysis`
|
||||
|
@ -3,6 +3,9 @@ from abc import ABC, abstractmethod
|
||||
# -------------------------------------------------------------------------------------------------------------------- #
|
||||
# PostProcessingActions #
|
||||
# -------------------------------------------------------------------------------------------------------------------- #
|
||||
from eland import SortOrder
|
||||
|
||||
|
||||
class PostProcessingAction(ABC):
|
||||
def __init__(self, action_type):
|
||||
"""
|
||||
@ -27,6 +30,7 @@ class PostProcessingAction(ABC):
|
||||
def __repr__(self):
|
||||
pass
|
||||
|
||||
|
||||
class SortIndexAction(PostProcessingAction):
|
||||
def __init__(self):
|
||||
super().__init__("sort_index")
|
||||
@ -37,6 +41,7 @@ class SortIndexAction(PostProcessingAction):
|
||||
def __repr__(self):
|
||||
return "('{}')".format(self.type)
|
||||
|
||||
|
||||
class HeadAction(PostProcessingAction):
|
||||
def __init__(self, count):
|
||||
super().__init__("head")
|
||||
@ -76,10 +81,10 @@ class SortFieldAction(PostProcessingAction):
|
||||
raise ValueError("Expected ES sort params string (e.g. _doc:desc). Got '{}'".format(sort_params_string))
|
||||
|
||||
self._sort_field = sort_params[0]
|
||||
self._sort_order = Operations.SortOrder.from_string(sort_params[1])
|
||||
self._sort_order = SortOrder.from_string(sort_params[1])
|
||||
|
||||
def resolve_action(self, df):
|
||||
if self._sort_order == Operations.SortOrder.ASC:
|
||||
if self._sort_order == SortOrder.ASC:
|
||||
return df.sort_values(self._sort_field, True)
|
||||
return df.sort_values(self._sort_field, False)
|
||||
|
||||
|
@ -13,6 +13,8 @@
|
||||
# limitations under the License.
|
||||
|
||||
# Default number of rows displayed (different to pandas where ALL could be displayed)
|
||||
from enum import Enum
|
||||
|
||||
DEFAULT_NUM_ROWS_DISPLAYED = 60
|
||||
|
||||
|
||||
@ -22,3 +24,29 @@ def docstring_parameter(*sub):
|
||||
return obj
|
||||
|
||||
return dec
|
||||
|
||||
|
||||
class SortOrder(Enum):
|
||||
ASC = 0
|
||||
DESC = 1
|
||||
|
||||
@staticmethod
|
||||
def reverse(order):
|
||||
if order == SortOrder.ASC:
|
||||
return SortOrder.DESC
|
||||
|
||||
return SortOrder.ASC
|
||||
|
||||
@staticmethod
|
||||
def to_string(order):
|
||||
if order == SortOrder.ASC:
|
||||
return "asc"
|
||||
|
||||
return "desc"
|
||||
|
||||
@staticmethod
|
||||
def from_string(order):
|
||||
if order == "asc":
|
||||
return SortOrder.ASC
|
||||
|
||||
return SortOrder.DESC
|
||||
|
17
eland/compat.py
Normal file
17
eland/compat.py
Normal file
@ -0,0 +1,17 @@
|
||||
# Copyright 2019 Elasticsearch BV
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import sys
|
||||
|
||||
PY36 = sys.version_info >= (3, 6)
|
@ -27,6 +27,7 @@ from pandas.io.common import _expand_user, _stringify_path
|
||||
from pandas.io.formats import console
|
||||
from pandas.io.formats import format as fmt
|
||||
from pandas.io.formats.printing import pprint_thing
|
||||
from pandas.util._validators import validate_bool_kwarg
|
||||
|
||||
import eland.plotting as gfx
|
||||
from eland import NDFrame
|
||||
@ -255,6 +256,151 @@ class DataFrame(NDFrame):
|
||||
"""
|
||||
return DataFrame(query_compiler=self._query_compiler.tail(n))
|
||||
|
||||
def drop(
|
||||
self,
|
||||
labels=None,
|
||||
axis=0,
|
||||
index=None,
|
||||
columns=None,
|
||||
level=None,
|
||||
inplace=False,
|
||||
errors="raise",
|
||||
):
|
||||
"""Return new object with labels in requested axis removed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
labels:
|
||||
Index or column labels to drop.
|
||||
axis:
|
||||
Whether to drop labels from the index (0 / 'index') or columns (1 / 'columns').
|
||||
index, columns:
|
||||
Alternative to specifying axis (labels, axis=1 is equivalent to columns=labels).
|
||||
level:
|
||||
For MultiIndex - not supported
|
||||
inplace:
|
||||
If True, do operation inplace and return None.
|
||||
errors:
|
||||
If 'ignore', suppress error and existing labels are dropped.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dropped:
|
||||
type of caller
|
||||
|
||||
See Also
|
||||
--------
|
||||
:pandas_api_docs:`pandas.DataFrame.drop`
|
||||
|
||||
Examples
|
||||
--------
|
||||
Drop a column
|
||||
|
||||
>>> df = ed.DataFrame('localhost', 'ecommerce', columns=['customer_first_name', 'email', 'user'])
|
||||
>>> df.drop(columns=['user'])
|
||||
customer_first_name email
|
||||
0 Eddie eddie@underwood-family.zzz
|
||||
1 Mary mary@bailey-family.zzz
|
||||
2 Gwen gwen@butler-family.zzz
|
||||
3 Diane diane@chandler-family.zzz
|
||||
4 Eddie eddie@weber-family.zzz
|
||||
... ... ...
|
||||
4670 Mary mary@lambert-family.zzz
|
||||
4671 Jim jim@gilbert-family.zzz
|
||||
4672 Yahya yahya@rivera-family.zzz
|
||||
4673 Mary mary@hampton-family.zzz
|
||||
4674 Jackson jackson@hopkins-family.zzz
|
||||
<BLANKLINE>
|
||||
[4675 rows x 2 columns]
|
||||
|
||||
Drop rows by index value (axis=0)
|
||||
|
||||
>>> df.drop(['1', '2'])
|
||||
customer_first_name email user
|
||||
0 Eddie eddie@underwood-family.zzz eddie
|
||||
3 Diane diane@chandler-family.zzz diane
|
||||
4 Eddie eddie@weber-family.zzz eddie
|
||||
5 Diane diane@goodwin-family.zzz diane
|
||||
6 Oliver oliver@rios-family.zzz oliver
|
||||
... ... ... ...
|
||||
4670 Mary mary@lambert-family.zzz mary
|
||||
4671 Jim jim@gilbert-family.zzz jim
|
||||
4672 Yahya yahya@rivera-family.zzz yahya
|
||||
4673 Mary mary@hampton-family.zzz mary
|
||||
4674 Jackson jackson@hopkins-family.zzz jackson
|
||||
<BLANKLINE>
|
||||
[4673 rows x 3 columns]
|
||||
"""
|
||||
# Level not supported
|
||||
if level is not None:
|
||||
raise NotImplementedError("level not supported {}".format(level))
|
||||
|
||||
inplace = validate_bool_kwarg(inplace, "inplace")
|
||||
if labels is not None:
|
||||
if index is not None or columns is not None:
|
||||
raise ValueError("Cannot specify both 'labels' and 'index'/'columns'")
|
||||
axis = pd.DataFrame()._get_axis_name(axis)
|
||||
axes = {axis: labels}
|
||||
elif index is not None or columns is not None:
|
||||
axes, _ = pd.DataFrame()._construct_axes_from_arguments(
|
||||
(index, columns), {}
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Need to specify at least one of 'labels', 'index' or 'columns'"
|
||||
)
|
||||
|
||||
# TODO Clean up this error checking
|
||||
if "index" not in axes:
|
||||
axes["index"] = None
|
||||
elif axes["index"] is not None:
|
||||
if not is_list_like(axes["index"]):
|
||||
axes["index"] = [axes["index"]]
|
||||
if errors == "raise":
|
||||
# Check if axes['index'] values exists in index
|
||||
count = self._query_compiler._index_matches_count(axes["index"])
|
||||
if count != len(axes["index"]):
|
||||
raise ValueError(
|
||||
"number of labels {}!={} not contained in axis".format(count, len(axes["index"]))
|
||||
)
|
||||
else:
|
||||
"""
|
||||
axes["index"] = self._query_compiler.index_matches(axes["index"])
|
||||
# If the length is zero, we will just do nothing
|
||||
if not len(axes["index"]):
|
||||
axes["index"] = None
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
if "columns" not in axes:
|
||||
axes["columns"] = None
|
||||
elif axes["columns"] is not None:
|
||||
if not is_list_like(axes["columns"]):
|
||||
axes["columns"] = [axes["columns"]]
|
||||
if errors == "raise":
|
||||
non_existant = [
|
||||
obj for obj in axes["columns"] if obj not in self.columns
|
||||
]
|
||||
if len(non_existant):
|
||||
raise ValueError(
|
||||
"labels {} not contained in axis".format(non_existant)
|
||||
)
|
||||
else:
|
||||
axes["columns"] = [
|
||||
obj for obj in axes["columns"] if obj in self.columns
|
||||
]
|
||||
# If the length is zero, we will just do nothing
|
||||
if not len(axes["columns"]):
|
||||
axes["columns"] = None
|
||||
|
||||
new_query_compiler = self._query_compiler.drop(
|
||||
index=axes["index"], columns=axes["columns"]
|
||||
)
|
||||
return self._create_or_update_from_compiler(new_query_compiler, inplace)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._getitem(key)
|
||||
|
||||
def __repr__(self):
|
||||
"""
|
||||
From pandas
|
||||
@ -312,7 +458,8 @@ class DataFrame(NDFrame):
|
||||
max_rows = min_rows
|
||||
|
||||
return self.to_html(max_rows=max_rows, max_cols=max_cols,
|
||||
show_dimensions=show_dimensions, notebook=True) # set for consistency with pandas output
|
||||
show_dimensions=show_dimensions,
|
||||
notebook=True) # set for consistency with pandas output
|
||||
else:
|
||||
return None
|
||||
|
||||
@ -417,7 +564,7 @@ class DataFrame(NDFrame):
|
||||
size: 5
|
||||
sort_params: _doc:desc
|
||||
_source: ['timestamp', 'OriginAirportID', 'DestAirportID', 'FlightDelayMin']
|
||||
body: {'query': {'bool': {'must': [{'term': {'OriginAirportID': 'AMS'}}, {'range': {'FlightDelayMin': {'gt': 60}}}]}}, 'aggs': {}}
|
||||
body: {'query': {'bool': {'must': [{'term': {'OriginAirportID': 'AMS'}}, {'range': {'FlightDelayMin': {'gt': 60}}}]}}}
|
||||
post_processing: [('sort_index')]
|
||||
'field_to_display_names': {}
|
||||
'display_to_field_names': {}
|
||||
|
@ -24,10 +24,10 @@ class BooleanFilter:
|
||||
if isinstance(self, AndFilter):
|
||||
if 'must_not' in x.subtree:
|
||||
# nest a must_not under a must
|
||||
self.subtree['must'].append(x.build()) # 'build includes bool'
|
||||
self.subtree['must'].append(x.build()) # 'build includes bool'
|
||||
else:
|
||||
# append a must to a must
|
||||
self.subtree['must'].append(x.subtree) # 'subtree strips bool'
|
||||
self.subtree['must'].append(x.subtree) # 'subtree strips bool'
|
||||
return self
|
||||
elif isinstance(x, AndFilter):
|
||||
if 'must_not' in self.subtree:
|
||||
|
@ -11,8 +11,8 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import warnings
|
||||
from collections import OrderedDict
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
@ -66,7 +66,7 @@ class Mappings:
|
||||
"""
|
||||
|
||||
# here we keep track of the format of any date fields
|
||||
self._date_fields_format = {}
|
||||
self._date_fields_format = dict()
|
||||
if (client is not None) and (index_pattern is not None):
|
||||
get_mapping = client.get_mapping(index=index_pattern)
|
||||
|
||||
@ -86,7 +86,8 @@ class Mappings:
|
||||
|
||||
# Cache source field types for efficient lookup
|
||||
# (this massively improves performance of DataFrame.flatten)
|
||||
self._source_field_pd_dtypes = {}
|
||||
|
||||
self._source_field_pd_dtypes = OrderedDict()
|
||||
|
||||
for field_name in self._mappings_capabilities[self._mappings_capabilities._source].index:
|
||||
pd_dtype = self._mappings_capabilities.loc[field_name]['pd_dtype']
|
||||
@ -135,14 +136,14 @@ class Mappings:
|
||||
|
||||
Returns
|
||||
-------
|
||||
fields, dates_format: tuple(dict, dict)
|
||||
fields, dates_format: tuple(OrderedDict, dict)
|
||||
where:
|
||||
fields: Dict of field names and types
|
||||
fields: OrderedDict of field names and types
|
||||
dates_format: Dict of date field names and format
|
||||
|
||||
"""
|
||||
fields = {}
|
||||
dates_format = {}
|
||||
fields = OrderedDict()
|
||||
dates_format = dict()
|
||||
|
||||
# Recurse until we get a 'type: xxx'
|
||||
def flatten(x, name=''):
|
||||
@ -206,7 +207,7 @@ class Mappings:
|
||||
all_fields_caps_fields = all_fields_caps['fields']
|
||||
|
||||
field_names = ['_source', 'es_dtype', 'pd_dtype', 'searchable', 'aggregatable']
|
||||
capability_matrix = {}
|
||||
capability_matrix = OrderedDict()
|
||||
|
||||
for field, field_caps in all_fields_caps_fields.items():
|
||||
if field in all_fields:
|
||||
@ -353,7 +354,7 @@ class Mappings:
|
||||
else:
|
||||
es_dtype = Mappings._pd_dtype_to_es_dtype(dtype)
|
||||
|
||||
mappings['properties'][field_name_name] = {}
|
||||
mappings['properties'][field_name_name] = OrderedDict()
|
||||
mappings['properties'][field_name_name]['type'] = es_dtype
|
||||
|
||||
return {"mappings": mappings}
|
||||
@ -401,8 +402,8 @@ class Mappings:
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
A dictionary (for date fields) containing the mapping {field_name:format}
|
||||
str
|
||||
A string (for date fields) containing the date format for the field
|
||||
"""
|
||||
return self._date_fields_format.get(field_name)
|
||||
|
||||
@ -460,12 +461,12 @@ class Mappings:
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
OrderedDict
|
||||
e.g. {'customer_full_name': 'customer_full_name.keyword', ...}
|
||||
"""
|
||||
if field_names is None:
|
||||
field_names = self.source_fields()
|
||||
aggregatables = {}
|
||||
aggregatables = OrderedDict()
|
||||
for field_name in field_names:
|
||||
capabilities = self.field_capabilities(field_name)
|
||||
if capabilities['aggregatable']:
|
||||
@ -478,7 +479,7 @@ class Mappings:
|
||||
aggregatables[field_name_keyword] = field_name
|
||||
|
||||
if not aggregatables:
|
||||
raise ValueError("Aggregations not supported for ", field_name)
|
||||
raise ValueError("Aggregations not supported for ", field_names)
|
||||
|
||||
return aggregatables
|
||||
|
||||
@ -533,11 +534,15 @@ class Mappings:
|
||||
Source field name + pd_dtype as np.dtype
|
||||
"""
|
||||
if field_names is not None:
|
||||
return pd.Series(
|
||||
{key: np.dtype(self._source_field_pd_dtypes[key]) for key in field_names})
|
||||
data = OrderedDict()
|
||||
for key in field_names:
|
||||
data[key] = np.dtype(self._source_field_pd_dtypes[key])
|
||||
return pd.Series(data)
|
||||
|
||||
return pd.Series(
|
||||
{key: np.dtype(value) for key, value in self._source_field_pd_dtypes.items()})
|
||||
data = OrderedDict()
|
||||
for key, value in self._source_field_pd_dtypes.items():
|
||||
data[key] = np.dtype(value)
|
||||
return pd.Series(data)
|
||||
|
||||
def info_es(self, buf):
|
||||
buf.write("Mappings:\n")
|
||||
|
203
eland/ndframe.py
203
eland/ndframe.py
@ -1,3 +1,22 @@
|
||||
# Copyright 2019 Elasticsearch BV
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import sys
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from eland import QueryCompiler
|
||||
|
||||
"""
|
||||
NDFrame
|
||||
---------
|
||||
@ -23,29 +42,6 @@ only Elasticsearch aggregatable fields can be aggregated or grouped.
|
||||
|
||||
"""
|
||||
|
||||
# Copyright 2019 Elasticsearch BV
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import sys
|
||||
from abc import ABC
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.dtypes.common import is_list_like
|
||||
from pandas.util._validators import validate_bool_kwarg
|
||||
|
||||
from eland import ElandQueryCompiler
|
||||
|
||||
|
||||
class NDFrame(ABC):
|
||||
|
||||
@ -64,8 +60,8 @@ class NDFrame(ABC):
|
||||
A reference to a Elasticsearch python client
|
||||
"""
|
||||
if query_compiler is None:
|
||||
query_compiler = ElandQueryCompiler(client=client, index_pattern=index_pattern, field_names=columns,
|
||||
index_field=index_field)
|
||||
query_compiler = QueryCompiler(client=client, index_pattern=index_pattern, field_names=columns,
|
||||
index_field=index_field)
|
||||
self._query_compiler = query_compiler
|
||||
|
||||
def _get_index(self):
|
||||
@ -139,9 +135,6 @@ class NDFrame(ABC):
|
||||
|
||||
return head.append(tail)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._getitem(key)
|
||||
|
||||
def __sizeof__(self):
|
||||
# Don't default to pandas, just return approximation TODO - make this more accurate
|
||||
return sys.getsizeof(self._query_compiler)
|
||||
@ -157,148 +150,6 @@ class NDFrame(ABC):
|
||||
def _info_es(self, buf):
|
||||
self._query_compiler.info_es(buf)
|
||||
|
||||
def drop(
|
||||
self,
|
||||
labels=None,
|
||||
axis=0,
|
||||
index=None,
|
||||
columns=None,
|
||||
level=None,
|
||||
inplace=False,
|
||||
errors="raise",
|
||||
):
|
||||
"""Return new object with labels in requested axis removed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
labels:
|
||||
Index or column labels to drop.
|
||||
axis:
|
||||
Whether to drop labels from the index (0 / 'index') or columns (1 / 'columns').
|
||||
index, columns:
|
||||
Alternative to specifying axis (labels, axis=1 is equivalent to columns=labels).
|
||||
level:
|
||||
For MultiIndex - not supported
|
||||
inplace:
|
||||
If True, do operation inplace and return None.
|
||||
errors:
|
||||
If 'ignore', suppress error and existing labels are dropped.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dropped:
|
||||
type of caller
|
||||
|
||||
See Also
|
||||
--------
|
||||
:pandas_api_docs:`pandas.DataFrame.drop`
|
||||
|
||||
Examples
|
||||
--------
|
||||
Drop a column
|
||||
|
||||
>>> df = ed.DataFrame('localhost', 'ecommerce', columns=['customer_first_name', 'email', 'user'])
|
||||
>>> df.drop(columns=['user'])
|
||||
customer_first_name email
|
||||
0 Eddie eddie@underwood-family.zzz
|
||||
1 Mary mary@bailey-family.zzz
|
||||
2 Gwen gwen@butler-family.zzz
|
||||
3 Diane diane@chandler-family.zzz
|
||||
4 Eddie eddie@weber-family.zzz
|
||||
... ... ...
|
||||
4670 Mary mary@lambert-family.zzz
|
||||
4671 Jim jim@gilbert-family.zzz
|
||||
4672 Yahya yahya@rivera-family.zzz
|
||||
4673 Mary mary@hampton-family.zzz
|
||||
4674 Jackson jackson@hopkins-family.zzz
|
||||
<BLANKLINE>
|
||||
[4675 rows x 2 columns]
|
||||
|
||||
Drop rows by index value (axis=0)
|
||||
|
||||
>>> df.drop(['1', '2'])
|
||||
customer_first_name email user
|
||||
0 Eddie eddie@underwood-family.zzz eddie
|
||||
3 Diane diane@chandler-family.zzz diane
|
||||
4 Eddie eddie@weber-family.zzz eddie
|
||||
5 Diane diane@goodwin-family.zzz diane
|
||||
6 Oliver oliver@rios-family.zzz oliver
|
||||
... ... ... ...
|
||||
4670 Mary mary@lambert-family.zzz mary
|
||||
4671 Jim jim@gilbert-family.zzz jim
|
||||
4672 Yahya yahya@rivera-family.zzz yahya
|
||||
4673 Mary mary@hampton-family.zzz mary
|
||||
4674 Jackson jackson@hopkins-family.zzz jackson
|
||||
<BLANKLINE>
|
||||
[4673 rows x 3 columns]
|
||||
"""
|
||||
# Level not supported
|
||||
if level is not None:
|
||||
raise NotImplementedError("level not supported {}".format(level))
|
||||
|
||||
inplace = validate_bool_kwarg(inplace, "inplace")
|
||||
if labels is not None:
|
||||
if index is not None or columns is not None:
|
||||
raise ValueError("Cannot specify both 'labels' and 'index'/'columns'")
|
||||
axis = pd.DataFrame()._get_axis_name(axis)
|
||||
axes = {axis: labels}
|
||||
elif index is not None or columns is not None:
|
||||
axes, _ = pd.DataFrame()._construct_axes_from_arguments(
|
||||
(index, columns), {}
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Need to specify at least one of 'labels', 'index' or 'columns'"
|
||||
)
|
||||
|
||||
# TODO Clean up this error checking
|
||||
if "index" not in axes:
|
||||
axes["index"] = None
|
||||
elif axes["index"] is not None:
|
||||
if not is_list_like(axes["index"]):
|
||||
axes["index"] = [axes["index"]]
|
||||
if errors == "raise":
|
||||
# Check if axes['index'] values exists in index
|
||||
count = self._query_compiler._index_matches_count(axes["index"])
|
||||
if count != len(axes["index"]):
|
||||
raise ValueError(
|
||||
"number of labels {}!={} not contained in axis".format(count, len(axes["index"]))
|
||||
)
|
||||
else:
|
||||
"""
|
||||
axes["index"] = self._query_compiler.index_matches(axes["index"])
|
||||
# If the length is zero, we will just do nothing
|
||||
if not len(axes["index"]):
|
||||
axes["index"] = None
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
if "columns" not in axes:
|
||||
axes["columns"] = None
|
||||
elif axes["columns"] is not None:
|
||||
if not is_list_like(axes["columns"]):
|
||||
axes["columns"] = [axes["columns"]]
|
||||
if errors == "raise":
|
||||
non_existant = [
|
||||
obj for obj in axes["columns"] if obj not in self.columns
|
||||
]
|
||||
if len(non_existant):
|
||||
raise ValueError(
|
||||
"labels {} not contained in axis".format(non_existant)
|
||||
)
|
||||
else:
|
||||
axes["columns"] = [
|
||||
obj for obj in axes["columns"] if obj in self.columns
|
||||
]
|
||||
# If the length is zero, we will just do nothing
|
||||
if not len(axes["columns"]):
|
||||
axes["columns"] = None
|
||||
|
||||
new_query_compiler = self._query_compiler.drop(
|
||||
index=axes["index"], columns=axes["columns"]
|
||||
)
|
||||
return self._create_or_update_from_compiler(new_query_compiler, inplace)
|
||||
|
||||
def mean(self, numeric_only=True):
|
||||
"""
|
||||
Return mean value for each numeric column
|
||||
@ -518,3 +369,15 @@ class NDFrame(ABC):
|
||||
max 1199.729004 360.000000
|
||||
"""
|
||||
return self._query_compiler.describe()
|
||||
|
||||
@abstractmethod
|
||||
def _to_pandas(self):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def head(self, n=5):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def tail(self, n=5):
|
||||
pass
|
||||
|
@ -13,14 +13,15 @@
|
||||
# limitations under the License.
|
||||
|
||||
import copy
|
||||
from collections import OrderedDict
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from eland import Index
|
||||
from eland import Index, SortOrder
|
||||
from eland import Query
|
||||
from eland.actions import SortFieldAction
|
||||
from eland.tasks import HeadTask, TailTask, BooleanFilterTask, ArithmeticOpFieldsTask, QueryTermsTask, \
|
||||
QueryIdsTask, SortOrder, SizeTask
|
||||
QueryIdsTask, SizeTask
|
||||
|
||||
|
||||
class Operations:
|
||||
@ -35,6 +36,7 @@ class Operations:
|
||||
This is maintained as a 'task graph' (inspired by dask)
|
||||
(see https://docs.dask.org/en/latest/spec.html)
|
||||
"""
|
||||
|
||||
def __init__(self, tasks=None, field_names=None):
|
||||
if tasks is None:
|
||||
self._tasks = []
|
||||
@ -94,7 +96,7 @@ class Operations:
|
||||
# Only return requested field_names
|
||||
fields = query_compiler.field_names
|
||||
|
||||
counts = {}
|
||||
counts = OrderedDict()
|
||||
for field in fields:
|
||||
body = Query(query_params['query'])
|
||||
body.exists(field, must=True)
|
||||
@ -171,7 +173,7 @@ class Operations:
|
||||
# "value" : 628.2536888148849
|
||||
# }
|
||||
# }
|
||||
results = {}
|
||||
results = OrderedDict()
|
||||
|
||||
if field_types == 'aggregatable':
|
||||
for key, value in source_fields.items():
|
||||
@ -220,7 +222,7 @@ class Operations:
|
||||
size=0,
|
||||
body=body.to_search_body())
|
||||
|
||||
results = {}
|
||||
results = OrderedDict()
|
||||
|
||||
for key in aggregatable_field_names.keys():
|
||||
# key is aggregatable field, value is label
|
||||
@ -276,8 +278,8 @@ class Operations:
|
||||
# },
|
||||
# ...
|
||||
|
||||
bins = {}
|
||||
weights = {}
|
||||
bins = OrderedDict()
|
||||
weights = OrderedDict()
|
||||
|
||||
# There is one more bin that weights
|
||||
# len(bins) = len(weights) + 1
|
||||
@ -415,7 +417,7 @@ class Operations:
|
||||
sum 8.204365e+06 9.261629e+07 5.754909e+07 618150
|
||||
min 1.000205e+02 0.000000e+00 0.000000e+00 0
|
||||
"""
|
||||
results = {}
|
||||
results = OrderedDict()
|
||||
|
||||
for field in field_names:
|
||||
values = list()
|
||||
@ -455,7 +457,7 @@ class Operations:
|
||||
size=0,
|
||||
body=body.to_search_body())
|
||||
|
||||
results = {}
|
||||
results = OrderedDict()
|
||||
|
||||
for field in numeric_source_fields:
|
||||
values = list()
|
||||
|
@ -152,9 +152,15 @@ class Query:
|
||||
|
||||
def to_search_body(self):
|
||||
if self._query.empty():
|
||||
body = {"aggs": self._aggs}
|
||||
if self._aggs:
|
||||
body = {"aggs": self._aggs}
|
||||
else:
|
||||
body = {}
|
||||
else:
|
||||
body = {"query": self._query.build(), "aggs": self._aggs}
|
||||
if self._aggs:
|
||||
body = {"query": self._query.build(), "aggs": self._aggs}
|
||||
else:
|
||||
body = {"query": self._query.build()}
|
||||
return body
|
||||
|
||||
def to_count_body(self):
|
||||
|
@ -13,6 +13,7 @@
|
||||
# limitations under the License.
|
||||
|
||||
import warnings
|
||||
from collections import OrderedDict
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
@ -24,7 +25,7 @@ from eland import Mappings
|
||||
from eland import Operations
|
||||
|
||||
|
||||
class ElandQueryCompiler:
|
||||
class QueryCompiler:
|
||||
"""
|
||||
Some notes on what can and can not be mapped:
|
||||
|
||||
@ -73,7 +74,7 @@ class ElandQueryCompiler:
|
||||
self.field_names = field_names
|
||||
|
||||
if name_mapper is None:
|
||||
self._name_mapper = ElandQueryCompiler.DisplayNameToFieldNameMapper()
|
||||
self._name_mapper = QueryCompiler.DisplayNameToFieldNameMapper()
|
||||
else:
|
||||
self._name_mapper = name_mapper
|
||||
|
||||
@ -276,7 +277,7 @@ class ElandQueryCompiler:
|
||||
return partial_result, df
|
||||
|
||||
def _flatten_dict(self, y):
|
||||
out = {}
|
||||
out = OrderedDict()
|
||||
|
||||
def flatten(x, name=''):
|
||||
# We flatten into source fields e.g. if type=geo_point
|
||||
@ -360,14 +361,14 @@ class ElandQueryCompiler:
|
||||
def _empty_pd_ef(self):
|
||||
# Return an empty dataframe with correct columns and dtypes
|
||||
df = pd.DataFrame()
|
||||
for c, d in zip(self.columns, self.dtypes):
|
||||
for c, d in zip(self.dtypes.index, self.dtypes.values):
|
||||
df[c] = pd.Series(dtype=d)
|
||||
return df
|
||||
|
||||
def copy(self):
|
||||
return ElandQueryCompiler(client=self._client, index_pattern=self._index_pattern, field_names=None,
|
||||
index_field=self._index.index_field, operations=self._operations.copy(),
|
||||
name_mapper=self._name_mapper.copy())
|
||||
return QueryCompiler(client=self._client, index_pattern=self._index_pattern, field_names=None,
|
||||
index_field=self._index.index_field, operations=self._operations.copy(),
|
||||
name_mapper=self._name_mapper.copy())
|
||||
|
||||
def rename(self, renames, inplace=False):
|
||||
if inplace:
|
||||
@ -500,7 +501,7 @@ class ElandQueryCompiler:
|
||||
|
||||
Parameters
|
||||
----------
|
||||
right: ElandQueryCompiler
|
||||
right: QueryCompiler
|
||||
The query compiler to compare self to
|
||||
|
||||
Raises
|
||||
@ -508,7 +509,7 @@ class ElandQueryCompiler:
|
||||
TypeError, ValueError
|
||||
If arithmetic operations aren't possible
|
||||
"""
|
||||
if not isinstance(right, ElandQueryCompiler):
|
||||
if not isinstance(right, QueryCompiler):
|
||||
raise TypeError(
|
||||
"Incompatible types "
|
||||
"{0} != {1}".format(type(self), type(right))
|
||||
@ -539,7 +540,7 @@ class ElandQueryCompiler:
|
||||
|
||||
Parameters
|
||||
----------
|
||||
right: ElandQueryCompiler
|
||||
right: QueryCompiler
|
||||
The query compiler to compare self to
|
||||
|
||||
Raises
|
||||
@ -585,12 +586,12 @@ class ElandQueryCompiler:
|
||||
if field_to_display_names is not None:
|
||||
self._field_to_display_names = field_to_display_names
|
||||
else:
|
||||
self._field_to_display_names = dict()
|
||||
self._field_to_display_names = {}
|
||||
|
||||
if display_to_field_names is not None:
|
||||
self._display_to_field_names = display_to_field_names
|
||||
else:
|
||||
self._display_to_field_names = dict()
|
||||
self._display_to_field_names = {}
|
||||
|
||||
def rename_display_name(self, renames):
|
||||
for current_display_name, new_display_name in renames.items():
|
||||
|
@ -1055,7 +1055,8 @@ class Series(NDFrame):
|
||||
# our operation is between series
|
||||
op_type = op_type + tuple('s')
|
||||
# check if fields are aggregatable
|
||||
self.name, right.name = self._query_compiler.check_str_arithmetics(right._query_compiler, self.name, right.name)
|
||||
self.name, right.name = self._query_compiler.check_str_arithmetics(right._query_compiler, self.name,
|
||||
right.name)
|
||||
|
||||
series = Series(query_compiler=self._query_compiler.arithmetic_op_fields(
|
||||
new_field_name, method_name, self.name, right.name, op_type))
|
||||
@ -1067,7 +1068,7 @@ class Series(NDFrame):
|
||||
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
|
||||
raise TypeError(
|
||||
"unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
|
||||
.format(method_name, type(self), self._dtype, type(right).__name__)
|
||||
.format(method_name, type(self), self._dtype, type(right).__name__)
|
||||
)
|
||||
|
||||
# check left number and right numeric series
|
||||
@ -1103,7 +1104,7 @@ class Series(NDFrame):
|
||||
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
|
||||
raise TypeError(
|
||||
"unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
|
||||
.format(method_name, type(self), self._dtype, type(right).__name__)
|
||||
.format(method_name, type(self), self._dtype, type(right).__name__)
|
||||
)
|
||||
|
||||
def _numeric_rop(self, left, method_name, op_type=None):
|
||||
@ -1146,7 +1147,7 @@ class Series(NDFrame):
|
||||
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
|
||||
raise TypeError(
|
||||
"unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
|
||||
.format(op_method_name, type(self), self._dtype, type(left).__name__)
|
||||
.format(op_method_name, type(self), self._dtype, type(left).__name__)
|
||||
)
|
||||
|
||||
def max(self):
|
||||
|
@ -1,37 +1,11 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from enum import Enum
|
||||
|
||||
import numpy as np
|
||||
|
||||
from eland import SortOrder
|
||||
from eland.actions import HeadAction, TailAction, SortIndexAction
|
||||
|
||||
|
||||
class SortOrder(Enum):
|
||||
ASC = 0
|
||||
DESC = 1
|
||||
|
||||
@staticmethod
|
||||
def reverse(order):
|
||||
if order == SortOrder.ASC:
|
||||
return SortOrder.DESC
|
||||
|
||||
return SortOrder.ASC
|
||||
|
||||
@staticmethod
|
||||
def to_string(order):
|
||||
if order == SortOrder.ASC:
|
||||
return "asc"
|
||||
|
||||
return "desc"
|
||||
|
||||
@staticmethod
|
||||
def from_string(order):
|
||||
if order == "asc":
|
||||
return SortOrder.ASC
|
||||
|
||||
return SortOrder.DESC
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------------------------------------------------- #
|
||||
# Tasks #
|
||||
# -------------------------------------------------------------------------------------------------------------------- #
|
||||
@ -305,7 +279,7 @@ class ArithmeticOpFieldsTask(Task):
|
||||
raise NotImplementedError("Not implemented operation '{0}'".format(self._op_name))
|
||||
|
||||
if query_params['query_script_fields'] is None:
|
||||
query_params['query_script_fields'] = {}
|
||||
query_params['query_script_fields'] = dict()
|
||||
query_params['query_script_fields'][self._field_name] = {
|
||||
'script': {
|
||||
'source': source
|
||||
@ -428,7 +402,7 @@ class ArithmeticOpFieldsTask(Task):
|
||||
raise NotImplementedError("Not implemented operation '{0}'".format(self._op_name))
|
||||
|
||||
if query_params['query_script_fields'] is None:
|
||||
query_params['query_script_fields'] = {}
|
||||
query_params['query_script_fields'] = dict()
|
||||
query_params['query_script_fields'][self._field_name] = {
|
||||
'script': {
|
||||
'source': source
|
||||
|
@ -14,8 +14,8 @@
|
||||
|
||||
import os
|
||||
|
||||
from elasticsearch import Elasticsearch
|
||||
import pandas as pd
|
||||
from elasticsearch import Elasticsearch
|
||||
|
||||
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
@ -11,4 +11,3 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
@ -11,4 +11,3 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
@ -28,4 +28,7 @@ class TestDataFrameCount(TestData):
|
||||
pd_count = pd_ecommerce.count()
|
||||
ed_count = ed_ecommerce.count()
|
||||
|
||||
print(pd_count)
|
||||
print(ed_count)
|
||||
|
||||
assert_series_equal(pd_count, ed_count)
|
||||
|
@ -15,7 +15,6 @@
|
||||
# File called _pytest for PyCharm compatability
|
||||
from datetime import datetime
|
||||
|
||||
from elasticsearch import Elasticsearch
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
@ -27,7 +26,6 @@ from eland.tests.common import assert_pandas_eland_series_equal
|
||||
|
||||
|
||||
class TestDataFrameDateTime(TestData):
|
||||
|
||||
times = ["2019-11-26T19:58:15.246+0000",
|
||||
"1970-01-01T00:00:03.000+0000"]
|
||||
time_index_name = 'test_time_formats'
|
||||
|
@ -40,5 +40,5 @@ class TestDataFrameInit:
|
||||
df0 = ed.DataFrame(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME)
|
||||
df1 = ed.DataFrame(client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME)
|
||||
|
||||
qc = ed.ElandQueryCompiler(client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME)
|
||||
qc = ed.QueryCompiler(client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME)
|
||||
df2 = ed.DataFrame(query_compiler=qc)
|
||||
|
@ -15,7 +15,6 @@
|
||||
# File called _pytest for PyCharm compatability
|
||||
|
||||
import pandas as pd
|
||||
from elasticsearch import Elasticsearch
|
||||
|
||||
import eland as ed
|
||||
from eland.tests.common import ES_TEST_CLIENT
|
||||
@ -128,4 +127,4 @@ class TestDataFrameQuery(TestData):
|
||||
|
||||
assert_pandas_eland_frame_equal(pd_q4, ed_q4)
|
||||
|
||||
ES_TEST_CLIENT.indices.delete(index_name)
|
||||
ES_TEST_CLIENT.indices.delete(index_name)
|
||||
|
@ -17,6 +17,7 @@
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from eland.compat import PY36
|
||||
from eland.dataframe import DEFAULT_NUM_ROWS_DISPLAYED
|
||||
from eland.tests.common import TestData
|
||||
|
||||
@ -198,7 +199,10 @@ class TestDataFrameRepr(TestData):
|
||||
# print(ed_head_str)
|
||||
# print(pd_head_str)
|
||||
|
||||
assert pd_head_str == ed_head_str
|
||||
# Currently pandas display bold_rows=True with >=PY36 and bold_rows=False with 3.5
|
||||
# TODO - fix this test for 3.5
|
||||
if PY36:
|
||||
assert pd_head_str == ed_head_str
|
||||
|
||||
def test_empty_dataframe_repr_html(self):
|
||||
# TODO - there is a bug in 'show_dimensions' as it gets added after the last </div>
|
||||
|
@ -18,7 +18,6 @@ import ast
|
||||
import time
|
||||
|
||||
import pandas as pd
|
||||
from elasticsearch import Elasticsearch
|
||||
from pandas.util.testing import assert_frame_equal
|
||||
|
||||
import eland as ed
|
||||
|
@ -54,3 +54,6 @@ class TestDataFrameUtils(TestData):
|
||||
ed_df_head = ed_df.head()
|
||||
|
||||
assert_pandas_eland_frame_equal(df, ed_df_head)
|
||||
|
||||
def test_eland_to_pandas_performance(self):
|
||||
pd_df = ed.eland_to_pandas(self.ed_flights())
|
||||
|
@ -11,4 +11,3 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
@ -11,4 +11,3 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
@ -188,20 +188,20 @@ class TestOperators:
|
||||
exp = (GreaterEqual('a', 2) & GreaterEqual('b', 2)) & ~(IsIn('ids', [1, 2, 3]))
|
||||
a = exp.build()
|
||||
b = {
|
||||
'bool': {
|
||||
'must': [
|
||||
{'range': {'a': {'gte': 2}}},
|
||||
{'range': {'b': {'gte': 2}}},
|
||||
{
|
||||
'bool': {
|
||||
'must_not': {
|
||||
'ids': {'values': [1, 2, 3]}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
'bool': {
|
||||
'must': [
|
||||
{'range': {'a': {'gte': 2}}},
|
||||
{'range': {'b': {'gte': 2}}},
|
||||
{
|
||||
'bool': {
|
||||
'must_not': {
|
||||
'ids': {'values': [1, 2, 3]}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
assert a == b
|
||||
|
||||
def test_must_not_and_must_filter(self):
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
# File called _pytest for PyCharm compatability
|
||||
|
||||
from eland import ElandQueryCompiler
|
||||
from eland import QueryCompiler
|
||||
from eland.tests.common import TestData
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@ class TestQueryCompilerRename(TestData):
|
||||
field_names = []
|
||||
display_names = []
|
||||
|
||||
mapper = ElandQueryCompiler.DisplayNameToFieldNameMapper()
|
||||
mapper = QueryCompiler.DisplayNameToFieldNameMapper()
|
||||
|
||||
assert field_names == mapper.field_names_to_list()
|
||||
assert display_names == mapper.display_names_to_list()
|
||||
@ -58,7 +58,7 @@ class TestQueryCompilerRename(TestData):
|
||||
def test_query_compiler_basic_rename_columns(self):
|
||||
columns = ['a', 'b', 'c', 'd']
|
||||
|
||||
mapper = ElandQueryCompiler.DisplayNameToFieldNameMapper()
|
||||
mapper = QueryCompiler.DisplayNameToFieldNameMapper()
|
||||
|
||||
display_names = ['A', 'b', 'c', 'd']
|
||||
update_A = {'a': 'A'}
|
||||
|
@ -11,4 +11,3 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
@ -14,7 +14,6 @@
|
||||
|
||||
# File called _pytest for PyCharm compatability
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from eland.tests.common import TestData, assert_pandas_eland_series_equal
|
||||
|
||||
@ -60,7 +59,6 @@ class TestSeriesArithmetics(TestData):
|
||||
|
||||
assert_pandas_eland_series_equal(pdadd, edadd)
|
||||
|
||||
|
||||
def test_ser_add_str_add_ser(self):
|
||||
pdadd = self.pd_ecommerce()['customer_first_name'] + self.pd_ecommerce()['customer_last_name']
|
||||
print(pdadd.name)
|
||||
@ -84,5 +82,5 @@ class TestSeriesArithmetics(TestData):
|
||||
assert self.ed_ecommerce()['customer_gender'] + self.ed_ecommerce()['customer_first_name']
|
||||
|
||||
def test_aggregatable_add_non_aggregatable(self):
|
||||
with pytest.raises(ValueError):
|
||||
assert self.ed_ecommerce()['customer_first_name'] + self.ed_ecommerce()['customer_gender']
|
||||
with pytest.raises(ValueError):
|
||||
assert self.ed_ecommerce()['customer_first_name'] + self.ed_ecommerce()['customer_gender']
|
||||
|
@ -12,7 +12,6 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from elasticsearch import Elasticsearch
|
||||
from elasticsearch import helpers
|
||||
from elasticsearch.client import ClusterClient
|
||||
|
||||
@ -70,9 +69,9 @@ def _update_max_compilations_limit(es, limit="10000/1m"):
|
||||
print('Updating script.max_compilations_rate to ', limit)
|
||||
cluster_client = ClusterClient(es)
|
||||
body = {
|
||||
"transient" : {
|
||||
"script.max_compilations_rate" : limit
|
||||
}
|
||||
"transient": {
|
||||
"script.max_compilations_rate": limit
|
||||
}
|
||||
}
|
||||
cluster_client.put_settings(body=body)
|
||||
|
||||
|
@ -243,7 +243,7 @@ def read_csv(filepath_or_buffer,
|
||||
|
||||
Parameters
|
||||
----------
|
||||
es_params: Elasticsearch client argument(s)
|
||||
es_client: Elasticsearch client argument(s)
|
||||
- elasticsearch-py parameters or
|
||||
- elasticsearch-py instance or
|
||||
- eland.Client instance
|
||||
@ -260,8 +260,6 @@ def read_csv(filepath_or_buffer,
|
||||
* False: Include missing values - may cause bulk to fail
|
||||
es_geo_points: list, default None
|
||||
List of columns to map to geo_point data type
|
||||
iterator
|
||||
not supported
|
||||
chunksize
|
||||
number of csv rows to read before bulk index into Elasticsearch
|
||||
|
||||
@ -275,6 +273,8 @@ def read_csv(filepath_or_buffer,
|
||||
|
||||
Notes
|
||||
-----
|
||||
iterator not supported
|
||||
|
||||
TODO - currently the eland.DataFrame may not retain the order of the data in the csv.
|
||||
"""
|
||||
kwds = dict()
|
||||
|
3
setup.py
3
setup.py
@ -12,10 +12,11 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
from codecs import open
|
||||
from os import path
|
||||
|
||||
from setuptools import setup
|
||||
|
||||
here = path.abspath(path.dirname(__file__))
|
||||
|
||||
with open(path.join(here, 'README.md'), encoding='utf-8') as f:
|
||||
|
Loading…
x
Reference in New Issue
Block a user