mirror of
https://github.com/elastic/eland.git
synced 2025-07-24 00:00:39 +08:00
Feature/python 3.5 (#93)
* Adding python 3.5 compatibility. Main issue is ordering of dictionaries. * Updating notebooks with 3.7 results. * Removing tempoorary code. * Defaulting to OrderedDict for python 3.5 + lint all code All code reformated by PyCharm and inspection results analysed.
This commit is contained in:
parent
9a2d55f3c8
commit
c5730e6d38
@ -140,7 +140,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 6,
|
"execution_count": 6,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -166,7 +170,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": 7,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -199,7 +207,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 8,
|
"execution_count": 8,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -230,7 +242,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 9,
|
"execution_count": 9,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -268,7 +284,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 10,
|
"execution_count": 10,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -421,7 +441,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 11,
|
"execution_count": 11,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -581,7 +605,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 12,
|
"execution_count": 12,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -601,7 +629,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 13,
|
"execution_count": 13,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -628,7 +660,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 14,
|
"execution_count": 14,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -648,7 +684,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 15,
|
"execution_count": 15,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -677,7 +717,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 16,
|
"execution_count": 16,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -700,12 +744,16 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 17,
|
"execution_count": 17,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"<eland.index.Index at 0x11214bfd0>"
|
"<eland.index.Index at 0x12036ef90>"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 17,
|
"execution_count": 17,
|
||||||
@ -721,7 +769,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 18,
|
"execution_count": 18,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -750,7 +802,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 19,
|
"execution_count": 19,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -782,7 +838,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 20,
|
"execution_count": 20,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
@ -1023,7 +1083,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## DataFrame.tail"
|
"### DataFrame.tail"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -1242,7 +1302,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 25,
|
"execution_count": 25,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -1268,7 +1332,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 26,
|
"execution_count": 26,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -1301,7 +1369,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 27,
|
"execution_count": 27,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -1332,7 +1404,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 28,
|
"execution_count": 28,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -1363,7 +1439,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 29,
|
"execution_count": 29,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -1487,7 +1567,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 30,
|
"execution_count": 30,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
@ -1514,7 +1598,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 31,
|
"execution_count": 31,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -1676,7 +1764,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 32,
|
"execution_count": 32,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -1836,7 +1928,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 33,
|
"execution_count": 33,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -1991,7 +2087,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 34,
|
"execution_count": 34,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -2160,7 +2260,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 35,
|
"execution_count": 35,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -2233,7 +2337,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 36,
|
"execution_count": 36,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -2313,7 +2421,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 37,
|
"execution_count": 37,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -2344,7 +2456,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 38,
|
"execution_count": 38,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -2382,7 +2498,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 39,
|
"execution_count": 39,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -2515,7 +2635,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 40,
|
"execution_count": 40,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -2580,15 +2704,15 @@
|
|||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>25%</th>\n",
|
" <th>25%</th>\n",
|
||||||
" <td>409.983219</td>\n",
|
" <td>410.008918</td>\n",
|
||||||
" <td>2470.545974</td>\n",
|
" <td>2470.545974</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>251.738513</td>\n",
|
" <td>251.944994</td>\n",
|
||||||
" <td>1.000000</td>\n",
|
" <td>1.000000</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>50%</th>\n",
|
" <th>50%</th>\n",
|
||||||
" <td>640.387285</td>\n",
|
" <td>640.362667</td>\n",
|
||||||
" <td>7612.072403</td>\n",
|
" <td>7612.072403</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>503.148975</td>\n",
|
" <td>503.148975</td>\n",
|
||||||
@ -2596,11 +2720,11 @@
|
|||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>75%</th>\n",
|
" <th>75%</th>\n",
|
||||||
" <td>842.255395</td>\n",
|
" <td>842.254990</td>\n",
|
||||||
" <td>9735.860651</td>\n",
|
" <td>9735.660463</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>720.561564</td>\n",
|
" <td>720.561564</td>\n",
|
||||||
" <td>4.230496</td>\n",
|
" <td>4.000000</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>max</th>\n",
|
" <th>max</th>\n",
|
||||||
@ -2621,9 +2745,9 @@
|
|||||||
"mean 628.253689 7092.142457 ... 511.127842 2.835975\n",
|
"mean 628.253689 7092.142457 ... 511.127842 2.835975\n",
|
||||||
"std 266.386661 4578.263193 ... 334.741135 1.939365\n",
|
"std 266.386661 4578.263193 ... 334.741135 1.939365\n",
|
||||||
"min 100.020531 0.000000 ... 0.000000 0.000000\n",
|
"min 100.020531 0.000000 ... 0.000000 0.000000\n",
|
||||||
"25% 409.983219 2470.545974 ... 251.738513 1.000000\n",
|
"25% 410.008918 2470.545974 ... 251.944994 1.000000\n",
|
||||||
"50% 640.387285 7612.072403 ... 503.148975 3.000000\n",
|
"50% 640.362667 7612.072403 ... 503.148975 3.000000\n",
|
||||||
"75% 842.255395 9735.860651 ... 720.561564 4.230496\n",
|
"75% 842.254990 9735.660463 ... 720.561564 4.000000\n",
|
||||||
"max 1199.729004 19881.482422 ... 1902.901978 6.000000\n",
|
"max 1199.729004 19881.482422 ... 1902.901978 6.000000\n",
|
||||||
"\n",
|
"\n",
|
||||||
"[8 rows x 7 columns]"
|
"[8 rows x 7 columns]"
|
||||||
@ -2649,7 +2773,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 41,
|
"execution_count": 41,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
@ -2697,7 +2825,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 42,
|
"execution_count": 42,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
@ -2759,7 +2891,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 43,
|
"execution_count": 43,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -2795,7 +2931,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 44,
|
"execution_count": 44,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -2831,7 +2971,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 45,
|
"execution_count": 45,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -2860,7 +3004,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 46,
|
"execution_count": 46,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -2896,7 +3044,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 47,
|
"execution_count": 47,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -2925,7 +3077,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 48,
|
"execution_count": 48,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -2961,7 +3117,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 49,
|
"execution_count": 49,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -2990,7 +3150,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 50,
|
"execution_count": 50,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -3026,7 +3190,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 51,
|
"execution_count": 51,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -3049,7 +3217,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 52,
|
"execution_count": 52,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -3079,7 +3251,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 53,
|
"execution_count": 53,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -3103,7 +3279,7 @@
|
|||||||
" <tr style=\"text-align: right;\">\n",
|
" <tr style=\"text-align: right;\">\n",
|
||||||
" <th></th>\n",
|
" <th></th>\n",
|
||||||
" <th>Carrier</th>\n",
|
" <th>Carrier</th>\n",
|
||||||
" <th>DestLocation</th>\n",
|
" <th>DestRegion</th>\n",
|
||||||
" <th>...</th>\n",
|
" <th>...</th>\n",
|
||||||
" <th>dayOfWeek</th>\n",
|
" <th>dayOfWeek</th>\n",
|
||||||
" <th>timestamp</th>\n",
|
" <th>timestamp</th>\n",
|
||||||
@ -3113,7 +3289,7 @@
|
|||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>0</th>\n",
|
" <th>0</th>\n",
|
||||||
" <td>Kibana Airlines</td>\n",
|
" <td>Kibana Airlines</td>\n",
|
||||||
" <td>{'lat': '-33.94609833', 'lon': '151.177002'}</td>\n",
|
" <td>SE-BD</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>0</td>\n",
|
" <td>0</td>\n",
|
||||||
" <td>2018-01-01 00:00:00</td>\n",
|
" <td>2018-01-01 00:00:00</td>\n",
|
||||||
@ -3121,7 +3297,7 @@
|
|||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>1</th>\n",
|
" <th>1</th>\n",
|
||||||
" <td>Logstash Airways</td>\n",
|
" <td>Logstash Airways</td>\n",
|
||||||
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
|
" <td>IT-34</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>0</td>\n",
|
" <td>0</td>\n",
|
||||||
" <td>2018-01-01 18:27:00</td>\n",
|
" <td>2018-01-01 18:27:00</td>\n",
|
||||||
@ -3129,7 +3305,7 @@
|
|||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>2</th>\n",
|
" <th>2</th>\n",
|
||||||
" <td>Logstash Airways</td>\n",
|
" <td>Logstash Airways</td>\n",
|
||||||
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
|
" <td>IT-34</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>0</td>\n",
|
" <td>0</td>\n",
|
||||||
" <td>2018-01-01 17:11:14</td>\n",
|
" <td>2018-01-01 17:11:14</td>\n",
|
||||||
@ -3137,7 +3313,7 @@
|
|||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>3</th>\n",
|
" <th>3</th>\n",
|
||||||
" <td>Kibana Airlines</td>\n",
|
" <td>Kibana Airlines</td>\n",
|
||||||
" <td>{'lat': '45.648399', 'lon': '12.1944'}</td>\n",
|
" <td>IT-34</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>0</td>\n",
|
" <td>0</td>\n",
|
||||||
" <td>2018-01-01 10:33:28</td>\n",
|
" <td>2018-01-01 10:33:28</td>\n",
|
||||||
@ -3145,7 +3321,7 @@
|
|||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>4</th>\n",
|
" <th>4</th>\n",
|
||||||
" <td>Kibana Airlines</td>\n",
|
" <td>Kibana Airlines</td>\n",
|
||||||
" <td>{'lat': '34.447102', 'lon': '108.751999'}</td>\n",
|
" <td>SE-BD</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>0</td>\n",
|
" <td>0</td>\n",
|
||||||
" <td>2018-01-01 05:13:00</td>\n",
|
" <td>2018-01-01 05:13:00</td>\n",
|
||||||
@ -3161,7 +3337,7 @@
|
|||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>13054</th>\n",
|
" <th>13054</th>\n",
|
||||||
" <td>Logstash Airways</td>\n",
|
" <td>Logstash Airways</td>\n",
|
||||||
" <td>{'lat': '34.447102', 'lon': '108.751999'}</td>\n",
|
" <td>SE-BD</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>6</td>\n",
|
" <td>6</td>\n",
|
||||||
" <td>2018-02-11 20:42:25</td>\n",
|
" <td>2018-02-11 20:42:25</td>\n",
|
||||||
@ -3169,7 +3345,7 @@
|
|||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>13055</th>\n",
|
" <th>13055</th>\n",
|
||||||
" <td>Logstash Airways</td>\n",
|
" <td>Logstash Airways</td>\n",
|
||||||
" <td>{'lat': '47.464699', 'lon': '8.54917'}</td>\n",
|
" <td>CH-ZH</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>6</td>\n",
|
" <td>6</td>\n",
|
||||||
" <td>2018-02-11 01:41:57</td>\n",
|
" <td>2018-02-11 01:41:57</td>\n",
|
||||||
@ -3177,7 +3353,7 @@
|
|||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>13056</th>\n",
|
" <th>13056</th>\n",
|
||||||
" <td>Logstash Airways</td>\n",
|
" <td>Logstash Airways</td>\n",
|
||||||
" <td>{'lat': '51.169997', 'lon': '128.445007'}</td>\n",
|
" <td>RU-AMU</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>6</td>\n",
|
" <td>6</td>\n",
|
||||||
" <td>2018-02-11 04:09:27</td>\n",
|
" <td>2018-02-11 04:09:27</td>\n",
|
||||||
@ -3185,7 +3361,7 @@
|
|||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>13057</th>\n",
|
" <th>13057</th>\n",
|
||||||
" <td>JetBeats</td>\n",
|
" <td>JetBeats</td>\n",
|
||||||
" <td>{'lat': '-34.8222', 'lon': '-58.5358'}</td>\n",
|
" <td>SE-BD</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>6</td>\n",
|
" <td>6</td>\n",
|
||||||
" <td>2018-02-11 08:28:21</td>\n",
|
" <td>2018-02-11 08:28:21</td>\n",
|
||||||
@ -3193,44 +3369,31 @@
|
|||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>13058</th>\n",
|
" <th>13058</th>\n",
|
||||||
" <td>JetBeats</td>\n",
|
" <td>JetBeats</td>\n",
|
||||||
" <td>{'lat': '38.94449997', 'lon': '-77.45580292'}</td>\n",
|
" <td>US-DC</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>6</td>\n",
|
" <td>6</td>\n",
|
||||||
" <td>2018-02-11 14:54:34</td>\n",
|
" <td>2018-02-11 14:54:34</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" </tbody>\n",
|
" </tbody>\n",
|
||||||
"</table>\n",
|
"</table>\n",
|
||||||
"<p>13059 rows × 21 columns</p>\n",
|
"<p>13059 rows × 20 columns</p>\n",
|
||||||
"</div>"
|
"</div>"
|
||||||
],
|
],
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
" Carrier DestLocation ... dayOfWeek \\\n",
|
" Carrier DestRegion ... dayOfWeek timestamp\n",
|
||||||
"0 Kibana Airlines {'lat': '-33.94609833', 'lon': '151.177002'} ... 0 \n",
|
"0 Kibana Airlines SE-BD ... 0 2018-01-01 00:00:00\n",
|
||||||
"1 Logstash Airways {'lat': '45.505299', 'lon': '12.3519'} ... 0 \n",
|
"1 Logstash Airways IT-34 ... 0 2018-01-01 18:27:00\n",
|
||||||
"2 Logstash Airways {'lat': '45.505299', 'lon': '12.3519'} ... 0 \n",
|
"2 Logstash Airways IT-34 ... 0 2018-01-01 17:11:14\n",
|
||||||
"3 Kibana Airlines {'lat': '45.648399', 'lon': '12.1944'} ... 0 \n",
|
"3 Kibana Airlines IT-34 ... 0 2018-01-01 10:33:28\n",
|
||||||
"4 Kibana Airlines {'lat': '34.447102', 'lon': '108.751999'} ... 0 \n",
|
"4 Kibana Airlines SE-BD ... 0 2018-01-01 05:13:00\n",
|
||||||
"... ... ... ... ... \n",
|
"... ... ... ... ... ...\n",
|
||||||
"13054 Logstash Airways {'lat': '34.447102', 'lon': '108.751999'} ... 6 \n",
|
"13054 Logstash Airways SE-BD ... 6 2018-02-11 20:42:25\n",
|
||||||
"13055 Logstash Airways {'lat': '47.464699', 'lon': '8.54917'} ... 6 \n",
|
"13055 Logstash Airways CH-ZH ... 6 2018-02-11 01:41:57\n",
|
||||||
"13056 Logstash Airways {'lat': '51.169997', 'lon': '128.445007'} ... 6 \n",
|
"13056 Logstash Airways RU-AMU ... 6 2018-02-11 04:09:27\n",
|
||||||
"13057 JetBeats {'lat': '-34.8222', 'lon': '-58.5358'} ... 6 \n",
|
"13057 JetBeats SE-BD ... 6 2018-02-11 08:28:21\n",
|
||||||
"13058 JetBeats {'lat': '38.94449997', 'lon': '-77.45580292'} ... 6 \n",
|
"13058 JetBeats US-DC ... 6 2018-02-11 14:54:34\n",
|
||||||
"\n",
|
"\n",
|
||||||
" timestamp \n",
|
"[13059 rows x 20 columns]"
|
||||||
"0 2018-01-01 00:00:00 \n",
|
|
||||||
"1 2018-01-01 18:27:00 \n",
|
|
||||||
"2 2018-01-01 17:11:14 \n",
|
|
||||||
"3 2018-01-01 10:33:28 \n",
|
|
||||||
"4 2018-01-01 05:13:00 \n",
|
|
||||||
"... ... \n",
|
|
||||||
"13054 2018-02-11 20:42:25 \n",
|
|
||||||
"13055 2018-02-11 01:41:57 \n",
|
|
||||||
"13056 2018-02-11 04:09:27 \n",
|
|
||||||
"13057 2018-02-11 08:28:21 \n",
|
|
||||||
"13058 2018-02-11 14:54:34 \n",
|
|
||||||
"\n",
|
|
||||||
"[13059 rows x 21 columns]"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 53,
|
"execution_count": 53,
|
||||||
@ -3241,6 +3404,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"pd_flights.drop(columns=['AvgTicketPrice', \n",
|
"pd_flights.drop(columns=['AvgTicketPrice', \n",
|
||||||
" 'Cancelled', \n",
|
" 'Cancelled', \n",
|
||||||
|
" 'DestLocation',\n",
|
||||||
" 'Dest', \n",
|
" 'Dest', \n",
|
||||||
" 'DestAirportID', \n",
|
" 'DestAirportID', \n",
|
||||||
" 'DestCityName', \n",
|
" 'DestCityName', \n",
|
||||||
@ -3250,7 +3414,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 54,
|
"execution_count": 54,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -3274,7 +3442,7 @@
|
|||||||
" <tr style=\"text-align: right;\">\n",
|
" <tr style=\"text-align: right;\">\n",
|
||||||
" <th></th>\n",
|
" <th></th>\n",
|
||||||
" <th>Carrier</th>\n",
|
" <th>Carrier</th>\n",
|
||||||
" <th>DestLocation</th>\n",
|
" <th>DestRegion</th>\n",
|
||||||
" <th>...</th>\n",
|
" <th>...</th>\n",
|
||||||
" <th>dayOfWeek</th>\n",
|
" <th>dayOfWeek</th>\n",
|
||||||
" <th>timestamp</th>\n",
|
" <th>timestamp</th>\n",
|
||||||
@ -3284,7 +3452,7 @@
|
|||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>0</th>\n",
|
" <th>0</th>\n",
|
||||||
" <td>Kibana Airlines</td>\n",
|
" <td>Kibana Airlines</td>\n",
|
||||||
" <td>{'lon': '151.177002', 'lat': '-33.94609833'}</td>\n",
|
" <td>SE-BD</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>0</td>\n",
|
" <td>0</td>\n",
|
||||||
" <td>2018-01-01 00:00:00</td>\n",
|
" <td>2018-01-01 00:00:00</td>\n",
|
||||||
@ -3292,7 +3460,7 @@
|
|||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>1</th>\n",
|
" <th>1</th>\n",
|
||||||
" <td>Logstash Airways</td>\n",
|
" <td>Logstash Airways</td>\n",
|
||||||
" <td>{'lon': '12.3519', 'lat': '45.505299'}</td>\n",
|
" <td>IT-34</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>0</td>\n",
|
" <td>0</td>\n",
|
||||||
" <td>2018-01-01 18:27:00</td>\n",
|
" <td>2018-01-01 18:27:00</td>\n",
|
||||||
@ -3300,7 +3468,7 @@
|
|||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>2</th>\n",
|
" <th>2</th>\n",
|
||||||
" <td>Logstash Airways</td>\n",
|
" <td>Logstash Airways</td>\n",
|
||||||
" <td>{'lon': '12.3519', 'lat': '45.505299'}</td>\n",
|
" <td>IT-34</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>0</td>\n",
|
" <td>0</td>\n",
|
||||||
" <td>2018-01-01 17:11:14</td>\n",
|
" <td>2018-01-01 17:11:14</td>\n",
|
||||||
@ -3308,7 +3476,7 @@
|
|||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>3</th>\n",
|
" <th>3</th>\n",
|
||||||
" <td>Kibana Airlines</td>\n",
|
" <td>Kibana Airlines</td>\n",
|
||||||
" <td>{'lon': '12.1944', 'lat': '45.648399'}</td>\n",
|
" <td>IT-34</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>0</td>\n",
|
" <td>0</td>\n",
|
||||||
" <td>2018-01-01 10:33:28</td>\n",
|
" <td>2018-01-01 10:33:28</td>\n",
|
||||||
@ -3316,7 +3484,7 @@
|
|||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>4</th>\n",
|
" <th>4</th>\n",
|
||||||
" <td>Kibana Airlines</td>\n",
|
" <td>Kibana Airlines</td>\n",
|
||||||
" <td>{'lon': '108.751999', 'lat': '34.447102'}</td>\n",
|
" <td>SE-BD</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>0</td>\n",
|
" <td>0</td>\n",
|
||||||
" <td>2018-01-01 05:13:00</td>\n",
|
" <td>2018-01-01 05:13:00</td>\n",
|
||||||
@ -3332,7 +3500,7 @@
|
|||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>13054</th>\n",
|
" <th>13054</th>\n",
|
||||||
" <td>Logstash Airways</td>\n",
|
" <td>Logstash Airways</td>\n",
|
||||||
" <td>{'lon': '108.751999', 'lat': '34.447102'}</td>\n",
|
" <td>SE-BD</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>6</td>\n",
|
" <td>6</td>\n",
|
||||||
" <td>2018-02-11 20:42:25</td>\n",
|
" <td>2018-02-11 20:42:25</td>\n",
|
||||||
@ -3340,7 +3508,7 @@
|
|||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>13055</th>\n",
|
" <th>13055</th>\n",
|
||||||
" <td>Logstash Airways</td>\n",
|
" <td>Logstash Airways</td>\n",
|
||||||
" <td>{'lon': '8.54917', 'lat': '47.464699'}</td>\n",
|
" <td>CH-ZH</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>6</td>\n",
|
" <td>6</td>\n",
|
||||||
" <td>2018-02-11 01:41:57</td>\n",
|
" <td>2018-02-11 01:41:57</td>\n",
|
||||||
@ -3348,7 +3516,7 @@
|
|||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>13056</th>\n",
|
" <th>13056</th>\n",
|
||||||
" <td>Logstash Airways</td>\n",
|
" <td>Logstash Airways</td>\n",
|
||||||
" <td>{'lon': '128.445007', 'lat': '51.169997'}</td>\n",
|
" <td>RU-AMU</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>6</td>\n",
|
" <td>6</td>\n",
|
||||||
" <td>2018-02-11 04:09:27</td>\n",
|
" <td>2018-02-11 04:09:27</td>\n",
|
||||||
@ -3356,7 +3524,7 @@
|
|||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>13057</th>\n",
|
" <th>13057</th>\n",
|
||||||
" <td>JetBeats</td>\n",
|
" <td>JetBeats</td>\n",
|
||||||
" <td>{'lon': '-58.5358', 'lat': '-34.8222'}</td>\n",
|
" <td>SE-BD</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>6</td>\n",
|
" <td>6</td>\n",
|
||||||
" <td>2018-02-11 08:28:21</td>\n",
|
" <td>2018-02-11 08:28:21</td>\n",
|
||||||
@ -3364,7 +3532,7 @@
|
|||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>13058</th>\n",
|
" <th>13058</th>\n",
|
||||||
" <td>JetBeats</td>\n",
|
" <td>JetBeats</td>\n",
|
||||||
" <td>{'lon': '-77.45580292', 'lat': '38.94449997'}</td>\n",
|
" <td>US-DC</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>6</td>\n",
|
" <td>6</td>\n",
|
||||||
" <td>2018-02-11 14:54:34</td>\n",
|
" <td>2018-02-11 14:54:34</td>\n",
|
||||||
@ -3372,36 +3540,23 @@
|
|||||||
" </tbody>\n",
|
" </tbody>\n",
|
||||||
"</table>\n",
|
"</table>\n",
|
||||||
"</div>\n",
|
"</div>\n",
|
||||||
"<p>13059 rows × 21 columns</p>"
|
"<p>13059 rows × 20 columns</p>"
|
||||||
],
|
],
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
" Carrier DestLocation ... dayOfWeek \\\n",
|
" Carrier DestRegion ... dayOfWeek timestamp\n",
|
||||||
"0 Kibana Airlines {'lon': '151.177002', 'lat': '-33.94609833'} ... 0 \n",
|
"0 Kibana Airlines SE-BD ... 0 2018-01-01 00:00:00\n",
|
||||||
"1 Logstash Airways {'lon': '12.3519', 'lat': '45.505299'} ... 0 \n",
|
"1 Logstash Airways IT-34 ... 0 2018-01-01 18:27:00\n",
|
||||||
"2 Logstash Airways {'lon': '12.3519', 'lat': '45.505299'} ... 0 \n",
|
"2 Logstash Airways IT-34 ... 0 2018-01-01 17:11:14\n",
|
||||||
"3 Kibana Airlines {'lon': '12.1944', 'lat': '45.648399'} ... 0 \n",
|
"3 Kibana Airlines IT-34 ... 0 2018-01-01 10:33:28\n",
|
||||||
"4 Kibana Airlines {'lon': '108.751999', 'lat': '34.447102'} ... 0 \n",
|
"4 Kibana Airlines SE-BD ... 0 2018-01-01 05:13:00\n",
|
||||||
"... ... ... ... ... \n",
|
"... ... ... ... ... ...\n",
|
||||||
"13054 Logstash Airways {'lon': '108.751999', 'lat': '34.447102'} ... 6 \n",
|
"13054 Logstash Airways SE-BD ... 6 2018-02-11 20:42:25\n",
|
||||||
"13055 Logstash Airways {'lon': '8.54917', 'lat': '47.464699'} ... 6 \n",
|
"13055 Logstash Airways CH-ZH ... 6 2018-02-11 01:41:57\n",
|
||||||
"13056 Logstash Airways {'lon': '128.445007', 'lat': '51.169997'} ... 6 \n",
|
"13056 Logstash Airways RU-AMU ... 6 2018-02-11 04:09:27\n",
|
||||||
"13057 JetBeats {'lon': '-58.5358', 'lat': '-34.8222'} ... 6 \n",
|
"13057 JetBeats SE-BD ... 6 2018-02-11 08:28:21\n",
|
||||||
"13058 JetBeats {'lon': '-77.45580292', 'lat': '38.94449997'} ... 6 \n",
|
"13058 JetBeats US-DC ... 6 2018-02-11 14:54:34\n",
|
||||||
"\n",
|
"\n",
|
||||||
" timestamp \n",
|
"[13059 rows x 20 columns]"
|
||||||
"0 2018-01-01 00:00:00 \n",
|
|
||||||
"1 2018-01-01 18:27:00 \n",
|
|
||||||
"2 2018-01-01 17:11:14 \n",
|
|
||||||
"3 2018-01-01 10:33:28 \n",
|
|
||||||
"4 2018-01-01 05:13:00 \n",
|
|
||||||
"... ... \n",
|
|
||||||
"13054 2018-02-11 20:42:25 \n",
|
|
||||||
"13055 2018-02-11 01:41:57 \n",
|
|
||||||
"13056 2018-02-11 04:09:27 \n",
|
|
||||||
"13057 2018-02-11 08:28:21 \n",
|
|
||||||
"13058 2018-02-11 14:54:34 \n",
|
|
||||||
"\n",
|
|
||||||
"[13059 rows x 21 columns]"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 54,
|
"execution_count": 54,
|
||||||
@ -3412,6 +3567,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"ed_flights.drop(columns=['AvgTicketPrice', \n",
|
"ed_flights.drop(columns=['AvgTicketPrice', \n",
|
||||||
" 'Cancelled', \n",
|
" 'Cancelled', \n",
|
||||||
|
" 'DestLocation',\n",
|
||||||
" 'Dest', \n",
|
" 'Dest', \n",
|
||||||
" 'DestAirportID', \n",
|
" 'DestAirportID', \n",
|
||||||
" 'DestCityName', \n",
|
" 'DestCityName', \n",
|
||||||
@ -3428,7 +3584,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 55,
|
"execution_count": 55,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -3451,7 +3611,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 56,
|
"execution_count": 56,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
@ -3481,7 +3645,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 57,
|
"execution_count": 57,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"ed_flights2 = ed_flights[(ed_flights.OriginAirportID == 'AMS') & (ed_flights.FlightDelayMin > 60)]\n",
|
"ed_flights2 = ed_flights[(ed_flights.OriginAirportID == 'AMS') & (ed_flights.FlightDelayMin > 60)]\n",
|
||||||
@ -3492,7 +3660,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 58,
|
"execution_count": 58,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"is_executing": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
@ -3537,7 +3709,7 @@
|
|||||||
" size: 5\n",
|
" size: 5\n",
|
||||||
" sort_params: _doc:desc\n",
|
" sort_params: _doc:desc\n",
|
||||||
" _source: ['timestamp', 'OriginAirportID', 'DestAirportID', 'FlightDelayMin']\n",
|
" _source: ['timestamp', 'OriginAirportID', 'DestAirportID', 'FlightDelayMin']\n",
|
||||||
" body: {'query': {'bool': {'must': [{'term': {'OriginAirportID': 'AMS'}}, {'range': {'FlightDelayMin': {'gt': 60}}}]}}, 'aggs': {}}\n",
|
" body: {'query': {'bool': {'must': [{'term': {'OriginAirportID': 'AMS'}}, {'range': {'FlightDelayMin': {'gt': 60}}}]}}}\n",
|
||||||
" post_processing: [('sort_index')]\n",
|
" post_processing: [('sort_index')]\n",
|
||||||
"'field_to_display_names': {}\n",
|
"'field_to_display_names': {}\n",
|
||||||
"'display_to_field_names': {}\n",
|
"'display_to_field_names': {}\n",
|
||||||
|
@ -5,7 +5,7 @@ Examples
|
|||||||
========
|
========
|
||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 2
|
:maxdepth: 3
|
||||||
|
|
||||||
demo_notebook
|
demo_notebook
|
||||||
online_retail_analysis
|
online_retail_analysis
|
||||||
|
@ -176,7 +176,7 @@
|
|||||||
" size: None\n",
|
" size: None\n",
|
||||||
" sort_params: None\n",
|
" sort_params: None\n",
|
||||||
" _source: None\n",
|
" _source: None\n",
|
||||||
" body: {'aggs': {}}\n",
|
" body: {}\n",
|
||||||
" post_processing: []\n",
|
" post_processing: []\n",
|
||||||
"'field_to_display_names': {}\n",
|
"'field_to_display_names': {}\n",
|
||||||
"'display_to_field_names': {}\n",
|
"'display_to_field_names': {}\n",
|
||||||
@ -308,7 +308,7 @@
|
|||||||
" size: 2\n",
|
" size: 2\n",
|
||||||
" sort_params: _doc:desc\n",
|
" sort_params: _doc:desc\n",
|
||||||
" _source: None\n",
|
" _source: None\n",
|
||||||
" body: {'aggs': {}}\n",
|
" body: {}\n",
|
||||||
" post_processing: [('sort_index'), ('head': ('count': 2)), ('tail': ('count': 2))]\n",
|
" post_processing: [('sort_index'), ('head': ('count': 2)), ('tail': ('count': 2))]\n",
|
||||||
"'field_to_display_names': {}\n",
|
"'field_to_display_names': {}\n",
|
||||||
"'display_to_field_names': {}\n",
|
"'display_to_field_names': {}\n",
|
||||||
@ -813,7 +813,7 @@
|
|||||||
" size: None\n",
|
" size: None\n",
|
||||||
" sort_params: None\n",
|
" sort_params: None\n",
|
||||||
" _source: None\n",
|
" _source: None\n",
|
||||||
" body: {'query': {'bool': {'must': [{'term': {'Country': 'Germany'}}, {'range': {'Quantity': {'gt': 90}}}]}}, 'aggs': {}}\n",
|
" body: {'query': {'bool': {'must': [{'term': {'Country': 'Germany'}}, {'range': {'Quantity': {'gt': 90}}}]}}}\n",
|
||||||
" post_processing: []\n",
|
" post_processing: []\n",
|
||||||
"'field_to_display_names': {}\n",
|
"'field_to_display_names': {}\n",
|
||||||
"'display_to_field_names': {}\n",
|
"'display_to_field_names': {}\n",
|
||||||
@ -1037,23 +1037,23 @@
|
|||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>25%</th>\n",
|
" <th>25%</th>\n",
|
||||||
" <td>14220.581670</td>\n",
|
" <td>14220.529879</td>\n",
|
||||||
" <td>1.000000</td>\n",
|
" <td>1.000000</td>\n",
|
||||||
" <td>1.250000</td>\n",
|
" <td>1.250000</td>\n",
|
||||||
" <td>3756.500000</td>\n",
|
" <td>3756.500000</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>50%</th>\n",
|
" <th>50%</th>\n",
|
||||||
" <td>15666.545935</td>\n",
|
" <td>15661.227460</td>\n",
|
||||||
" <td>2.000000</td>\n",
|
" <td>2.000000</td>\n",
|
||||||
" <td>2.510000</td>\n",
|
" <td>2.510000</td>\n",
|
||||||
" <td>7498.861278</td>\n",
|
" <td>7499.363732</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>75%</th>\n",
|
" <th>75%</th>\n",
|
||||||
" <td>17213.978376</td>\n",
|
" <td>17214.478439</td>\n",
|
||||||
" <td>6.614054</td>\n",
|
" <td>6.613198</td>\n",
|
||||||
" <td>4.215516</td>\n",
|
" <td>4.210000</td>\n",
|
||||||
" <td>11249.500000</td>\n",
|
" <td>11249.500000</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
@ -1073,9 +1073,9 @@
|
|||||||
"mean 15590.776680 7.464000 4.103233 7499.500000\n",
|
"mean 15590.776680 7.464000 4.103233 7499.500000\n",
|
||||||
"std 1764.025160 85.924387 20.104873 4330.127009\n",
|
"std 1764.025160 85.924387 20.104873 4330.127009\n",
|
||||||
"min 12347.000000 -9360.000000 0.000000 0.000000\n",
|
"min 12347.000000 -9360.000000 0.000000 0.000000\n",
|
||||||
"25% 14220.581670 1.000000 1.250000 3756.500000\n",
|
"25% 14220.529879 1.000000 1.250000 3756.500000\n",
|
||||||
"50% 15666.545935 2.000000 2.510000 7498.861278\n",
|
"50% 15661.227460 2.000000 2.510000 7499.363732\n",
|
||||||
"75% 17213.978376 6.614054 4.215516 11249.500000\n",
|
"75% 17214.478439 6.613198 4.210000 11249.500000\n",
|
||||||
"max 18239.000000 2880.000000 950.990000 14999.000000"
|
"max 18239.000000 2880.000000 950.990000 14999.000000"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -48,3 +48,5 @@ In general, the data resides in elasticsearch and not in memory, which allows el
|
|||||||
|
|
||||||
* :doc:`examples/index`
|
* :doc:`examples/index`
|
||||||
|
|
||||||
|
* :doc:`examples/demo_notebook`
|
||||||
|
* :doc:`examples/online_retail_analysis`
|
||||||
|
@ -3,6 +3,9 @@ from abc import ABC, abstractmethod
|
|||||||
# -------------------------------------------------------------------------------------------------------------------- #
|
# -------------------------------------------------------------------------------------------------------------------- #
|
||||||
# PostProcessingActions #
|
# PostProcessingActions #
|
||||||
# -------------------------------------------------------------------------------------------------------------------- #
|
# -------------------------------------------------------------------------------------------------------------------- #
|
||||||
|
from eland import SortOrder
|
||||||
|
|
||||||
|
|
||||||
class PostProcessingAction(ABC):
|
class PostProcessingAction(ABC):
|
||||||
def __init__(self, action_type):
|
def __init__(self, action_type):
|
||||||
"""
|
"""
|
||||||
@ -27,6 +30,7 @@ class PostProcessingAction(ABC):
|
|||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class SortIndexAction(PostProcessingAction):
|
class SortIndexAction(PostProcessingAction):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__("sort_index")
|
super().__init__("sort_index")
|
||||||
@ -37,6 +41,7 @@ class SortIndexAction(PostProcessingAction):
|
|||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "('{}')".format(self.type)
|
return "('{}')".format(self.type)
|
||||||
|
|
||||||
|
|
||||||
class HeadAction(PostProcessingAction):
|
class HeadAction(PostProcessingAction):
|
||||||
def __init__(self, count):
|
def __init__(self, count):
|
||||||
super().__init__("head")
|
super().__init__("head")
|
||||||
@ -76,10 +81,10 @@ class SortFieldAction(PostProcessingAction):
|
|||||||
raise ValueError("Expected ES sort params string (e.g. _doc:desc). Got '{}'".format(sort_params_string))
|
raise ValueError("Expected ES sort params string (e.g. _doc:desc). Got '{}'".format(sort_params_string))
|
||||||
|
|
||||||
self._sort_field = sort_params[0]
|
self._sort_field = sort_params[0]
|
||||||
self._sort_order = Operations.SortOrder.from_string(sort_params[1])
|
self._sort_order = SortOrder.from_string(sort_params[1])
|
||||||
|
|
||||||
def resolve_action(self, df):
|
def resolve_action(self, df):
|
||||||
if self._sort_order == Operations.SortOrder.ASC:
|
if self._sort_order == SortOrder.ASC:
|
||||||
return df.sort_values(self._sort_field, True)
|
return df.sort_values(self._sort_field, True)
|
||||||
return df.sort_values(self._sort_field, False)
|
return df.sort_values(self._sort_field, False)
|
||||||
|
|
||||||
|
@ -13,6 +13,8 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
# Default number of rows displayed (different to pandas where ALL could be displayed)
|
# Default number of rows displayed (different to pandas where ALL could be displayed)
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
DEFAULT_NUM_ROWS_DISPLAYED = 60
|
DEFAULT_NUM_ROWS_DISPLAYED = 60
|
||||||
|
|
||||||
|
|
||||||
@ -22,3 +24,29 @@ def docstring_parameter(*sub):
|
|||||||
return obj
|
return obj
|
||||||
|
|
||||||
return dec
|
return dec
|
||||||
|
|
||||||
|
|
||||||
|
class SortOrder(Enum):
|
||||||
|
ASC = 0
|
||||||
|
DESC = 1
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def reverse(order):
|
||||||
|
if order == SortOrder.ASC:
|
||||||
|
return SortOrder.DESC
|
||||||
|
|
||||||
|
return SortOrder.ASC
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def to_string(order):
|
||||||
|
if order == SortOrder.ASC:
|
||||||
|
return "asc"
|
||||||
|
|
||||||
|
return "desc"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def from_string(order):
|
||||||
|
if order == "asc":
|
||||||
|
return SortOrder.ASC
|
||||||
|
|
||||||
|
return SortOrder.DESC
|
||||||
|
17
eland/compat.py
Normal file
17
eland/compat.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
# Copyright 2019 Elasticsearch BV
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
PY36 = sys.version_info >= (3, 6)
|
@ -27,6 +27,7 @@ from pandas.io.common import _expand_user, _stringify_path
|
|||||||
from pandas.io.formats import console
|
from pandas.io.formats import console
|
||||||
from pandas.io.formats import format as fmt
|
from pandas.io.formats import format as fmt
|
||||||
from pandas.io.formats.printing import pprint_thing
|
from pandas.io.formats.printing import pprint_thing
|
||||||
|
from pandas.util._validators import validate_bool_kwarg
|
||||||
|
|
||||||
import eland.plotting as gfx
|
import eland.plotting as gfx
|
||||||
from eland import NDFrame
|
from eland import NDFrame
|
||||||
@ -255,6 +256,151 @@ class DataFrame(NDFrame):
|
|||||||
"""
|
"""
|
||||||
return DataFrame(query_compiler=self._query_compiler.tail(n))
|
return DataFrame(query_compiler=self._query_compiler.tail(n))
|
||||||
|
|
||||||
|
def drop(
|
||||||
|
self,
|
||||||
|
labels=None,
|
||||||
|
axis=0,
|
||||||
|
index=None,
|
||||||
|
columns=None,
|
||||||
|
level=None,
|
||||||
|
inplace=False,
|
||||||
|
errors="raise",
|
||||||
|
):
|
||||||
|
"""Return new object with labels in requested axis removed.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
labels:
|
||||||
|
Index or column labels to drop.
|
||||||
|
axis:
|
||||||
|
Whether to drop labels from the index (0 / 'index') or columns (1 / 'columns').
|
||||||
|
index, columns:
|
||||||
|
Alternative to specifying axis (labels, axis=1 is equivalent to columns=labels).
|
||||||
|
level:
|
||||||
|
For MultiIndex - not supported
|
||||||
|
inplace:
|
||||||
|
If True, do operation inplace and return None.
|
||||||
|
errors:
|
||||||
|
If 'ignore', suppress error and existing labels are dropped.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
dropped:
|
||||||
|
type of caller
|
||||||
|
|
||||||
|
See Also
|
||||||
|
--------
|
||||||
|
:pandas_api_docs:`pandas.DataFrame.drop`
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
Drop a column
|
||||||
|
|
||||||
|
>>> df = ed.DataFrame('localhost', 'ecommerce', columns=['customer_first_name', 'email', 'user'])
|
||||||
|
>>> df.drop(columns=['user'])
|
||||||
|
customer_first_name email
|
||||||
|
0 Eddie eddie@underwood-family.zzz
|
||||||
|
1 Mary mary@bailey-family.zzz
|
||||||
|
2 Gwen gwen@butler-family.zzz
|
||||||
|
3 Diane diane@chandler-family.zzz
|
||||||
|
4 Eddie eddie@weber-family.zzz
|
||||||
|
... ... ...
|
||||||
|
4670 Mary mary@lambert-family.zzz
|
||||||
|
4671 Jim jim@gilbert-family.zzz
|
||||||
|
4672 Yahya yahya@rivera-family.zzz
|
||||||
|
4673 Mary mary@hampton-family.zzz
|
||||||
|
4674 Jackson jackson@hopkins-family.zzz
|
||||||
|
<BLANKLINE>
|
||||||
|
[4675 rows x 2 columns]
|
||||||
|
|
||||||
|
Drop rows by index value (axis=0)
|
||||||
|
|
||||||
|
>>> df.drop(['1', '2'])
|
||||||
|
customer_first_name email user
|
||||||
|
0 Eddie eddie@underwood-family.zzz eddie
|
||||||
|
3 Diane diane@chandler-family.zzz diane
|
||||||
|
4 Eddie eddie@weber-family.zzz eddie
|
||||||
|
5 Diane diane@goodwin-family.zzz diane
|
||||||
|
6 Oliver oliver@rios-family.zzz oliver
|
||||||
|
... ... ... ...
|
||||||
|
4670 Mary mary@lambert-family.zzz mary
|
||||||
|
4671 Jim jim@gilbert-family.zzz jim
|
||||||
|
4672 Yahya yahya@rivera-family.zzz yahya
|
||||||
|
4673 Mary mary@hampton-family.zzz mary
|
||||||
|
4674 Jackson jackson@hopkins-family.zzz jackson
|
||||||
|
<BLANKLINE>
|
||||||
|
[4673 rows x 3 columns]
|
||||||
|
"""
|
||||||
|
# Level not supported
|
||||||
|
if level is not None:
|
||||||
|
raise NotImplementedError("level not supported {}".format(level))
|
||||||
|
|
||||||
|
inplace = validate_bool_kwarg(inplace, "inplace")
|
||||||
|
if labels is not None:
|
||||||
|
if index is not None or columns is not None:
|
||||||
|
raise ValueError("Cannot specify both 'labels' and 'index'/'columns'")
|
||||||
|
axis = pd.DataFrame()._get_axis_name(axis)
|
||||||
|
axes = {axis: labels}
|
||||||
|
elif index is not None or columns is not None:
|
||||||
|
axes, _ = pd.DataFrame()._construct_axes_from_arguments(
|
||||||
|
(index, columns), {}
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
"Need to specify at least one of 'labels', 'index' or 'columns'"
|
||||||
|
)
|
||||||
|
|
||||||
|
# TODO Clean up this error checking
|
||||||
|
if "index" not in axes:
|
||||||
|
axes["index"] = None
|
||||||
|
elif axes["index"] is not None:
|
||||||
|
if not is_list_like(axes["index"]):
|
||||||
|
axes["index"] = [axes["index"]]
|
||||||
|
if errors == "raise":
|
||||||
|
# Check if axes['index'] values exists in index
|
||||||
|
count = self._query_compiler._index_matches_count(axes["index"])
|
||||||
|
if count != len(axes["index"]):
|
||||||
|
raise ValueError(
|
||||||
|
"number of labels {}!={} not contained in axis".format(count, len(axes["index"]))
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
"""
|
||||||
|
axes["index"] = self._query_compiler.index_matches(axes["index"])
|
||||||
|
# If the length is zero, we will just do nothing
|
||||||
|
if not len(axes["index"]):
|
||||||
|
axes["index"] = None
|
||||||
|
"""
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
if "columns" not in axes:
|
||||||
|
axes["columns"] = None
|
||||||
|
elif axes["columns"] is not None:
|
||||||
|
if not is_list_like(axes["columns"]):
|
||||||
|
axes["columns"] = [axes["columns"]]
|
||||||
|
if errors == "raise":
|
||||||
|
non_existant = [
|
||||||
|
obj for obj in axes["columns"] if obj not in self.columns
|
||||||
|
]
|
||||||
|
if len(non_existant):
|
||||||
|
raise ValueError(
|
||||||
|
"labels {} not contained in axis".format(non_existant)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
axes["columns"] = [
|
||||||
|
obj for obj in axes["columns"] if obj in self.columns
|
||||||
|
]
|
||||||
|
# If the length is zero, we will just do nothing
|
||||||
|
if not len(axes["columns"]):
|
||||||
|
axes["columns"] = None
|
||||||
|
|
||||||
|
new_query_compiler = self._query_compiler.drop(
|
||||||
|
index=axes["index"], columns=axes["columns"]
|
||||||
|
)
|
||||||
|
return self._create_or_update_from_compiler(new_query_compiler, inplace)
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
return self._getitem(key)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
"""
|
"""
|
||||||
From pandas
|
From pandas
|
||||||
@ -312,7 +458,8 @@ class DataFrame(NDFrame):
|
|||||||
max_rows = min_rows
|
max_rows = min_rows
|
||||||
|
|
||||||
return self.to_html(max_rows=max_rows, max_cols=max_cols,
|
return self.to_html(max_rows=max_rows, max_cols=max_cols,
|
||||||
show_dimensions=show_dimensions, notebook=True) # set for consistency with pandas output
|
show_dimensions=show_dimensions,
|
||||||
|
notebook=True) # set for consistency with pandas output
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -417,7 +564,7 @@ class DataFrame(NDFrame):
|
|||||||
size: 5
|
size: 5
|
||||||
sort_params: _doc:desc
|
sort_params: _doc:desc
|
||||||
_source: ['timestamp', 'OriginAirportID', 'DestAirportID', 'FlightDelayMin']
|
_source: ['timestamp', 'OriginAirportID', 'DestAirportID', 'FlightDelayMin']
|
||||||
body: {'query': {'bool': {'must': [{'term': {'OriginAirportID': 'AMS'}}, {'range': {'FlightDelayMin': {'gt': 60}}}]}}, 'aggs': {}}
|
body: {'query': {'bool': {'must': [{'term': {'OriginAirportID': 'AMS'}}, {'range': {'FlightDelayMin': {'gt': 60}}}]}}}
|
||||||
post_processing: [('sort_index')]
|
post_processing: [('sort_index')]
|
||||||
'field_to_display_names': {}
|
'field_to_display_names': {}
|
||||||
'display_to_field_names': {}
|
'display_to_field_names': {}
|
||||||
|
@ -24,10 +24,10 @@ class BooleanFilter:
|
|||||||
if isinstance(self, AndFilter):
|
if isinstance(self, AndFilter):
|
||||||
if 'must_not' in x.subtree:
|
if 'must_not' in x.subtree:
|
||||||
# nest a must_not under a must
|
# nest a must_not under a must
|
||||||
self.subtree['must'].append(x.build()) # 'build includes bool'
|
self.subtree['must'].append(x.build()) # 'build includes bool'
|
||||||
else:
|
else:
|
||||||
# append a must to a must
|
# append a must to a must
|
||||||
self.subtree['must'].append(x.subtree) # 'subtree strips bool'
|
self.subtree['must'].append(x.subtree) # 'subtree strips bool'
|
||||||
return self
|
return self
|
||||||
elif isinstance(x, AndFilter):
|
elif isinstance(x, AndFilter):
|
||||||
if 'must_not' in self.subtree:
|
if 'must_not' in self.subtree:
|
||||||
|
@ -11,8 +11,8 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import warnings
|
import warnings
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@ -66,7 +66,7 @@ class Mappings:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# here we keep track of the format of any date fields
|
# here we keep track of the format of any date fields
|
||||||
self._date_fields_format = {}
|
self._date_fields_format = dict()
|
||||||
if (client is not None) and (index_pattern is not None):
|
if (client is not None) and (index_pattern is not None):
|
||||||
get_mapping = client.get_mapping(index=index_pattern)
|
get_mapping = client.get_mapping(index=index_pattern)
|
||||||
|
|
||||||
@ -86,7 +86,8 @@ class Mappings:
|
|||||||
|
|
||||||
# Cache source field types for efficient lookup
|
# Cache source field types for efficient lookup
|
||||||
# (this massively improves performance of DataFrame.flatten)
|
# (this massively improves performance of DataFrame.flatten)
|
||||||
self._source_field_pd_dtypes = {}
|
|
||||||
|
self._source_field_pd_dtypes = OrderedDict()
|
||||||
|
|
||||||
for field_name in self._mappings_capabilities[self._mappings_capabilities._source].index:
|
for field_name in self._mappings_capabilities[self._mappings_capabilities._source].index:
|
||||||
pd_dtype = self._mappings_capabilities.loc[field_name]['pd_dtype']
|
pd_dtype = self._mappings_capabilities.loc[field_name]['pd_dtype']
|
||||||
@ -135,14 +136,14 @@ class Mappings:
|
|||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
fields, dates_format: tuple(dict, dict)
|
fields, dates_format: tuple(OrderedDict, dict)
|
||||||
where:
|
where:
|
||||||
fields: Dict of field names and types
|
fields: OrderedDict of field names and types
|
||||||
dates_format: Dict of date field names and format
|
dates_format: Dict of date field names and format
|
||||||
|
|
||||||
"""
|
"""
|
||||||
fields = {}
|
fields = OrderedDict()
|
||||||
dates_format = {}
|
dates_format = dict()
|
||||||
|
|
||||||
# Recurse until we get a 'type: xxx'
|
# Recurse until we get a 'type: xxx'
|
||||||
def flatten(x, name=''):
|
def flatten(x, name=''):
|
||||||
@ -206,7 +207,7 @@ class Mappings:
|
|||||||
all_fields_caps_fields = all_fields_caps['fields']
|
all_fields_caps_fields = all_fields_caps['fields']
|
||||||
|
|
||||||
field_names = ['_source', 'es_dtype', 'pd_dtype', 'searchable', 'aggregatable']
|
field_names = ['_source', 'es_dtype', 'pd_dtype', 'searchable', 'aggregatable']
|
||||||
capability_matrix = {}
|
capability_matrix = OrderedDict()
|
||||||
|
|
||||||
for field, field_caps in all_fields_caps_fields.items():
|
for field, field_caps in all_fields_caps_fields.items():
|
||||||
if field in all_fields:
|
if field in all_fields:
|
||||||
@ -353,7 +354,7 @@ class Mappings:
|
|||||||
else:
|
else:
|
||||||
es_dtype = Mappings._pd_dtype_to_es_dtype(dtype)
|
es_dtype = Mappings._pd_dtype_to_es_dtype(dtype)
|
||||||
|
|
||||||
mappings['properties'][field_name_name] = {}
|
mappings['properties'][field_name_name] = OrderedDict()
|
||||||
mappings['properties'][field_name_name]['type'] = es_dtype
|
mappings['properties'][field_name_name]['type'] = es_dtype
|
||||||
|
|
||||||
return {"mappings": mappings}
|
return {"mappings": mappings}
|
||||||
@ -401,8 +402,8 @@ class Mappings:
|
|||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
dict
|
str
|
||||||
A dictionary (for date fields) containing the mapping {field_name:format}
|
A string (for date fields) containing the date format for the field
|
||||||
"""
|
"""
|
||||||
return self._date_fields_format.get(field_name)
|
return self._date_fields_format.get(field_name)
|
||||||
|
|
||||||
@ -460,12 +461,12 @@ class Mappings:
|
|||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
dict
|
OrderedDict
|
||||||
e.g. {'customer_full_name': 'customer_full_name.keyword', ...}
|
e.g. {'customer_full_name': 'customer_full_name.keyword', ...}
|
||||||
"""
|
"""
|
||||||
if field_names is None:
|
if field_names is None:
|
||||||
field_names = self.source_fields()
|
field_names = self.source_fields()
|
||||||
aggregatables = {}
|
aggregatables = OrderedDict()
|
||||||
for field_name in field_names:
|
for field_name in field_names:
|
||||||
capabilities = self.field_capabilities(field_name)
|
capabilities = self.field_capabilities(field_name)
|
||||||
if capabilities['aggregatable']:
|
if capabilities['aggregatable']:
|
||||||
@ -478,7 +479,7 @@ class Mappings:
|
|||||||
aggregatables[field_name_keyword] = field_name
|
aggregatables[field_name_keyword] = field_name
|
||||||
|
|
||||||
if not aggregatables:
|
if not aggregatables:
|
||||||
raise ValueError("Aggregations not supported for ", field_name)
|
raise ValueError("Aggregations not supported for ", field_names)
|
||||||
|
|
||||||
return aggregatables
|
return aggregatables
|
||||||
|
|
||||||
@ -533,11 +534,15 @@ class Mappings:
|
|||||||
Source field name + pd_dtype as np.dtype
|
Source field name + pd_dtype as np.dtype
|
||||||
"""
|
"""
|
||||||
if field_names is not None:
|
if field_names is not None:
|
||||||
return pd.Series(
|
data = OrderedDict()
|
||||||
{key: np.dtype(self._source_field_pd_dtypes[key]) for key in field_names})
|
for key in field_names:
|
||||||
|
data[key] = np.dtype(self._source_field_pd_dtypes[key])
|
||||||
|
return pd.Series(data)
|
||||||
|
|
||||||
return pd.Series(
|
data = OrderedDict()
|
||||||
{key: np.dtype(value) for key, value in self._source_field_pd_dtypes.items()})
|
for key, value in self._source_field_pd_dtypes.items():
|
||||||
|
data[key] = np.dtype(value)
|
||||||
|
return pd.Series(data)
|
||||||
|
|
||||||
def info_es(self, buf):
|
def info_es(self, buf):
|
||||||
buf.write("Mappings:\n")
|
buf.write("Mappings:\n")
|
||||||
|
203
eland/ndframe.py
203
eland/ndframe.py
@ -1,3 +1,22 @@
|
|||||||
|
# Copyright 2019 Elasticsearch BV
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
from eland import QueryCompiler
|
||||||
|
|
||||||
"""
|
"""
|
||||||
NDFrame
|
NDFrame
|
||||||
---------
|
---------
|
||||||
@ -23,29 +42,6 @@ only Elasticsearch aggregatable fields can be aggregated or grouped.
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Copyright 2019 Elasticsearch BV
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
import sys
|
|
||||||
from abc import ABC
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
from pandas.core.dtypes.common import is_list_like
|
|
||||||
from pandas.util._validators import validate_bool_kwarg
|
|
||||||
|
|
||||||
from eland import ElandQueryCompiler
|
|
||||||
|
|
||||||
|
|
||||||
class NDFrame(ABC):
|
class NDFrame(ABC):
|
||||||
|
|
||||||
@ -64,8 +60,8 @@ class NDFrame(ABC):
|
|||||||
A reference to a Elasticsearch python client
|
A reference to a Elasticsearch python client
|
||||||
"""
|
"""
|
||||||
if query_compiler is None:
|
if query_compiler is None:
|
||||||
query_compiler = ElandQueryCompiler(client=client, index_pattern=index_pattern, field_names=columns,
|
query_compiler = QueryCompiler(client=client, index_pattern=index_pattern, field_names=columns,
|
||||||
index_field=index_field)
|
index_field=index_field)
|
||||||
self._query_compiler = query_compiler
|
self._query_compiler = query_compiler
|
||||||
|
|
||||||
def _get_index(self):
|
def _get_index(self):
|
||||||
@ -139,9 +135,6 @@ class NDFrame(ABC):
|
|||||||
|
|
||||||
return head.append(tail)
|
return head.append(tail)
|
||||||
|
|
||||||
def __getitem__(self, key):
|
|
||||||
return self._getitem(key)
|
|
||||||
|
|
||||||
def __sizeof__(self):
|
def __sizeof__(self):
|
||||||
# Don't default to pandas, just return approximation TODO - make this more accurate
|
# Don't default to pandas, just return approximation TODO - make this more accurate
|
||||||
return sys.getsizeof(self._query_compiler)
|
return sys.getsizeof(self._query_compiler)
|
||||||
@ -157,148 +150,6 @@ class NDFrame(ABC):
|
|||||||
def _info_es(self, buf):
|
def _info_es(self, buf):
|
||||||
self._query_compiler.info_es(buf)
|
self._query_compiler.info_es(buf)
|
||||||
|
|
||||||
def drop(
|
|
||||||
self,
|
|
||||||
labels=None,
|
|
||||||
axis=0,
|
|
||||||
index=None,
|
|
||||||
columns=None,
|
|
||||||
level=None,
|
|
||||||
inplace=False,
|
|
||||||
errors="raise",
|
|
||||||
):
|
|
||||||
"""Return new object with labels in requested axis removed.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
labels:
|
|
||||||
Index or column labels to drop.
|
|
||||||
axis:
|
|
||||||
Whether to drop labels from the index (0 / 'index') or columns (1 / 'columns').
|
|
||||||
index, columns:
|
|
||||||
Alternative to specifying axis (labels, axis=1 is equivalent to columns=labels).
|
|
||||||
level:
|
|
||||||
For MultiIndex - not supported
|
|
||||||
inplace:
|
|
||||||
If True, do operation inplace and return None.
|
|
||||||
errors:
|
|
||||||
If 'ignore', suppress error and existing labels are dropped.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
dropped:
|
|
||||||
type of caller
|
|
||||||
|
|
||||||
See Also
|
|
||||||
--------
|
|
||||||
:pandas_api_docs:`pandas.DataFrame.drop`
|
|
||||||
|
|
||||||
Examples
|
|
||||||
--------
|
|
||||||
Drop a column
|
|
||||||
|
|
||||||
>>> df = ed.DataFrame('localhost', 'ecommerce', columns=['customer_first_name', 'email', 'user'])
|
|
||||||
>>> df.drop(columns=['user'])
|
|
||||||
customer_first_name email
|
|
||||||
0 Eddie eddie@underwood-family.zzz
|
|
||||||
1 Mary mary@bailey-family.zzz
|
|
||||||
2 Gwen gwen@butler-family.zzz
|
|
||||||
3 Diane diane@chandler-family.zzz
|
|
||||||
4 Eddie eddie@weber-family.zzz
|
|
||||||
... ... ...
|
|
||||||
4670 Mary mary@lambert-family.zzz
|
|
||||||
4671 Jim jim@gilbert-family.zzz
|
|
||||||
4672 Yahya yahya@rivera-family.zzz
|
|
||||||
4673 Mary mary@hampton-family.zzz
|
|
||||||
4674 Jackson jackson@hopkins-family.zzz
|
|
||||||
<BLANKLINE>
|
|
||||||
[4675 rows x 2 columns]
|
|
||||||
|
|
||||||
Drop rows by index value (axis=0)
|
|
||||||
|
|
||||||
>>> df.drop(['1', '2'])
|
|
||||||
customer_first_name email user
|
|
||||||
0 Eddie eddie@underwood-family.zzz eddie
|
|
||||||
3 Diane diane@chandler-family.zzz diane
|
|
||||||
4 Eddie eddie@weber-family.zzz eddie
|
|
||||||
5 Diane diane@goodwin-family.zzz diane
|
|
||||||
6 Oliver oliver@rios-family.zzz oliver
|
|
||||||
... ... ... ...
|
|
||||||
4670 Mary mary@lambert-family.zzz mary
|
|
||||||
4671 Jim jim@gilbert-family.zzz jim
|
|
||||||
4672 Yahya yahya@rivera-family.zzz yahya
|
|
||||||
4673 Mary mary@hampton-family.zzz mary
|
|
||||||
4674 Jackson jackson@hopkins-family.zzz jackson
|
|
||||||
<BLANKLINE>
|
|
||||||
[4673 rows x 3 columns]
|
|
||||||
"""
|
|
||||||
# Level not supported
|
|
||||||
if level is not None:
|
|
||||||
raise NotImplementedError("level not supported {}".format(level))
|
|
||||||
|
|
||||||
inplace = validate_bool_kwarg(inplace, "inplace")
|
|
||||||
if labels is not None:
|
|
||||||
if index is not None or columns is not None:
|
|
||||||
raise ValueError("Cannot specify both 'labels' and 'index'/'columns'")
|
|
||||||
axis = pd.DataFrame()._get_axis_name(axis)
|
|
||||||
axes = {axis: labels}
|
|
||||||
elif index is not None or columns is not None:
|
|
||||||
axes, _ = pd.DataFrame()._construct_axes_from_arguments(
|
|
||||||
(index, columns), {}
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
raise ValueError(
|
|
||||||
"Need to specify at least one of 'labels', 'index' or 'columns'"
|
|
||||||
)
|
|
||||||
|
|
||||||
# TODO Clean up this error checking
|
|
||||||
if "index" not in axes:
|
|
||||||
axes["index"] = None
|
|
||||||
elif axes["index"] is not None:
|
|
||||||
if not is_list_like(axes["index"]):
|
|
||||||
axes["index"] = [axes["index"]]
|
|
||||||
if errors == "raise":
|
|
||||||
# Check if axes['index'] values exists in index
|
|
||||||
count = self._query_compiler._index_matches_count(axes["index"])
|
|
||||||
if count != len(axes["index"]):
|
|
||||||
raise ValueError(
|
|
||||||
"number of labels {}!={} not contained in axis".format(count, len(axes["index"]))
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
"""
|
|
||||||
axes["index"] = self._query_compiler.index_matches(axes["index"])
|
|
||||||
# If the length is zero, we will just do nothing
|
|
||||||
if not len(axes["index"]):
|
|
||||||
axes["index"] = None
|
|
||||||
"""
|
|
||||||
raise NotImplementedError()
|
|
||||||
|
|
||||||
if "columns" not in axes:
|
|
||||||
axes["columns"] = None
|
|
||||||
elif axes["columns"] is not None:
|
|
||||||
if not is_list_like(axes["columns"]):
|
|
||||||
axes["columns"] = [axes["columns"]]
|
|
||||||
if errors == "raise":
|
|
||||||
non_existant = [
|
|
||||||
obj for obj in axes["columns"] if obj not in self.columns
|
|
||||||
]
|
|
||||||
if len(non_existant):
|
|
||||||
raise ValueError(
|
|
||||||
"labels {} not contained in axis".format(non_existant)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
axes["columns"] = [
|
|
||||||
obj for obj in axes["columns"] if obj in self.columns
|
|
||||||
]
|
|
||||||
# If the length is zero, we will just do nothing
|
|
||||||
if not len(axes["columns"]):
|
|
||||||
axes["columns"] = None
|
|
||||||
|
|
||||||
new_query_compiler = self._query_compiler.drop(
|
|
||||||
index=axes["index"], columns=axes["columns"]
|
|
||||||
)
|
|
||||||
return self._create_or_update_from_compiler(new_query_compiler, inplace)
|
|
||||||
|
|
||||||
def mean(self, numeric_only=True):
|
def mean(self, numeric_only=True):
|
||||||
"""
|
"""
|
||||||
Return mean value for each numeric column
|
Return mean value for each numeric column
|
||||||
@ -518,3 +369,15 @@ class NDFrame(ABC):
|
|||||||
max 1199.729004 360.000000
|
max 1199.729004 360.000000
|
||||||
"""
|
"""
|
||||||
return self._query_compiler.describe()
|
return self._query_compiler.describe()
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _to_pandas(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def head(self, n=5):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def tail(self, n=5):
|
||||||
|
pass
|
||||||
|
@ -13,14 +13,15 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import copy
|
import copy
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from eland import Index
|
from eland import Index, SortOrder
|
||||||
from eland import Query
|
from eland import Query
|
||||||
from eland.actions import SortFieldAction
|
from eland.actions import SortFieldAction
|
||||||
from eland.tasks import HeadTask, TailTask, BooleanFilterTask, ArithmeticOpFieldsTask, QueryTermsTask, \
|
from eland.tasks import HeadTask, TailTask, BooleanFilterTask, ArithmeticOpFieldsTask, QueryTermsTask, \
|
||||||
QueryIdsTask, SortOrder, SizeTask
|
QueryIdsTask, SizeTask
|
||||||
|
|
||||||
|
|
||||||
class Operations:
|
class Operations:
|
||||||
@ -35,6 +36,7 @@ class Operations:
|
|||||||
This is maintained as a 'task graph' (inspired by dask)
|
This is maintained as a 'task graph' (inspired by dask)
|
||||||
(see https://docs.dask.org/en/latest/spec.html)
|
(see https://docs.dask.org/en/latest/spec.html)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, tasks=None, field_names=None):
|
def __init__(self, tasks=None, field_names=None):
|
||||||
if tasks is None:
|
if tasks is None:
|
||||||
self._tasks = []
|
self._tasks = []
|
||||||
@ -94,7 +96,7 @@ class Operations:
|
|||||||
# Only return requested field_names
|
# Only return requested field_names
|
||||||
fields = query_compiler.field_names
|
fields = query_compiler.field_names
|
||||||
|
|
||||||
counts = {}
|
counts = OrderedDict()
|
||||||
for field in fields:
|
for field in fields:
|
||||||
body = Query(query_params['query'])
|
body = Query(query_params['query'])
|
||||||
body.exists(field, must=True)
|
body.exists(field, must=True)
|
||||||
@ -171,7 +173,7 @@ class Operations:
|
|||||||
# "value" : 628.2536888148849
|
# "value" : 628.2536888148849
|
||||||
# }
|
# }
|
||||||
# }
|
# }
|
||||||
results = {}
|
results = OrderedDict()
|
||||||
|
|
||||||
if field_types == 'aggregatable':
|
if field_types == 'aggregatable':
|
||||||
for key, value in source_fields.items():
|
for key, value in source_fields.items():
|
||||||
@ -220,7 +222,7 @@ class Operations:
|
|||||||
size=0,
|
size=0,
|
||||||
body=body.to_search_body())
|
body=body.to_search_body())
|
||||||
|
|
||||||
results = {}
|
results = OrderedDict()
|
||||||
|
|
||||||
for key in aggregatable_field_names.keys():
|
for key in aggregatable_field_names.keys():
|
||||||
# key is aggregatable field, value is label
|
# key is aggregatable field, value is label
|
||||||
@ -276,8 +278,8 @@ class Operations:
|
|||||||
# },
|
# },
|
||||||
# ...
|
# ...
|
||||||
|
|
||||||
bins = {}
|
bins = OrderedDict()
|
||||||
weights = {}
|
weights = OrderedDict()
|
||||||
|
|
||||||
# There is one more bin that weights
|
# There is one more bin that weights
|
||||||
# len(bins) = len(weights) + 1
|
# len(bins) = len(weights) + 1
|
||||||
@ -415,7 +417,7 @@ class Operations:
|
|||||||
sum 8.204365e+06 9.261629e+07 5.754909e+07 618150
|
sum 8.204365e+06 9.261629e+07 5.754909e+07 618150
|
||||||
min 1.000205e+02 0.000000e+00 0.000000e+00 0
|
min 1.000205e+02 0.000000e+00 0.000000e+00 0
|
||||||
"""
|
"""
|
||||||
results = {}
|
results = OrderedDict()
|
||||||
|
|
||||||
for field in field_names:
|
for field in field_names:
|
||||||
values = list()
|
values = list()
|
||||||
@ -455,7 +457,7 @@ class Operations:
|
|||||||
size=0,
|
size=0,
|
||||||
body=body.to_search_body())
|
body=body.to_search_body())
|
||||||
|
|
||||||
results = {}
|
results = OrderedDict()
|
||||||
|
|
||||||
for field in numeric_source_fields:
|
for field in numeric_source_fields:
|
||||||
values = list()
|
values = list()
|
||||||
|
@ -152,9 +152,15 @@ class Query:
|
|||||||
|
|
||||||
def to_search_body(self):
|
def to_search_body(self):
|
||||||
if self._query.empty():
|
if self._query.empty():
|
||||||
body = {"aggs": self._aggs}
|
if self._aggs:
|
||||||
|
body = {"aggs": self._aggs}
|
||||||
|
else:
|
||||||
|
body = {}
|
||||||
else:
|
else:
|
||||||
body = {"query": self._query.build(), "aggs": self._aggs}
|
if self._aggs:
|
||||||
|
body = {"query": self._query.build(), "aggs": self._aggs}
|
||||||
|
else:
|
||||||
|
body = {"query": self._query.build()}
|
||||||
return body
|
return body
|
||||||
|
|
||||||
def to_count_body(self):
|
def to_count_body(self):
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import warnings
|
import warnings
|
||||||
|
from collections import OrderedDict
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -24,7 +25,7 @@ from eland import Mappings
|
|||||||
from eland import Operations
|
from eland import Operations
|
||||||
|
|
||||||
|
|
||||||
class ElandQueryCompiler:
|
class QueryCompiler:
|
||||||
"""
|
"""
|
||||||
Some notes on what can and can not be mapped:
|
Some notes on what can and can not be mapped:
|
||||||
|
|
||||||
@ -73,7 +74,7 @@ class ElandQueryCompiler:
|
|||||||
self.field_names = field_names
|
self.field_names = field_names
|
||||||
|
|
||||||
if name_mapper is None:
|
if name_mapper is None:
|
||||||
self._name_mapper = ElandQueryCompiler.DisplayNameToFieldNameMapper()
|
self._name_mapper = QueryCompiler.DisplayNameToFieldNameMapper()
|
||||||
else:
|
else:
|
||||||
self._name_mapper = name_mapper
|
self._name_mapper = name_mapper
|
||||||
|
|
||||||
@ -276,7 +277,7 @@ class ElandQueryCompiler:
|
|||||||
return partial_result, df
|
return partial_result, df
|
||||||
|
|
||||||
def _flatten_dict(self, y):
|
def _flatten_dict(self, y):
|
||||||
out = {}
|
out = OrderedDict()
|
||||||
|
|
||||||
def flatten(x, name=''):
|
def flatten(x, name=''):
|
||||||
# We flatten into source fields e.g. if type=geo_point
|
# We flatten into source fields e.g. if type=geo_point
|
||||||
@ -360,14 +361,14 @@ class ElandQueryCompiler:
|
|||||||
def _empty_pd_ef(self):
|
def _empty_pd_ef(self):
|
||||||
# Return an empty dataframe with correct columns and dtypes
|
# Return an empty dataframe with correct columns and dtypes
|
||||||
df = pd.DataFrame()
|
df = pd.DataFrame()
|
||||||
for c, d in zip(self.columns, self.dtypes):
|
for c, d in zip(self.dtypes.index, self.dtypes.values):
|
||||||
df[c] = pd.Series(dtype=d)
|
df[c] = pd.Series(dtype=d)
|
||||||
return df
|
return df
|
||||||
|
|
||||||
def copy(self):
|
def copy(self):
|
||||||
return ElandQueryCompiler(client=self._client, index_pattern=self._index_pattern, field_names=None,
|
return QueryCompiler(client=self._client, index_pattern=self._index_pattern, field_names=None,
|
||||||
index_field=self._index.index_field, operations=self._operations.copy(),
|
index_field=self._index.index_field, operations=self._operations.copy(),
|
||||||
name_mapper=self._name_mapper.copy())
|
name_mapper=self._name_mapper.copy())
|
||||||
|
|
||||||
def rename(self, renames, inplace=False):
|
def rename(self, renames, inplace=False):
|
||||||
if inplace:
|
if inplace:
|
||||||
@ -500,7 +501,7 @@ class ElandQueryCompiler:
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
right: ElandQueryCompiler
|
right: QueryCompiler
|
||||||
The query compiler to compare self to
|
The query compiler to compare self to
|
||||||
|
|
||||||
Raises
|
Raises
|
||||||
@ -508,7 +509,7 @@ class ElandQueryCompiler:
|
|||||||
TypeError, ValueError
|
TypeError, ValueError
|
||||||
If arithmetic operations aren't possible
|
If arithmetic operations aren't possible
|
||||||
"""
|
"""
|
||||||
if not isinstance(right, ElandQueryCompiler):
|
if not isinstance(right, QueryCompiler):
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
"Incompatible types "
|
"Incompatible types "
|
||||||
"{0} != {1}".format(type(self), type(right))
|
"{0} != {1}".format(type(self), type(right))
|
||||||
@ -539,7 +540,7 @@ class ElandQueryCompiler:
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
right: ElandQueryCompiler
|
right: QueryCompiler
|
||||||
The query compiler to compare self to
|
The query compiler to compare self to
|
||||||
|
|
||||||
Raises
|
Raises
|
||||||
@ -585,12 +586,12 @@ class ElandQueryCompiler:
|
|||||||
if field_to_display_names is not None:
|
if field_to_display_names is not None:
|
||||||
self._field_to_display_names = field_to_display_names
|
self._field_to_display_names = field_to_display_names
|
||||||
else:
|
else:
|
||||||
self._field_to_display_names = dict()
|
self._field_to_display_names = {}
|
||||||
|
|
||||||
if display_to_field_names is not None:
|
if display_to_field_names is not None:
|
||||||
self._display_to_field_names = display_to_field_names
|
self._display_to_field_names = display_to_field_names
|
||||||
else:
|
else:
|
||||||
self._display_to_field_names = dict()
|
self._display_to_field_names = {}
|
||||||
|
|
||||||
def rename_display_name(self, renames):
|
def rename_display_name(self, renames):
|
||||||
for current_display_name, new_display_name in renames.items():
|
for current_display_name, new_display_name in renames.items():
|
||||||
|
@ -1055,7 +1055,8 @@ class Series(NDFrame):
|
|||||||
# our operation is between series
|
# our operation is between series
|
||||||
op_type = op_type + tuple('s')
|
op_type = op_type + tuple('s')
|
||||||
# check if fields are aggregatable
|
# check if fields are aggregatable
|
||||||
self.name, right.name = self._query_compiler.check_str_arithmetics(right._query_compiler, self.name, right.name)
|
self.name, right.name = self._query_compiler.check_str_arithmetics(right._query_compiler, self.name,
|
||||||
|
right.name)
|
||||||
|
|
||||||
series = Series(query_compiler=self._query_compiler.arithmetic_op_fields(
|
series = Series(query_compiler=self._query_compiler.arithmetic_op_fields(
|
||||||
new_field_name, method_name, self.name, right.name, op_type))
|
new_field_name, method_name, self.name, right.name, op_type))
|
||||||
@ -1067,7 +1068,7 @@ class Series(NDFrame):
|
|||||||
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
|
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
"unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
|
"unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
|
||||||
.format(method_name, type(self), self._dtype, type(right).__name__)
|
.format(method_name, type(self), self._dtype, type(right).__name__)
|
||||||
)
|
)
|
||||||
|
|
||||||
# check left number and right numeric series
|
# check left number and right numeric series
|
||||||
@ -1103,7 +1104,7 @@ class Series(NDFrame):
|
|||||||
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
|
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
"unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
|
"unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
|
||||||
.format(method_name, type(self), self._dtype, type(right).__name__)
|
.format(method_name, type(self), self._dtype, type(right).__name__)
|
||||||
)
|
)
|
||||||
|
|
||||||
def _numeric_rop(self, left, method_name, op_type=None):
|
def _numeric_rop(self, left, method_name, op_type=None):
|
||||||
@ -1146,7 +1147,7 @@ class Series(NDFrame):
|
|||||||
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
|
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
"unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
|
"unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
|
||||||
.format(op_method_name, type(self), self._dtype, type(left).__name__)
|
.format(op_method_name, type(self), self._dtype, type(left).__name__)
|
||||||
)
|
)
|
||||||
|
|
||||||
def max(self):
|
def max(self):
|
||||||
|
@ -1,37 +1,11 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from enum import Enum
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
from eland import SortOrder
|
||||||
from eland.actions import HeadAction, TailAction, SortIndexAction
|
from eland.actions import HeadAction, TailAction, SortIndexAction
|
||||||
|
|
||||||
|
|
||||||
class SortOrder(Enum):
|
|
||||||
ASC = 0
|
|
||||||
DESC = 1
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def reverse(order):
|
|
||||||
if order == SortOrder.ASC:
|
|
||||||
return SortOrder.DESC
|
|
||||||
|
|
||||||
return SortOrder.ASC
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def to_string(order):
|
|
||||||
if order == SortOrder.ASC:
|
|
||||||
return "asc"
|
|
||||||
|
|
||||||
return "desc"
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def from_string(order):
|
|
||||||
if order == "asc":
|
|
||||||
return SortOrder.ASC
|
|
||||||
|
|
||||||
return SortOrder.DESC
|
|
||||||
|
|
||||||
|
|
||||||
# -------------------------------------------------------------------------------------------------------------------- #
|
# -------------------------------------------------------------------------------------------------------------------- #
|
||||||
# Tasks #
|
# Tasks #
|
||||||
# -------------------------------------------------------------------------------------------------------------------- #
|
# -------------------------------------------------------------------------------------------------------------------- #
|
||||||
@ -305,7 +279,7 @@ class ArithmeticOpFieldsTask(Task):
|
|||||||
raise NotImplementedError("Not implemented operation '{0}'".format(self._op_name))
|
raise NotImplementedError("Not implemented operation '{0}'".format(self._op_name))
|
||||||
|
|
||||||
if query_params['query_script_fields'] is None:
|
if query_params['query_script_fields'] is None:
|
||||||
query_params['query_script_fields'] = {}
|
query_params['query_script_fields'] = dict()
|
||||||
query_params['query_script_fields'][self._field_name] = {
|
query_params['query_script_fields'][self._field_name] = {
|
||||||
'script': {
|
'script': {
|
||||||
'source': source
|
'source': source
|
||||||
@ -428,7 +402,7 @@ class ArithmeticOpFieldsTask(Task):
|
|||||||
raise NotImplementedError("Not implemented operation '{0}'".format(self._op_name))
|
raise NotImplementedError("Not implemented operation '{0}'".format(self._op_name))
|
||||||
|
|
||||||
if query_params['query_script_fields'] is None:
|
if query_params['query_script_fields'] is None:
|
||||||
query_params['query_script_fields'] = {}
|
query_params['query_script_fields'] = dict()
|
||||||
query_params['query_script_fields'][self._field_name] = {
|
query_params['query_script_fields'][self._field_name] = {
|
||||||
'script': {
|
'script': {
|
||||||
'source': source
|
'source': source
|
||||||
|
@ -14,8 +14,8 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from elasticsearch import Elasticsearch
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
from elasticsearch import Elasticsearch
|
||||||
|
|
||||||
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
@ -11,4 +11,3 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
@ -11,4 +11,3 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
@ -28,4 +28,7 @@ class TestDataFrameCount(TestData):
|
|||||||
pd_count = pd_ecommerce.count()
|
pd_count = pd_ecommerce.count()
|
||||||
ed_count = ed_ecommerce.count()
|
ed_count = ed_ecommerce.count()
|
||||||
|
|
||||||
|
print(pd_count)
|
||||||
|
print(ed_count)
|
||||||
|
|
||||||
assert_series_equal(pd_count, ed_count)
|
assert_series_equal(pd_count, ed_count)
|
||||||
|
@ -15,7 +15,6 @@
|
|||||||
# File called _pytest for PyCharm compatability
|
# File called _pytest for PyCharm compatability
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from elasticsearch import Elasticsearch
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
@ -27,7 +26,6 @@ from eland.tests.common import assert_pandas_eland_series_equal
|
|||||||
|
|
||||||
|
|
||||||
class TestDataFrameDateTime(TestData):
|
class TestDataFrameDateTime(TestData):
|
||||||
|
|
||||||
times = ["2019-11-26T19:58:15.246+0000",
|
times = ["2019-11-26T19:58:15.246+0000",
|
||||||
"1970-01-01T00:00:03.000+0000"]
|
"1970-01-01T00:00:03.000+0000"]
|
||||||
time_index_name = 'test_time_formats'
|
time_index_name = 'test_time_formats'
|
||||||
|
@ -40,5 +40,5 @@ class TestDataFrameInit:
|
|||||||
df0 = ed.DataFrame(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME)
|
df0 = ed.DataFrame(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME)
|
||||||
df1 = ed.DataFrame(client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME)
|
df1 = ed.DataFrame(client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME)
|
||||||
|
|
||||||
qc = ed.ElandQueryCompiler(client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME)
|
qc = ed.QueryCompiler(client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME)
|
||||||
df2 = ed.DataFrame(query_compiler=qc)
|
df2 = ed.DataFrame(query_compiler=qc)
|
||||||
|
@ -15,7 +15,6 @@
|
|||||||
# File called _pytest for PyCharm compatability
|
# File called _pytest for PyCharm compatability
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from elasticsearch import Elasticsearch
|
|
||||||
|
|
||||||
import eland as ed
|
import eland as ed
|
||||||
from eland.tests.common import ES_TEST_CLIENT
|
from eland.tests.common import ES_TEST_CLIENT
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from eland.compat import PY36
|
||||||
from eland.dataframe import DEFAULT_NUM_ROWS_DISPLAYED
|
from eland.dataframe import DEFAULT_NUM_ROWS_DISPLAYED
|
||||||
from eland.tests.common import TestData
|
from eland.tests.common import TestData
|
||||||
|
|
||||||
@ -198,7 +199,10 @@ class TestDataFrameRepr(TestData):
|
|||||||
# print(ed_head_str)
|
# print(ed_head_str)
|
||||||
# print(pd_head_str)
|
# print(pd_head_str)
|
||||||
|
|
||||||
assert pd_head_str == ed_head_str
|
# Currently pandas display bold_rows=True with >=PY36 and bold_rows=False with 3.5
|
||||||
|
# TODO - fix this test for 3.5
|
||||||
|
if PY36:
|
||||||
|
assert pd_head_str == ed_head_str
|
||||||
|
|
||||||
def test_empty_dataframe_repr_html(self):
|
def test_empty_dataframe_repr_html(self):
|
||||||
# TODO - there is a bug in 'show_dimensions' as it gets added after the last </div>
|
# TODO - there is a bug in 'show_dimensions' as it gets added after the last </div>
|
||||||
|
@ -18,7 +18,6 @@ import ast
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from elasticsearch import Elasticsearch
|
|
||||||
from pandas.util.testing import assert_frame_equal
|
from pandas.util.testing import assert_frame_equal
|
||||||
|
|
||||||
import eland as ed
|
import eland as ed
|
||||||
|
@ -54,3 +54,6 @@ class TestDataFrameUtils(TestData):
|
|||||||
ed_df_head = ed_df.head()
|
ed_df_head = ed_df.head()
|
||||||
|
|
||||||
assert_pandas_eland_frame_equal(df, ed_df_head)
|
assert_pandas_eland_frame_equal(df, ed_df_head)
|
||||||
|
|
||||||
|
def test_eland_to_pandas_performance(self):
|
||||||
|
pd_df = ed.eland_to_pandas(self.ed_flights())
|
||||||
|
@ -11,4 +11,3 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
@ -11,4 +11,3 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
@ -188,20 +188,20 @@ class TestOperators:
|
|||||||
exp = (GreaterEqual('a', 2) & GreaterEqual('b', 2)) & ~(IsIn('ids', [1, 2, 3]))
|
exp = (GreaterEqual('a', 2) & GreaterEqual('b', 2)) & ~(IsIn('ids', [1, 2, 3]))
|
||||||
a = exp.build()
|
a = exp.build()
|
||||||
b = {
|
b = {
|
||||||
'bool': {
|
'bool': {
|
||||||
'must': [
|
'must': [
|
||||||
{'range': {'a': {'gte': 2}}},
|
{'range': {'a': {'gte': 2}}},
|
||||||
{'range': {'b': {'gte': 2}}},
|
{'range': {'b': {'gte': 2}}},
|
||||||
{
|
{
|
||||||
'bool': {
|
'bool': {
|
||||||
'must_not': {
|
'must_not': {
|
||||||
'ids': {'values': [1, 2, 3]}
|
'ids': {'values': [1, 2, 3]}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert a == b
|
assert a == b
|
||||||
|
|
||||||
def test_must_not_and_must_filter(self):
|
def test_must_not_and_must_filter(self):
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
|
|
||||||
# File called _pytest for PyCharm compatability
|
# File called _pytest for PyCharm compatability
|
||||||
|
|
||||||
from eland import ElandQueryCompiler
|
from eland import QueryCompiler
|
||||||
from eland.tests.common import TestData
|
from eland.tests.common import TestData
|
||||||
|
|
||||||
|
|
||||||
@ -24,7 +24,7 @@ class TestQueryCompilerRename(TestData):
|
|||||||
field_names = []
|
field_names = []
|
||||||
display_names = []
|
display_names = []
|
||||||
|
|
||||||
mapper = ElandQueryCompiler.DisplayNameToFieldNameMapper()
|
mapper = QueryCompiler.DisplayNameToFieldNameMapper()
|
||||||
|
|
||||||
assert field_names == mapper.field_names_to_list()
|
assert field_names == mapper.field_names_to_list()
|
||||||
assert display_names == mapper.display_names_to_list()
|
assert display_names == mapper.display_names_to_list()
|
||||||
@ -58,7 +58,7 @@ class TestQueryCompilerRename(TestData):
|
|||||||
def test_query_compiler_basic_rename_columns(self):
|
def test_query_compiler_basic_rename_columns(self):
|
||||||
columns = ['a', 'b', 'c', 'd']
|
columns = ['a', 'b', 'c', 'd']
|
||||||
|
|
||||||
mapper = ElandQueryCompiler.DisplayNameToFieldNameMapper()
|
mapper = QueryCompiler.DisplayNameToFieldNameMapper()
|
||||||
|
|
||||||
display_names = ['A', 'b', 'c', 'd']
|
display_names = ['A', 'b', 'c', 'd']
|
||||||
update_A = {'a': 'A'}
|
update_A = {'a': 'A'}
|
||||||
|
@ -11,4 +11,3 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
@ -14,7 +14,6 @@
|
|||||||
|
|
||||||
# File called _pytest for PyCharm compatability
|
# File called _pytest for PyCharm compatability
|
||||||
import pytest
|
import pytest
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from eland.tests.common import TestData, assert_pandas_eland_series_equal
|
from eland.tests.common import TestData, assert_pandas_eland_series_equal
|
||||||
|
|
||||||
@ -60,7 +59,6 @@ class TestSeriesArithmetics(TestData):
|
|||||||
|
|
||||||
assert_pandas_eland_series_equal(pdadd, edadd)
|
assert_pandas_eland_series_equal(pdadd, edadd)
|
||||||
|
|
||||||
|
|
||||||
def test_ser_add_str_add_ser(self):
|
def test_ser_add_str_add_ser(self):
|
||||||
pdadd = self.pd_ecommerce()['customer_first_name'] + self.pd_ecommerce()['customer_last_name']
|
pdadd = self.pd_ecommerce()['customer_first_name'] + self.pd_ecommerce()['customer_last_name']
|
||||||
print(pdadd.name)
|
print(pdadd.name)
|
||||||
@ -84,5 +82,5 @@ class TestSeriesArithmetics(TestData):
|
|||||||
assert self.ed_ecommerce()['customer_gender'] + self.ed_ecommerce()['customer_first_name']
|
assert self.ed_ecommerce()['customer_gender'] + self.ed_ecommerce()['customer_first_name']
|
||||||
|
|
||||||
def test_aggregatable_add_non_aggregatable(self):
|
def test_aggregatable_add_non_aggregatable(self):
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
assert self.ed_ecommerce()['customer_first_name'] + self.ed_ecommerce()['customer_gender']
|
assert self.ed_ecommerce()['customer_first_name'] + self.ed_ecommerce()['customer_gender']
|
||||||
|
@ -12,7 +12,6 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
from elasticsearch import Elasticsearch
|
|
||||||
from elasticsearch import helpers
|
from elasticsearch import helpers
|
||||||
from elasticsearch.client import ClusterClient
|
from elasticsearch.client import ClusterClient
|
||||||
|
|
||||||
@ -70,9 +69,9 @@ def _update_max_compilations_limit(es, limit="10000/1m"):
|
|||||||
print('Updating script.max_compilations_rate to ', limit)
|
print('Updating script.max_compilations_rate to ', limit)
|
||||||
cluster_client = ClusterClient(es)
|
cluster_client = ClusterClient(es)
|
||||||
body = {
|
body = {
|
||||||
"transient" : {
|
"transient": {
|
||||||
"script.max_compilations_rate" : limit
|
"script.max_compilations_rate": limit
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
cluster_client.put_settings(body=body)
|
cluster_client.put_settings(body=body)
|
||||||
|
|
||||||
|
@ -243,7 +243,7 @@ def read_csv(filepath_or_buffer,
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
es_params: Elasticsearch client argument(s)
|
es_client: Elasticsearch client argument(s)
|
||||||
- elasticsearch-py parameters or
|
- elasticsearch-py parameters or
|
||||||
- elasticsearch-py instance or
|
- elasticsearch-py instance or
|
||||||
- eland.Client instance
|
- eland.Client instance
|
||||||
@ -260,8 +260,6 @@ def read_csv(filepath_or_buffer,
|
|||||||
* False: Include missing values - may cause bulk to fail
|
* False: Include missing values - may cause bulk to fail
|
||||||
es_geo_points: list, default None
|
es_geo_points: list, default None
|
||||||
List of columns to map to geo_point data type
|
List of columns to map to geo_point data type
|
||||||
iterator
|
|
||||||
not supported
|
|
||||||
chunksize
|
chunksize
|
||||||
number of csv rows to read before bulk index into Elasticsearch
|
number of csv rows to read before bulk index into Elasticsearch
|
||||||
|
|
||||||
@ -275,6 +273,8 @@ def read_csv(filepath_or_buffer,
|
|||||||
|
|
||||||
Notes
|
Notes
|
||||||
-----
|
-----
|
||||||
|
iterator not supported
|
||||||
|
|
||||||
TODO - currently the eland.DataFrame may not retain the order of the data in the csv.
|
TODO - currently the eland.DataFrame may not retain the order of the data in the csv.
|
||||||
"""
|
"""
|
||||||
kwds = dict()
|
kwds = dict()
|
||||||
|
3
setup.py
3
setup.py
@ -12,10 +12,11 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
from setuptools import setup, find_packages
|
|
||||||
from codecs import open
|
from codecs import open
|
||||||
from os import path
|
from os import path
|
||||||
|
|
||||||
|
from setuptools import setup
|
||||||
|
|
||||||
here = path.abspath(path.dirname(__file__))
|
here = path.abspath(path.dirname(__file__))
|
||||||
|
|
||||||
with open(path.join(here, 'README.md'), encoding='utf-8') as f:
|
with open(path.join(here, 'README.md'), encoding='utf-8') as f:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user