Updating docs + added supported methods doc

This commit is contained in:
Stephen Dodson 2019-11-19 10:42:23 +00:00
parent fb2a1fae7b
commit 9b4fe40305
7 changed files with 345 additions and 265 deletions

View File

@ -1,3 +1,6 @@
.. _implementation/dataframe_supported:
===============================
pandas.DataFrame supported APIs
===============================
@ -8,20 +11,18 @@ also welcome!
The following table is structured as follows: The first column contains the method name.
The second column is a flag for whether or not there is an implementation in Modin for
the method in the left column. ``Y`` stands for yes, ``N`` stands for no, ``P`` stands
for partial (meaning some parameters may not be supported yet), and ``D`` stands for
default to pandas.
the method in the left column. ``Y`` stands for yes, ``N`` stands for no.
https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv represents a prioritised list.
+-------------------------+-------+------------------------------------------------+
| Method | Count | Notes |
+-------------------------+-------+------------------------------------------------+
| pd.read_csv | 1422 | Not implemented ed.read_es implemented instead |
| pd.read_csv | 1422 | y |
+-------------------------+-------+------------------------------------------------+
| pd.DataFrame | 886 | y |
+-------------------------+-------+------------------------------------------------+
| df.append | 792 | Not implemented |
| df.append | 792 | n |
+-------------------------+-------+------------------------------------------------+
| df.mean | 783 | y |
+-------------------------+-------+------------------------------------------------+
@ -31,407 +32,407 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
+-------------------------+-------+------------------------------------------------+
| df.sum | 755 | y |
+-------------------------+-------+------------------------------------------------+
| df.to_csv | 693 | |
| df.to_csv | 693 | y |
+-------------------------+-------+------------------------------------------------+
| df.get | 669 | |
| df.get | 669 | y |
+-------------------------+-------+------------------------------------------------+
| df.mode | 653 | |
| df.mode | 653 | n |
+-------------------------+-------+------------------------------------------------+
| df.astype | 649 | |
| df.astype | 649 | n |
+-------------------------+-------+------------------------------------------------+
| df.sub | 637 | |
| df.sub | 637 | n |
+-------------------------+-------+------------------------------------------------+
| pd.concat | 582 | |
| pd.concat | 582 | n |
+-------------------------+-------+------------------------------------------------+
| df.apply | 577 | |
| df.apply | 577 | n |
+-------------------------+-------+------------------------------------------------+
| df.groupby | 557 | |
| df.groupby | 557 | n |
+-------------------------+-------+------------------------------------------------+
| df.join | 544 | |
| df.join | 544 | n |
+-------------------------+-------+------------------------------------------------+
| df.fillna | 543 | |
| df.fillna | 543 | n |
+-------------------------+-------+------------------------------------------------+
| df.max | 508 | |
| df.max | 508 | y |
+-------------------------+-------+------------------------------------------------+
| df.reset_index | 434 | |
| df.reset_index | 434 | n |
+-------------------------+-------+------------------------------------------------+
| pd.unique | 433 | |
| pd.unique | 433 | n |
+-------------------------+-------+------------------------------------------------+
| df.le | 405 | |
| df.le | 405 | n |
+-------------------------+-------+------------------------------------------------+
| df.count | 399 | |
| df.count | 399 | y |
+-------------------------+-------+------------------------------------------------+
| pd.value_counts | 397 | |
| pd.value_counts | 397 | n |
+-------------------------+-------+------------------------------------------------+
| df.sort_values | 390 | |
| df.sort_values | 390 | n |
+-------------------------+-------+------------------------------------------------+
| df.transform | 387 | |
| df.transform | 387 | n |
+-------------------------+-------+------------------------------------------------+
| df.merge | 376 | |
| df.merge | 376 | n |
+-------------------------+-------+------------------------------------------------+
| df.add | 346 | |
| df.add | 346 | n |
+-------------------------+-------+------------------------------------------------+
| df.isnull | 338 | |
| df.isnull | 338 | n |
+-------------------------+-------+------------------------------------------------+
| df.min | 321 | |
| df.min | 321 | y |
+-------------------------+-------+------------------------------------------------+
| df.copy | 314 | |
| df.copy | 314 | n |
+-------------------------+-------+------------------------------------------------+
| df.replace | 300 | |
| df.replace | 300 | n |
+-------------------------+-------+------------------------------------------------+
| df.std | 261 | |
| df.std | 261 | n |
+-------------------------+-------+------------------------------------------------+
| df.hist | 246 | |
| df.hist | 246 | y |
+-------------------------+-------+------------------------------------------------+
| df.filter | 234 | |
| df.filter | 234 | n |
+-------------------------+-------+------------------------------------------------+
| df.describe | 220 | |
| df.describe | 220 | y |
+-------------------------+-------+------------------------------------------------+
| df.ne | 218 | |
| df.ne | 218 | n |
+-------------------------+-------+------------------------------------------------+
| df.corr | 217 | |
| df.corr | 217 | n |
+-------------------------+-------+------------------------------------------------+
| df.median | 217 | |
| df.median | 217 | n |
+-------------------------+-------+------------------------------------------------+
| df.items | 212 | |
| df.items | 212 | n |
+-------------------------+-------+------------------------------------------------+
| pd.to_datetime | 204 | |
| pd.to_datetime | 204 | n |
+-------------------------+-------+------------------------------------------------+
| df.isin | 203 | |
| df.isin | 203 | n |
+-------------------------+-------+------------------------------------------------+
| df.dropna | 195 | |
| df.dropna | 195 | n |
+-------------------------+-------+------------------------------------------------+
| pd.get_dummies | 190 | |
| pd.get_dummies | 190 | n |
+-------------------------+-------+------------------------------------------------+
| df.rename | 185 | |
| df.rename | 185 | n |
+-------------------------+-------+------------------------------------------------+
| df.info | 180 | |
| df.info | 180 | y |
+-------------------------+-------+------------------------------------------------+
| df.set_index | 166 | |
| df.set_index | 166 | n |
+-------------------------+-------+------------------------------------------------+
| df.keys | 159 | |
| df.keys | 159 | y |
+-------------------------+-------+------------------------------------------------+
| df.sample | 155 | |
| df.sample | 155 | n |
+-------------------------+-------+------------------------------------------------+
| df.agg | 140 | |
| df.agg | 140 | y |
+-------------------------+-------+------------------------------------------------+
| df.where | 138 | |
| df.where | 138 | n |
+-------------------------+-------+------------------------------------------------+
| df.boxplot | 134 | |
| df.boxplot | 134 | n |
+-------------------------+-------+------------------------------------------------+
| df.clip | 116 | |
| df.clip | 116 | n |
+-------------------------+-------+------------------------------------------------+
| df.round | 116 | |
| df.round | 116 | n |
+-------------------------+-------+------------------------------------------------+
| df.abs | 101 | |
| df.abs | 101 | n |
+-------------------------+-------+------------------------------------------------+
| df.stack | 97 | |
| df.stack | 97 | n |
+-------------------------+-------+------------------------------------------------+
| df.tail | 94 | |
| df.tail | 94 | y |
+-------------------------+-------+------------------------------------------------+
| df.update | 92 | |
| df.update | 92 | n |
+-------------------------+-------+------------------------------------------------+
| df.iterrows | 90 | |
| df.iterrows | 90 | n |
+-------------------------+-------+------------------------------------------------+
| df.transpose | 87 | |
| df.transpose | 87 | n |
+-------------------------+-------+------------------------------------------------+
| df.any | 85 | |
| df.any | 85 | n |
+-------------------------+-------+------------------------------------------------+
| df.pipe | 80 | |
| df.pipe | 80 | n |
+-------------------------+-------+------------------------------------------------+
| pd.eval | 73 | |
| pd.eval | 73 | n |
+-------------------------+-------+------------------------------------------------+
| df.eval | 73 | |
| df.eval | 73 | n |
+-------------------------+-------+------------------------------------------------+
| pd.read_json | 72 | |
| pd.read_json | 72 | n |
+-------------------------+-------+------------------------------------------------+
| df.nunique | 70 | |
| df.nunique | 70 | y |
+-------------------------+-------+------------------------------------------------+
| df.pivot | 70 | |
| df.pivot | 70 | n |
+-------------------------+-------+------------------------------------------------+
| df.select | 68 | |
| df.select | 68 | n |
+-------------------------+-------+------------------------------------------------+
| df.as_matrix | 67 | |
| df.as_matrix | 67 | n |
+-------------------------+-------+------------------------------------------------+
| df.notnull | 66 | |
| df.notnull | 66 | n |
+-------------------------+-------+------------------------------------------------+
| df.cumsum | 66 | |
| df.cumsum | 66 | n |
+-------------------------+-------+------------------------------------------------+
| df.prod | 64 | |
| df.prod | 64 | n |
+-------------------------+-------+------------------------------------------------+
| df.unstack | 64 | |
| df.unstack | 64 | n |
+-------------------------+-------+------------------------------------------------+
| df.drop_duplicates | 63 | |
| df.drop_duplicates | 63 | n |
+-------------------------+-------+------------------------------------------------+
| df.div | 63 | |
| df.div | 63 | n |
+-------------------------+-------+------------------------------------------------+
| pd.crosstab | 59 | |
| pd.crosstab | 59 | n |
+-------------------------+-------+------------------------------------------------+
| df.select_dtypes | 57 | |
| df.select_dtypes | 57 | y |
+-------------------------+-------+------------------------------------------------+
| df.pow | 56 | |
| df.pow | 56 | n |
+-------------------------+-------+------------------------------------------------+
| df.sort_index | 56 | |
| df.sort_index | 56 | n |
+-------------------------+-------+------------------------------------------------+
| df.product | 52 | |
| df.product | 52 | n |
+-------------------------+-------+------------------------------------------------+
| df.isna | 51 | |
| df.isna | 51 | n |
+-------------------------+-------+------------------------------------------------+
| df.dot | 46 | |
| df.dot | 46 | n |
+-------------------------+-------+------------------------------------------------+
| pd.cut | 45 | |
| pd.cut | 45 | n |
+-------------------------+-------+------------------------------------------------+
| df.bool | 44 | |
| df.bool | 44 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_dict | 44 | |
| df.to_dict | 44 | n |
+-------------------------+-------+------------------------------------------------+
| df.diff | 44 | |
| df.diff | 44 | n |
+-------------------------+-------+------------------------------------------------+
| df.insert | 44 | |
| df.insert | 44 | n |
+-------------------------+-------+------------------------------------------------+
| df.pop | 44 | |
| df.pop | 44 | n |
+-------------------------+-------+------------------------------------------------+
| df.query | 43 | |
| df.query | 43 | y |
+-------------------------+-------+------------------------------------------------+
| df.var | 43 | |
| df.var | 43 | n |
+-------------------------+-------+------------------------------------------------+
| df.__init__ | 41 | |
| df.__init__ | 41 | y |
+-------------------------+-------+------------------------------------------------+
| pd.to_numeric | 39 | |
| pd.to_numeric | 39 | n |
+-------------------------+-------+------------------------------------------------+
| df.squeeze | 39 | |
| df.squeeze | 39 | n |
+-------------------------+-------+------------------------------------------------+
| df.ge | 37 | |
| df.ge | 37 | n |
+-------------------------+-------+------------------------------------------------+
| df.quantile | 37 | |
| df.quantile | 37 | n |
+-------------------------+-------+------------------------------------------------+
| df.reindex | 37 | |
| df.reindex | 37 | n |
+-------------------------+-------+------------------------------------------------+
| df.rolling | 35 | |
| df.rolling | 35 | n |
+-------------------------+-------+------------------------------------------------+
| pd.factorize | 32 | |
| pd.factorize | 32 | n |
+-------------------------+-------+------------------------------------------------+
| pd.melt | 31 | |
| pd.melt | 31 | n |
+-------------------------+-------+------------------------------------------------+
| df.melt | 31 | |
| df.melt | 31 | n |
+-------------------------+-------+------------------------------------------------+
| df.rank | 31 | |
| df.rank | 31 | n |
+-------------------------+-------+------------------------------------------------+
| pd.read_table | 30 | |
| pd.read_table | 30 | n |
+-------------------------+-------+------------------------------------------------+
| pd.pivot_table | 30 | |
| pd.pivot_table | 30 | n |
+-------------------------+-------+------------------------------------------------+
| df.idxmax | 30 | |
| df.idxmax | 30 | n |
+-------------------------+-------+------------------------------------------------+
| pd.test | 29 | |
| pd.test | 29 | n |
+-------------------------+-------+------------------------------------------------+
| df.iteritems | 29 | |
| df.iteritems | 29 | n |
+-------------------------+-------+------------------------------------------------+
| df.shift | 28 | |
| df.shift | 28 | n |
+-------------------------+-------+------------------------------------------------+
| df.mul | 28 | |
| df.mul | 28 | n |
+-------------------------+-------+------------------------------------------------+
| pd.qcut | 25 | |
| pd.qcut | 25 | n |
+-------------------------+-------+------------------------------------------------+
| df.set_value | 25 | |
| df.set_value | 25 | n |
+-------------------------+-------+------------------------------------------------+
| df.all | 24 | |
| df.all | 24 | n |
+-------------------------+-------+------------------------------------------------+
| df.skew | 24 | |
| df.skew | 24 | n |
+-------------------------+-------+------------------------------------------------+
| df.aggregate | 23 | |
| df.aggregate | 23 | y |
+-------------------------+-------+------------------------------------------------+
| pd.match | 22 | |
| pd.match | 22 | n |
+-------------------------+-------+------------------------------------------------+
| df.nlargest | 22 | |
| df.nlargest | 22 | n |
+-------------------------+-------+------------------------------------------------+
| df.multiply | 21 | |
| df.multiply | 21 | n |
+-------------------------+-------+------------------------------------------------+
| df.set_axis | 19 | |
| df.set_axis | 19 | n |
+-------------------------+-------+------------------------------------------------+
| df.eq | 18 | |
| df.eq | 18 | n |
+-------------------------+-------+------------------------------------------------+
| df.resample | 18 | |
| df.resample | 18 | n |
+-------------------------+-------+------------------------------------------------+
| pd.read_sql | 17 | |
| pd.read_sql | 17 | n |
+-------------------------+-------+------------------------------------------------+
| df.duplicated | 16 | |
| df.duplicated | 16 | n |
+-------------------------+-------+------------------------------------------------+
| pd.date_range | 16 | |
| pd.date_range | 16 | n |
+-------------------------+-------+------------------------------------------------+
| df.interpolate | 15 | |
| df.interpolate | 15 | n |
+-------------------------+-------+------------------------------------------------+
| df.memory_usage | 15 | |
| df.memory_usage | 15 | n |
+-------------------------+-------+------------------------------------------------+
| df.divide | 14 | |
| df.divide | 14 | n |
+-------------------------+-------+------------------------------------------------+
| df.cov | 13 | |
| df.cov | 13 | n |
+-------------------------+-------+------------------------------------------------+
| df.assign | 12 | |
| df.assign | 12 | n |
+-------------------------+-------+------------------------------------------------+
| df.subtract | 12 | |
| df.subtract | 12 | n |
+-------------------------+-------+------------------------------------------------+
| pd.read_pickle | 11 | |
| pd.read_pickle | 11 | n |
+-------------------------+-------+------------------------------------------------+
| df.applymap | 11 | |
| df.applymap | 11 | n |
+-------------------------+-------+------------------------------------------------+
| df.first | 11 | |
| df.first | 11 | n |
+-------------------------+-------+------------------------------------------------+
| df.kurt | 10 | |
| df.kurt | 10 | n |
+-------------------------+-------+------------------------------------------------+
| df.truncate | 10 | |
| df.truncate | 10 | n |
+-------------------------+-------+------------------------------------------------+
| df.get_value | 9 | |
| df.get_value | 9 | n |
+-------------------------+-------+------------------------------------------------+
| pd.read_hdf | 9 | |
| pd.read_hdf | 9 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_html | 9 | |
| df.to_html | 9 | y |
+-------------------------+-------+------------------------------------------------+
| pd.read_sql_query | 9 | |
| pd.read_sql_query | 9 | n |
+-------------------------+-------+------------------------------------------------+
| df.take | 8 | |
| df.take | 8 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_pickle | 7 | |
| df.to_pickle | 7 | n |
+-------------------------+-------+------------------------------------------------+
| df.itertuples | 7 | |
| df.itertuples | 7 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_string | 7 | |
| df.to_string | 7 | y |
+-------------------------+-------+------------------------------------------------+
| df.last | 7 | |
| df.last | 7 | n |
+-------------------------+-------+------------------------------------------------+
| df.sem | 7 | |
| df.sem | 7 | n |
+-------------------------+-------+------------------------------------------------+
| pd.to_pickle | 7 | |
| pd.to_pickle | 7 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_json | 7 | |
| df.to_json | 7 | n |
+-------------------------+-------+------------------------------------------------+
| df.idxmin | 7 | |
| df.idxmin | 7 | n |
+-------------------------+-------+------------------------------------------------+
| df.xs | 6 | |
| df.xs | 6 | n |
+-------------------------+-------+------------------------------------------------+
| df.combine | 6 | |
| df.combine | 6 | n |
+-------------------------+-------+------------------------------------------------+
| pd.rolling_mean | 6 | |
| pd.rolling_mean | 6 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_period | 6 | |
| df.to_period | 6 | n |
+-------------------------+-------+------------------------------------------------+
| df.convert_objects | 5 | |
| df.convert_objects | 5 | n |
+-------------------------+-------+------------------------------------------------+
| df.mask | 4 | |
| df.mask | 4 | n |
+-------------------------+-------+------------------------------------------------+
| df.pct_change | 4 | |
| df.pct_change | 4 | n |
+-------------------------+-------+------------------------------------------------+
| df.add_prefix | 4 | |
| df.add_prefix | 4 | n |
+-------------------------+-------+------------------------------------------------+
| pd.read_excel | 4 | |
| pd.read_excel | 4 | n |
+-------------------------+-------+------------------------------------------------+
| pd.rolling_std | 3 | |
| pd.rolling_std | 3 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_records | 3 | |
| df.to_records | 3 | n |
+-------------------------+-------+------------------------------------------------+
| df.corrwith | 3 | |
| df.corrwith | 3 | n |
+-------------------------+-------+------------------------------------------------+
| df.swapaxes | 3 | |
| df.swapaxes | 3 | n |
+-------------------------+-------+------------------------------------------------+
| df.__iter__ | 3 | |
| df.__iter__ | 3 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_sql | 3 | |
| df.to_sql | 3 | n |
+-------------------------+-------+------------------------------------------------+
| pd.read_feather | 3 | |
| pd.read_feather | 3 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_feather | 3 | |
| df.to_feather | 3 | n |
+-------------------------+-------+------------------------------------------------+
| df.__len__ | 3 | |
| df.__len__ | 3 | n |
+-------------------------+-------+------------------------------------------------+
| df.kurtosis | 3 | |
| df.kurtosis | 3 | n |
+-------------------------+-------+------------------------------------------------+
| df.mod | 2 | |
| df.mod | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_sparse | 2 | |
| df.to_sparse | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.get_values | 2 | |
| df.get_values | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.__eq__ | 2 | |
| df.__eq__ | 2 | n |
+-------------------------+-------+------------------------------------------------+
| pd.bdate_range | 2 | |
| pd.bdate_range | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.get_dtype_counts | 2 | |
| df.get_dtype_counts | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.combine_first | 2 | |
| df.combine_first | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df._get_numeric_data | 2 | |
| df._get_numeric_data | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.nsmallest | 2 | |
| df.nsmallest | 2 | n |
+-------------------------+-------+------------------------------------------------+
| pd.scatter_matrix | 2 | |
| pd.scatter_matrix | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.rename_axis | 2 | |
| df.rename_axis | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.__setstate__ | 2 | |
| df.__setstate__ | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.cumprod | 2 | |
| df.cumprod | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.__getstate__ | 2 | |
| df.__getstate__ | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.equals | 2 | |
| df.equals | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.__getitem__ | 2 | |
| df.__getitem__ | 2 | y |
+-------------------------+-------+------------------------------------------------+
| df.clip_upper | 2 | |
| df.clip_upper | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.floordiv | 2 | |
| df.floordiv | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_excel | 2 | |
| df.to_excel | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.reindex_axis | 1 | |
| df.reindex_axis | 1 | n |
+-------------------------+-------+------------------------------------------------+
| pd.to_timedelta | 1 | |
| pd.to_timedelta | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.ewm | 1 | |
| df.ewm | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.tz_localize | 1 | |
| df.tz_localize | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.tz_convert | 1 | |
| df.tz_convert | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_hdf | 1 | |
| df.to_hdf | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.lookup | 1 | |
| df.lookup | 1 | n |
+-------------------------+-------+------------------------------------------------+
| pd.merge_ordered | 1 | |
| pd.merge_ordered | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.swaplevel | 1 | |
| df.swaplevel | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.first_valid_index | 1 | |
| df.first_valid_index | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.lt | 1 | |
| df.lt | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.add_suffix | 1 | |
| df.add_suffix | 1 | n |
+-------------------------+-------+------------------------------------------------+
| pd.rolling_median | 1 | |
| pd.rolling_median | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_dense | 1 | |
| df.to_dense | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.mad | 1 | |
| df.mad | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.align | 1 | |
| df.align | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.__copy__ | 1 | |
| df.__copy__ | 1 | n |
+-------------------------+-------+------------------------------------------------+
| pd.set_eng_float_format | 1 | |
| pd.set_eng_float_format | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.add_suffix | 1 | |
| df.add_suffix | 1 | n |
+-------------------------+-------+------------------------------------------------+
| pd.rolling_median | 1 | |
| pd.rolling_median | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_dense | 1 | |
| df.to_dense | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.mad | 1 | |
| df.mad | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.align | 1 | |
| df.align | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.__copy__ | 1 | |
| df.__copy__ | 1 | n |
+-------------------------+-------+------------------------------------------------+
| pd.set_eng_float_format | 1 | |
| pd.set_eng_float_format | 1 | n |
+-------------------------+-------+------------------------------------------------+
+---------------------------+---------------------------------+----------------------------------------------------+
@ -447,7 +448,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
+---------------------------+---------------------------------+----------------------------------------------------+
| ``add_suffix`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``agg`` | N | |
| ``agg`` | Y | |
| ``aggregate`` | | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``align`` | N | |
@ -512,7 +513,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
+---------------------------+---------------------------------+----------------------------------------------------+
| ``corrwith`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``count`` | N | |
| ``count`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``cov`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
@ -524,29 +525,29 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
+---------------------------+---------------------------------+----------------------------------------------------+
| ``cumsum`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``describe`` | N | |
| ``describe`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``diff`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``div`` | N | See ``add`` |
| ``div`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``divide`` | N | See ``add`` |
| ``divide`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``dot`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``drop`` | N | |
| ``drop`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``drop_duplicates`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``dropna`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``dtypes`` | N | |
| ``dtypes`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``duplicated`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``empty`` | N | |
| ``empty`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``eq`` | N | See ``add`` |
| ``eq`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``equals`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
@ -566,7 +567,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
+---------------------------+---------------------------------+----------------------------------------------------+
| ``first_valid_index`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``floordiv`` | N | See ``add`` |
| ``floordiv`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``from_csv`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
@ -578,9 +579,9 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
+---------------------------+---------------------------------+----------------------------------------------------+
| ``ftypes`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``ge`` | N | See ``add`` |
| ``ge`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``get`` | N | |
| ``get`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``get_dtype_counts`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
@ -592,11 +593,11 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
+---------------------------+---------------------------------+----------------------------------------------------+
| ``groupby`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``gt`` | N | See ``add`` |
| ``gt`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``head`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``hist`` | N | |
| ``hist`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``iat`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
@ -608,7 +609,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
+---------------------------+---------------------------------+----------------------------------------------------+
| ``infer_objects`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``info`` | N | |
| ``info`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``insert`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
@ -634,7 +635,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
+---------------------------+---------------------------------+----------------------------------------------------+
| ``join`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``keys`` | N | |
| ``keys`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``kurt`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
@ -644,21 +645,21 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
+---------------------------+---------------------------------+----------------------------------------------------+
| ``last_valid_index`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``le`` | N | See ``add`` |
| ``le`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``loc`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``lookup`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``lt`` | N | See ``add`` |
| ``lt`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``mad`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``mask`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``max`` | N | |
| ``max`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``mean`` | N | |
| ``mean`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``median`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
@ -668,19 +669,19 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
+---------------------------+---------------------------------+----------------------------------------------------+
| ``merge`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``min`` | N | |
| ``min`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``mod`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``mode`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``mul`` | N | See ``add`` |
| ``mul`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``multiply`` | N | See ``add`` |
| ``multiply`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``ndim`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``ne`` | N | See ``add`` |
| ``ne`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``nlargest`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
@ -690,7 +691,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
+---------------------------+---------------------------------+----------------------------------------------------+
| ``nsmallest`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``nunique`` | N | |
| ``nunique`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``pct_change`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
@ -704,7 +705,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
+---------------------------+---------------------------------+----------------------------------------------------+
| ``pop`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``pow`` | N | See ``add`` |
| ``pow`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``prod`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
@ -712,13 +713,13 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
+---------------------------+---------------------------------+----------------------------------------------------+
| ``quantile`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``query`` | N | |
| ``query`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``radd`` | N | See ``add`` |
| ``radd`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``rank`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``rdiv`` | N | See ``add`` |
| ``rdiv`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``reindex`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
@ -738,27 +739,27 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
+---------------------------+---------------------------------+----------------------------------------------------+
| ``reset_index`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``rfloordiv`` | N | See ``add`` |
| ``rfloordiv`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``rmod`` | N | See ``add`` |
| ``rmod`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``rmul`` | N | See ``add`` |
| ``rmul`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``rolling`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``round`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``rpow`` | N | See ``add`` |
| ``rpow`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``rsub`` | N | See ``add`` |
| ``rsub`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``rtruediv`` | N | See ``add`` |
| ``rtruediv`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``sample`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``select`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``select_dtypes`` | N | |
| ``select_dtypes`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``sem`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
@ -768,7 +769,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
+---------------------------+---------------------------------+----------------------------------------------------+
| ``set_value`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``shape`` | N | |
| ``shape`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``shift`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
@ -792,11 +793,11 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
+---------------------------+---------------------------------+----------------------------------------------------+
| ``style`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``sub`` | N | See ``add`` |
| ``sub`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``subtract`` | N | See ``add`` |
| ``subtract`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``sum`` | N | |
| ``sum`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``swapaxes`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
@ -808,7 +809,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_clipboard`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_csv`` | N | |
| ``to_csv`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_dense`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
@ -822,7 +823,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_hdf`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_html`` | N | |
| ``to_html`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_json`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
@ -856,7 +857,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
+---------------------------+---------------------------------+----------------------------------------------------+
| ``transpose`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``truediv`` | N | See ``add`` |
| ``truediv`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``truncate`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+

View File

@ -0,0 +1,61 @@
.. _implementation/details:
======================
Implementation Details
======================
The goal of an ``eland.DataFrame`` is to enable users who are familiar with ``pandas.DataFrame``
to access, explore and manipulate data that resides in Elasticsearch.
Ideally, all data should reside in Elasticsearch and not to reside in memory.
This restricts the API, but allows access to huge data sets that do not fit into memory, and allows
use of powerful Elasticsearch features such as aggrergations.
Pandas and 3rd Party Storage Systems
------------------------------------
Generally, integrations with [3rd party storage systems](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html)
(SQL, Google Big Query etc.) involve accessing these systems and reading all external data into an
in-core pandas data structure. This also applies to [Apache Arrow](https://arrow.apache.org/docs/python/pandas.html)
structures.
Whilst this provides access to data in these systems, for large datasets this can require significant
in-core memory, and for systems such as Elasticsearch, bulk export of data can be an inefficient way
of exploring the data.
An alternative option is to create an API that proxies ``pandas.DataFrame``-like calls to Elasticsearch
queries and operations. This could allow the Elasticsearch cluster to perform operations such as
aggregations rather than exporting all the data and performing this operation in-core.
Implementation Options
----------------------
An option would be to replace the ``pandas.DataFrame`` backend in-core memory structures with Elasticsearch
accessors. This would allow full access to the ``pandas.DataFrame`` APIs. However, this has issues:
* If a ``pandas.DataFrame`` instance maps to an index, typical manipulation of a ``pandas.DataFrame``
may involve creating many derived ``pandas.DataFrame`` instances. Constructing an index per
``pandas.DataFrame`` may result in many Elasticsearch indexes and a significant load on Elasticsearch.
For example, ``df_a = df['a']`` should not require Elasticsearch indices ``df`` and ``df_a``
* Not all ``pandas.DataFrame`` APIs map to things we may want to do in Elasticsearch. In particular,
API calls that involve exporting all data from Elasticsearch into memory e.g. ``df.to_dict()``.
* The backend ``pandas.DataFrame`` structures are not easily abstractable and are deeply embedded in
the implementation.
Another option is to create a ``eland.DataFrame`` API that mimics appropriate aspects of
the ``pandas.DataFrame`` API. This resolves some of the issues above as:
* ``df_a = df['a']`` could be implemented as a change to the Elasticsearch query used, rather
than a new index
* Instead of supporting the enitre ``pandas.DataFrame`` API we can support a subset appropriate for
Elasticsearch. If addition calls are required, we could to create a ``eland.DataFrame._to_pandas()``
method which would explicitly export all data to a ``pandas.DataFrame``
* Creating a new ``eland.DataFrame`` API gives us full flexibility in terms of implementation. However,
it does create a large amount of work which may duplicate a lot of the ``pandas`` code - for example,
printing objects etc. - this creates maintenance issues etc.

View File

@ -0,0 +1,11 @@
.. _implementation:
====================
Implementation Notes
====================
.. toctree::
:maxdepth: 2
details.rst
dataframe_supported.rst

View File

@ -23,10 +23,17 @@ In general, the data resides in elasticsearch and not in memory, which allows el
:hidden:
reference/index
implementation/index
* :doc:`reference/index`
* :doc:`reference/io`
* :doc:`reference/general_utility_functions`
* :doc:`reference/dataframe`
* :doc:`reference/index`
* :doc:`reference/indexing`
* :doc:`implementation/index`
* :doc:`implementation/details`
* :doc:`implementation/dataframe_supported`

View File

@ -561,7 +561,7 @@ class DataFrame(NDFrame):
See Also
--------
:pandas_api_docs:`to_html` for argument details.
:pandas_api_docs:`pandas.DataFrame.to_html` for argument details.
"""
# In pandas calling 'to_string' without max_rows set, will dump ALL rows - we avoid this
# by limiting rows by default.
@ -621,7 +621,7 @@ class DataFrame(NDFrame):
See Also
--------
:pandas_api_docs:`to_string` for argument details.
:pandas_api_docs:`pandas.DataFrame.to_string` for argument details.
"""
# In pandas calling 'to_string' without max_rows set, will dump ALL rows - we avoid this
# by limiting rows by default.
@ -787,7 +787,7 @@ class DataFrame(NDFrame):
See Also
--------
:pandas_api_docs:`to_csv` for argument details.
:pandas_api_docs:`pandas.DataFrame.to_csv` for argument details.
"""
kwargs = {
"path_or_buf": path_or_buf,

View File

@ -287,7 +287,7 @@ class NDFrame:
"""
Return mean value for each numeric column
TODO - implement remainder of pandas arguments
TODO - implement remainder of pandas arguments, currently non-numerics are not supported
Returns
-------
@ -321,7 +321,7 @@ class NDFrame:
"""
Return sum for each numeric column
TODO - implement remainder of pandas arguments
TODO - implement remainder of pandas arguments, currently non-numerics are not supported
Returns
-------
@ -355,7 +355,7 @@ class NDFrame:
"""
Return the minimum value for each numeric column
TODO - implement remainder of pandas arguments
TODO - implement remainder of pandas arguments, currently non-numerics are not supported
Returns
-------
@ -389,7 +389,7 @@ class NDFrame:
"""
Return the maximum value for each numeric column
TODO - implement remainder of pandas arguments
TODO - implement remainder of pandas arguments, currently non-numerics are not supported
Returns
-------

View File

@ -8,7 +8,7 @@ from eland import Client
from eland import DataFrame
from eland import Mappings
_default_chunk_size = 10000
DEFAULT_CHUNK_SIZE = 10000
def read_es(es_params, index_pattern):
@ -80,7 +80,7 @@ def pandas_to_eland(pd_df, es_params, destination_index, if_exists='fail', chunk
eland.eland_to_pandas: Create a pandas.Dataframe from eland.DataFrame
"""
if chunksize is None:
chunksize = _default_chunk_size
chunksize = DEFAULT_CHUNK_SIZE
client = Client(es_params)
@ -99,7 +99,7 @@ def pandas_to_eland(pd_df, es_params, destination_index, if_exists='fail', chunk
client.index_delete(index=destination_index)
client.index_create(index=destination_index, body=mapping)
# elif if_exists == "append":
# TODO validate mapping is compatible
# TODO validate mapping are compatible
else:
client.index_create(index=destination_index, body=mapping)
@ -226,7 +226,7 @@ def read_csv(filepath_or_buffer,
**Modifies an Elasticsearch index**
**Note iteration not supported**
**Note pandas iteration options not supported**
Parameters
----------
@ -248,17 +248,17 @@ def read_csv(filepath_or_buffer,
es_geo_points: list, default None
List of columns to map to geo_point data type
iterator
ignored
not supported
chunksize
number of csv rows to read before bulk index into Elasticsearch
Other Parameters
----------------
Parameters derived from :pandas_api_docs:`read_csv`.
Parameters derived from :pandas_api_docs:`pandas.read_csv`.
See Also
--------
:pandas_api_docs:`read_csv` - for all parameters
:pandas_api_docs:`pandas.read_csv` - for all parameters
Notes
-----
@ -318,7 +318,7 @@ def read_csv(filepath_or_buffer,
)
if chunksize is None:
kwds.update(chunksize=_default_chunk_size)
kwds.update(chunksize=DEFAULT_CHUNK_SIZE)
client = Client(es_client)