mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Merge pull request #51 from stevedodson/master
Updating docs + added supported methods doc
This commit is contained in:
commit
885a0a4aba
@ -1,3 +1,6 @@
|
||||
.. _implementation/dataframe_supported:
|
||||
|
||||
===============================
|
||||
pandas.DataFrame supported APIs
|
||||
===============================
|
||||
|
||||
@ -8,20 +11,18 @@ also welcome!
|
||||
|
||||
The following table is structured as follows: The first column contains the method name.
|
||||
The second column is a flag for whether or not there is an implementation in Modin for
|
||||
the method in the left column. ``Y`` stands for yes, ``N`` stands for no, ``P`` stands
|
||||
for partial (meaning some parameters may not be supported yet), and ``D`` stands for
|
||||
default to pandas.
|
||||
the method in the left column. ``Y`` stands for yes, ``N`` stands for no.
|
||||
|
||||
https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv represents a prioritised list.
|
||||
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| Method | Count | Notes |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.read_csv | 1422 | Not implemented ed.read_es implemented instead |
|
||||
| pd.read_csv | 1422 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.DataFrame | 886 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.append | 792 | Not implemented |
|
||||
| df.append | 792 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.mean | 783 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
@ -31,407 +32,407 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.sum | 755 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.to_csv | 693 | |
|
||||
| df.to_csv | 693 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.get | 669 | |
|
||||
| df.get | 669 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.mode | 653 | |
|
||||
| df.mode | 653 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.astype | 649 | |
|
||||
| df.astype | 649 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.sub | 637 | |
|
||||
| df.sub | 637 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.concat | 582 | |
|
||||
| pd.concat | 582 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.apply | 577 | |
|
||||
| df.apply | 577 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.groupby | 557 | |
|
||||
| df.groupby | 557 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.join | 544 | |
|
||||
| df.join | 544 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.fillna | 543 | |
|
||||
| df.fillna | 543 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.max | 508 | |
|
||||
| df.max | 508 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.reset_index | 434 | |
|
||||
| df.reset_index | 434 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.unique | 433 | |
|
||||
| pd.unique | 433 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.le | 405 | |
|
||||
| df.le | 405 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.count | 399 | |
|
||||
| df.count | 399 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.value_counts | 397 | |
|
||||
| pd.value_counts | 397 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.sort_values | 390 | |
|
||||
| df.sort_values | 390 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.transform | 387 | |
|
||||
| df.transform | 387 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.merge | 376 | |
|
||||
| df.merge | 376 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.add | 346 | |
|
||||
| df.add | 346 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.isnull | 338 | |
|
||||
| df.isnull | 338 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.min | 321 | |
|
||||
| df.min | 321 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.copy | 314 | |
|
||||
| df.copy | 314 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.replace | 300 | |
|
||||
| df.replace | 300 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.std | 261 | |
|
||||
| df.std | 261 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.hist | 246 | |
|
||||
| df.hist | 246 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.filter | 234 | |
|
||||
| df.filter | 234 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.describe | 220 | |
|
||||
| df.describe | 220 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.ne | 218 | |
|
||||
| df.ne | 218 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.corr | 217 | |
|
||||
| df.corr | 217 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.median | 217 | |
|
||||
| df.median | 217 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.items | 212 | |
|
||||
| df.items | 212 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.to_datetime | 204 | |
|
||||
| pd.to_datetime | 204 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.isin | 203 | |
|
||||
| df.isin | 203 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.dropna | 195 | |
|
||||
| df.dropna | 195 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.get_dummies | 190 | |
|
||||
| pd.get_dummies | 190 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.rename | 185 | |
|
||||
| df.rename | 185 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.info | 180 | |
|
||||
| df.info | 180 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.set_index | 166 | |
|
||||
| df.set_index | 166 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.keys | 159 | |
|
||||
| df.keys | 159 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.sample | 155 | |
|
||||
| df.sample | 155 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.agg | 140 | |
|
||||
| df.agg | 140 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.where | 138 | |
|
||||
| df.where | 138 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.boxplot | 134 | |
|
||||
| df.boxplot | 134 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.clip | 116 | |
|
||||
| df.clip | 116 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.round | 116 | |
|
||||
| df.round | 116 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.abs | 101 | |
|
||||
| df.abs | 101 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.stack | 97 | |
|
||||
| df.stack | 97 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.tail | 94 | |
|
||||
| df.tail | 94 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.update | 92 | |
|
||||
| df.update | 92 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.iterrows | 90 | |
|
||||
| df.iterrows | 90 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.transpose | 87 | |
|
||||
| df.transpose | 87 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.any | 85 | |
|
||||
| df.any | 85 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.pipe | 80 | |
|
||||
| df.pipe | 80 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.eval | 73 | |
|
||||
| pd.eval | 73 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.eval | 73 | |
|
||||
| df.eval | 73 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.read_json | 72 | |
|
||||
| pd.read_json | 72 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.nunique | 70 | |
|
||||
| df.nunique | 70 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.pivot | 70 | |
|
||||
| df.pivot | 70 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.select | 68 | |
|
||||
| df.select | 68 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.as_matrix | 67 | |
|
||||
| df.as_matrix | 67 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.notnull | 66 | |
|
||||
| df.notnull | 66 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.cumsum | 66 | |
|
||||
| df.cumsum | 66 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.prod | 64 | |
|
||||
| df.prod | 64 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.unstack | 64 | |
|
||||
| df.unstack | 64 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.drop_duplicates | 63 | |
|
||||
| df.drop_duplicates | 63 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.div | 63 | |
|
||||
| df.div | 63 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.crosstab | 59 | |
|
||||
| pd.crosstab | 59 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.select_dtypes | 57 | |
|
||||
| df.select_dtypes | 57 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.pow | 56 | |
|
||||
| df.pow | 56 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.sort_index | 56 | |
|
||||
| df.sort_index | 56 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.product | 52 | |
|
||||
| df.product | 52 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.isna | 51 | |
|
||||
| df.isna | 51 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.dot | 46 | |
|
||||
| df.dot | 46 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.cut | 45 | |
|
||||
| pd.cut | 45 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.bool | 44 | |
|
||||
| df.bool | 44 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.to_dict | 44 | |
|
||||
| df.to_dict | 44 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.diff | 44 | |
|
||||
| df.diff | 44 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.insert | 44 | |
|
||||
| df.insert | 44 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.pop | 44 | |
|
||||
| df.pop | 44 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.query | 43 | |
|
||||
| df.query | 43 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.var | 43 | |
|
||||
| df.var | 43 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.__init__ | 41 | |
|
||||
| df.__init__ | 41 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.to_numeric | 39 | |
|
||||
| pd.to_numeric | 39 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.squeeze | 39 | |
|
||||
| df.squeeze | 39 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.ge | 37 | |
|
||||
| df.ge | 37 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.quantile | 37 | |
|
||||
| df.quantile | 37 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.reindex | 37 | |
|
||||
| df.reindex | 37 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.rolling | 35 | |
|
||||
| df.rolling | 35 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.factorize | 32 | |
|
||||
| pd.factorize | 32 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.melt | 31 | |
|
||||
| pd.melt | 31 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.melt | 31 | |
|
||||
| df.melt | 31 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.rank | 31 | |
|
||||
| df.rank | 31 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.read_table | 30 | |
|
||||
| pd.read_table | 30 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.pivot_table | 30 | |
|
||||
| pd.pivot_table | 30 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.idxmax | 30 | |
|
||||
| df.idxmax | 30 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.test | 29 | |
|
||||
| pd.test | 29 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.iteritems | 29 | |
|
||||
| df.iteritems | 29 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.shift | 28 | |
|
||||
| df.shift | 28 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.mul | 28 | |
|
||||
| df.mul | 28 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.qcut | 25 | |
|
||||
| pd.qcut | 25 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.set_value | 25 | |
|
||||
| df.set_value | 25 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.all | 24 | |
|
||||
| df.all | 24 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.skew | 24 | |
|
||||
| df.skew | 24 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.aggregate | 23 | |
|
||||
| df.aggregate | 23 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.match | 22 | |
|
||||
| pd.match | 22 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.nlargest | 22 | |
|
||||
| df.nlargest | 22 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.multiply | 21 | |
|
||||
| df.multiply | 21 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.set_axis | 19 | |
|
||||
| df.set_axis | 19 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.eq | 18 | |
|
||||
| df.eq | 18 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.resample | 18 | |
|
||||
| df.resample | 18 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.read_sql | 17 | |
|
||||
| pd.read_sql | 17 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.duplicated | 16 | |
|
||||
| df.duplicated | 16 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.date_range | 16 | |
|
||||
| pd.date_range | 16 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.interpolate | 15 | |
|
||||
| df.interpolate | 15 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.memory_usage | 15 | |
|
||||
| df.memory_usage | 15 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.divide | 14 | |
|
||||
| df.divide | 14 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.cov | 13 | |
|
||||
| df.cov | 13 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.assign | 12 | |
|
||||
| df.assign | 12 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.subtract | 12 | |
|
||||
| df.subtract | 12 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.read_pickle | 11 | |
|
||||
| pd.read_pickle | 11 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.applymap | 11 | |
|
||||
| df.applymap | 11 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.first | 11 | |
|
||||
| df.first | 11 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.kurt | 10 | |
|
||||
| df.kurt | 10 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.truncate | 10 | |
|
||||
| df.truncate | 10 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.get_value | 9 | |
|
||||
| df.get_value | 9 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.read_hdf | 9 | |
|
||||
| pd.read_hdf | 9 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.to_html | 9 | |
|
||||
| df.to_html | 9 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.read_sql_query | 9 | |
|
||||
| pd.read_sql_query | 9 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.take | 8 | |
|
||||
| df.take | 8 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.to_pickle | 7 | |
|
||||
| df.to_pickle | 7 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.itertuples | 7 | |
|
||||
| df.itertuples | 7 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.to_string | 7 | |
|
||||
| df.to_string | 7 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.last | 7 | |
|
||||
| df.last | 7 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.sem | 7 | |
|
||||
| df.sem | 7 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.to_pickle | 7 | |
|
||||
| pd.to_pickle | 7 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.to_json | 7 | |
|
||||
| df.to_json | 7 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.idxmin | 7 | |
|
||||
| df.idxmin | 7 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.xs | 6 | |
|
||||
| df.xs | 6 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.combine | 6 | |
|
||||
| df.combine | 6 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.rolling_mean | 6 | |
|
||||
| pd.rolling_mean | 6 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.to_period | 6 | |
|
||||
| df.to_period | 6 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.convert_objects | 5 | |
|
||||
| df.convert_objects | 5 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.mask | 4 | |
|
||||
| df.mask | 4 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.pct_change | 4 | |
|
||||
| df.pct_change | 4 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.add_prefix | 4 | |
|
||||
| df.add_prefix | 4 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.read_excel | 4 | |
|
||||
| pd.read_excel | 4 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.rolling_std | 3 | |
|
||||
| pd.rolling_std | 3 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.to_records | 3 | |
|
||||
| df.to_records | 3 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.corrwith | 3 | |
|
||||
| df.corrwith | 3 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.swapaxes | 3 | |
|
||||
| df.swapaxes | 3 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.__iter__ | 3 | |
|
||||
| df.__iter__ | 3 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.to_sql | 3 | |
|
||||
| df.to_sql | 3 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.read_feather | 3 | |
|
||||
| pd.read_feather | 3 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.to_feather | 3 | |
|
||||
| df.to_feather | 3 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.__len__ | 3 | |
|
||||
| df.__len__ | 3 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.kurtosis | 3 | |
|
||||
| df.kurtosis | 3 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.mod | 2 | |
|
||||
| df.mod | 2 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.to_sparse | 2 | |
|
||||
| df.to_sparse | 2 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.get_values | 2 | |
|
||||
| df.get_values | 2 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.__eq__ | 2 | |
|
||||
| df.__eq__ | 2 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.bdate_range | 2 | |
|
||||
| pd.bdate_range | 2 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.get_dtype_counts | 2 | |
|
||||
| df.get_dtype_counts | 2 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.combine_first | 2 | |
|
||||
| df.combine_first | 2 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df._get_numeric_data | 2 | |
|
||||
| df._get_numeric_data | 2 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.nsmallest | 2 | |
|
||||
| df.nsmallest | 2 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.scatter_matrix | 2 | |
|
||||
| pd.scatter_matrix | 2 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.rename_axis | 2 | |
|
||||
| df.rename_axis | 2 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.__setstate__ | 2 | |
|
||||
| df.__setstate__ | 2 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.cumprod | 2 | |
|
||||
| df.cumprod | 2 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.__getstate__ | 2 | |
|
||||
| df.__getstate__ | 2 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.equals | 2 | |
|
||||
| df.equals | 2 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.__getitem__ | 2 | |
|
||||
| df.__getitem__ | 2 | y |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.clip_upper | 2 | |
|
||||
| df.clip_upper | 2 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.floordiv | 2 | |
|
||||
| df.floordiv | 2 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.to_excel | 2 | |
|
||||
| df.to_excel | 2 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.reindex_axis | 1 | |
|
||||
| df.reindex_axis | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.to_timedelta | 1 | |
|
||||
| pd.to_timedelta | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.ewm | 1 | |
|
||||
| df.ewm | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.tz_localize | 1 | |
|
||||
| df.tz_localize | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.tz_convert | 1 | |
|
||||
| df.tz_convert | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.to_hdf | 1 | |
|
||||
| df.to_hdf | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.lookup | 1 | |
|
||||
| df.lookup | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.merge_ordered | 1 | |
|
||||
| pd.merge_ordered | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.swaplevel | 1 | |
|
||||
| df.swaplevel | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.first_valid_index | 1 | |
|
||||
| df.first_valid_index | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.lt | 1 | |
|
||||
| df.lt | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.add_suffix | 1 | |
|
||||
| df.add_suffix | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.rolling_median | 1 | |
|
||||
| pd.rolling_median | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.to_dense | 1 | |
|
||||
| df.to_dense | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.mad | 1 | |
|
||||
| df.mad | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.align | 1 | |
|
||||
| df.align | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.__copy__ | 1 | |
|
||||
| df.__copy__ | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.set_eng_float_format | 1 | |
|
||||
| pd.set_eng_float_format | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.add_suffix | 1 | |
|
||||
| df.add_suffix | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.rolling_median | 1 | |
|
||||
| pd.rolling_median | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.to_dense | 1 | |
|
||||
| df.to_dense | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.mad | 1 | |
|
||||
| df.mad | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.align | 1 | |
|
||||
| df.align | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| df.__copy__ | 1 | |
|
||||
| df.__copy__ | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
| pd.set_eng_float_format | 1 | |
|
||||
| pd.set_eng_float_format | 1 | n |
|
||||
+-------------------------+-------+------------------------------------------------+
|
||||
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
@ -447,7 +448,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``add_suffix`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``agg`` | N | |
|
||||
| ``agg`` | Y | |
|
||||
| ``aggregate`` | | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``align`` | N | |
|
||||
@ -512,7 +513,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``corrwith`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``count`` | N | |
|
||||
| ``count`` | Y | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``cov`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
@ -524,29 +525,29 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``cumsum`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``describe`` | N | |
|
||||
| ``describe`` | Y | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``diff`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``div`` | N | See ``add`` |
|
||||
| ``div`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``divide`` | N | See ``add`` |
|
||||
| ``divide`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``dot`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``drop`` | N | |
|
||||
| ``drop`` | Y | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``drop_duplicates`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``dropna`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``dtypes`` | N | |
|
||||
| ``dtypes`` | Y | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``duplicated`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``empty`` | N | |
|
||||
| ``empty`` | Y | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``eq`` | N | See ``add`` |
|
||||
| ``eq`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``equals`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
@ -566,7 +567,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``first_valid_index`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``floordiv`` | N | See ``add`` |
|
||||
| ``floordiv`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``from_csv`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
@ -578,9 +579,9 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``ftypes`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``ge`` | N | See ``add`` |
|
||||
| ``ge`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``get`` | N | |
|
||||
| ``get`` | Y | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``get_dtype_counts`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
@ -592,11 +593,11 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``groupby`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``gt`` | N | See ``add`` |
|
||||
| ``gt`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``head`` | Y | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``hist`` | N | |
|
||||
| ``hist`` | Y | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``iat`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
@ -608,7 +609,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``infer_objects`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``info`` | N | |
|
||||
| ``info`` | Y | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``insert`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
@ -634,7 +635,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``join`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``keys`` | N | |
|
||||
| ``keys`` | Y | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``kurt`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
@ -644,21 +645,21 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``last_valid_index`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``le`` | N | See ``add`` |
|
||||
| ``le`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``loc`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``lookup`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``lt`` | N | See ``add`` |
|
||||
| ``lt`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``mad`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``mask`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``max`` | N | |
|
||||
| ``max`` | Y | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``mean`` | N | |
|
||||
| ``mean`` | Y | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``median`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
@ -668,19 +669,19 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``merge`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``min`` | N | |
|
||||
| ``min`` | Y | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``mod`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``mode`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``mul`` | N | See ``add`` |
|
||||
| ``mul`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``multiply`` | N | See ``add`` |
|
||||
| ``multiply`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``ndim`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``ne`` | N | See ``add`` |
|
||||
| ``ne`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``nlargest`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
@ -690,7 +691,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``nsmallest`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``nunique`` | N | |
|
||||
| ``nunique`` | Y | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``pct_change`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
@ -704,7 +705,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``pop`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``pow`` | N | See ``add`` |
|
||||
| ``pow`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``prod`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
@ -712,13 +713,13 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``quantile`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``query`` | N | |
|
||||
| ``query`` | Y | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``radd`` | N | See ``add`` |
|
||||
| ``radd`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``rank`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``rdiv`` | N | See ``add`` |
|
||||
| ``rdiv`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``reindex`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
@ -738,27 +739,27 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``reset_index`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``rfloordiv`` | N | See ``add`` |
|
||||
| ``rfloordiv`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``rmod`` | N | See ``add`` |
|
||||
| ``rmod`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``rmul`` | N | See ``add`` |
|
||||
| ``rmul`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``rolling`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``round`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``rpow`` | N | See ``add`` |
|
||||
| ``rpow`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``rsub`` | N | See ``add`` |
|
||||
| ``rsub`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``rtruediv`` | N | See ``add`` |
|
||||
| ``rtruediv`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``sample`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``select`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``select_dtypes`` | N | |
|
||||
| ``select_dtypes`` | Y | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``sem`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
@ -768,7 +769,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``set_value`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``shape`` | N | |
|
||||
| ``shape`` | Y | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``shift`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
@ -792,11 +793,11 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``style`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``sub`` | N | See ``add`` |
|
||||
| ``sub`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``subtract`` | N | See ``add`` |
|
||||
| ``subtract`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``sum`` | N | |
|
||||
| ``sum`` | Y | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``swapaxes`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
@ -808,7 +809,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``to_clipboard`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``to_csv`` | N | |
|
||||
| ``to_csv`` | Y | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``to_dense`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
@ -822,7 +823,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``to_hdf`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``to_html`` | N | |
|
||||
| ``to_html`` | Y | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``to_json`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
@ -856,7 +857,7 @@ https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv rep
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``transpose`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``truediv`` | N | See ``add`` |
|
||||
| ``truediv`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
||||
| ``truncate`` | N | |
|
||||
+---------------------------+---------------------------------+----------------------------------------------------+
|
61
docs/source/implementation/details.rst
Normal file
61
docs/source/implementation/details.rst
Normal file
@ -0,0 +1,61 @@
|
||||
.. _implementation/details:
|
||||
|
||||
======================
|
||||
Implementation Details
|
||||
======================
|
||||
|
||||
The goal of an ``eland.DataFrame`` is to enable users who are familiar with ``pandas.DataFrame``
|
||||
to access, explore and manipulate data that resides in Elasticsearch.
|
||||
|
||||
Ideally, all data should reside in Elasticsearch and not to reside in memory.
|
||||
This restricts the API, but allows access to huge data sets that do not fit into memory, and allows
|
||||
use of powerful Elasticsearch features such as aggrergations.
|
||||
|
||||
|
||||
Pandas and 3rd Party Storage Systems
|
||||
------------------------------------
|
||||
|
||||
Generally, integrations with [3rd party storage systems](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html)
|
||||
(SQL, Google Big Query etc.) involve accessing these systems and reading all external data into an
|
||||
in-core pandas data structure. This also applies to [Apache Arrow](https://arrow.apache.org/docs/python/pandas.html)
|
||||
structures.
|
||||
|
||||
Whilst this provides access to data in these systems, for large datasets this can require significant
|
||||
in-core memory, and for systems such as Elasticsearch, bulk export of data can be an inefficient way
|
||||
of exploring the data.
|
||||
|
||||
An alternative option is to create an API that proxies ``pandas.DataFrame``-like calls to Elasticsearch
|
||||
queries and operations. This could allow the Elasticsearch cluster to perform operations such as
|
||||
aggregations rather than exporting all the data and performing this operation in-core.
|
||||
|
||||
Implementation Options
|
||||
----------------------
|
||||
|
||||
An option would be to replace the ``pandas.DataFrame`` backend in-core memory structures with Elasticsearch
|
||||
accessors. This would allow full access to the ``pandas.DataFrame`` APIs. However, this has issues:
|
||||
|
||||
* If a ``pandas.DataFrame`` instance maps to an index, typical manipulation of a ``pandas.DataFrame``
|
||||
may involve creating many derived ``pandas.DataFrame`` instances. Constructing an index per
|
||||
``pandas.DataFrame`` may result in many Elasticsearch indexes and a significant load on Elasticsearch.
|
||||
For example, ``df_a = df['a']`` should not require Elasticsearch indices ``df`` and ``df_a``
|
||||
|
||||
* Not all ``pandas.DataFrame`` APIs map to things we may want to do in Elasticsearch. In particular,
|
||||
API calls that involve exporting all data from Elasticsearch into memory e.g. ``df.to_dict()``.
|
||||
|
||||
* The backend ``pandas.DataFrame`` structures are not easily abstractable and are deeply embedded in
|
||||
the implementation.
|
||||
|
||||
Another option is to create a ``eland.DataFrame`` API that mimics appropriate aspects of
|
||||
the ``pandas.DataFrame`` API. This resolves some of the issues above as:
|
||||
|
||||
* ``df_a = df['a']`` could be implemented as a change to the Elasticsearch query used, rather
|
||||
than a new index
|
||||
|
||||
* Instead of supporting the enitre ``pandas.DataFrame`` API we can support a subset appropriate for
|
||||
Elasticsearch. If addition calls are required, we could to create a ``eland.DataFrame._to_pandas()``
|
||||
method which would explicitly export all data to a ``pandas.DataFrame``
|
||||
|
||||
* Creating a new ``eland.DataFrame`` API gives us full flexibility in terms of implementation. However,
|
||||
it does create a large amount of work which may duplicate a lot of the ``pandas`` code - for example,
|
||||
printing objects etc. - this creates maintenance issues etc.
|
||||
|
11
docs/source/implementation/index.rst
Normal file
11
docs/source/implementation/index.rst
Normal file
@ -0,0 +1,11 @@
|
||||
.. _implementation:
|
||||
|
||||
====================
|
||||
Implementation Notes
|
||||
====================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
details.rst
|
||||
dataframe_supported.rst
|
@ -23,10 +23,17 @@ In general, the data resides in elasticsearch and not in memory, which allows el
|
||||
:hidden:
|
||||
|
||||
reference/index
|
||||
implementation/index
|
||||
|
||||
* :doc:`reference/index`
|
||||
|
||||
* :doc:`reference/io`
|
||||
* :doc:`reference/general_utility_functions`
|
||||
* :doc:`reference/dataframe`
|
||||
* :doc:`reference/index`
|
||||
* :doc:`reference/indexing`
|
||||
|
||||
* :doc:`implementation/index`
|
||||
|
||||
* :doc:`implementation/details`
|
||||
* :doc:`implementation/dataframe_supported`
|
||||
|
||||
|
@ -561,7 +561,7 @@ class DataFrame(NDFrame):
|
||||
|
||||
See Also
|
||||
--------
|
||||
:pandas_api_docs:`to_html` for argument details.
|
||||
:pandas_api_docs:`pandas.DataFrame.to_html` for argument details.
|
||||
"""
|
||||
# In pandas calling 'to_string' without max_rows set, will dump ALL rows - we avoid this
|
||||
# by limiting rows by default.
|
||||
@ -621,7 +621,7 @@ class DataFrame(NDFrame):
|
||||
|
||||
See Also
|
||||
--------
|
||||
:pandas_api_docs:`to_string` for argument details.
|
||||
:pandas_api_docs:`pandas.DataFrame.to_string` for argument details.
|
||||
"""
|
||||
# In pandas calling 'to_string' without max_rows set, will dump ALL rows - we avoid this
|
||||
# by limiting rows by default.
|
||||
@ -787,7 +787,7 @@ class DataFrame(NDFrame):
|
||||
|
||||
See Also
|
||||
--------
|
||||
:pandas_api_docs:`to_csv` for argument details.
|
||||
:pandas_api_docs:`pandas.DataFrame.to_csv` for argument details.
|
||||
"""
|
||||
kwargs = {
|
||||
"path_or_buf": path_or_buf,
|
||||
|
@ -452,28 +452,23 @@ class Mappings:
|
||||
numeric_source_fields: list of str
|
||||
List of source fields where pd_dtype == (int64 or float64 or bool)
|
||||
"""
|
||||
if columns is not None:
|
||||
if include_bool == True:
|
||||
return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
|
||||
((self._mappings_capabilities.pd_dtype == 'int64') |
|
||||
(self._mappings_capabilities.pd_dtype == 'float64') |
|
||||
(self._mappings_capabilities.pd_dtype == 'bool'))].reindex(
|
||||
columns).index.tolist()
|
||||
else:
|
||||
return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
|
||||
((self._mappings_capabilities.pd_dtype == 'int64') |
|
||||
(self._mappings_capabilities.pd_dtype == 'float64'))].reindex(
|
||||
columns).index.tolist()
|
||||
if include_bool == True:
|
||||
df = self._mappings_capabilities[(self._mappings_capabilities._source == True) &
|
||||
((self._mappings_capabilities.pd_dtype == 'int64') |
|
||||
(self._mappings_capabilities.pd_dtype == 'float64') |
|
||||
(self._mappings_capabilities.pd_dtype == 'bool'))]
|
||||
else:
|
||||
if include_bool == True:
|
||||
return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
|
||||
((self._mappings_capabilities.pd_dtype == 'int64') |
|
||||
(self._mappings_capabilities.pd_dtype == 'float64') |
|
||||
(self._mappings_capabilities.pd_dtype == 'bool'))].index.tolist()
|
||||
else:
|
||||
return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
|
||||
((self._mappings_capabilities.pd_dtype == 'int64') |
|
||||
(self._mappings_capabilities.pd_dtype == 'float64'))].index.tolist()
|
||||
df = self._mappings_capabilities[(self._mappings_capabilities._source == True) &
|
||||
((self._mappings_capabilities.pd_dtype == 'int64') |
|
||||
(self._mappings_capabilities.pd_dtype == 'float64'))]
|
||||
# if columns exists, filter index with columns
|
||||
if columns is not None:
|
||||
# reindex adds NA for non-existing columns (non-numeric), so drop these after reindex
|
||||
df = df.reindex(columns)
|
||||
df.dropna(inplace=True)
|
||||
|
||||
# return as list
|
||||
return df.index.to_list()
|
||||
|
||||
def source_fields(self):
|
||||
"""
|
||||
|
@ -287,7 +287,7 @@ class NDFrame:
|
||||
"""
|
||||
Return mean value for each numeric column
|
||||
|
||||
TODO - implement remainder of pandas arguments
|
||||
TODO - implement remainder of pandas arguments, currently non-numerics are not supported
|
||||
|
||||
Returns
|
||||
-------
|
||||
@ -321,7 +321,7 @@ class NDFrame:
|
||||
"""
|
||||
Return sum for each numeric column
|
||||
|
||||
TODO - implement remainder of pandas arguments
|
||||
TODO - implement remainder of pandas arguments, currently non-numerics are not supported
|
||||
|
||||
Returns
|
||||
-------
|
||||
@ -355,7 +355,7 @@ class NDFrame:
|
||||
"""
|
||||
Return the minimum value for each numeric column
|
||||
|
||||
TODO - implement remainder of pandas arguments
|
||||
TODO - implement remainder of pandas arguments, currently non-numerics are not supported
|
||||
|
||||
Returns
|
||||
-------
|
||||
@ -389,7 +389,7 @@ class NDFrame:
|
||||
"""
|
||||
Return the maximum value for each numeric column
|
||||
|
||||
TODO - implement remainder of pandas arguments
|
||||
TODO - implement remainder of pandas arguments, currently non-numerics are not supported
|
||||
|
||||
Returns
|
||||
-------
|
||||
@ -488,16 +488,16 @@ class NDFrame:
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = ed.DataFrame('localhost', 'flights', columns=['AvgTicketPrice', 'FlightDelay'])
|
||||
>>> df = ed.DataFrame('localhost', 'flights', columns=['AvgTicketPrice', 'FlightDelayMin'])
|
||||
>>> df.describe() # ignoring percentiles as they don't generate consistent results
|
||||
AvgTicketPrice FlightDelay
|
||||
count 13059.000000 13059.000000
|
||||
mean 628.253689 0.251168
|
||||
std 266.386661 0.433685
|
||||
min 100.020531 0.000000
|
||||
AvgTicketPrice FlightDelayMin
|
||||
count 13059.000000 13059.000000
|
||||
mean 628.253689 47.335171
|
||||
std 266.386661 96.743006
|
||||
min 100.020531 0.000000
|
||||
...
|
||||
...
|
||||
...
|
||||
max 1199.729004 1.000000
|
||||
max 1199.729004 360.000000
|
||||
"""
|
||||
return self._query_compiler.describe()
|
||||
|
@ -4,41 +4,54 @@ from pandas.util.testing import assert_series_equal
|
||||
|
||||
from eland.tests.common import TestData
|
||||
|
||||
import eland as ed
|
||||
|
||||
|
||||
class TestDataFrameMetrics(TestData):
|
||||
|
||||
def test_mean(self):
|
||||
funcs = ['max', 'min', 'mean', 'sum']
|
||||
|
||||
def test_flights_metrics(self):
|
||||
pd_flights = self.pd_flights()
|
||||
ed_flights = self.ed_flights()
|
||||
|
||||
pd_mean = pd_flights.mean(numeric_only=True)
|
||||
ed_mean = ed_flights.mean(numeric_only=True)
|
||||
for func in self.funcs:
|
||||
pd_metric = getattr(pd_flights, func)(numeric_only=True)
|
||||
ed_metric = getattr(ed_flights, func)(numeric_only=True)
|
||||
|
||||
assert_series_equal(pd_mean, ed_mean)
|
||||
assert_series_equal(pd_metric, ed_metric)
|
||||
|
||||
def test_sum(self):
|
||||
pd_flights = self.pd_flights()
|
||||
ed_flights = self.ed_flights()
|
||||
def test_ecommerce_selected_non_numeric_source_fields(self):
|
||||
# None of these are numeric
|
||||
columns = ['category', 'currency', 'customer_birth_date', 'customer_first_name', 'user']
|
||||
|
||||
pd_sum = pd_flights.sum(numeric_only=True)
|
||||
ed_sum = ed_flights.sum(numeric_only=True)
|
||||
pd_ecommerce = self.pd_ecommerce()[columns]
|
||||
ed_ecommerce = self.ed_ecommerce()[columns]
|
||||
|
||||
assert_series_equal(pd_sum, ed_sum)
|
||||
for func in self.funcs:
|
||||
assert_series_equal(getattr(pd_ecommerce, func)(numeric_only=True), getattr(ed_ecommerce, func)(numeric_only=True),
|
||||
check_less_precise=True)
|
||||
|
||||
def test_min(self):
|
||||
pd_flights = self.pd_flights()
|
||||
ed_flights = self.ed_flights()
|
||||
def test_ecommerce_selected_mixed_numeric_source_fields(self):
|
||||
# Some of these are numeric
|
||||
columns = ['category', 'currency', 'taxless_total_price', 'customer_birth_date',
|
||||
'total_quantity', 'customer_first_name', 'user']
|
||||
|
||||
pd_min = pd_flights.min(numeric_only=True)
|
||||
ed_min = ed_flights.min(numeric_only=True)
|
||||
pd_ecommerce = self.pd_ecommerce()[columns]
|
||||
ed_ecommerce = self.ed_ecommerce()[columns]
|
||||
|
||||
assert_series_equal(pd_min, ed_min)
|
||||
for func in self.funcs:
|
||||
assert_series_equal(getattr(pd_ecommerce, func)(numeric_only=True), getattr(ed_ecommerce, func)(numeric_only=True),
|
||||
check_less_precise=True)
|
||||
|
||||
def test_max(self):
|
||||
pd_flights = self.pd_flights()
|
||||
ed_flights = self.ed_flights()
|
||||
|
||||
pd_max = pd_flights.max(numeric_only=True)
|
||||
ed_max = ed_flights.max(numeric_only=True)
|
||||
def test_ecommerce_selected_all_numeric_source_fields(self):
|
||||
# All of these are numeric
|
||||
columns = ['total_quantity', 'taxful_total_price', 'taxless_total_price']
|
||||
|
||||
assert_series_equal(pd_max, ed_max)
|
||||
pd_ecommerce = self.pd_ecommerce()[columns]
|
||||
ed_ecommerce = self.ed_ecommerce()[columns]
|
||||
|
||||
for func in self.funcs:
|
||||
assert_series_equal(getattr(pd_ecommerce, func)(numeric_only=True), getattr(ed_ecommerce, func)(numeric_only=True),
|
||||
check_less_precise=True)
|
||||
|
77
eland/tests/mappings/test_numeric_source_fields_pytest.py
Normal file
77
eland/tests/mappings/test_numeric_source_fields_pytest.py
Normal file
@ -0,0 +1,77 @@
|
||||
# File called _pytest for PyCharm compatability
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
from eland.tests.common import TestData
|
||||
|
||||
|
||||
class TestMappingsNumericSourceFields(TestData):
|
||||
|
||||
def test_flights_numeric_source_fields(self):
|
||||
ed_flights = self.ed_flights()
|
||||
pd_flights = self.pd_flights()
|
||||
|
||||
ed_numeric = ed_flights._query_compiler._mappings.numeric_source_fields(columns=None, include_bool=False)
|
||||
pd_numeric = pd_flights.select_dtypes(include=np.number)
|
||||
|
||||
assert pd_numeric.columns.to_list() == ed_numeric
|
||||
|
||||
def test_ecommerce_selected_non_numeric_source_fields(self):
|
||||
columns = ['category', 'currency', 'customer_birth_date', 'customer_first_name', 'user']
|
||||
"""
|
||||
Note: non of there are numeric
|
||||
category object
|
||||
currency object
|
||||
customer_birth_date datetime64[ns]
|
||||
customer_first_name object
|
||||
user object
|
||||
"""
|
||||
|
||||
ed_ecommerce = self.ed_ecommerce()[columns]
|
||||
pd_ecommerce = self.pd_ecommerce()[columns]
|
||||
|
||||
ed_numeric = ed_ecommerce._query_compiler._mappings.numeric_source_fields(columns=columns, include_bool=False)
|
||||
pd_numeric = pd_ecommerce.select_dtypes(include=np.number)
|
||||
|
||||
assert pd_numeric.columns.to_list() == ed_numeric
|
||||
|
||||
def test_ecommerce_selected_mixed_numeric_source_fields(self):
|
||||
columns = ['category', 'currency', 'customer_birth_date', 'customer_first_name', 'total_quantity', 'user']
|
||||
|
||||
"""
|
||||
Note: one is numeric
|
||||
category object
|
||||
currency object
|
||||
customer_birth_date datetime64[ns]
|
||||
customer_first_name object
|
||||
total_quantity int64
|
||||
user object
|
||||
"""
|
||||
|
||||
ed_ecommerce = self.ed_ecommerce()[columns]
|
||||
pd_ecommerce = self.pd_ecommerce()[columns]
|
||||
|
||||
ed_numeric = ed_ecommerce._query_compiler._mappings.numeric_source_fields(columns=columns, include_bool=False)
|
||||
pd_numeric = pd_ecommerce.select_dtypes(include=np.number)
|
||||
|
||||
assert pd_numeric.columns.to_list() == ed_numeric
|
||||
|
||||
def test_ecommerce_selected_all_numeric_source_fields(self):
|
||||
columns = ['total_quantity', 'taxful_total_price', 'taxless_total_price']
|
||||
|
||||
"""
|
||||
Note: all are numeric
|
||||
total_quantity int64
|
||||
taxful_total_price float64
|
||||
taxless_total_price float64
|
||||
"""
|
||||
|
||||
ed_ecommerce = self.ed_ecommerce()[columns]
|
||||
pd_ecommerce = self.pd_ecommerce()[columns]
|
||||
|
||||
ed_numeric = ed_ecommerce._query_compiler._mappings.numeric_source_fields(columns=columns, include_bool=False)
|
||||
pd_numeric = pd_ecommerce.select_dtypes(include=np.number)
|
||||
|
||||
assert pd_numeric.columns.to_list() == ed_numeric
|
@ -8,7 +8,7 @@ from eland import Client
|
||||
from eland import DataFrame
|
||||
from eland import Mappings
|
||||
|
||||
_default_chunk_size = 10000
|
||||
DEFAULT_CHUNK_SIZE = 10000
|
||||
|
||||
|
||||
def read_es(es_params, index_pattern):
|
||||
@ -80,7 +80,7 @@ def pandas_to_eland(pd_df, es_params, destination_index, if_exists='fail', chunk
|
||||
eland.eland_to_pandas: Create a pandas.Dataframe from eland.DataFrame
|
||||
"""
|
||||
if chunksize is None:
|
||||
chunksize = _default_chunk_size
|
||||
chunksize = DEFAULT_CHUNK_SIZE
|
||||
|
||||
client = Client(es_params)
|
||||
|
||||
@ -99,7 +99,7 @@ def pandas_to_eland(pd_df, es_params, destination_index, if_exists='fail', chunk
|
||||
client.index_delete(index=destination_index)
|
||||
client.index_create(index=destination_index, body=mapping)
|
||||
# elif if_exists == "append":
|
||||
# TODO validate mapping is compatible
|
||||
# TODO validate mapping are compatible
|
||||
else:
|
||||
client.index_create(index=destination_index, body=mapping)
|
||||
|
||||
@ -226,7 +226,7 @@ def read_csv(filepath_or_buffer,
|
||||
|
||||
**Modifies an Elasticsearch index**
|
||||
|
||||
**Note iteration not supported**
|
||||
**Note pandas iteration options not supported**
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@ -248,17 +248,17 @@ def read_csv(filepath_or_buffer,
|
||||
es_geo_points: list, default None
|
||||
List of columns to map to geo_point data type
|
||||
iterator
|
||||
ignored
|
||||
not supported
|
||||
chunksize
|
||||
number of csv rows to read before bulk index into Elasticsearch
|
||||
|
||||
Other Parameters
|
||||
----------------
|
||||
Parameters derived from :pandas_api_docs:`read_csv`.
|
||||
Parameters derived from :pandas_api_docs:`pandas.read_csv`.
|
||||
|
||||
See Also
|
||||
--------
|
||||
:pandas_api_docs:`read_csv` - for all parameters
|
||||
:pandas_api_docs:`pandas.read_csv` - for all parameters
|
||||
|
||||
Notes
|
||||
-----
|
||||
@ -318,7 +318,7 @@ def read_csv(filepath_or_buffer,
|
||||
)
|
||||
|
||||
if chunksize is None:
|
||||
kwds.update(chunksize=_default_chunk_size)
|
||||
kwds.update(chunksize=DEFAULT_CHUNK_SIZE)
|
||||
|
||||
client = Client(es_client)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user