diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..df00727 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +# Compiled python modules. +*.pyc + +# Setuptools distribution folder. +/dist/ + +# Python egg metadata, regenerated from source files by setuptools. +/*.egg-info + +# PyCharm files +.idea/ + +# pytest files +.pytest_cache/1 diff --git a/LICENSE b/LICENSE index 261eeb9..7376ffc 100644 --- a/LICENSE +++ b/LICENSE @@ -1,201 +1,223 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ +ELASTIC LICENSE AGREEMENT - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION +PLEASE READ CAREFULLY THIS ELASTIC LICENSE AGREEMENT (THIS "AGREEMENT"), WHICH +CONSTITUTES A LEGALLY BINDING AGREEMENT AND GOVERNS ALL OF YOUR USE OF ALL OF +THE ELASTIC SOFTWARE WITH WHICH THIS AGREEMENT IS INCLUDED ("ELASTIC SOFTWARE") +THAT IS PROVIDED IN OBJECT CODE FORMAT, AND, IN ACCORDANCE WITH SECTION 2 BELOW, +CERTAIN OF THE ELASTIC SOFTWARE THAT IS PROVIDED IN SOURCE CODE FORMAT. BY +INSTALLING OR USING ANY OF THE ELASTIC SOFTWARE GOVERNED BY THIS AGREEMENT, YOU +ARE ASSENTING TO THE TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE +WITH SUCH TERMS AND CONDITIONS, YOU MAY NOT INSTALL OR USE THE ELASTIC SOFTWARE +GOVERNED BY THIS AGREEMENT. IF YOU ARE INSTALLING OR USING THE SOFTWARE ON +BEHALF OF A LEGAL ENTITY, YOU REPRESENT AND WARRANT THAT YOU HAVE THE ACTUAL +AUTHORITY TO AGREE TO THE TERMS AND CONDITIONS OF THIS AGREEMENT ON BEHALF OF +SUCH ENTITY. - 1. Definitions. +Posted Date: April 20, 2018 - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. +This Agreement is entered into by and between Elasticsearch BV ("Elastic") and +You, or the legal entity on behalf of whom You are acting (as applicable, +"You"). - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. +1. OBJECT CODE END USER LICENSES, RESTRICTIONS AND THIRD PARTY OPEN SOURCE +SOFTWARE - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. + 1.1 Object Code End User License. Subject to the terms and conditions of + Section 1.2 of this Agreement, Elastic hereby grants to You, AT NO CHARGE and + for so long as you are not in breach of any provision of this Agreement, a + License to the Basic Features and Functions of the Elastic Software. - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. + 1.2 Reservation of Rights; Restrictions. As between Elastic and You, Elastic + and its licensors own all right, title and interest in and to the Elastic + Software, and except as expressly set forth in Sections 1.1, and 2.1 of this + Agreement, no other license to the Elastic Software is granted to You under + this Agreement, by implication, estoppel or otherwise. You agree not to: (i) + reverse engineer or decompile, decrypt, disassemble or otherwise reduce any + Elastic Software provided to You in Object Code, or any portion thereof, to + Source Code, except and only to the extent any such restriction is prohibited + by applicable law, (ii) except as expressly permitted in this Agreement, + prepare derivative works from, modify, copy or use the Elastic Software Object + Code or the Commercial Software Source Code in any manner; (iii) except as + expressly permitted in Section 1.1 above, transfer, sell, rent, lease, + distribute, sublicense, loan or otherwise transfer, Elastic Software Object + Code, in whole or in part, to any third party; (iv) use Elastic Software + Object Code for providing time-sharing services, any software-as-a-service, + service bureau services or as part of an application services provider or + other service offering (collectively, "SaaS Offering") where obtaining access + to the Elastic Software or the features and functions of the Elastic Software + is a primary reason or substantial motivation for users of the SaaS Offering + to access and/or use the SaaS Offering ("Prohibited SaaS Offering"); (v) + circumvent the limitations on use of Elastic Software provided to You in + Object Code format that are imposed or preserved by any License Key, or (vi) + alter or remove any Marks and Notices in the Elastic Software. If You have any + question as to whether a specific SaaS Offering constitutes a Prohibited SaaS + Offering, or are interested in obtaining Elastic's permission to engage in + commercial or non-commercial distribution of the Elastic Software, please + contact elastic_license@elastic.co. - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. + 1.3 Third Party Open Source Software. The Commercial Software may contain or + be provided with third party open source libraries, components, utilities and + other open source software (collectively, "Open Source Software"), which Open + Source Software may have applicable license terms as identified on a website + designated by Elastic. Notwithstanding anything to the contrary herein, use of + the Open Source Software shall be subject to the license terms and conditions + applicable to such Open Source Software, to the extent required by the + applicable licensor (which terms shall not restrict the license rights granted + to You hereunder, but may contain additional rights). To the extent any + condition of this Agreement conflicts with any license to the Open Source + Software, the Open Source Software license will govern with respect to such + Open Source Software only. Elastic may also separately provide you with + certain open source software that is licensed by Elastic. Your use of such + Elastic open source software will not be governed by this Agreement, but by + the applicable open source license terms. - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. +2. COMMERCIAL SOFTWARE SOURCE CODE - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). + 2.1 Limited License. Subject to the terms and conditions of Section 2.2 of + this Agreement, Elastic hereby grants to You, AT NO CHARGE and for so long as + you are not in breach of any provision of this Agreement, a limited, + non-exclusive, non-transferable, fully paid up royalty free right and license + to the Commercial Software in Source Code format, without the right to grant + or authorize sublicenses, to prepare Derivative Works of the Commercial + Software, provided You (i) do not hack the licensing mechanism, or otherwise + circumvent the intended limitations on the use of Elastic Software to enable + features other than Basic Features and Functions or those features You are + entitled to as part of a Subscription, and (ii) use the resulting object code + only for reasonable testing purposes. - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. + 2.2 Restrictions. Nothing in Section 2.1 grants You the right to (i) use the + Commercial Software Source Code other than in accordance with Section 2.1 + above, (ii) use a Derivative Work of the Commercial Software outside of a + Non-production Environment, in any production capacity, on a temporary or + permanent basis, or (iii) transfer, sell, rent, lease, distribute, sublicense, + loan or otherwise make available the Commercial Software Source Code, in whole + or in part, to any third party. Notwithstanding the foregoing, You may + maintain a copy of the repository in which the Source Code of the Commercial + Software resides and that copy may be publicly accessible, provided that you + include this Agreement with Your copy of the repository. - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." +3. TERMINATION - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. + 3.1 Termination. This Agreement will automatically terminate, whether or not + You receive notice of such Termination from Elastic, if You breach any of its + provisions. - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. + 3.2 Post Termination. Upon any termination of this Agreement, for any reason, + You shall promptly cease the use of the Elastic Software in Object Code format + and cease use of the Commercial Software in Source Code format. For the + avoidance of doubt, termination of this Agreement will not affect Your right + to use Elastic Software, in either Object Code or Source Code formats, made + available under the Apache License Version 2.0. - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. + 3.3 Survival. Sections 1.2, 2.2. 3.3, 4 and 5 shall survive any termination or + expiration of this Agreement. - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: +4. DISCLAIMER OF WARRANTIES AND LIMITATION OF LIABILITY - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and + 4.1 Disclaimer of Warranties. TO THE MAXIMUM EXTENT PERMITTED UNDER APPLICABLE + LAW, THE ELASTIC SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, + AND ELASTIC AND ITS LICENSORS MAKE NO WARRANTIES WHETHER EXPRESSED, IMPLIED OR + STATUTORY REGARDING OR RELATING TO THE ELASTIC SOFTWARE. TO THE MAXIMUM EXTENT + PERMITTED UNDER APPLICABLE LAW, ELASTIC AND ITS LICENSORS SPECIFICALLY + DISCLAIM ALL IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE AND NON-INFRINGEMENT WITH RESPECT TO THE ELASTIC SOFTWARE, AND WITH + RESPECT TO THE USE OF THE FOREGOING. FURTHER, ELASTIC DOES NOT WARRANT RESULTS + OF USE OR THAT THE ELASTIC SOFTWARE WILL BE ERROR FREE OR THAT THE USE OF THE + ELASTIC SOFTWARE WILL BE UNINTERRUPTED. - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and + 4.2 Limitation of Liability. IN NO EVENT SHALL ELASTIC OR ITS LICENSORS BE + LIABLE TO YOU OR ANY THIRD PARTY FOR ANY DIRECT OR INDIRECT DAMAGES, + INCLUDING, WITHOUT LIMITATION, FOR ANY LOSS OF PROFITS, LOSS OF USE, BUSINESS + INTERRUPTION, LOSS OF DATA, COST OF SUBSTITUTE GOODS OR SERVICES, OR FOR ANY + SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, IN CONNECTION WITH + OR ARISING OUT OF THE USE OR INABILITY TO USE THE ELASTIC SOFTWARE, OR THE + PERFORMANCE OF OR FAILURE TO PERFORM THIS AGREEMENT, WHETHER ALLEGED AS A + BREACH OF CONTRACT OR TORTIOUS CONDUCT, INCLUDING NEGLIGENCE, EVEN IF ELASTIC + HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and +5. MISCELLANEOUS - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. + This Agreement completely and exclusively states the entire agreement of the + parties regarding the subject matter herein, and it supersedes, and its terms + govern, all prior proposals, agreements, or other communications between the + parties, oral or written, regarding such subject matter. This Agreement may be + modified by Elastic from time to time, and any such modifications will be + effective upon the "Posted Date" set forth at the top of the modified + Agreement. If any provision hereof is held unenforceable, this Agreement will + continue without said provision and be interpreted to reflect the original + intent of the parties. This Agreement and any non-contractual obligation + arising out of or in connection with it, is governed exclusively by Dutch law. + This Agreement shall not be governed by the 1980 UN Convention on Contracts + for the International Sale of Goods. All disputes arising out of or in + connection with this Agreement, including its existence and validity, shall be + resolved by the courts with jurisdiction in Amsterdam, The Netherlands, except + where mandatory law provides for the courts at another location in The + Netherlands to have jurisdiction. The parties hereby irrevocably waive any and + all claims and defenses either might otherwise have in any such action or + proceeding in any of such courts based upon any alleged lack of personal + jurisdiction, improper venue, forum non conveniens or any similar claim or + defense. A breach or threatened breach, by You of Section 2 may cause + irreparable harm for which damages at law may not provide adequate relief, and + therefore Elastic shall be entitled to seek injunctive relief without being + required to post a bond. You may not assign this Agreement (including by + operation of law in connection with a merger or acquisition), in whole or in + part to any third party without the prior written consent of Elastic, which + may be withheld or granted by Elastic in its sole and absolute discretion. + Any assignment in violation of the preceding sentence is void. Notices to + Elastic may also be sent to legal@elastic.co. - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. +6. DEFINITIONS - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. + The following terms have the meanings ascribed: - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. + 6.1 "Affiliate" means, with respect to a party, any entity that controls, is + controlled by, or which is under common control with, such party, where + "control" means ownership of at least fifty percent (50%) of the outstanding + voting shares of the entity, or the contractual right to establish policy for, + and manage the operations of, the entity. - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. + 6.2 "Basic Features and Functions" means those features and functions of the + Elastic Software that are eligible for use under a Basic license, as set forth + at https://www.elastic.co/subscriptions, as may be modified by Elastic from + time to time. - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. + 6.3 "Commercial Software" means the Elastic Software Source Code in any file + containing a header stating the contents are subject to the Elastic License or + which is contained in the repository folder labeled "x-pack", unless a LICENSE + file present in the directory subtree declares a different license. - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. + 6.4 "Derivative Work of the Commercial Software" means, for purposes of this + Agreement, any modification(s) or enhancement(s) to the Commercial Software, + which represent, as a whole, an original work of authorship. - END OF TERMS AND CONDITIONS + 6.5 "License" means a limited, non-exclusive, non-transferable, fully paid up, + royalty free, right and license, without the right to grant or authorize + sublicenses, solely for Your internal business operations to (i) install and + use the applicable Features and Functions of the Elastic Software in Object + Code, and (ii) permit Contractors and Your Affiliates to use the Elastic + software as set forth in (i) above, provided that such use by Contractors must + be solely for Your benefit and/or the benefit of Your Affiliates, and You + shall be responsible for all acts and omissions of such Contractors and + Affiliates in connection with their use of the Elastic software that are + contrary to the terms and conditions of this Agreement. - APPENDIX: How to apply the Apache License to your work. + 6.6 "License Key" means a sequence of bytes, including but not limited to a + JSON blob, that is used to enable certain features and functions of the + Elastic Software. - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. + 6.7 "Marks and Notices" means all Elastic trademarks, trade names, logos and + notices present on the Documentation as originally provided by Elastic. - Copyright [yyyy] [name of copyright owner] + 6.8 "Non-production Environment" means an environment for development, testing + or quality assurance, where software is not used for production purposes. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at + 6.9 "Object Code" means any form resulting from mechanical transformation or + translation of Source Code form, including but not limited to compiled object + code, generated documentation, and conversions to other media types. - http://www.apache.org/licenses/LICENSE-2.0 + 6.10 "Source Code" means the preferred form of computer software for making + modifications, including but not limited to software source code, + documentation source, and configuration files. - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + 6.11 "Subscription" means the right to receive Support Services and a License + to the Commercial Software. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..9561fb1 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include README.rst diff --git a/README.md b/README.rst similarity index 100% rename from README.md rename to README.rst diff --git a/eland/DataFrame.py b/eland/DataFrame.py index 1f49216..5251c51 100644 --- a/eland/DataFrame.py +++ b/eland/DataFrame.py @@ -12,6 +12,10 @@ class DataFrame(): self.index_pattern = index_pattern self.client.indices().exists(index_pattern) + + @staticmethod + def _flatten_results(prefix, results, result): + # TODO @staticmethod def _es_results_to_pandas(results): diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..515fe57 --- /dev/null +++ b/setup.py @@ -0,0 +1,20 @@ +from setuptools import setup + +def readme(): + with open('README.rst') as f: + return f.read() + +setup(name='eland', + version='0.1', + description='Python elasticsearch client to analyse, explore and manipulate data that resides in elasticsearch', + url='http://github.com/elastic/eland', + author='Stephen Dodson', + author_email='sjd171@gmail.com', + license='ELASTIC LICENSE', + packages=['eland'], + install_requires=[ + 'elasticsearch', + 'elasticsearch_dsl', + 'pandas' + ], + zip_safe=False) diff --git a/test.py b/test.py deleted file mode 100644 index e73163c..0000000 --- a/test.py +++ /dev/null @@ -1,7 +0,0 @@ -import eland as ed - -df = ed.read_es('localhost', 'kibana_sample_data_flights') - -print(df.head()) - -print(df.describe()) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/dataframe/__init__.py b/tests/dataframe/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/dataframe/common.py b/tests/dataframe/common.py new file mode 100644 index 0000000..2258c0b --- /dev/null +++ b/tests/dataframe/common.py @@ -0,0 +1,16 @@ +import eland as ed + +import pandas as pd + +# Create pandas and eland data frames +_pd_df = pd.read_json('flights.json.gz', lines=True) +_ed_df = ed.read_es('localhost', 'flights') + +class TestData: + + def pandas_frame(self): + return _pd_df + + def eland_frame(self): + return _ed_df + diff --git a/tests/dataframe/test_indexing.py b/tests/dataframe/test_indexing.py new file mode 100644 index 0000000..3ad2659 --- /dev/null +++ b/tests/dataframe/test_indexing.py @@ -0,0 +1,19 @@ +import pytest + +import eland as ed + +from eland.tests.dataframe.common import TestData + +from pandas.util.testing import ( + assert_almost_equal, assert_frame_equal, assert_series_equal) + +class TestDataFrameIndexing(TestData): + + def test_head(self): + pd_head = self.pd_df.head() + ed_head = self.ed_df.head() + + assert_frame_equal(pd_head, ed_head) + + + diff --git a/flights.json.gz b/tests/flights.json.gz similarity index 100% rename from flights.json.gz rename to tests/flights.json.gz diff --git a/tests/index_flights.py b/tests/index_flights.py new file mode 100644 index 0000000..a1cb304 --- /dev/null +++ b/tests/index_flights.py @@ -0,0 +1,38 @@ +import pandas as pd +from elasticsearch import Elasticsearch +from elasticsearch import helpers + +if __name__ == '__main__': + + # Read json file and index records into Elasticsearch + json_file_name = 'flights.json.gz' + index_name = 'flights' + + # Create connection to Elasticsearch - use defaults1 + es = Elasticsearch() + + # Delete index + print("Deleting index:", index_name) + es.indices.delete(index=index_name, ignore=[400, 404]) + + df = pd.read_json(json_file_name, lines=True) + + actions = [] + n = 0 + + print("Adding", df.shape[0], "items to index:", index_name) + for index, row in df.iterrows(): + action = {"_index": index_name, '_source': row.to_dict()} + + actions.append(action) + + n = n + 1 + + if n % 10000 == 0: + helpers.bulk(es, actions) + actions = [] + + helpers.bulk(es, actions) + actions = [] + + print("Done", es.cat.indices(index_name)) diff --git a/test.ipynb b/tests/test.ipynb similarity index 100% rename from test.ipynb rename to tests/test.ipynb