mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Remove eland.Client, use Elasticsearch directly
This commit is contained in:
parent
29af76101e
commit
064d43b9ef
@ -25,7 +25,6 @@ from eland._version import (
|
|||||||
__maintainer_email__,
|
__maintainer_email__,
|
||||||
)
|
)
|
||||||
from eland.common import *
|
from eland.common import *
|
||||||
from eland.client import *
|
|
||||||
from eland.filter import *
|
from eland.filter import *
|
||||||
from eland.index import *
|
from eland.index import *
|
||||||
from eland.field_mappings import *
|
from eland.field_mappings import *
|
||||||
|
@ -1,61 +0,0 @@
|
|||||||
# Copyright 2019 Elasticsearch BV
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
from elasticsearch import Elasticsearch
|
|
||||||
from elasticsearch import helpers
|
|
||||||
|
|
||||||
|
|
||||||
class Client:
|
|
||||||
"""
|
|
||||||
eland client - implemented as facade to control access to Elasticsearch methods
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, es=None):
|
|
||||||
if isinstance(es, Elasticsearch):
|
|
||||||
self._es = es
|
|
||||||
elif isinstance(es, Client):
|
|
||||||
self._es = es._es
|
|
||||||
else:
|
|
||||||
self._es = Elasticsearch(es)
|
|
||||||
|
|
||||||
def index_create(self, **kwargs):
|
|
||||||
return self._es.indices.create(**kwargs)
|
|
||||||
|
|
||||||
def index_delete(self, **kwargs):
|
|
||||||
return self._es.indices.delete(**kwargs)
|
|
||||||
|
|
||||||
def index_exists(self, **kwargs):
|
|
||||||
return self._es.indices.exists(**kwargs)
|
|
||||||
|
|
||||||
def get_mapping(self, **kwargs):
|
|
||||||
return self._es.indices.get_mapping(**kwargs)
|
|
||||||
|
|
||||||
def bulk(self, actions, refresh=False):
|
|
||||||
return helpers.bulk(self._es, actions, refresh=refresh)
|
|
||||||
|
|
||||||
def scan(self, **kwargs):
|
|
||||||
return helpers.scan(self._es, **kwargs)
|
|
||||||
|
|
||||||
def search(self, **kwargs):
|
|
||||||
return self._es.search(**kwargs)
|
|
||||||
|
|
||||||
def field_caps(self, **kwargs):
|
|
||||||
return self._es.field_caps(**kwargs)
|
|
||||||
|
|
||||||
def count(self, **kwargs):
|
|
||||||
count_json = self._es.count(**kwargs)
|
|
||||||
return count_json["count"]
|
|
||||||
|
|
||||||
def perform_request(self, method, url, headers=None, params=None, body=None):
|
|
||||||
return self._es.transport.perform_request(method, url, headers, params, body)
|
|
@ -15,9 +15,10 @@
|
|||||||
# Default number of rows displayed (different to pandas where ALL could be displayed)
|
# Default number of rows displayed (different to pandas where ALL could be displayed)
|
||||||
import warnings
|
import warnings
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Union
|
from typing import Union, List, Tuple
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
from elasticsearch import Elasticsearch
|
||||||
|
|
||||||
DEFAULT_NUM_ROWS_DISPLAYED = 60
|
DEFAULT_NUM_ROWS_DISPLAYED = 60
|
||||||
|
|
||||||
@ -259,3 +260,11 @@ def elasticsearch_date_to_pandas_date(
|
|||||||
)
|
)
|
||||||
# TODO investigate how we could generate this just once for a bulk read.
|
# TODO investigate how we could generate this just once for a bulk read.
|
||||||
return pd.to_datetime(value)
|
return pd.to_datetime(value)
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_es_client(
|
||||||
|
es_client: Union[str, List[str], Tuple[str, ...], Elasticsearch]
|
||||||
|
) -> Elasticsearch:
|
||||||
|
if not isinstance(es_client, Elasticsearch):
|
||||||
|
return Elasticsearch(es_client)
|
||||||
|
return es_client
|
||||||
|
@ -46,8 +46,7 @@ class DataFrame(NDFrame):
|
|||||||
----------
|
----------
|
||||||
client: Elasticsearch client argument(s) (e.g. 'localhost:9200')
|
client: Elasticsearch client argument(s) (e.g. 'localhost:9200')
|
||||||
- elasticsearch-py parameters or
|
- elasticsearch-py parameters or
|
||||||
- elasticsearch-py instance or
|
- elasticsearch-py instance
|
||||||
- eland.Client instance
|
|
||||||
index_pattern: str
|
index_pattern: str
|
||||||
Elasticsearch index pattern. This can contain wildcards. (e.g. 'flights')
|
Elasticsearch index pattern. This can contain wildcards. (e.g. 'flights')
|
||||||
columns: list of str, optional
|
columns: list of str, optional
|
||||||
|
@ -67,7 +67,7 @@ class FieldMappings:
|
|||||||
"""
|
"""
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
client: eland.Client
|
client: elasticsearch.Elasticsearch
|
||||||
Elasticsearch client
|
Elasticsearch client
|
||||||
|
|
||||||
index_pattern: str
|
index_pattern: str
|
||||||
@ -82,7 +82,7 @@ class FieldMappings:
|
|||||||
f"or index_pattern {client} {index_pattern}",
|
f"or index_pattern {client} {index_pattern}",
|
||||||
)
|
)
|
||||||
|
|
||||||
get_mapping = client.get_mapping(index=index_pattern)
|
get_mapping = client.indices.get_mapping(index=index_pattern)
|
||||||
|
|
||||||
# Get all fields (including all nested) and then all field_caps
|
# Get all fields (including all nested) and then all field_caps
|
||||||
all_fields = FieldMappings._extract_fields_from_mapping(get_mapping)
|
all_fields = FieldMappings._extract_fields_from_mapping(get_mapping)
|
||||||
|
@ -42,8 +42,7 @@ class ImportedMLModel(MLModel):
|
|||||||
----------
|
----------
|
||||||
es_client: Elasticsearch client argument(s)
|
es_client: Elasticsearch client argument(s)
|
||||||
- elasticsearch-py parameters or
|
- elasticsearch-py parameters or
|
||||||
- elasticsearch-py instance or
|
- elasticsearch-py instance
|
||||||
- eland.Client instance
|
|
||||||
|
|
||||||
model_id: str
|
model_id: str
|
||||||
The unique identifier of the trained inference model in Elasticsearch.
|
The unique identifier of the trained inference model in Elasticsearch.
|
||||||
@ -160,9 +159,8 @@ class ImportedMLModel(MLModel):
|
|||||||
serialized_model = str(serializer.serialize_and_compress_model())[
|
serialized_model = str(serializer.serialize_and_compress_model())[
|
||||||
2:-1
|
2:-1
|
||||||
] # remove `b` and str quotes
|
] # remove `b` and str quotes
|
||||||
self._client.perform_request(
|
self._client.ml.put_trained_model(
|
||||||
"PUT",
|
model_id=self._model_id,
|
||||||
"/_ml/inference/" + self._model_id,
|
|
||||||
body={
|
body={
|
||||||
"input": {"field_names": feature_names},
|
"input": {"field_names": feature_names},
|
||||||
"compressed_definition": serialized_model,
|
"compressed_definition": serialized_model,
|
||||||
@ -229,9 +227,7 @@ class ImportedMLModel(MLModel):
|
|||||||
else:
|
else:
|
||||||
raise NotImplementedError(f"Prediction for type {type(X)}, not supported")
|
raise NotImplementedError(f"Prediction for type {type(X)}, not supported")
|
||||||
|
|
||||||
results = self._client.perform_request(
|
results = self._client.ingest.simulate(
|
||||||
"POST",
|
|
||||||
"/_ingest/pipeline/_simulate",
|
|
||||||
body={
|
body={
|
||||||
"pipeline": {
|
"pipeline": {
|
||||||
"processors": [
|
"processors": [
|
||||||
@ -245,7 +241,7 @@ class ImportedMLModel(MLModel):
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"docs": docs,
|
"docs": docs,
|
||||||
},
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
y = [
|
y = [
|
||||||
|
@ -13,8 +13,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import elasticsearch
|
import elasticsearch
|
||||||
|
from eland.common import ensure_es_client
|
||||||
from eland import Client
|
|
||||||
|
|
||||||
|
|
||||||
class MLModel:
|
class MLModel:
|
||||||
@ -37,13 +36,12 @@ class MLModel:
|
|||||||
----------
|
----------
|
||||||
es_client: Elasticsearch client argument(s)
|
es_client: Elasticsearch client argument(s)
|
||||||
- elasticsearch-py parameters or
|
- elasticsearch-py parameters or
|
||||||
- elasticsearch-py instance or
|
- elasticsearch-py instance
|
||||||
- eland.Client instance
|
|
||||||
|
|
||||||
model_id: str
|
model_id: str
|
||||||
The unique identifier of the trained inference model in Elasticsearch.
|
The unique identifier of the trained inference model in Elasticsearch.
|
||||||
"""
|
"""
|
||||||
self._client = Client(es_client)
|
self._client = ensure_es_client(es_client)
|
||||||
self._model_id = model_id
|
self._model_id = model_id
|
||||||
|
|
||||||
def delete_model(self):
|
def delete_model(self):
|
||||||
@ -53,6 +51,6 @@ class MLModel:
|
|||||||
If model doesn't exist, ignore failure.
|
If model doesn't exist, ignore failure.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
self._client.perform_request("DELETE", "/_ml/inference/" + self._model_id)
|
self._client.ml.delete_trained_model(model_id=self._model_id)
|
||||||
except elasticsearch.exceptions.NotFoundError:
|
except elasticsearch.NotFoundError:
|
||||||
pass
|
pass
|
||||||
|
@ -57,7 +57,7 @@ class NDFrame(ABC):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
client : eland.Client
|
client : elasticsearch.Elasticsearch
|
||||||
A reference to a Elasticsearch python client
|
A reference to a Elasticsearch python client
|
||||||
"""
|
"""
|
||||||
if query_compiler is None:
|
if query_compiler is None:
|
||||||
|
@ -16,6 +16,7 @@ import warnings
|
|||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pandas.core.dtypes.common import is_datetime_or_timedelta_dtype
|
from pandas.core.dtypes.common import is_datetime_or_timedelta_dtype
|
||||||
|
from elasticsearch.helpers import scan
|
||||||
|
|
||||||
from eland import (
|
from eland import (
|
||||||
Index,
|
Index,
|
||||||
@ -114,7 +115,7 @@ class Operations:
|
|||||||
|
|
||||||
field_exists_count = query_compiler._client.count(
|
field_exists_count = query_compiler._client.count(
|
||||||
index=query_compiler._index_pattern, body=body.to_count_body()
|
index=query_compiler._index_pattern, body=body.to_count_body()
|
||||||
)
|
)["count"]
|
||||||
counts[field] = field_exists_count
|
counts[field] = field_exists_count
|
||||||
|
|
||||||
return pd.Series(data=counts, index=fields)
|
return pd.Series(data=counts, index=fields)
|
||||||
@ -702,8 +703,10 @@ class Operations:
|
|||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
is_scan = True
|
is_scan = True
|
||||||
es_results = query_compiler._client.scan(
|
es_results = scan(
|
||||||
index=query_compiler._index_pattern, query=body
|
client=query_compiler._client,
|
||||||
|
index=query_compiler._index_pattern,
|
||||||
|
query=body,
|
||||||
)
|
)
|
||||||
# create post sort
|
# create post sort
|
||||||
if sort_params is not None:
|
if sort_params is not None:
|
||||||
@ -739,7 +742,7 @@ class Operations:
|
|||||||
|
|
||||||
return query_compiler._client.count(
|
return query_compiler._client.count(
|
||||||
index=query_compiler._index_pattern, body=body.to_count_body()
|
index=query_compiler._index_pattern, body=body.to_count_body()
|
||||||
)
|
)["count"]
|
||||||
|
|
||||||
def _validate_index_operation(self, query_compiler, items):
|
def _validate_index_operation(self, query_compiler, items):
|
||||||
if not isinstance(items, list):
|
if not isinstance(items, list):
|
||||||
@ -772,7 +775,7 @@ class Operations:
|
|||||||
|
|
||||||
return query_compiler._client.count(
|
return query_compiler._client.count(
|
||||||
index=query_compiler._index_pattern, body=body.to_count_body()
|
index=query_compiler._index_pattern, body=body.to_count_body()
|
||||||
)
|
)["count"]
|
||||||
|
|
||||||
def drop_index_values(self, query_compiler, field, items):
|
def drop_index_values(self, query_compiler, field, items):
|
||||||
self._validate_index_operation(query_compiler, items)
|
self._validate_index_operation(query_compiler, items)
|
||||||
|
@ -18,7 +18,6 @@ import numpy as np
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from eland import (
|
from eland import (
|
||||||
Client,
|
|
||||||
DEFAULT_PROGRESS_REPORTING_NUM_ROWS,
|
DEFAULT_PROGRESS_REPORTING_NUM_ROWS,
|
||||||
elasticsearch_date_to_pandas_date,
|
elasticsearch_date_to_pandas_date,
|
||||||
)
|
)
|
||||||
@ -26,6 +25,7 @@ from eland import FieldMappings
|
|||||||
from eland import Index
|
from eland import Index
|
||||||
from eland import Operations
|
from eland import Operations
|
||||||
from eland.filter import QueryFilter
|
from eland.filter import QueryFilter
|
||||||
|
from eland.common import ensure_es_client
|
||||||
|
|
||||||
|
|
||||||
class QueryCompiler:
|
class QueryCompiler:
|
||||||
@ -67,13 +67,13 @@ class QueryCompiler:
|
|||||||
):
|
):
|
||||||
# Implement copy as we don't deep copy the client
|
# Implement copy as we don't deep copy the client
|
||||||
if to_copy is not None:
|
if to_copy is not None:
|
||||||
self._client = Client(to_copy._client)
|
self._client = to_copy._client
|
||||||
self._index_pattern = to_copy._index_pattern
|
self._index_pattern = to_copy._index_pattern
|
||||||
self._index = Index(self, to_copy._index.index_field)
|
self._index = Index(self, to_copy._index.index_field)
|
||||||
self._operations = copy.deepcopy(to_copy._operations)
|
self._operations = copy.deepcopy(to_copy._operations)
|
||||||
self._mappings = copy.deepcopy(to_copy._mappings)
|
self._mappings = copy.deepcopy(to_copy._mappings)
|
||||||
else:
|
else:
|
||||||
self._client = Client(client)
|
self._client = ensure_es_client(client)
|
||||||
self._index_pattern = index_pattern
|
self._index_pattern = index_pattern
|
||||||
# Get and persist mappings, this allows us to correctly
|
# Get and persist mappings, this allows us to correctly
|
||||||
# map returned types from Elasticsearch to pandas datatypes
|
# map returned types from Elasticsearch to pandas datatypes
|
||||||
@ -519,10 +519,10 @@ class QueryCompiler:
|
|||||||
if not isinstance(right, QueryCompiler):
|
if not isinstance(right, QueryCompiler):
|
||||||
raise TypeError(f"Incompatible types {type(self)} != {type(right)}")
|
raise TypeError(f"Incompatible types {type(self)} != {type(right)}")
|
||||||
|
|
||||||
if self._client._es != right._client._es:
|
if self._client != right._client:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Can not perform arithmetic operations across different clients"
|
f"Can not perform arithmetic operations across different clients"
|
||||||
f"{self._client._es} != {right._client._es}"
|
f"{self._client} != {right._client}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if self._index.index_field != right._index.index_field:
|
if self._index.index_field != right._index.index_field:
|
||||||
|
@ -63,7 +63,7 @@ class Series(NDFrame):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
client : eland.Client
|
client : elasticsearch.Elasticsearch
|
||||||
A reference to a Elasticsearch python client
|
A reference to a Elasticsearch python client
|
||||||
|
|
||||||
index_pattern : str
|
index_pattern : str
|
||||||
@ -1046,7 +1046,7 @@ class Series(NDFrame):
|
|||||||
return a op b
|
return a op b
|
||||||
|
|
||||||
a & b == Series
|
a & b == Series
|
||||||
a & b must share same eland.Client, index_pattern and index_field
|
a & b must share same Elasticsearch client, index_pattern and index_field
|
||||||
a == Series, b == numeric or string
|
a == Series, b == numeric or string
|
||||||
|
|
||||||
Naming of the resulting Series
|
Naming of the resulting Series
|
||||||
|
@ -1,13 +0,0 @@
|
|||||||
# Copyright 2019 Elasticsearch BV
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
@ -1,40 +0,0 @@
|
|||||||
# Copyright 2019 Elasticsearch BV
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
# File called _pytest for PyCharm compatability
|
|
||||||
import elasticsearch
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
import eland as ed
|
|
||||||
from eland.tests import ES_TEST_CLIENT
|
|
||||||
from eland.tests.common import TestData
|
|
||||||
|
|
||||||
|
|
||||||
class TestClientEq(TestData):
|
|
||||||
def test_perform_request(self):
|
|
||||||
client = ed.Client(ES_TEST_CLIENT)
|
|
||||||
|
|
||||||
response = client.perform_request("GET", "/_cat/indices/flights")
|
|
||||||
|
|
||||||
# yellow open flights TNUv0iysQSi7F-N5ykWfWQ 1 1 13059 0 5.7mb 5.7mb
|
|
||||||
tokens = response.split(" ")
|
|
||||||
|
|
||||||
assert tokens[2] == "flights"
|
|
||||||
assert tokens[6] == "13059"
|
|
||||||
|
|
||||||
def test_bad_perform_request(self):
|
|
||||||
client = ed.Client(ES_TEST_CLIENT)
|
|
||||||
|
|
||||||
with pytest.raises(elasticsearch.exceptions.NotFoundError):
|
|
||||||
client.perform_request("GET", "/_cat/indices/non_existant_index")
|
|
@ -24,7 +24,7 @@ class TestAggregatables(TestData):
|
|||||||
@pytest.mark.filterwarnings("ignore:Aggregations not supported")
|
@pytest.mark.filterwarnings("ignore:Aggregations not supported")
|
||||||
def test_ecommerce_all_aggregatables(self):
|
def test_ecommerce_all_aggregatables(self):
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=ECOMMERCE_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=ECOMMERCE_INDEX_NAME
|
||||||
)
|
)
|
||||||
|
|
||||||
aggregatables = ed_field_mappings.aggregatable_field_names()
|
aggregatables = ed_field_mappings.aggregatable_field_names()
|
||||||
@ -89,7 +89,7 @@ class TestAggregatables(TestData):
|
|||||||
}
|
}
|
||||||
|
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT),
|
client=ES_TEST_CLIENT,
|
||||||
index_pattern=ECOMMERCE_INDEX_NAME,
|
index_pattern=ECOMMERCE_INDEX_NAME,
|
||||||
display_names=expected.values(),
|
display_names=expected.values(),
|
||||||
)
|
)
|
||||||
@ -100,14 +100,14 @@ class TestAggregatables(TestData):
|
|||||||
|
|
||||||
def test_ecommerce_single_aggregatable_field(self):
|
def test_ecommerce_single_aggregatable_field(self):
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=ECOMMERCE_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=ECOMMERCE_INDEX_NAME
|
||||||
)
|
)
|
||||||
|
|
||||||
assert "user" == ed_field_mappings.aggregatable_field_name("user")
|
assert "user" == ed_field_mappings.aggregatable_field_name("user")
|
||||||
|
|
||||||
def test_ecommerce_single_keyword_aggregatable_field(self):
|
def test_ecommerce_single_keyword_aggregatable_field(self):
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=ECOMMERCE_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=ECOMMERCE_INDEX_NAME
|
||||||
)
|
)
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
@ -117,7 +117,7 @@ class TestAggregatables(TestData):
|
|||||||
|
|
||||||
def test_ecommerce_single_non_existant_field(self):
|
def test_ecommerce_single_non_existant_field(self):
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=ECOMMERCE_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=ECOMMERCE_INDEX_NAME
|
||||||
)
|
)
|
||||||
|
|
||||||
with pytest.raises(KeyError):
|
with pytest.raises(KeyError):
|
||||||
@ -126,7 +126,7 @@ class TestAggregatables(TestData):
|
|||||||
@pytest.mark.filterwarnings("ignore:Aggregations not supported")
|
@pytest.mark.filterwarnings("ignore:Aggregations not supported")
|
||||||
def test_ecommerce_single_non_aggregatable_field(self):
|
def test_ecommerce_single_non_aggregatable_field(self):
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=ECOMMERCE_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=ECOMMERCE_INDEX_NAME
|
||||||
)
|
)
|
||||||
|
|
||||||
assert None is ed_field_mappings.aggregatable_field_name("customer_gender")
|
assert None is ed_field_mappings.aggregatable_field_name("customer_gender")
|
||||||
|
@ -64,7 +64,7 @@ class TestDateTime(TestData):
|
|||||||
|
|
||||||
def test_all_formats(self):
|
def test_all_formats(self):
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=self.time_index_name
|
client=ES_TEST_CLIENT, index_pattern=self.time_index_name
|
||||||
)
|
)
|
||||||
|
|
||||||
# do a rename so display_name for a field is different to es_field_name
|
# do a rename so display_name for a field is different to es_field_name
|
||||||
|
@ -23,7 +23,7 @@ from eland.tests.common import TestData
|
|||||||
class TestDisplayNames(TestData):
|
class TestDisplayNames(TestData):
|
||||||
def test_init_all_fields(self):
|
def test_init_all_fields(self):
|
||||||
field_mappings = ed.FieldMappings(
|
field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME
|
||||||
)
|
)
|
||||||
|
|
||||||
expected = self.pd_flights().columns.to_list()
|
expected = self.pd_flights().columns.to_list()
|
||||||
@ -34,7 +34,7 @@ class TestDisplayNames(TestData):
|
|||||||
expected = ["timestamp", "DestWeather", "DistanceKilometers", "AvgTicketPrice"]
|
expected = ["timestamp", "DestWeather", "DistanceKilometers", "AvgTicketPrice"]
|
||||||
|
|
||||||
field_mappings = ed.FieldMappings(
|
field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT),
|
client=ES_TEST_CLIENT,
|
||||||
index_pattern=FLIGHTS_INDEX_NAME,
|
index_pattern=FLIGHTS_INDEX_NAME,
|
||||||
display_names=expected,
|
display_names=expected,
|
||||||
)
|
)
|
||||||
@ -51,7 +51,7 @@ class TestDisplayNames(TestData):
|
|||||||
]
|
]
|
||||||
|
|
||||||
field_mappings = ed.FieldMappings(
|
field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME
|
||||||
)
|
)
|
||||||
|
|
||||||
field_mappings.display_names = expected
|
field_mappings.display_names = expected
|
||||||
@ -75,7 +75,7 @@ class TestDisplayNames(TestData):
|
|||||||
]
|
]
|
||||||
|
|
||||||
field_mappings = ed.FieldMappings(
|
field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME
|
||||||
)
|
)
|
||||||
|
|
||||||
with pytest.raises(KeyError):
|
with pytest.raises(KeyError):
|
||||||
@ -87,7 +87,7 @@ class TestDisplayNames(TestData):
|
|||||||
|
|
||||||
def test_invalid_list_type_display_names(self):
|
def test_invalid_list_type_display_names(self):
|
||||||
field_mappings = ed.FieldMappings(
|
field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME
|
||||||
)
|
)
|
||||||
|
|
||||||
# not a list like object
|
# not a list like object
|
||||||
|
@ -23,7 +23,7 @@ from eland.tests.common import TestData
|
|||||||
class TestDTypes(TestData):
|
class TestDTypes(TestData):
|
||||||
def test_all_fields(self):
|
def test_all_fields(self):
|
||||||
field_mappings = ed.FieldMappings(
|
field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME
|
||||||
)
|
)
|
||||||
|
|
||||||
pd_flights = self.pd_flights()
|
pd_flights = self.pd_flights()
|
||||||
@ -34,7 +34,7 @@ class TestDTypes(TestData):
|
|||||||
expected = ["timestamp", "DestWeather", "DistanceKilometers", "AvgTicketPrice"]
|
expected = ["timestamp", "DestWeather", "DistanceKilometers", "AvgTicketPrice"]
|
||||||
|
|
||||||
field_mappings = ed.FieldMappings(
|
field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT),
|
client=ES_TEST_CLIENT,
|
||||||
index_pattern=FLIGHTS_INDEX_NAME,
|
index_pattern=FLIGHTS_INDEX_NAME,
|
||||||
display_names=expected,
|
display_names=expected,
|
||||||
)
|
)
|
||||||
|
@ -25,7 +25,7 @@ from eland.tests.common import TestData
|
|||||||
class TestFieldNamePDDType(TestData):
|
class TestFieldNamePDDType(TestData):
|
||||||
def test_all_formats(self):
|
def test_all_formats(self):
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME
|
||||||
)
|
)
|
||||||
|
|
||||||
pd_flights = self.pd_flights()
|
pd_flights = self.pd_flights()
|
||||||
@ -39,7 +39,7 @@ class TestFieldNamePDDType(TestData):
|
|||||||
|
|
||||||
def test_non_existant(self):
|
def test_non_existant(self):
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME
|
||||||
)
|
)
|
||||||
|
|
||||||
with pytest.raises(KeyError):
|
with pytest.raises(KeyError):
|
||||||
|
@ -25,7 +25,7 @@ from eland.tests.common import TestData
|
|||||||
class TestGetFieldNames(TestData):
|
class TestGetFieldNames(TestData):
|
||||||
def test_get_field_names_all(self):
|
def test_get_field_names_all(self):
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME
|
||||||
)
|
)
|
||||||
pd_flights = self.pd_flights()
|
pd_flights = self.pd_flights()
|
||||||
|
|
||||||
@ -38,7 +38,7 @@ class TestGetFieldNames(TestData):
|
|||||||
def test_get_field_names_selected(self):
|
def test_get_field_names_selected(self):
|
||||||
expected = ["Carrier", "AvgTicketPrice"]
|
expected = ["Carrier", "AvgTicketPrice"]
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT),
|
client=ES_TEST_CLIENT,
|
||||||
index_pattern=FLIGHTS_INDEX_NAME,
|
index_pattern=FLIGHTS_INDEX_NAME,
|
||||||
display_names=expected,
|
display_names=expected,
|
||||||
)
|
)
|
||||||
@ -53,7 +53,7 @@ class TestGetFieldNames(TestData):
|
|||||||
def test_get_field_names_scripted(self):
|
def test_get_field_names_scripted(self):
|
||||||
expected = ["Carrier", "AvgTicketPrice"]
|
expected = ["Carrier", "AvgTicketPrice"]
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT),
|
client=ES_TEST_CLIENT,
|
||||||
index_pattern=FLIGHTS_INDEX_NAME,
|
index_pattern=FLIGHTS_INDEX_NAME,
|
||||||
display_names=expected,
|
display_names=expected,
|
||||||
)
|
)
|
||||||
|
@ -24,7 +24,7 @@ from eland.tests.common import TestData
|
|||||||
class TestMetricSourceFields(TestData):
|
class TestMetricSourceFields(TestData):
|
||||||
def test_flights_all_metric_source_fields(self):
|
def test_flights_all_metric_source_fields(self):
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME
|
||||||
)
|
)
|
||||||
pd_flights = self.pd_flights()
|
pd_flights = self.pd_flights()
|
||||||
|
|
||||||
@ -38,7 +38,7 @@ class TestMetricSourceFields(TestData):
|
|||||||
|
|
||||||
def test_flights_all_metric_source_fields_and_bool(self):
|
def test_flights_all_metric_source_fields_and_bool(self):
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME
|
||||||
)
|
)
|
||||||
pd_flights = self.pd_flights()
|
pd_flights = self.pd_flights()
|
||||||
|
|
||||||
@ -54,7 +54,7 @@ class TestMetricSourceFields(TestData):
|
|||||||
|
|
||||||
def test_flights_all_metric_source_fields_bool_and_timestamp(self):
|
def test_flights_all_metric_source_fields_bool_and_timestamp(self):
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME
|
||||||
)
|
)
|
||||||
pd_flights = self.pd_flights()
|
pd_flights = self.pd_flights()
|
||||||
|
|
||||||
@ -87,7 +87,7 @@ class TestMetricSourceFields(TestData):
|
|||||||
user object
|
user object
|
||||||
"""
|
"""
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT),
|
client=ES_TEST_CLIENT,
|
||||||
index_pattern=ECOMMERCE_INDEX_NAME,
|
index_pattern=ECOMMERCE_INDEX_NAME,
|
||||||
display_names=field_names,
|
display_names=field_names,
|
||||||
)
|
)
|
||||||
@ -120,7 +120,7 @@ class TestMetricSourceFields(TestData):
|
|||||||
user object
|
user object
|
||||||
"""
|
"""
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT),
|
client=ES_TEST_CLIENT,
|
||||||
index_pattern=ECOMMERCE_INDEX_NAME,
|
index_pattern=ECOMMERCE_INDEX_NAME,
|
||||||
display_names=field_names,
|
display_names=field_names,
|
||||||
)
|
)
|
||||||
@ -143,7 +143,7 @@ class TestMetricSourceFields(TestData):
|
|||||||
taxless_total_price float64
|
taxless_total_price float64
|
||||||
"""
|
"""
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT),
|
client=ES_TEST_CLIENT,
|
||||||
index_pattern=ECOMMERCE_INDEX_NAME,
|
index_pattern=ECOMMERCE_INDEX_NAME,
|
||||||
display_names=field_names,
|
display_names=field_names,
|
||||||
)
|
)
|
||||||
|
@ -22,7 +22,7 @@ from eland.tests.common import TestData
|
|||||||
class TestRename(TestData):
|
class TestRename(TestData):
|
||||||
def test_single_rename(self):
|
def test_single_rename(self):
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME
|
||||||
)
|
)
|
||||||
|
|
||||||
pd_flights_column_series = self.pd_flights().columns.to_series()
|
pd_flights_column_series = self.pd_flights().columns.to_series()
|
||||||
@ -47,7 +47,7 @@ class TestRename(TestData):
|
|||||||
|
|
||||||
def test_non_exists_rename(self):
|
def test_non_exists_rename(self):
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME
|
||||||
)
|
)
|
||||||
|
|
||||||
pd_flights_column_series = self.pd_flights().columns.to_series()
|
pd_flights_column_series = self.pd_flights().columns.to_series()
|
||||||
@ -71,7 +71,7 @@ class TestRename(TestData):
|
|||||||
|
|
||||||
def test_exists_and_non_exists_rename(self):
|
def test_exists_and_non_exists_rename(self):
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME
|
||||||
)
|
)
|
||||||
|
|
||||||
pd_flights_column_series = self.pd_flights().columns.to_series()
|
pd_flights_column_series = self.pd_flights().columns.to_series()
|
||||||
@ -104,7 +104,7 @@ class TestRename(TestData):
|
|||||||
|
|
||||||
def test_multi_rename(self):
|
def test_multi_rename(self):
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME
|
||||||
)
|
)
|
||||||
|
|
||||||
pd_flights_column_series = self.pd_flights().columns.to_series()
|
pd_flights_column_series = self.pd_flights().columns.to_series()
|
||||||
|
@ -25,7 +25,7 @@ from eland.tests.common import TestData
|
|||||||
class TestScriptedFields(TestData):
|
class TestScriptedFields(TestData):
|
||||||
def test_add_new_scripted_field(self):
|
def test_add_new_scripted_field(self):
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME
|
||||||
)
|
)
|
||||||
|
|
||||||
ed_field_mappings.add_scripted_field(
|
ed_field_mappings.add_scripted_field(
|
||||||
@ -44,7 +44,7 @@ class TestScriptedFields(TestData):
|
|||||||
|
|
||||||
def test_add_duplicate_scripted_field(self):
|
def test_add_duplicate_scripted_field(self):
|
||||||
ed_field_mappings = ed.FieldMappings(
|
ed_field_mappings = ed.FieldMappings(
|
||||||
client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
|
client=ES_TEST_CLIENT, index_pattern=FLIGHTS_INDEX_NAME
|
||||||
)
|
)
|
||||||
|
|
||||||
ed_field_mappings.add_scripted_field(
|
ed_field_mappings.add_scripted_field(
|
||||||
|
@ -14,7 +14,6 @@
|
|||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from elasticsearch import helpers
|
from elasticsearch import helpers
|
||||||
from elasticsearch.client import ClusterClient
|
|
||||||
|
|
||||||
from eland.tests import (
|
from eland.tests import (
|
||||||
FLIGHTS_FILE_NAME,
|
FLIGHTS_FILE_NAME,
|
||||||
@ -85,9 +84,8 @@ def _setup_data(es):
|
|||||||
|
|
||||||
def _update_max_compilations_limit(es, limit="10000/1m"):
|
def _update_max_compilations_limit(es, limit="10000/1m"):
|
||||||
print("Updating script.max_compilations_rate to ", limit)
|
print("Updating script.max_compilations_rate to ", limit)
|
||||||
cluster_client = ClusterClient(es)
|
|
||||||
body = {"transient": {"script.max_compilations_rate": limit}}
|
body = {"transient": {"script.max_compilations_rate": limit}}
|
||||||
cluster_client.put_settings(body=body)
|
es.cluster.put_settings(body=body)
|
||||||
|
|
||||||
|
|
||||||
def _setup_test_mappings(es):
|
def _setup_test_mappings(es):
|
||||||
|
@ -17,9 +17,11 @@ import csv
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pandas.io.parsers import _c_parser_defaults
|
from pandas.io.parsers import _c_parser_defaults
|
||||||
|
|
||||||
from eland import Client, DEFAULT_CHUNK_SIZE
|
from eland import DEFAULT_CHUNK_SIZE
|
||||||
from eland import DataFrame
|
from eland import DataFrame
|
||||||
from eland import FieldMappings
|
from eland import FieldMappings
|
||||||
|
from eland.common import ensure_es_client
|
||||||
|
from elasticsearch.helpers import bulk
|
||||||
|
|
||||||
|
|
||||||
def read_es(es_client, es_index_pattern):
|
def read_es(es_client, es_index_pattern):
|
||||||
@ -32,8 +34,7 @@ def read_es(es_client, es_index_pattern):
|
|||||||
----------
|
----------
|
||||||
es_client: Elasticsearch client argument(s)
|
es_client: Elasticsearch client argument(s)
|
||||||
- elasticsearch-py parameters or
|
- elasticsearch-py parameters or
|
||||||
- elasticsearch-py instance or
|
- elasticsearch-py instance
|
||||||
- eland.Client instance
|
|
||||||
es_index_pattern: str
|
es_index_pattern: str
|
||||||
Elasticsearch index pattern
|
Elasticsearch index pattern
|
||||||
|
|
||||||
@ -69,8 +70,7 @@ def pandas_to_eland(
|
|||||||
----------
|
----------
|
||||||
es_client: Elasticsearch client argument(s)
|
es_client: Elasticsearch client argument(s)
|
||||||
- elasticsearch-py parameters or
|
- elasticsearch-py parameters or
|
||||||
- elasticsearch-py instance or
|
- elasticsearch-py instance
|
||||||
- eland.Client instance
|
|
||||||
es_dest_index: str
|
es_dest_index: str
|
||||||
Name of Elasticsearch index to be appended to
|
Name of Elasticsearch index to be appended to
|
||||||
es_if_exists : {'fail', 'replace', 'append'}, default 'fail'
|
es_if_exists : {'fail', 'replace', 'append'}, default 'fail'
|
||||||
@ -164,12 +164,11 @@ def pandas_to_eland(
|
|||||||
if chunksize is None:
|
if chunksize is None:
|
||||||
chunksize = DEFAULT_CHUNK_SIZE
|
chunksize = DEFAULT_CHUNK_SIZE
|
||||||
|
|
||||||
client = Client(es_client)
|
|
||||||
|
|
||||||
mapping = FieldMappings._generate_es_mappings(pd_df, es_geo_points)
|
mapping = FieldMappings._generate_es_mappings(pd_df, es_geo_points)
|
||||||
|
es_client = ensure_es_client(es_client)
|
||||||
|
|
||||||
# If table exists, check if_exists parameter
|
# If table exists, check if_exists parameter
|
||||||
if client.index_exists(index=es_dest_index):
|
if es_client.indices.exists(index=es_dest_index):
|
||||||
if es_if_exists == "fail":
|
if es_if_exists == "fail":
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Could not create the index [{es_dest_index}] because it "
|
f"Could not create the index [{es_dest_index}] because it "
|
||||||
@ -178,12 +177,12 @@ def pandas_to_eland(
|
|||||||
f"'append' or 'replace' data."
|
f"'append' or 'replace' data."
|
||||||
)
|
)
|
||||||
elif es_if_exists == "replace":
|
elif es_if_exists == "replace":
|
||||||
client.index_delete(index=es_dest_index)
|
es_client.indices.delete(index=es_dest_index)
|
||||||
client.index_create(index=es_dest_index, body=mapping)
|
es_client.indices.create(index=es_dest_index, body=mapping)
|
||||||
# elif if_exists == "append":
|
# elif if_exists == "append":
|
||||||
# TODO validate mapping are compatible
|
# TODO validate mapping are compatible
|
||||||
else:
|
else:
|
||||||
client.index_create(index=es_dest_index, body=mapping)
|
es_client.indices.create(index=es_dest_index, body=mapping)
|
||||||
|
|
||||||
# Now add data
|
# Now add data
|
||||||
actions = []
|
actions = []
|
||||||
@ -208,14 +207,11 @@ def pandas_to_eland(
|
|||||||
n = n + 1
|
n = n + 1
|
||||||
|
|
||||||
if n % chunksize == 0:
|
if n % chunksize == 0:
|
||||||
client.bulk(actions, refresh=es_refresh)
|
bulk(client=es_client, actions=actions, refresh=es_refresh)
|
||||||
actions = []
|
actions = []
|
||||||
|
|
||||||
client.bulk(actions, refresh=es_refresh)
|
bulk(client=es_client, actions=actions, refresh=es_refresh)
|
||||||
|
return DataFrame(es_client, es_dest_index)
|
||||||
ed_df = DataFrame(client, es_dest_index)
|
|
||||||
|
|
||||||
return ed_df
|
|
||||||
|
|
||||||
|
|
||||||
def eland_to_pandas(ed_df, show_progress=False):
|
def eland_to_pandas(ed_df, show_progress=False):
|
||||||
@ -357,8 +353,7 @@ def read_csv(
|
|||||||
----------
|
----------
|
||||||
es_client: Elasticsearch client argument(s)
|
es_client: Elasticsearch client argument(s)
|
||||||
- elasticsearch-py parameters or
|
- elasticsearch-py parameters or
|
||||||
- elasticsearch-py instance or
|
- elasticsearch-py instance
|
||||||
- eland.Client instance
|
|
||||||
es_dest_index: str
|
es_dest_index: str
|
||||||
Name of Elasticsearch index to be appended to
|
Name of Elasticsearch index to be appended to
|
||||||
es_if_exists : {'fail', 'replace', 'append'}, default 'fail'
|
es_if_exists : {'fail', 'replace', 'append'}, default 'fail'
|
||||||
@ -490,8 +485,6 @@ def read_csv(
|
|||||||
if chunksize is None:
|
if chunksize is None:
|
||||||
kwds.update(chunksize=DEFAULT_CHUNK_SIZE)
|
kwds.update(chunksize=DEFAULT_CHUNK_SIZE)
|
||||||
|
|
||||||
client = Client(es_client)
|
|
||||||
|
|
||||||
# read csv in chunks to pandas DataFrame and dump to eland DataFrame (and Elasticsearch)
|
# read csv in chunks to pandas DataFrame and dump to eland DataFrame (and Elasticsearch)
|
||||||
reader = pd.read_csv(filepath_or_buffer, **kwds)
|
reader = pd.read_csv(filepath_or_buffer, **kwds)
|
||||||
|
|
||||||
@ -500,7 +493,7 @@ def read_csv(
|
|||||||
if first_write:
|
if first_write:
|
||||||
pandas_to_eland(
|
pandas_to_eland(
|
||||||
chunk,
|
chunk,
|
||||||
client,
|
es_client,
|
||||||
es_dest_index,
|
es_dest_index,
|
||||||
es_if_exists=es_if_exists,
|
es_if_exists=es_if_exists,
|
||||||
chunksize=chunksize,
|
chunksize=chunksize,
|
||||||
@ -512,7 +505,7 @@ def read_csv(
|
|||||||
else:
|
else:
|
||||||
pandas_to_eland(
|
pandas_to_eland(
|
||||||
chunk,
|
chunk,
|
||||||
client,
|
es_client,
|
||||||
es_dest_index,
|
es_dest_index,
|
||||||
es_if_exists="append",
|
es_if_exists="append",
|
||||||
chunksize=chunksize,
|
chunksize=chunksize,
|
||||||
@ -522,6 +515,4 @@ def read_csv(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Now create an eland.DataFrame that references the new index
|
# Now create an eland.DataFrame that references the new index
|
||||||
ed_df = DataFrame(client, es_dest_index)
|
return DataFrame(es_client, es_dest_index)
|
||||||
|
|
||||||
return ed_df
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user