mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Adding a new movie dataset to the tests. (#646)
This commit is contained in:
parent
0f91224daf
commit
05c5859b8a
@ -163,6 +163,59 @@ ECOMMERCE_MAPPING = {
|
||||
ECOMMERCE_FILE_NAME = ROOT_DIR + "/ecommerce.json.gz"
|
||||
ECOMMERCE_DF_FILE_NAME = ROOT_DIR + "/ecommerce_df.json.gz"
|
||||
|
||||
MOVIES_INDEX_NAME = "movies"
|
||||
MOVIES_FILE_NAME = ROOT_DIR + "/movies.json.gz"
|
||||
MOVIES_MAPPING = {
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"type": {"type": "keyword"},
|
||||
"title": {"type": "text"},
|
||||
"year": {"type": "integer"},
|
||||
"rated": {"type": "keyword"},
|
||||
"released": {"type": "date"},
|
||||
"plot": {"type": "text"},
|
||||
"awards": {"type": "text"},
|
||||
"poster": {"type": "keyword"},
|
||||
"id": {"type": "keyword"},
|
||||
"metascore": {"type": "float"},
|
||||
"imdbRating": {"type": "float"},
|
||||
"imdbVotes": {"type": "integer"},
|
||||
"language": {"type": "keyword"},
|
||||
"runtime": {"type": "integer"},
|
||||
"genres": {
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"keyword": {"type": "keyword"},
|
||||
},
|
||||
},
|
||||
"directors": {
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"keyword": {"type": "keyword"},
|
||||
},
|
||||
},
|
||||
"writers": {
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"keyword": {"type": "keyword"},
|
||||
},
|
||||
},
|
||||
"actors": {
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"keyword": {"type": "keyword"},
|
||||
},
|
||||
},
|
||||
"country": {
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"keyword": {"type": "keyword"},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_MAPPING1 = {
|
||||
"mappings": {
|
||||
"properties": {
|
||||
|
BIN
tests/movies.json.gz
Normal file
BIN
tests/movies.json.gz
Normal file
Binary file not shown.
@ -30,6 +30,9 @@ from tests import (
|
||||
FLIGHTS_MAPPING,
|
||||
FLIGHTS_SMALL_FILE_NAME,
|
||||
FLIGHTS_SMALL_INDEX_NAME,
|
||||
MOVIES_FILE_NAME,
|
||||
MOVIES_INDEX_NAME,
|
||||
MOVIES_MAPPING,
|
||||
TEST_MAPPING1,
|
||||
TEST_MAPPING1_INDEX_NAME,
|
||||
TEST_NESTED_USER_GROUP_DOCS,
|
||||
@ -41,6 +44,7 @@ DATA_LIST = [
|
||||
(FLIGHTS_FILE_NAME, FLIGHTS_INDEX_NAME, FLIGHTS_MAPPING),
|
||||
(FLIGHTS_SMALL_FILE_NAME, FLIGHTS_SMALL_INDEX_NAME, FLIGHTS_MAPPING),
|
||||
(ECOMMERCE_FILE_NAME, ECOMMERCE_INDEX_NAME, ECOMMERCE_MAPPING),
|
||||
(MOVIES_FILE_NAME, MOVIES_INDEX_NAME, MOVIES_MAPPING),
|
||||
]
|
||||
|
||||
|
||||
@ -58,18 +62,20 @@ def _setup_data(es):
|
||||
es.indices.create(index=index_name, **mapping)
|
||||
|
||||
df = pd.read_json(json_file_name, lines=True)
|
||||
|
||||
actions = []
|
||||
n = 0
|
||||
|
||||
print("Adding", df.shape[0], "items to index:", index_name)
|
||||
for index, row in df.iterrows():
|
||||
values = row.to_dict()
|
||||
values = row.dropna().to_dict()
|
||||
# make timestamp datetime 2018-01-01T12:09:35
|
||||
# values['timestamp'] = datetime.strptime(values['timestamp'], '%Y-%m-%dT%H:%M:%S')
|
||||
|
||||
# Use integer as id field for repeatable results
|
||||
action = {"_index": index_name, "_source": values, "_id": str(n)}
|
||||
# Use id field as document id from the row if the fiel exists.
|
||||
# Else, use integer as id field for repeatable results
|
||||
# document_id = values['id'] if 'id' in values else str(n)
|
||||
document_id = values["id"] if "id" in values else str(n)
|
||||
action = {"_index": index_name, "_source": values, "_id": document_id}
|
||||
|
||||
actions.append(action)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user