mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Adding a new movie dataset to the tests. (#646)
This commit is contained in:
parent
0f91224daf
commit
05c5859b8a
@ -163,6 +163,59 @@ ECOMMERCE_MAPPING = {
|
|||||||
ECOMMERCE_FILE_NAME = ROOT_DIR + "/ecommerce.json.gz"
|
ECOMMERCE_FILE_NAME = ROOT_DIR + "/ecommerce.json.gz"
|
||||||
ECOMMERCE_DF_FILE_NAME = ROOT_DIR + "/ecommerce_df.json.gz"
|
ECOMMERCE_DF_FILE_NAME = ROOT_DIR + "/ecommerce_df.json.gz"
|
||||||
|
|
||||||
|
MOVIES_INDEX_NAME = "movies"
|
||||||
|
MOVIES_FILE_NAME = ROOT_DIR + "/movies.json.gz"
|
||||||
|
MOVIES_MAPPING = {
|
||||||
|
"mappings": {
|
||||||
|
"properties": {
|
||||||
|
"type": {"type": "keyword"},
|
||||||
|
"title": {"type": "text"},
|
||||||
|
"year": {"type": "integer"},
|
||||||
|
"rated": {"type": "keyword"},
|
||||||
|
"released": {"type": "date"},
|
||||||
|
"plot": {"type": "text"},
|
||||||
|
"awards": {"type": "text"},
|
||||||
|
"poster": {"type": "keyword"},
|
||||||
|
"id": {"type": "keyword"},
|
||||||
|
"metascore": {"type": "float"},
|
||||||
|
"imdbRating": {"type": "float"},
|
||||||
|
"imdbVotes": {"type": "integer"},
|
||||||
|
"language": {"type": "keyword"},
|
||||||
|
"runtime": {"type": "integer"},
|
||||||
|
"genres": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {"type": "keyword"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"directors": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {"type": "keyword"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"writers": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {"type": "keyword"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"actors": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {"type": "keyword"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"country": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {"type": "keyword"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_MAPPING1 = {
|
TEST_MAPPING1 = {
|
||||||
"mappings": {
|
"mappings": {
|
||||||
"properties": {
|
"properties": {
|
||||||
|
BIN
tests/movies.json.gz
Normal file
BIN
tests/movies.json.gz
Normal file
Binary file not shown.
@ -30,6 +30,9 @@ from tests import (
|
|||||||
FLIGHTS_MAPPING,
|
FLIGHTS_MAPPING,
|
||||||
FLIGHTS_SMALL_FILE_NAME,
|
FLIGHTS_SMALL_FILE_NAME,
|
||||||
FLIGHTS_SMALL_INDEX_NAME,
|
FLIGHTS_SMALL_INDEX_NAME,
|
||||||
|
MOVIES_FILE_NAME,
|
||||||
|
MOVIES_INDEX_NAME,
|
||||||
|
MOVIES_MAPPING,
|
||||||
TEST_MAPPING1,
|
TEST_MAPPING1,
|
||||||
TEST_MAPPING1_INDEX_NAME,
|
TEST_MAPPING1_INDEX_NAME,
|
||||||
TEST_NESTED_USER_GROUP_DOCS,
|
TEST_NESTED_USER_GROUP_DOCS,
|
||||||
@ -41,6 +44,7 @@ DATA_LIST = [
|
|||||||
(FLIGHTS_FILE_NAME, FLIGHTS_INDEX_NAME, FLIGHTS_MAPPING),
|
(FLIGHTS_FILE_NAME, FLIGHTS_INDEX_NAME, FLIGHTS_MAPPING),
|
||||||
(FLIGHTS_SMALL_FILE_NAME, FLIGHTS_SMALL_INDEX_NAME, FLIGHTS_MAPPING),
|
(FLIGHTS_SMALL_FILE_NAME, FLIGHTS_SMALL_INDEX_NAME, FLIGHTS_MAPPING),
|
||||||
(ECOMMERCE_FILE_NAME, ECOMMERCE_INDEX_NAME, ECOMMERCE_MAPPING),
|
(ECOMMERCE_FILE_NAME, ECOMMERCE_INDEX_NAME, ECOMMERCE_MAPPING),
|
||||||
|
(MOVIES_FILE_NAME, MOVIES_INDEX_NAME, MOVIES_MAPPING),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -58,18 +62,20 @@ def _setup_data(es):
|
|||||||
es.indices.create(index=index_name, **mapping)
|
es.indices.create(index=index_name, **mapping)
|
||||||
|
|
||||||
df = pd.read_json(json_file_name, lines=True)
|
df = pd.read_json(json_file_name, lines=True)
|
||||||
|
|
||||||
actions = []
|
actions = []
|
||||||
n = 0
|
n = 0
|
||||||
|
|
||||||
print("Adding", df.shape[0], "items to index:", index_name)
|
print("Adding", df.shape[0], "items to index:", index_name)
|
||||||
for index, row in df.iterrows():
|
for index, row in df.iterrows():
|
||||||
values = row.to_dict()
|
values = row.dropna().to_dict()
|
||||||
# make timestamp datetime 2018-01-01T12:09:35
|
# make timestamp datetime 2018-01-01T12:09:35
|
||||||
# values['timestamp'] = datetime.strptime(values['timestamp'], '%Y-%m-%dT%H:%M:%S')
|
# values['timestamp'] = datetime.strptime(values['timestamp'], '%Y-%m-%dT%H:%M:%S')
|
||||||
|
|
||||||
# Use integer as id field for repeatable results
|
# Use id field as document id from the row if the fiel exists.
|
||||||
action = {"_index": index_name, "_source": values, "_id": str(n)}
|
# Else, use integer as id field for repeatable results
|
||||||
|
# document_id = values['id'] if 'id' in values else str(n)
|
||||||
|
document_id = values["id"] if "id" in values else str(n)
|
||||||
|
action = {"_index": index_name, "_source": values, "_id": document_id}
|
||||||
|
|
||||||
actions.append(action)
|
actions.append(action)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user