mirror of
https://github.com/elastic/eland.git
synced 2025-07-24 00:00:39 +08:00
164 lines
4.8 KiB
Python
164 lines
4.8 KiB
Python
# Licensed to Elasticsearch B.V under one or more agreements.
|
|
# Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
|
|
# See the LICENSE file in the project root for more information
|
|
|
|
import warnings
|
|
from copy import deepcopy
|
|
from typing import Optional, Dict, List, Any
|
|
|
|
from eland.filter import BooleanFilter, NotNull, IsNull, IsIn
|
|
|
|
|
|
class Query:
|
|
"""
|
|
Simple class to manage building Elasticsearch queries.
|
|
"""
|
|
|
|
def __init__(self, query: Optional["Query"] = None):
|
|
# type defs
|
|
self._query: BooleanFilter
|
|
self._aggs: Dict[str, Any]
|
|
|
|
if query is None:
|
|
self._query = BooleanFilter()
|
|
self._aggs = {}
|
|
else:
|
|
# Deep copy the incoming query so we can change it
|
|
self._query = deepcopy(query._query)
|
|
self._aggs = deepcopy(query._aggs)
|
|
|
|
def exists(self, field: str, must: bool = True) -> None:
|
|
"""
|
|
Add exists query
|
|
https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-exists-query.html
|
|
"""
|
|
if must:
|
|
if self._query.empty():
|
|
self._query = NotNull(field)
|
|
else:
|
|
self._query = self._query & NotNull(field)
|
|
else:
|
|
if self._query.empty():
|
|
self._query = IsNull(field)
|
|
else:
|
|
self._query = self._query & IsNull(field)
|
|
|
|
def ids(self, items: List[Any], must: bool = True) -> None:
|
|
"""
|
|
Add ids query
|
|
https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-ids-query.html
|
|
"""
|
|
if must:
|
|
if self._query.empty():
|
|
self._query = IsIn("ids", items)
|
|
else:
|
|
self._query = self._query & IsIn("ids", items)
|
|
else:
|
|
if self._query.empty():
|
|
self._query = ~(IsIn("ids", items))
|
|
else:
|
|
self._query = self._query & ~(IsIn("ids", items))
|
|
|
|
def terms(self, field: str, items: List[str], must: bool = True) -> None:
|
|
"""
|
|
Add ids query
|
|
https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html
|
|
"""
|
|
if must:
|
|
if self._query.empty():
|
|
self._query = IsIn(field, items)
|
|
else:
|
|
self._query = self._query & IsIn(field, items)
|
|
else:
|
|
if self._query.empty():
|
|
self._query = ~(IsIn(field, items))
|
|
else:
|
|
self._query = self._query & ~(IsIn(field, items))
|
|
|
|
def terms_aggs(self, name: str, func: str, field: str, es_size: int) -> None:
|
|
"""
|
|
Add terms agg e.g
|
|
|
|
"aggs": {
|
|
"name": {
|
|
"terms": {
|
|
"field": "Airline",
|
|
"size": 10
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
agg = {func: {"field": field, "size": es_size}}
|
|
self._aggs[name] = agg
|
|
|
|
def metric_aggs(self, name: str, func: str, field: str) -> None:
|
|
"""
|
|
Add metric agg e.g
|
|
|
|
"aggs": {
|
|
"name": {
|
|
"max": {
|
|
"field": "AvgTicketPrice"
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
agg = {func: {"field": field}}
|
|
self._aggs[name] = agg
|
|
|
|
def hist_aggs(
|
|
self,
|
|
name: str,
|
|
field: str,
|
|
min_aggs: Dict[str, Any],
|
|
max_aggs: Dict[str, Any],
|
|
num_bins: int,
|
|
) -> None:
|
|
"""
|
|
Add histogram agg e.g.
|
|
"aggs": {
|
|
"name": {
|
|
"histogram": {
|
|
"field": "AvgTicketPrice"
|
|
"interval": (max_aggs[field] - min_aggs[field])/bins
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
min = min_aggs[field]
|
|
max = max_aggs[field]
|
|
|
|
interval = (max - min) / num_bins
|
|
|
|
if interval != 0:
|
|
offset = min
|
|
agg = {
|
|
"histogram": {"field": field, "interval": interval, "offset": offset}
|
|
}
|
|
self._aggs[name] = agg
|
|
|
|
def to_search_body(self) -> Dict[str, Any]:
|
|
body = {}
|
|
if self._aggs:
|
|
body["aggs"] = self._aggs
|
|
if not self._query.empty():
|
|
body["query"] = self._query.build()
|
|
return body
|
|
|
|
def to_count_body(self) -> Optional[Dict[str, Any]]:
|
|
if len(self._aggs) > 0:
|
|
warnings.warn(f"Requesting count for agg query {self}")
|
|
if self._query.empty():
|
|
return None
|
|
else:
|
|
return {"query": self._query.build()}
|
|
|
|
def update_boolean_filter(self, boolean_filter: BooleanFilter) -> None:
|
|
if self._query.empty():
|
|
self._query = boolean_filter
|
|
else:
|
|
self._query = self._query & boolean_filter
|
|
|
|
def __repr__(self) -> str:
|
|
return repr(self.to_search_body())
|