Compare commits

..

No commits in common. "main" and "7.5.1a4" have entirely different histories.

478 changed files with 13960 additions and 41933 deletions

View File

@ -1,9 +0,0 @@
ARG PYTHON_VERSION=3.9
FROM python:${PYTHON_VERSION}
ENV FORCE_COLOR=1
WORKDIR /code/eland
RUN python -m pip install nox
COPY . .

View File

@ -1,11 +0,0 @@
#!/usr/bin/env bash
set -eo pipefail
export LC_ALL=en_US.UTF-8
echo "--- Building the Wolfi image"
# Building the linux/arm64 image takes about one hour on Buildkite, which is too slow
docker build --file Dockerfile.wolfi .
echo "--- Building the public image"
docker build .

View File

@ -1,8 +0,0 @@
#!/usr/bin/env bash
docker build --file .buildkite/Dockerfile --tag elastic/eland --build-arg PYTHON_VERSION=${PYTHON_VERSION} .
docker run \
--name doc_build \
--rm \
elastic/eland \
bash -c "apt-get update && apt-get install --yes pandoc && nox -s docs"

View File

@ -1,7 +0,0 @@
#!/usr/bin/env bash
docker build --file .buildkite/Dockerfile --tag elastic/eland --build-arg PYTHON_VERSION=${PYTHON_VERSION} .
docker run \
--name linter \
--rm \
elastic/eland \
nox -s lint

View File

@ -1,50 +0,0 @@
steps:
- label: ":terminal: Lint code"
env:
PYTHON_VERSION: 3
agents:
provider: "gcp"
machineType: "n2-standard-2"
commands:
- ./.buildkite/lint-code.sh
- label: ":books: Build documentation"
env:
PYTHON_VERSION: 3.9-bookworm
agents:
provider: "gcp"
machineType: "n2-standard-2"
commands:
- ./.buildkite/build-documentation.sh
- label: ":docker: Build Wolfi image"
env:
PYTHON_VERSION: 3.11-bookworm
agents:
provider: "gcp"
machineType: "n2-standard-2"
commands:
- ./.buildkite/build-docker-images.sh
- label: ":python: {{ matrix.python }} :elasticsearch: {{ matrix.stack }} :pandas: {{ matrix.pandas }}"
agents:
provider: "gcp"
machineType: "n2-standard-4"
env:
PYTHON_VERSION: "{{ matrix.python }}"
PANDAS_VERSION: "{{ matrix.pandas }}"
TEST_SUITE: "xpack"
ELASTICSEARCH_VERSION: "{{ matrix.stack }}"
matrix:
setup:
# Python and pandas versions need to be added to the nox configuration too
# (in the decorators of the test method in noxfile.py)
pandas:
- '1.5.0'
- '2.2.3'
python:
- '3.12'
- '3.11'
- '3.10'
- '3.9'
stack:
- '9.0.0'
- '9.1.0-SNAPSHOT'
command: ./.buildkite/run-tests

View File

@ -1,28 +0,0 @@
{
"jobs": [
{
"enabled": true,
"pipeline_slug": "eland",
"allow_org_users": true,
"allowed_repo_permissions": ["admin", "write"],
"build_on_commit": true,
"build_on_comment": true,
"trigger_comment_regex": "^(?:(?:buildkite\\W+)?(?:build|test)\\W+(?:this|it))",
"always_trigger_comment_regex": "^(?:(?:buildkite\\W+)?(?:build|test)\\W+(?:this|it))",
"skip_ci_labels": ["skip-ci"],
"skip_ci_on_only_changed": ["\\.md$"]
},
{
"enabled": true,
"pipeline_slug": "docs-build-pr",
"allow_org_users": true,
"allowed_repo_permissions": ["admin", "write"],
"build_on_commit": true,
"build_on_comment": true,
"trigger_comment_regex": "^(?:(?:buildkite\\W+)?(?:build|test)\\W+(?:this|it))",
"always_trigger_comment_regex": "^(?:(?:buildkite\\W+)?(?:build|test)\\W+(?:this|it))",
"skip_ci_labels": ["skip-ci"],
"skip_ci_on_only_changed": ["\\.md$"]
}
]
}

View File

@ -1,28 +0,0 @@
steps:
- input: "Build parameters"
fields:
- text: "Release version"
key: "RELEASE_VERSION"
default: ""
format: "\\d{1,}.\\d{1,}.\\d{1,}"
hint: "The version to release e.g. '8.10.0' (without the v prefix)."
- select: "Environment"
key: "ENVIRONMENT"
options:
- label: "Staging"
value: "staging"
- label: "Production"
value: "production"
- wait
- label: "Release Docker Artifacts for Eland"
command: |
set -eo pipefail
export RELEASE_VERSION=$(buildkite-agent meta-data get RELEASE_VERSION)
export ENVIRONMENT=$(buildkite-agent meta-data get ENVIRONMENT)
export BUILDKIT_PROGRESS=plain
bash .buildkite/release-docker/run.sh
# Run on GCP to use `docker`
agents:
provider: gcp

View File

@ -1,37 +0,0 @@
#!/usr/bin/env bash
set -eo pipefail
export LC_ALL=en_US.UTF-8
echo "Publishing Eland $RELEASE_VERSION Docker image to $ENVIRONMENT"
set +x
# login to docker registry
docker_registry=$(vault read -field registry "secret/ci/elastic-eland/container-library/eland-$ENVIRONMENT")
docker_username=$(vault read -field username "secret/ci/elastic-eland/container-library/eland-$ENVIRONMENT")
docker_password=$(vault read -field password "secret/ci/elastic-eland/container-library/eland-$ENVIRONMENT")
echo "$docker_password" | docker login "$docker_registry" --username "$docker_username" --password-stdin
unset docker_username docker_password
set -x
tmp_dir=$(mktemp --directory)
pushd "$tmp_dir"
git clone https://github.com/elastic/eland
pushd eland
git checkout "v${RELEASE_VERSION}"
git --no-pager show
# Create builder that supports QEMU emulation (needed for linux/arm64)
docker buildx rm --force eland-multiarch-builder || true
docker buildx create --name eland-multiarch-builder --bootstrap --use
docker buildx build --push \
--file Dockerfile.wolfi \
--tag "$docker_registry/eland/eland:$RELEASE_VERSION" \
--tag "$docker_registry/eland/eland:latest" \
--platform linux/amd64,linux/arm64 \
"$PWD"
popd
popd
rm -rf "$tmp_dir"

10
.ci/Dockerfile Normal file
View File

@ -0,0 +1,10 @@
ARG PYTHON_VERSION=3.7
FROM python:${PYTHON_VERSION}
WORKDIR /code/eland
COPY requirements-dev.txt .
RUN pip install -r requirements-dev.txt
COPY . .

20
.ci/certs/ca.crt Executable file
View File

@ -0,0 +1,20 @@
-----BEGIN CERTIFICATE-----
MIIDSTCCAjGgAwIBAgIUIwN+0zglsexRKwE1RGHvlCcmrdwwDQYJKoZIhvcNAQEL
BQAwNDEyMDAGA1UEAxMpRWxhc3RpYyBDZXJ0aWZpY2F0ZSBUb29sIEF1dG9nZW5l
cmF0ZWQgQ0EwHhcNMTkwMjEzMDcyMjQwWhcNMjIwMjEyMDcyMjQwWjA0MTIwMAYD
VQQDEylFbGFzdGljIENlcnRpZmljYXRlIFRvb2wgQXV0b2dlbmVyYXRlZCBDQTCC
ASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBANILs0JO0e7x29zeVx21qalK
XKdX+AMlGJPH75wWO/Jq6YHtxt1wYIg762krOBXfG6JsFSOIwIv5VrzGGRGjSPt9
OXQyXrDDiQvsBT3rpzLNdDs7KMl2tZswwv7w9ujgud0cYnS1MOpn81rfPc73DvMg
xuhplofDx6fn3++PjVRU2FNiIVWyEoaxRjCeGPMBubKZYaYbQA6vYM4Z+ByG727B
AyAER3t7xmvYti/EoO2hv2HQk5zgcj/Oq3AJKhnt8LH8fnfm3TnYNM1htvXqhN05
vsvhvm2PHfnA5qLlSr/3W0aI/U/PqfsFDCgyRV097sMIaKkmavb0Ue7aQ7lgtp0C
AwEAAaNTMFEwHQYDVR0OBBYEFDRKlCMowWR1rwxE0d1lTEQe5O71MB8GA1UdIwQY
MBaAFDRKlCMowWR1rwxE0d1lTEQe5O71MA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZI
hvcNAQELBQADggEBAKbCJ95EBpeuvF70KEt6QU70k/SH1NRvM9YzKryV0D975Jvu
HOSm9HgSTULeAUFZIa4oYyf3QUfVoI+2T/aQrfXA3gfrJWsHURkyNmiHOFAbYHqi
xA6i249G2GTEjc1+le/M2N2CcDKAmurW6vSGK4upXQbPd6KmnhHREX74zkWjnOa+
+tibbSSOCT4Tmja2DbBxAPuivU9IB1g/hIUmbYQqKffQrBJA0658tz6w63a/Q7xN
pCvvbSgiMZ6qcVIcJkBT2IooYie+ax45pQECHthgIUcQAzfmIfqlU0Qfl8rDgAmn
0c1o6HQjKGU2aVGgSRuaaiHaSZjbPIZVS51sOoI=
-----END CERTIFICATE-----

20
.ci/certs/ca.pem Normal file
View File

@ -0,0 +1,20 @@
-----BEGIN CERTIFICATE-----
MIIDSTCCAjGgAwIBAgIUIwN+0zglsexRKwE1RGHvlCcmrdwwDQYJKoZIhvcNAQEL
BQAwNDEyMDAGA1UEAxMpRWxhc3RpYyBDZXJ0aWZpY2F0ZSBUb29sIEF1dG9nZW5l
cmF0ZWQgQ0EwHhcNMTkwMjEzMDcyMjQwWhcNMjIwMjEyMDcyMjQwWjA0MTIwMAYD
VQQDEylFbGFzdGljIENlcnRpZmljYXRlIFRvb2wgQXV0b2dlbmVyYXRlZCBDQTCC
ASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBANILs0JO0e7x29zeVx21qalK
XKdX+AMlGJPH75wWO/Jq6YHtxt1wYIg762krOBXfG6JsFSOIwIv5VrzGGRGjSPt9
OXQyXrDDiQvsBT3rpzLNdDs7KMl2tZswwv7w9ujgud0cYnS1MOpn81rfPc73DvMg
xuhplofDx6fn3++PjVRU2FNiIVWyEoaxRjCeGPMBubKZYaYbQA6vYM4Z+ByG727B
AyAER3t7xmvYti/EoO2hv2HQk5zgcj/Oq3AJKhnt8LH8fnfm3TnYNM1htvXqhN05
vsvhvm2PHfnA5qLlSr/3W0aI/U/PqfsFDCgyRV097sMIaKkmavb0Ue7aQ7lgtp0C
AwEAAaNTMFEwHQYDVR0OBBYEFDRKlCMowWR1rwxE0d1lTEQe5O71MB8GA1UdIwQY
MBaAFDRKlCMowWR1rwxE0d1lTEQe5O71MA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZI
hvcNAQELBQADggEBAKbCJ95EBpeuvF70KEt6QU70k/SH1NRvM9YzKryV0D975Jvu
HOSm9HgSTULeAUFZIa4oYyf3QUfVoI+2T/aQrfXA3gfrJWsHURkyNmiHOFAbYHqi
xA6i249G2GTEjc1+le/M2N2CcDKAmurW6vSGK4upXQbPd6KmnhHREX74zkWjnOa+
+tibbSSOCT4Tmja2DbBxAPuivU9IB1g/hIUmbYQqKffQrBJA0658tz6w63a/Q7xN
pCvvbSgiMZ6qcVIcJkBT2IooYie+ax45pQECHthgIUcQAzfmIfqlU0Qfl8rDgAmn
0c1o6HQjKGU2aVGgSRuaaiHaSZjbPIZVS51sOoI=
-----END CERTIFICATE-----

19
.ci/certs/testnode.crt Executable file
View File

@ -0,0 +1,19 @@
-----BEGIN CERTIFICATE-----
MIIDIjCCAgqgAwIBAgIUI4QU6jA1dYSCbdIA6oAb2TBEluowDQYJKoZIhvcNAQEL
BQAwNDEyMDAGA1UEAxMpRWxhc3RpYyBDZXJ0aWZpY2F0ZSBUb29sIEF1dG9nZW5l
cmF0ZWQgQ0EwHhcNMTkwMjEzMDcyMzEzWhcNMjIwMjEyMDcyMzEzWjATMREwDwYD
VQQDEwhpbnN0YW5jZTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAJeT
yOy6EAScZxrULKjHePciiz38grivCrhFFV+dThaRCcl3DhDzb9Eny5q5iEw3WvLQ
Rqmf01jncNIhaocTt66VqveXaMubbE8O0LcG6e4kpFO+JtnVF8JTARTc+ux/1uD6
hO1VG/HItM7WQrQxh4hfB2u1AX2YQtoqEtXXEC+UHWfl4QzuzXjBnKCkO/L9/6Tf
yNFQWXxKnIiTs8Xm9sEhhSCBJPlLTQu+MX4vR2Uwj5XZmflDUr+ZTenl9qYxL6b3
SWhh/qEl4GAj1+tS7ZZOxE0237mUh3IIFYSWSaMm8K2m/BYHkLNWL5B1dMic0lsv
osSoYrQuCef4HQMCitsCAwEAAaNNMEswHQYDVR0OBBYEFFMg4l1GLW8lYbwASY+r
YeWYRzIiMB8GA1UdIwQYMBaAFDRKlCMowWR1rwxE0d1lTEQe5O71MAkGA1UdEwQC
MAAwDQYJKoZIhvcNAQELBQADggEBAEQrgh1xALpumQTzsjxFRGque/vlKTgRs5Kh
xtgapr6wjIbdq7dagee+4yNOKzS5lGVXCgwrJlHESv9qY0uumT/33vK2uduJ7NAd
fR2ZzyBnhMX+mkYhmGrGYCTUMUIwOIQYa4Evis4W+LHmCIDG03l7gLHfdIBe9VMO
pDZum8f6ng0MM49s8/rXODNYKw8kFyUhnfChqMi/2yggb1uUIfKlJJIchkgYjE13
zuC+fjo029Pq1jeMIdxugLf/7I/8NiW1Yj9aCXevUXG1qzHFEuKAinBXYOZO/vWS
LaEqOhwrzNynwgGpYAr7Rfgv4AflltYIIav4PZT03P7fbyAAf8s=
-----END CERTIFICATE-----

27
.ci/certs/testnode.key Executable file
View File

@ -0,0 +1,27 @@
-----BEGIN RSA PRIVATE KEY-----
MIIEpQIBAAKCAQEAl5PI7LoQBJxnGtQsqMd49yKLPfyCuK8KuEUVX51OFpEJyXcO
EPNv0SfLmrmITDda8tBGqZ/TWOdw0iFqhxO3rpWq95doy5tsTw7Qtwbp7iSkU74m
2dUXwlMBFNz67H/W4PqE7VUb8ci0ztZCtDGHiF8Ha7UBfZhC2ioS1dcQL5QdZ+Xh
DO7NeMGcoKQ78v3/pN/I0VBZfEqciJOzxeb2wSGFIIEk+UtNC74xfi9HZTCPldmZ
+UNSv5lN6eX2pjEvpvdJaGH+oSXgYCPX61Ltlk7ETTbfuZSHcggVhJZJoybwrab8
FgeQs1YvkHV0yJzSWy+ixKhitC4J5/gdAwKK2wIDAQABAoIBAQCRFTJna/xy/WUu
59FLR4qAOj8++JgCwACpue4oU7/vl6nffSYokWoAr2+RzG4qTX2vFi3cpA8+dGCn
sLZvTi8tWzKGxBTZdg2oakzaMzLr74SeZ052iCGyrZJGbvF6Ny7srr1XEXSq6+os
ZCb6pMHOhO7saBdiKMAsY8MdjTl/33AduuE6ztqv+L92xTr2g4QlbT1KvWlEgppU
k4Gy7zdETkPBTSH/17ZwyGJoJICIAhbL4IpmOM4dPIg8nFkVPPpy6p0z4uGjtgnK
nreZ2EKMzCafBaHn7A77gpi0OrQdl6pe0fsGqv/323YjCJPbwwl5TsoNq44DzwiX
3M7XiVJxAoGBAOCne56vdN4uZmCgLVGT2JSUNVPOu4bfjrxWH6cslzrPT2Zhp3lO
M4axZ3gmcervV252YEZXntXDHHCSfrECllRN1WFD63XmyQ/CkhuvZkkeRHfzL1TE
EdqHOTqs4sRETZ7+RITFC81DZQkWWOKeyXMjyPBqd7RnThQHijB1c8Y5AoGBAKy6
CVKBx+zz5crVD0tz4UhOmz1wRNN0CL0l+FXRuFSgbzMIvwpfiqe25crgeLHe2M2/
TogdWbjZ2nUZQTzoRsSkQ6cKHpj+G/gWurp/UcHHXFVwgLSPF7c3KHDtiYq7Vqw0
bvmhM03LI6+ZIPRV7hLBr7WP7UmpAiREMF7tTnmzAoGBAIkx3w3WywFQxtblmyeB
qbd7F2IaE23XoxyjX+tBEQ4qQqwcoSE0v8TXHIBEwjceeX+NLVhn9ClJYVniLRq+
oL3VVqVyzB4RleJZCc98e3PV1yyFx/b1Uo3pHOsXX9lKeTjKwV9v0rhFGzPEgP3M
yOvXA8TG0FnM6OLUg/D6GX0JAoGAMuHS4TVOGeV3ahr9mHKYiN5vKNgrzka+VEod
L9rJ/FQOrfADpyCiDen5I5ygsXU+VM3oanyK88NpcVlxOGoMft0M+OYoQVWKE7lO
ZKYhBX6fGqQ7pfUJPXXIOgwfmni5fZ0sm+j63g3bg10OsiumKGxaQJgXhL1+3gQg
Y7ZwibUCgYEAlZoFFvkMLjpOSaHk1z5ZZnt19X0QUIultBwkumSqMPm+Ks7+uDrx
thGUCoz4ecr/ci4bIUY7mB+zfAbqnBOMxreJqCRbAIuRypo1IlWkTp8DywoDOfMW
NfzjVmzJ7EJu44nGmVAi1jw4Pbseivvi1ujMCoPgaE8I1uSh144bwN8=
-----END RSA PRIVATE KEY-----

78
.ci/jobs/defaults.yml Executable file
View File

@ -0,0 +1,78 @@
---
##### GLOBAL METADATA
- meta:
cluster: clients-ci
##### JOB DEFAULTS
- job:
project-type: matrix
logrotate:
daysToKeep: 30
numToKeep: 100
parameters:
- string:
name: branch_specifier
default: refs/heads/master
description: the Git branch specifier to build (<branchName>, <tagName>,
<commitId>, etc.)
properties:
- github:
url: https://github.com/elastic/eland
- inject:
properties-content: HOME=$JENKINS_HOME
concurrent: true
node: flyweight
scm:
- git:
name: origin
credentials-id: f6c7695a-671e-4f4f-a331-acdce44ff9ba
reference-repo: /var/lib/jenkins/.git-references/eland.git
branches:
- ${branch_specifier}
url: git@github.com:elastic/eland.git
basedir: ''
wipe-workspace: 'True'
triggers:
- github
axes:
- axis:
type: slave
name: label
values:
- linux
- axis:
type: yaml
filename: .ci/test-matrix.yml
name: ELASTICSEARCH_VERSION
- axis:
type: yaml
filename: .ci/test-matrix.yml
name: TEST_SUITE
- axis:
type: yaml
filename: .ci/test-matrix.yml
name: PYTHON_VERSION
yaml-strategy:
exclude-key: exclude
filename: .ci/test-matrix.yml
wrappers:
- ansicolor
- timeout:
type: absolute
timeout: 120
fail: true
- timestamps
- workspace-cleanup
builders:
- shell: |-
#!/usr/local/bin/runbld
.ci/run-tests
publishers:
- email:
recipients: infra-root+build@elastic.co
- junit:
results: "build/output/*-junit.xml"
allow-empty-results: true

View File

@ -0,0 +1,14 @@
---
- job:
name: elastic+eland+master
display-name: 'elastic / eland # master'
description: Eland is a data science client with a Pandas-like interface
junit_results: "*-junit.xml"
parameters:
- string:
name: branch_specifier
default: refs/heads/master
description: The Git branch specifier to build
triggers:
- github
- timed: '@daily'

View File

@ -0,0 +1,19 @@
---
- job:
name: elastic+eland+pull-request
display-name: 'elastic / eland # pull-request'
description: Testing of eland pull requests.
scm:
- git:
branches:
- ${ghprbActualCommit}
refspec: +refs/pull/*:refs/remotes/origin/pr/*
triggers:
- github-pull-request:
org-list:
- elastic
allow-whitelist-orgs-as-admins: true
github-hooks: true
status-context: clients-ci
cancel-builds-on-update: true
publishers: []

View File

@ -16,12 +16,7 @@ fi
set -euxo pipefail
# realpath on MacOS use different flags than on Linux
if [[ "$OSTYPE" == "darwin"* ]]; then
SCRIPT_PATH=$(dirname $(realpath $0))
else
SCRIPT_PATH=$(dirname $(realpath -s $0))
fi
SCRIPT_PATH=$(dirname $(realpath -s $0))
moniker=$(echo "$ELASTICSEARCH_VERSION" | tr -C "[:alnum:]" '-')
suffix=rest-test
@ -32,6 +27,10 @@ CLUSTER_NAME=${CLUSTER_NAME-${moniker}${suffix}}
HTTP_PORT=${HTTP_PORT-9200}
ELASTIC_PASSWORD=${ELASTIC_PASSWORD-changeme}
SSL_CERT=${SSL_CERT-"${SCRIPT_PATH}/certs/testnode.crt"}
SSL_KEY=${SSL_KEY-"${SCRIPT_PATH}/certs/testnode.key"}
SSL_CA=${SSL_CA-"${SCRIPT_PATH}/certs/ca.crt"}
SSL_CA_PEM=${SSL_CA-"${SCRIPT_PATH}/certs/ca.pem"}
DETACH=${DETACH-false}
CLEANUP=${CLEANUP-false}
@ -42,11 +41,6 @@ NETWORK_NAME=${NETWORK_NAME-"$network_default"}
set +x
# Set vm.max_map_count kernel setting to 262144 if we're in CI
if [[ "$BUILDKITE" == "true" ]]; then
sudo sysctl -w vm.max_map_count=262144
fi
function cleanup_volume {
if [[ "$(docker volume ls -q -f name=$1)" ]]; then
echo -e "\033[34;1mINFO:\033[0m Removing volume $1\033[0m"
@ -54,7 +48,7 @@ function cleanup_volume {
fi
}
function container_running {
if [[ "$(docker ps -q -f name=$1)" ]]; then
if [[ "$(docker ps -q -f name=$1)" ]]; then
return 0;
else return 1;
fi
@ -116,12 +110,6 @@ environment=($(cat <<-END
--env node.attr.testattr=test
--env path.repo=/tmp
--env repositories.url.allowed_urls=http://snapshot.test*
--env ELASTIC_PASSWORD=$ELASTIC_PASSWORD
--env xpack.license.self_generated.type=trial
--env xpack.security.enabled=false
--env xpack.security.http.ssl.enabled=false
--env xpack.security.transport.ssl.enabled=false
--env xpack.ml.max_machine_memory_percent=90
END
))
@ -130,31 +118,54 @@ volumes=($(cat <<-END
END
))
url="http://elastic:$ELASTIC_PASSWORD@$NODE_NAME"
if [[ "$ELASTICSEARCH_VERSION" != *oss* ]]; then
environment+=($(cat <<-END
--env ELASTIC_PASSWORD=$ELASTIC_PASSWORD
--env xpack.license.self_generated.type=trial
--env xpack.security.enabled=true
--env xpack.security.http.ssl.enabled=true
--env xpack.security.http.ssl.verification_mode=certificate
--env xpack.security.http.ssl.key=certs/testnode.key
--env xpack.security.http.ssl.certificate=certs/testnode.crt
--env xpack.security.http.ssl.certificate_authorities=certs/ca.crt
--env xpack.security.transport.ssl.enabled=true
--env xpack.security.transport.ssl.key=certs/testnode.key
--env xpack.security.transport.ssl.certificate=certs/testnode.crt
--env xpack.security.transport.ssl.certificate_authorities=certs/ca.crt
END
))
volumes+=($(cat <<-END
--volume $SSL_CERT:/usr/share/elasticsearch/config/certs/testnode.crt
--volume $SSL_KEY:/usr/share/elasticsearch/config/certs/testnode.key
--volume $SSL_CA:/usr/share/elasticsearch/config/certs/ca.crt
--volume $SSL_CA_PEM:/usr/share/elasticsearch/config/certs/ca.pem
END
))
fi
# Pull the container, retry on failures up to 5 times with
# short delays between each attempt. Fixes most transient network errors.
docker_pull_attempts=0
until [ "$docker_pull_attempts" -ge 5 ]
do
docker pull docker.elastic.co/elasticsearch/$ELASTICSEARCH_VERSION && break
docker_pull_attempts=$((docker_pull_attempts+1))
sleep 10
done
url="http://$NODE_NAME"
if [[ "$ELASTICSEARCH_VERSION" != *oss* ]]; then
url="https://elastic:$ELASTIC_PASSWORD@$NODE_NAME"
fi
cert_validation_flags="--insecure"
if [[ "$NODE_NAME" == "instance" ]]; then
cert_validation_flags="--cacert /usr/share/elasticsearch/config/certs/ca.pem --resolve ${NODE_NAME}:443:127.0.0.1"
fi
echo -e "\033[34;1mINFO:\033[0m Starting container $NODE_NAME \033[0m"
set -x
docker run \
--name "$NODE_NAME" \
--network "$NETWORK_NAME" \
--env ES_JAVA_OPTS=-"Xms2g -Xmx2g" \
--env ES_JAVA_OPTS=-"Xms1g -Xmx1g" \
"${environment[@]}" \
"${volumes[@]}" \
--publish "$HTTP_PORT":9200 \
--ulimit nofile=65536:65536 \
--ulimit memlock=-1:-1 \
--detach="$DETACH" \
--health-cmd="curl --insecure --fail $url:9200/_cluster/health || exit 1" \
--health-cmd="curl $cert_validation_flags --fail $url:9200/_cluster/health || exit 1" \
--health-interval=2s \
--health-retries=20 \
--health-timeout=2s \

View File

@ -12,7 +12,7 @@
# When run in CI the test-matrix is used to define additional variables
# TEST_SUITE -- `xpack`
# TEST_SUITE -- either `oss` or `xpack`, defaults to `oss` in `run-tests`
#
PYTHON_VERSION=${PYTHON_VERSION-3.8}
@ -21,11 +21,10 @@ echo -e "\033[34;1mINFO:\033[0m VERSION ${ELASTICSEARCH_VERSION}\033[0m"
echo -e "\033[34;1mINFO:\033[0m CONTAINER ${ELASTICSEARCH_CONTAINER}\033[0m"
echo -e "\033[34;1mINFO:\033[0m TEST_SUITE ${TEST_SUITE}\033[0m"
echo -e "\033[34;1mINFO:\033[0m PYTHON_VERSION ${PYTHON_VERSION}\033[0m"
echo -e "\033[34;1mINFO:\033[0m PANDAS_VERSION ${PANDAS_VERSION}\033[0m"
echo -e "\033[1m>>>>> Build [elastic/eland container] >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\033[0m"
docker build --file .buildkite/Dockerfile --tag elastic/eland --build-arg PYTHON_VERSION=${PYTHON_VERSION} .
docker build --file .ci/Dockerfile --tag elastic/eland --build-arg PYTHON_VERSION=${PYTHON_VERSION} .
echo -e "\033[1m>>>>> Run [elastic/eland container] >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\033[0m"
@ -36,4 +35,4 @@ docker run \
--name eland-test-runner \
--rm \
elastic/eland \
nox -s "test-${PYTHON_VERSION}(pandas_version='${PANDAS_VERSION}')"
./run_build.sh

View File

@ -9,12 +9,13 @@ if [[ -z $ELASTICSEARCH_VERSION ]]; then
fi
set -euxo pipefail
TEST_SUITE=${TEST_SUITE-xpack}
NODE_NAME=localhost
PANDAS_VERSION=${PANDAS_VERSION-1.5.0}
TEST_SUITE=${TEST_SUITE-oss}
NODE_NAME=instance
elasticsearch_image=elasticsearch
elasticsearch_url=http://elastic:changeme@${NODE_NAME}:9200
elasticsearch_url=https://elastic:changeme@${NODE_NAME}:9200
if [[ $TEST_SUITE != "xpack" ]]; then
elasticsearch_image=elasticsearch-${TEST_SUITE}
elasticsearch_url=http://${NODE_NAME}:9200
@ -27,7 +28,7 @@ function cleanup {
NODE_NAME=${NODE_NAME} \
NETWORK_NAME=elasticsearch \
CLEANUP=true \
bash ./.buildkite/run-elasticsearch.sh
bash ./.ci/run-elasticsearch.sh
# Report status and exit
if [[ "$status" == "0" ]]; then
echo -e "\n\033[32;1mSUCCESS run-tests\033[0m"
@ -39,20 +40,19 @@ function cleanup {
}
trap cleanup EXIT
echo "--- :elasticsearch: Starting Elasticsearch"
echo -e "\033[1m>>>>> Start [$ELASTICSEARCH_VERSION container] >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\033[0m"
ELASTICSEARCH_VERSION=${elasticsearch_image}:${ELASTICSEARCH_VERSION} \
NODE_NAME=${NODE_NAME} \
NETWORK_NAME=host \
NETWORK_NAME=elasticsearch \
DETACH=true \
bash .buildkite/run-elasticsearch.sh
bash .ci/run-elasticsearch.sh
echo "+++ :python: Run tests"
echo -e "\033[1m>>>>> Repository specific tests >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\033[0m"
ELASTICSEARCH_CONTAINER=${elasticsearch_image}:${ELASTICSEARCH_VERSION} \
NETWORK_NAME=host \
NETWORK_NAME=elasticsearch \
NODE_NAME=${NODE_NAME} \
ELASTICSEARCH_URL=${elasticsearch_url} \
TEST_SUITE=${TEST_SUITE} \
PANDAS_VERSION=${PANDAS_VERSION} \
bash .buildkite/run-repository.sh
bash .ci/run-repository.sh

18
.ci/test-matrix.yml Executable file
View File

@ -0,0 +1,18 @@
---
ELASTICSEARCH_VERSION:
- 8.0.0-SNAPSHOT
- 7.5-SNAPSHOT
TEST_SUITE:
- oss
- xpack
PYTHON_VERSION:
- 3.8
- 3.7
- 3.6
- 3.5.3
exclude: ~

View File

@ -1,62 +1,3 @@
# docs and example
docs/*
example/*
# Git
.git
# Nox
.nox
# Compiled python modules.
*.pyc
__pycache__/
# Setuptools distribution folder.
dist/
# Build folder
build/
# pytest results
tests/dataframe/results/*csv
result_images/
# Python egg metadata, regenerated from source files by setuptools.
/*.egg-info
eland.egg-info/
# PyCharm files
.idea/
# vscode files
.vscode/
# pytest files
.pytest_cache/
# Ignore MacOSX files
.DS_Store
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# Environments
.env
.venv
.nox
env/
venv/
ENV/
env.bak/
venv.bak/
.mypy_cache
# Coverage
.coverage

View File

@ -1,26 +0,0 @@
name: Backport
on:
pull_request_target:
types:
- closed
- labeled
jobs:
backport:
name: Backport
runs-on: ubuntu-latest
# Only react to merged PRs for security reasons.
# See https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target.
if: >
github.event.pull_request.merged
&& (
github.event.action == 'closed'
|| (
github.event.action == 'labeled'
&& contains(github.event.label.name, 'backport')
)
)
steps:
- uses: tibdex/backport@9565281eda0731b1d20c4025c43339fb0a23812e # v2.0.4
with:
github_token: ${{ secrets.GITHUB_TOKEN }}

View File

@ -1,19 +0,0 @@
name: docs-build
on:
push:
branches:
- main
pull_request_target: ~
merge_group: ~
jobs:
docs-preview:
uses: elastic/docs-builder/.github/workflows/preview-build.yml@main
with:
path-pattern: docs/**
permissions:
deployments: write
id-token: write
contents: read
pull-requests: write

View File

@ -1,14 +0,0 @@
name: docs-cleanup
on:
pull_request_target:
types:
- closed
jobs:
docs-preview:
uses: elastic/docs-builder/.github/workflows/preview-cleanup.yml@main
permissions:
contents: none
id-token: write
deployments: write

11
.gitignore vendored
View File

@ -1,6 +1,5 @@
# Compiled python modules.
*.pyc
__pycache__/
# Setuptools distribution folder.
dist/
@ -12,19 +11,18 @@ build/
docs/build/
# pytest results
tests/dataframe/results/*csv
eland/tests/dataframe/results/
result_images/
# Python egg metadata, regenerated from source files by setuptools.
/*.egg-info
eland.egg-info/
# PyCharm files
.idea/
# vscode files
.vscode/
.vscode/*
# pytest files
.pytest_cache/
@ -45,13 +43,8 @@ ipython_config.py
# Environments
.env
.venv
.nox
env/
venv/
ENV/
env.bak/
venv.bak/
.mypy_cache
# Coverage
.coverage

View File

@ -1,14 +0,0 @@
version: 2
build:
os: ubuntu-22.04
tools:
python: "3.11"
python:
install:
- path: .
- requirements: docs/requirements-docs.txt
sphinx:
configuration: docs/sphinx/conf.py

View File

@ -1,784 +0,0 @@
=========
Changelog
=========
9.0.1 (2025-04-30)
------------------
* Forbid Elasticsearch 8 client or server (`#780 <https://github.com/elastic/eland/pull/780>`_)
* Fix DeBERTa tokenization (`#769 <https://github.com/elastic/eland/pull/769>`_)
* Upgrade PyTorch to 2.5.1 (`#785 <https://github.com/elastic/eland/pull/785>`_)
* Upgrade LightGBM to 4.6.0 (`#782 <https://github.com/elastic/eland/pull/782>`_)
9.0.0 (2025-04-15)
------------------
* Drop Python 3.8, Support Python 3.12 (`#743 <https://github.com/elastic/eland/pull/743>`_)
* Support Pandas 2 (`#742 <https://github.com/elastic/eland/pull/742>`_)
* Upgrade transformers to 4.47 (`#752 <https://github.com/elastic/eland/pull/752>`_)
* Remove ML model export as sklearn Pipeline (`#744 <https://github.com/elastic/eland/pull/744>`_)
* Allow scikit-learn 1.5 (`#729 <https://github.com/elastic/eland/pull/729>`_)
* Migrate docs from AsciiDoc to Markdown (`#762 <https://github.com/elastic/eland/pull/762>`_)
8.17.0 (2025-01-07)
-------------------
* Support sparse embedding models such as SPLADE-v3-DistilBERT (`#740 <https://github.com/elastic/eland/pull/740>`_)
8.16.0 (2024-11-13)
-------------------
* Add deprecation warning for ESGradientBoostingModel subclasses (`#738 <https://github.com/elastic/eland/pull/738>`_)
8.15.4 (2024-10-17)
-------------------
* Revert "Allow reading Elasticsearch certs in Wolfi image" (`#734 <https://github.com/elastic/eland/pull/734>`_)
8.15.3 (2024-10-09)
-------------------
* Added support for DeBERTa-V2 tokenizer (`#717 <https://github.com/elastic/eland/pull/717>`_)
* Fixed ``--ca-cert`` with a shared Elasticsearch Docker volume (`#732 <https://github.com/elastic/eland/pull/732>`_)
8.15.2 (2024-10-02)
-------------------
* Fixed Docker image build (`#728 <https://github.com/elastic/eland/pull/728>`_)
8.15.1 (2024-10-01)
-------------------
* Upgraded PyTorch to version 2.3.1, which is compatible with Elasticsearch 8.15.2 or above (`#718 <https://github.com/elastic/eland/pull/718>`_)
* Migrated to distroless Wolfi base Docker image (`#720 <https://github.com/elastic/eland/pull/720>`_)
8.15.0 (2024-08-12)
-------------------
* Added a default truncation of ``second`` for text similarity (`#713 <https://github.com/elastic/eland/pull/713>`_)
* Added note about using text_similarity for rerank in the CLI (`#716 <https://github.com/elastic/eland/pull/716>`_)
* Added support for lists in result hits (`#707 <https://github.com/elastic/eland/pull/707>`_)
* Removed input fields from exported LTR models (`#708 <https://github.com/elastic/eland/pull/708>`_)
8.14.0 (2024-06-10)
-------------------
Added
^^^^^
* Added Elasticsearch Serverless support in DataFrames (`#690`_, contributed by `@AshokChoudhary11`_) and eland_import_hub_model (`#698`_)
Fixed
^^^^^
* Fixed Python 3.8 support (`#695`_, contributed by `@bartbroere`_)
* Fixed non _source fields missing from the results hits (`#693`_, contributed by `@bartbroere`_)
.. _@AshokChoudhary11: https://github.com/AshokChoudhary11
.. _#690: https://github.com/elastic/eland/pull/690
.. _#693: https://github.com/elastic/eland/pull/693
.. _#695: https://github.com/elastic/eland/pull/695
.. _#698: https://github.com/elastic/eland/pull/698
8.13.1 (2024-05-03)
-------------------
Added
^^^^^
* Added support for HTTP proxies in eland_import_hub_model (`#688`_)
.. _#688: https://github.com/elastic/eland/pull/688
8.13.0 (2024-03-27)
-------------------
Added
^^^^^
* Added support for Python 3.11 (`#681`_)
* Added ``eland.DataFrame.to_json`` function (`#661`_, contributed by `@bartbroere`_)
* Added override option to specify the model's max input size (`#674`_)
Changed
^^^^^^^
* Upgraded torch to 2.1.2 (`#671`_)
* Mirrored pandas' ``lineterminator`` instead of ``line_terminator`` in ``to_csv`` (`#595`_, contributed by `@bartbroere`_)
.. _#595: https://github.com/elastic/eland/pull/595
.. _#661: https://github.com/elastic/eland/pull/661
.. _#671: https://github.com/elastic/eland/pull/671
.. _#674: https://github.com/elastic/eland/pull/674
.. _#681: https://github.com/elastic/eland/pull/681
8.12.1 (2024-01-30)
-------------------
Fixed
^^^^^
* Fix missing value support for XGBRanker (`#654`_)
.. _#654: https://github.com/elastic/eland/pull/654
8.12.0 (2024-01-18)
-------------------
Added
^^^^^
* Supported XGBRanker model (`#649`_)
* Accepted LTR (Learning to rank) model config when importing model (`#645`_, `#651`_)
* Added LTR feature logger (`#648`_)
* Added ``prefix_string`` config option to the import model hub script (`#642`_)
* Made online retail analysis notebook runnable in Colab (`#641`_)
* Added new movie dataset to the tests (`#646`_)
.. _#641: https://github.com/elastic/eland/pull/641
.. _#642: https://github.com/elastic/eland/pull/642
.. _#645: https://github.com/elastic/eland/pull/645
.. _#646: https://github.com/elastic/eland/pull/646
.. _#648: https://github.com/elastic/eland/pull/648
.. _#649: https://github.com/elastic/eland/pull/649
.. _#651: https://github.com/elastic/eland/pull/651
8.11.1 (2023-11-22)
-------------------
Added
^^^^^
* Make demo notebook runnable in Colab (`#630`_)
Changed
^^^^^^^
* Bump Shap version to 0.43 (`#636`_)
Fixed
^^^^^
* Fix failed import of Sentence Transformer RoBERTa models (`#637`_)
.. _#630: https://github.com/elastic/eland/pull/630
.. _#636: https://github.com/elastic/eland/pull/636
.. _#637: https://github.com/elastic/eland/pull/637
8.11.0 (2023-11-08)
-------------------
Added
^^^^^
* Support E5 small multilingual model (`#625`_)
Changed
^^^^^^^
* Stream writes in ``ed.DataFrame.to_csv()`` (`#579`_)
* Improve memory estimation for NLP models (`#568`_)
Fixed
^^^^^
* Fixed deprecations in preparation of Pandas 2.0 support (`#602`_, `#603`_, contributed by `@bartbroere`_)
.. _#568: https://github.com/elastic/eland/pull/568
.. _#579: https://github.com/elastic/eland/pull/579
.. _#602: https://github.com/elastic/eland/pull/602
.. _#603: https://github.com/elastic/eland/pull/603
.. _#625: https://github.com/elastic/eland/pull/625
8.10.1 (2023-10-11)
-------------------
Fixed
^^^^^
* Fixed direct usage of TransformerModel (`#619`_)
.. _#619: https://github.com/elastic/eland/pull/619
8.10.0 (2023-10-09)
-------------------
Added
^^^^^
* Published pre-built Docker images to docker.elastic.co/eland/eland (`#613`_)
* Allowed importing private HuggingFace models (`#608`_)
* Added Apple Silicon (arm64) support to Docker image (`#615`_)
* Allowed importing some DPR models like ance-dpr-context-multi (`#573`_)
* Allowed using the Pandas API without monitoring/main permissions (`#581`_)
Changed
^^^^^^^
* Updated Docker image to Debian 12 Bookworm (`#613`_)
* Reduced Docker image size by not installing unused PyTorch GPU support on amd64 (`#615`_)
* Reduced model chunk size to 1MB (`#605`_)
Fixed
^^^^^
* Fixed deprecations in preparation of Pandas 2.0 support (`#593`_, `#596`_, contributed by `@bartbroere`_)
.. _@bartbroere: https://github.com/bartbroere
.. _#613: https://github.com/elastic/eland/pull/613
.. _#608: https://github.com/elastic/eland/pull/608
.. _#615: https://github.com/elastic/eland/pull/615
.. _#573: https://github.com/elastic/eland/pull/573
.. _#581: https://github.com/elastic/eland/pull/581
.. _#605: https://github.com/elastic/eland/pull/605
.. _#593: https://github.com/elastic/eland/pull/593
.. _#596: https://github.com/elastic/eland/pull/596
8.9.0 (2023-08-24)
------------------
Added
^^^^^
* Simplify embedding model support and loading (`#569`_)
* Make eland_import_hub_model easier to find on Windows (`#559`_)
* Update trained model inference endpoint (`#556`_)
* Add BertJapaneseTokenizer support with bert_ja tokenization configuration (`#534`_)
* Add ability to upload xlm-roberta tokenized models (`#518`_)
* Tolerate different model output formats when measuring embedding size (`#535`_)
* Generate valid NLP model id from file path (`#541`_)
* Upgrade torch to 1.13.1 and check the cluster version before uploading a NLP model (`#522`_)
* Set embedding_size config parameter for Text Embedding models (`#532`_)
* Add support for the pass_through task (`#526`_)
Fixed
^^^^^
* Fixed black to comply with the code style (`#557`_)
* Fixed No module named 'torch' (`#553`_)
* Fix autosummary directive by removing hack autosummaries (`#548`_)
* Prevent TypeError with None check (`#525`_)
.. _#518: https://github.com/elastic/eland/pull/518
.. _#522: https://github.com/elastic/eland/pull/522
.. _#525: https://github.com/elastic/eland/pull/525
.. _#526: https://github.com/elastic/eland/pull/526
.. _#532: https://github.com/elastic/eland/pull/532
.. _#534: https://github.com/elastic/eland/pull/534
.. _#535: https://github.com/elastic/eland/pull/535
.. _#541: https://github.com/elastic/eland/pull/541
.. _#548: https://github.com/elastic/eland/pull/548
.. _#553: https://github.com/elastic/eland/pull/553
.. _#556: https://github.com/elastic/eland/pull/556
.. _#557: https://github.com/elastic/eland/pull/557
.. _#559: https://github.com/elastic/eland/pull/559
.. _#569: https://github.com/elastic/eland/pull/569
8.7.0 (2023-03-30)
------------------
Added
^^^^^
* Added a new NLP model task type "text_similarity" (`#486`_)
* Added a new NLP model task type "text_expansion" (`#520`_)
* Added support for exporting an Elastic ML model as a scikit-learn pipeline via ``MLModel.export_model()`` (`#509`_)
Fixed
^^^^^
* Fixed an issue that occurred when LightGBM was installed but libomp wasn't installed on the system. (`#499`_)
.. _#486: https://github.com/elastic/eland/pull/486
.. _#499: https://github.com/elastic/eland/pull/499
.. _#509: https://github.com/elastic/eland/pull/509
.. _#520: https://github.com/elastic/eland/pull/520
8.3.0 (2022-07-11)
------------------
Added
^^^^^
* Added a new NLP model task type "auto" which infers the task type based on model configuration and architecture (`#475`_)
Changed
^^^^^^^
* Changed required version of 'torch' package to `>=1.11.0,<1.12` to match required PyTorch version for Elasticsearch 8.3 (was `>=1.9.0,<2`) (`#479`_)
* Changed the default value of the `--task-type` parameter for the `eland_import_hub_model` CLI to be "auto" (`#475`_)
Fixed
^^^^^
* Fixed decision tree classifier serialization to account for probabilities (`#465`_)
* Fixed PyTorch model quantization (`#472`_)
.. _#465: https://github.com/elastic/eland/pull/465
.. _#472: https://github.com/elastic/eland/pull/472
.. _#475: https://github.com/elastic/eland/pull/475
.. _#479: https://github.com/elastic/eland/pull/479
8.2.0 (2022-05-09)
------------------
Added
^^^^^
* Added support for passing Cloud ID via ``--cloud-id`` to ``eland_import_hub_model`` CLI tool (`#462`_)
* Added support for authenticating via ``--es-username``, ``--es-password``, and ``--es-api-key`` to the ``eland_import_hub_model`` CLI tool (`#461`_)
* Added support for XGBoost 1.6 (`#458`_)
* Added support for ``question_answering`` NLP tasks (`#457`_)
.. _#457: https://github.com/elastic/eland/pull/457
.. _#458: https://github.com/elastic/eland/pull/458
.. _#461: https://github.com/elastic/eland/pull/461
.. _#462: https://github.com/elastic/eland/pull/462
8.1.0 (2022-03-31)
------------------
Added
^^^^^
* Added support for ``eland.Series.unique()`` (`#448`_, contributed by `@V1NAY8`_)
* Added ``--ca-certs`` and ``--insecure`` options to ``eland_import_hub_model`` for configuring TLS (`#441`_)
.. _#448: https://github.com/elastic/eland/pull/448
.. _#441: https://github.com/elastic/eland/pull/441
8.0.0 (2022-02-10)
------------------
Added
^^^^^
* Added support for Natural Language Processing (NLP) models using PyTorch (`#394`_)
* Added new extra ``eland[pytorch]`` for installing all dependencies needed for PyTorch (`#394`_)
* Added a CLI script ``eland_import_hub_model`` for uploading HuggingFace models to Elasticsearch (`#403`_)
* Added support for v8.0 of the Python Elasticsearch client (`#415`_)
* Added a warning if Eland detects it's communicating with an incompatible Elasticsearch version (`#419`_)
* Added support for ``number_samples`` to LightGBM and Scikit-Learn models (`#397`_, contributed by `@V1NAY8`_)
* Added ability to use datetime types for filtering dataframes (`284`_, contributed by `@Fju`_)
* Added pandas ``datetime64`` type to use the Elasticsearch ``date`` type (`#425`_, contributed by `@Ashton-Sidhu`_)
* Added ``es_verify_mapping_compatibility`` parameter to disable schema enforcement with ``pandas_to_eland`` (`#423`_, contributed by `@Ashton-Sidhu`_)
Changed
^^^^^^^
* Changed ``to_pandas()`` to only use Point-in-Time and ``search_after`` instead of using Scroll APIs
for pagination.
.. _@Fju: https://github.com/Fju
.. _@Ashton-Sidhu: https://github.com/Ashton-Sidhu
.. _#419: https://github.com/elastic/eland/pull/419
.. _#415: https://github.com/elastic/eland/pull/415
.. _#397: https://github.com/elastic/eland/pull/397
.. _#394: https://github.com/elastic/eland/pull/394
.. _#403: https://github.com/elastic/eland/pull/403
.. _#284: https://github.com/elastic/eland/pull/284
.. _#424: https://github.com/elastic/eland/pull/425
.. _#423: https://github.com/elastic/eland/pull/423
7.14.1b1 (2021-08-30)
---------------------
Added
^^^^^
* Added support for ``DataFrame.iterrows()`` and ``DataFrame.itertuples()`` (`#380`_, contributed by `@kxbin`_)
Performance
^^^^^^^^^^^
* Simplified result collectors to increase performance transforming Elasticsearch results to pandas (`#378`_, contributed by `@V1NAY8`_)
* Changed search pagination function to yield batches of hits (`#379`_)
.. _@kxbin: https://github.com/kxbin
.. _#378: https://github.com/elastic/eland/pull/378
.. _#379: https://github.com/elastic/eland/pull/379
.. _#380: https://github.com/elastic/eland/pull/380
7.14.0b1 (2021-08-09)
---------------------
Added
^^^^^
* Added support for Pandas 1.3.x (`#362`_, contributed by `@V1NAY8`_)
* Added support for LightGBM 3.x (`#362`_, contributed by `@V1NAY8`_)
* Added ``DataFrame.idxmax()`` and ``DataFrame.idxmin()`` methods (`#353`_, contributed by `@V1NAY8`_)
* Added type hints to ``eland.ndframe`` and ``eland.operations`` (`#366`_, contributed by `@V1NAY8`_)
Removed
^^^^^^^
* Removed support for Pandas <1.2 (`#364`_)
* Removed support for Python 3.6 to match Pandas (`#364`_)
Changed
^^^^^^^
* Changed paginated search function to use `Point-in-Time`_ and `Search After`_ features
instead of Scroll when connected to Elasticsearch 7.12+ (`#370`_ and `#376`_, contributed by `@V1NAY8`_)
* Optimized the ``FieldMappings.aggregate_field_name()`` method (`#373`_, contributed by `@V1NAY8`_)
.. _Point-in-Time: https://www.elastic.co/guide/en/elasticsearch/reference/current/point-in-time-api.html
.. _Search After: https://www.elastic.co/guide/en/elasticsearch/reference/7.14/paginate-search-results.html#search-after
.. _#353: https://github.com/elastic/eland/pull/353
.. _#362: https://github.com/elastic/eland/pull/362
.. _#364: https://github.com/elastic/eland/pull/364
.. _#366: https://github.com/elastic/eland/pull/366
.. _#370: https://github.com/elastic/eland/pull/370
.. _#373: https://github.com/elastic/eland/pull/373
.. _#376: https://github.com/elastic/eland/pull/376
7.13.0b1 (2021-06-22)
---------------------
Added
^^^^^
* Added ``DataFrame.quantile()``, ``Series.quantile()``, and
``DataFrameGroupBy.quantile()`` aggregations (`#318`_ and `#356`_, contributed by `@V1NAY8`_)
Changed
^^^^^^^
* Changed the error raised when ``es_index_pattern`` doesn't point to any indices
to be more user-friendly (`#346`_)
Fixed
^^^^^
* Fixed a warning about conflicting field types when wildcards are used
in ``es_index_pattern`` (`#346`_)
* Fixed sorting when using ``DataFrame.groupby()`` with ``dropna``
(`#322`_, contributed by `@V1NAY8`_)
* Fixed deprecated usage ``numpy.int`` in favor of ``numpy.int_`` (`#354`_, contributed by `@V1NAY8`_)
.. _#318: https://github.com/elastic/eland/pull/318
.. _#322: https://github.com/elastic/eland/pull/322
.. _#346: https://github.com/elastic/eland/pull/346
.. _#354: https://github.com/elastic/eland/pull/354
.. _#356: https://github.com/elastic/eland/pull/356
7.10.1b1 (2021-01-12)
---------------------
Added
^^^^^
* Added support for Pandas 1.2.0 (`#336`_)
* Added ``DataFrame.mode()`` and ``Series.mode()`` aggregation (`#323`_, contributed by `@V1NAY8`_)
* Added support for ``pd.set_option("display.max_rows", None)``
(`#308`_, contributed by `@V1NAY8`_)
* Added Elasticsearch storage usage to ``df.info()`` (`#321`_, contributed by `@V1NAY8`_)
Removed
^^^^^^^
* Removed deprecated aliases ``read_es``, ``read_csv``, ``DataFrame.info_es``,
and ``MLModel(overwrite=True)`` (`#331`_, contributed by `@V1NAY8`_)
.. _#336: https://github.com/elastic/eland/pull/336
.. _#331: https://github.com/elastic/eland/pull/331
.. _#323: https://github.com/elastic/eland/pull/323
.. _#321: https://github.com/elastic/eland/pull/321
.. _#308: https://github.com/elastic/eland/pull/308
7.10.0b1 (2020-10-29)
---------------------
Added
^^^^^
* Added ``DataFrame.groupby()`` method with all aggregations
(`#278`_, `#291`_, `#292`_, `#300`_ contributed by `@V1NAY8`_)
* Added ``es_match()`` method to ``DataFrame`` and ``Series`` for
filtering rows with full-text search (`#301`_)
* Added support for type hints of the ``elasticsearch-py`` package (`#295`_)
* Added support for passing dictionaries to ``es_type_overrides`` parameter
in the ``pandas_to_eland()`` function to directly control the field mapping
generated in Elasticsearch (`#310`_)
* Added ``es_dtypes`` property to ``DataFrame`` and ``Series`` (`#285`_)
Changed
^^^^^^^
* Changed ``pandas_to_eland()`` to use the ``parallel_bulk()``
helper instead of single-threaded ``bulk()`` helper to improve
performance (`#279`_, contributed by `@V1NAY8`_)
* Changed the ``es_type_overrides`` parameter in ``pandas_to_eland()``
to raise ``ValueError`` if an unknown column is given (`#302`_)
* Changed ``DataFrame.filter()`` to preserve the order of items
(`#283`_, contributed by `@V1NAY8`_)
* Changed when setting ``es_type_overrides={"column": "text"}`` in
``pandas_to_eland()`` will automatically add the ``column.keyword``
sub-field so that aggregations are available for the field as well (`#310`_)
Fixed
^^^^^
* Fixed ``Series.__repr__`` when the series is empty (`#306`_)
.. _#278: https://github.com/elastic/eland/pull/278
.. _#279: https://github.com/elastic/eland/pull/279
.. _#283: https://github.com/elastic/eland/pull/283
.. _#285: https://github.com/elastic/eland/pull/285
.. _#291: https://github.com/elastic/eland/pull/291
.. _#292: https://github.com/elastic/eland/pull/292
.. _#295: https://github.com/elastic/eland/pull/295
.. _#300: https://github.com/elastic/eland/pull/300
.. _#301: https://github.com/elastic/eland/pull/301
.. _#302: https://github.com/elastic/eland/pull/302
.. _#306: https://github.com/elastic/eland/pull/306
.. _#310: https://github.com/elastic/eland/pull/310
7.9.1a1 (2020-09-29)
--------------------
Added
^^^^^
* Added the ``predict()`` method and ``model_type``,
``feature_names``, and ``results_field`` properties
to ``MLModel`` (`#266`_)
Deprecated
^^^^^^^^^^
* Deprecated ``ImportedMLModel`` in favor of
``MLModel.import_model(...)`` (`#266`_)
Changed
^^^^^^^
* Changed DataFrame aggregations to use ``numeric_only=None``
instead of ``numeric_only=True`` by default. This is the same
behavior as Pandas (`#270`_, contributed by `@V1NAY8`_)
Fixed
^^^^^
* Fixed ``DataFrame.agg()`` when given a string instead of a list of
aggregations will now properly return a ``Series`` instead of
a ``DataFrame`` (`#263`_, contributed by `@V1NAY8`_)
.. _#263: https://github.com/elastic/eland/pull/263
.. _#266: https://github.com/elastic/eland/pull/266
.. _#270: https://github.com/elastic/eland/pull/270
7.9.0a1 (2020-08-18)
--------------------
Added
^^^^^
* Added support for Pandas v1.1 (`#253`_)
* Added support for LightGBM ``LGBMRegressor`` and ``LGBMClassifier`` to ``ImportedMLModel`` (`#247`_, `#252`_)
* Added support for ``multi:softmax`` and ``multi:softprob`` XGBoost operators to ``ImportedMLModel`` (`#246`_)
* Added column names to ``DataFrame.__dir__()`` for better auto-completion support (`#223`_, contributed by `@leonardbinet`_)
* Added support for ``es_if_exists='append'`` to ``pandas_to_eland()`` (`#217`_)
* Added support for aggregating datetimes with ``nunique`` and ``mean`` (`#253`_)
* Added ``es_compress_model_definition`` parameter to ``ImportedMLModel`` constructor (`#220`_)
* Added ``.size`` and ``.ndim`` properties to ``DataFrame`` and ``Series`` (`#231`_ and `#233`_)
* Added ``.dtype`` property to ``Series`` (`#258`_)
* Added support for using ``pandas.Series`` with ``Series.isin()`` (`#231`_)
* Added type hints to many APIs in ``DataFrame`` and ``Series`` (`#231`_)
Deprecated
^^^^^^^^^^
* Deprecated the ``overwrite`` parameter in favor of ``es_if_exists`` in ``ImportedMLModel`` constructor (`#249`_, contributed by `@V1NAY8`_)
Changed
^^^^^^^
* Changed aggregations for datetimes to be higher precision when available (`#253`_)
Fixed
^^^^^
* Fixed ``ImportedMLModel.predict()`` to fail when ``errors`` are present in the ``ingest.simulate`` response (`#220`_)
* Fixed ``Series.median()`` aggregation to return a scalar instead of ``pandas.Series`` (`#253`_)
* Fixed ``Series.describe()`` to return a ``pandas.Series`` instead of ``pandas.DataFrame`` (`#258`_)
* Fixed ``DataFrame.mean()`` and ``Series.mean()`` dtype (`#258`_)
* Fixed ``DataFrame.agg()`` aggregations when using ``extended_stats`` Elasticsearch aggregation (`#253`_)
.. _@leonardbinet: https://github.com/leonardbinet
.. _@V1NAY8: https://github.com/V1NAY8
.. _#217: https://github.com/elastic/eland/pull/217
.. _#220: https://github.com/elastic/eland/pull/220
.. _#223: https://github.com/elastic/eland/pull/223
.. _#231: https://github.com/elastic/eland/pull/231
.. _#233: https://github.com/elastic/eland/pull/233
.. _#246: https://github.com/elastic/eland/pull/246
.. _#247: https://github.com/elastic/eland/pull/247
.. _#249: https://github.com/elastic/eland/pull/249
.. _#252: https://github.com/elastic/eland/pull/252
.. _#253: https://github.com/elastic/eland/pull/253
.. _#258: https://github.com/elastic/eland/pull/258
7.7.0a1 (2020-05-20)
--------------------
Added
^^^^^
* Added the package to Conda Forge, install via
``conda install -c conda-forge eland`` (`#209`_)
* Added ``DataFrame.sample()`` and ``Series.sample()`` for querying
a random sample of data from the index (`#196`_, contributed by `@mesejo`_)
* Added ``Series.isna()`` and ``Series.notna()`` for filtering out
missing, ``NaN`` or null values from a column (`#210`_, contributed by `@mesejo`_)
* Added ``DataFrame.filter()`` and ``Series.filter()`` for reducing an axis
using a sequence of items or a pattern (`#212`_)
* Added ``DataFrame.to_pandas()`` and ``Series.to_pandas()`` for converting
an Eland dataframe or series into a Pandas dataframe or series inline (`#208`_)
* Added support for XGBoost v1.0.0 (`#200`_)
Deprecated
^^^^^^^^^^
* Deprecated ``info_es()`` in favor of ``es_info()`` (`#208`_)
* Deprecated ``eland.read_csv()`` in favor of ``eland.csv_to_eland()`` (`#208`_)
* Deprecated ``eland.read_es()`` in favor of ``eland.DataFrame()`` (`#208`_)
Changed
^^^^^^^
* Changed ``var`` and ``std`` aggregations to use sample instead of
population in line with Pandas (`#185`_)
* Changed painless scripts to use ``source`` rather than ``inline`` to improve
script caching performance (`#191`_, contributed by `@mesejo`_)
* Changed minimum ``elasticsearch`` Python library version to v7.7.0 (`#207`_)
* Changed name of ``Index.field_name`` to ``Index.es_field_name`` (`#208`_)
Fixed
^^^^^
* Fixed ``DeprecationWarning`` raised from ``pandas.Series`` when an
an empty series was created without specifying ``dtype`` (`#188`_, contributed by `@mesejo`_)
* Fixed a bug when filtering columns on complex combinations of and and or (`#204`_)
* Fixed an issue where ``DataFrame.shape`` would return a larger value than
in the index if a sized operation like ``.head(X)`` was applied to the data
frame (`#205`_, contributed by `@mesejo`_)
* Fixed issue where both ``scikit-learn`` and ``xgboost`` libraries were
required to use ``eland.ml.ImportedMLModel``, now only one library is
required to use this feature (`#206`_)
.. _#200: https://github.com/elastic/eland/pull/200
.. _#201: https://github.com/elastic/eland/pull/201
.. _#204: https://github.com/elastic/eland/pull/204
.. _#205: https://github.com/elastic/eland/pull/205
.. _#206: https://github.com/elastic/eland/pull/206
.. _#207: https://github.com/elastic/eland/pull/207
.. _#191: https://github.com/elastic/eland/pull/191
.. _#210: https://github.com/elastic/eland/pull/210
.. _#185: https://github.com/elastic/eland/pull/185
.. _#188: https://github.com/elastic/eland/pull/188
.. _#196: https://github.com/elastic/eland/pull/196
.. _#208: https://github.com/elastic/eland/pull/208
.. _#209: https://github.com/elastic/eland/pull/209
.. _#212: https://github.com/elastic/eland/pull/212
7.6.0a5 (2020-04-14)
--------------------
Added
^^^^^
* Added support for Pandas v1.0.0 (`#141`_, contributed by `@mesejo`_)
* Added ``use_pandas_index_for_es_ids`` parameter to ``pandas_to_eland()`` (`#154`_)
* Added ``es_type_overrides`` parameter to ``pandas_to_eland()`` (`#181`_)
* Added ``NDFrame.var()``, ``.std()`` and ``.median()`` aggregations (`#175`_, `#176`_, contributed by `@mesejo`_)
* Added ``DataFrame.es_query()`` to allow modifying ES queries directly (`#156`_)
* Added ``eland.__version__`` (`#153`_, contributed by `@mesejo`_)
Removed
^^^^^^^
* Removed support for Python 3.5 (`#150`_)
* Removed ``eland.Client()`` interface, use
``elasticsearch.Elasticsearch()`` client instead (`#166`_)
* Removed all private objects from top-level ``eland`` namespace (`#170`_)
* Removed ``geo_points`` from ``pandas_to_eland()`` in favor of ``es_type_overrides`` (`#181`_)
Changed
^^^^^^^
* Changed ML model serialization to be slightly smaller (`#159`_)
* Changed minimum ``elasticsearch`` Python library version to v7.6.0 (`#181`_)
Fixed
^^^^^
* Fixed ``inference_config`` being required on ML models for ES >=7.8 (`#174`_)
* Fixed unpacking for ``DataFrame.aggregate("median")`` (`#161`_)
.. _@mesejo: https://github.com/mesejo
.. _#141: https://github.com/elastic/eland/pull/141
.. _#150: https://github.com/elastic/eland/pull/150
.. _#153: https://github.com/elastic/eland/pull/153
.. _#154: https://github.com/elastic/eland/pull/154
.. _#156: https://github.com/elastic/eland/pull/156
.. _#159: https://github.com/elastic/eland/pull/159
.. _#161: https://github.com/elastic/eland/pull/161
.. _#166: https://github.com/elastic/eland/pull/166
.. _#170: https://github.com/elastic/eland/pull/170
.. _#174: https://github.com/elastic/eland/pull/174
.. _#175: https://github.com/elastic/eland/pull/175
.. _#176: https://github.com/elastic/eland/pull/176
.. _#181: https://github.com/elastic/eland/pull/181
7.6.0a4 (2020-03-23)
--------------------
Changed
^^^^^^^
* Changed requirement for ``xgboost`` from ``>=0.90`` to ``==0.90``
Fixed
^^^^^
* Fixed issue in ``DataFrame.info()`` when called on an empty frame (`#135`_)
* Fixed issues where many ``_source`` fields would generate
a ``too_long_frame`` error (`#135`_, `#137`_)
.. _#135: https://github.com/elastic/eland/pull/135
.. _#137: https://github.com/elastic/eland/pull/137

View File

@ -1,4 +1,5 @@
# Contributing to eland
Contributing to eland
=====================
Eland is an open source project and we love to receive contributions
from our community --- you! There are many ways to contribute, from
@ -6,7 +7,8 @@ writing tutorials or blog posts, improving the documentation, submitting
bug reports and feature requests or writing code which can be
incorporated into eland itself.
## Bug reports
Bug reports
-----------
If you think you have found a bug in eland, first make sure that you are
testing against the [latest version of
@ -27,7 +29,8 @@ lies with your query, when actually it depends on how your data is
indexed. The easier it is for us to recreate your problem, the faster it
is likely to be fixed.
## Feature requests
Feature requests
----------------
If you find yourself wishing for a feature that doesn\'t exist in eland,
you are probably not alone. There are bound to be others out there with
@ -37,7 +40,8 @@ list](https://github.com/elastic/eland/issues) on GitHub which describes
the feature you would like to see, why you need it, and how it should
work.
## Contributing code and documentation changes
Contributing code and documentation changes
-------------------------------------------
If you have a bugfix or new feature that you would like to contribute to
eland, please find or open an issue about it first. Talk about what you
@ -62,7 +66,7 @@ individual projects can be found below.
You will need to fork the main eland code or documentation repository
and clone it to your local machine. See [github help
page](https://docs.github.com/en/free-pro-team@latest/github/getting-started-with-github/fork-a-repo) for help.
page](https://help.github.com/articles/fork-a-repo) for help.
Further instructions for specific projects are given below.
@ -70,69 +74,58 @@ Further instructions for specific projects are given below.
Once your changes and tests are ready to submit for review:
1. Run the linter and test suite to ensure your changes do not break the existing code:
1. Test your changes
(TODO Add link to the testing document)
Run the test suite to make sure that nothing is broken (TODO add
link to testing doc).
``` bash
# Run Auto-format, lint, mypy type checker for your changes
$ nox -s format
2. Sign the Contributor License Agreement
# Launch Elasticsearch with a trial licence and ML enabled
$ docker run --name elasticsearch -p 9200:9200 -e "discovery.type=single-node" -e "xpack.security.enabled=false" -e "xpack.license.self_generated.type=trial" docker.elastic.co/elasticsearch/elasticsearch:9.0.0
Please make sure you have signed our [Contributor License
Agreement](https://www.elastic.co/contributor-agreement/). We are
not asking you to assign copyright to us, but to give us the right
to distribute your code without restriction. We ask this of all
contributors in order to assure our users of the origin and
continuing existence of the code. You only need to sign the CLA
once.
# See all test suites
$ nox -l
# Run a specific test suite
$ nox -rs "test-3.12(pandas_version='2.2.3')"
# Run a specific test
$ nox -rs "test-3.12(pandas_version='2.2.3')" -- -k test_learning_to_rank
```
2. Sign the Contributor License Agreement
Please make sure you have signed our [Contributor License Agreement](https://www.elastic.co/contributor-agreement/).
We are not asking you to assign copyright to us, but to give us the right to distribute your code without restriction.
We ask this of all contributors in order to assure our users of the origin and continuing existence of the code.
You only need to sign the CLA once.
3. Rebase your changes
3. Rebase your changes
Update your local repository with the most recent code from the main
eland repository, and rebase your branch on top of the latest main
eland repository, and rebase your branch on top of the latest master
branch. We prefer your initial changes to be squashed into a single
commit. Later, if we ask you to make changes, add them as separate
commits. This makes them easier to review. As a final step before
merging we will either ask you to squash all commits yourself or
we\'ll do it for you.
4. Submit a pull request
4. Submit a pull request
Push your local changes to your forked copy of the repository and
[submit a pull
request](https://docs.github.com/en/free-pro-team@latest/github/collaborating-with-issues-and-pull-requests/proposing-changes-to-your-work-with-pull-requests) .
In the pull request, choose a title which sums up the changes that you
request](https://help.github.com/articles/using-pull-requests). In
the pull request, choose a title which sums up the changes that you
have made, and in the body provide more details about what your
changes do. Also mention the number of the issue where discussion
has taken place, eg "Closes \#123".
Then sit back and wait. There will probably be discussion about the pull
request and, if any changes are needed, we would love to work with you
to get your pull request merged into `eland` .
to get your pull request merged into eland.
Please adhere to the general guideline that you should never force push
to a publicly shared branch. Once you have opened your pull request, you
should consider your branch publicly shared. Instead of force pushing
you can just add incremental commits; this is generally easier on your
reviewers. If you need to pick up changes from main, you can merge
main into your branch. A reviewer might ask you to rebase a
reviewers. If you need to pick up changes from master, you can merge
master into your branch. A reviewer might ask you to rebase a
long-running pull request in which case force pushing is okay for that
request. Note that squashing at the end of the review process should
also not be done, that can be done when the pull request is [integrated
via GitHub](https://github.com/blog/2141-squash-your-commits).
## Contributing to the eland codebase
Contributing to the eland codebase
----------------------------------
**Repository:** <https://github.com/elastic/eland>
@ -143,91 +136,27 @@ currently using a minimum version of PyCharm 2019.2.4.
(All commands should be run from module root)
* Create a new project via \'Check out from Version
- Create a new project via \'Check out from Version
Control\'-\>\'Git\' on the \"Welcome to PyCharm\" page (or other)
* Enter the URL to your fork of eland
(e.g.  `git@github.com:stevedodson/eland.git` )
* Click \'Yes\' for \'Checkout from Version Control\'
* Configure PyCharm environment:
* In \'Preferences\' configure a \'Project: eland\'-\>\'Project
- Enter the URL to your fork of eland
(e.g. `git@github.com:stevedodson/eland.git`)
- Click \'Yes\' for \'Checkout from Version Control\'
- Configure PyCharm environment:
- In \'Preferences\' configure a \'Project: eland\'-\>\'Project
Interpreter\'. Generally, we recommend creating a virtual
environment (TODO link to installing for python version support).
* In \'Preferences\' set \'Tools\'-\>\'Python Integrated
- In \'Preferences\' set \'Tools\'-\>\'Python Integrated
Tools\'-\>\'Default test runner\' to `pytest`
* In \'Preferences\' set \'Tools\'-\>\'Python Integrated
- In \'Preferences\' set \'Tools\'-\>\'Python Integrated
Tools\'-\>\'Docstring format\' to `numpy`
* To install development requirements. Open terminal in virtual environment and run
``` bash
> pip install -r requirements-dev.txt
```
* Setup Elasticsearch instance with docker
``` bash
> ELASTICSEARCH_VERSION=elasticsearch:8.17.0 BUILDKITE=false .buildkite/run-elasticsearch.sh
```
* Now check `http://localhost:9200`
* Install local `eland` module (required to execute notebook tests)
``` bash
> python setup.py install
```
* To setup test environment:
``` bash
> python -m tests.setup_tests
```
(Note this modifies Elasticsearch indices)
* To validate installation, open python console and run
``` bash
> import eland as ed
> ed_df = ed.DataFrame('http://localhost:9200', 'flights')
```
* To run the automatic formatter and check for lint issues run
``` bash
> nox -s format
```
* To test specific versions of Python run
``` bash
> nox -s test-3.12
```
- Install development requirements. Open terminal in virtual
environment and run `pip install -r requirements-dev.txt`
- Setup Elasticsearch instance (assumes `localhost:9200`), and run
`python -m eland.tests.setup_tests` to setup test environment -*note
this modifies Elasticsearch indices*
- Run `pytest --nbval --doctest-modules` to validate install
### Documentation
* [Install pandoc on your system](https://pandoc.org/installing.html) . For Ubuntu or Debian you can do
``` bash
> sudo apt-get install -y pandoc
```
* Install documentation requirements. Open terminal in virtual environment and run
``` bash
> pip install -r docs/requirements-docs.txt
```
* To verify/generate documentation run
``` bash
> nox -s docs
```
- Install documentation requirements. Open terminal in virtual
environment and run `pip install -r docs/requirements-docs.txt`

View File

@ -1,28 +0,0 @@
# syntax=docker/dockerfile:1
FROM python:3.10-slim
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt-get update && apt-get install -y \
build-essential \
pkg-config \
cmake \
libzip-dev \
libjpeg-dev
ADD . /eland
WORKDIR /eland
ARG TARGETPLATFORM
RUN --mount=type=cache,target=/root/.cache/pip \
if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
python3 -m pip install \
--no-cache-dir --disable-pip-version-check --extra-index-url https://download.pytorch.org/whl/cpu \
torch==2.5.1+cpu .[all]; \
else \
python3 -m pip install \
--no-cache-dir --disable-pip-version-check \
.[all]; \
fi
CMD ["/bin/sh"]

View File

@ -1,42 +0,0 @@
# syntax=docker/dockerfile:1
FROM docker.elastic.co/wolfi/python:3.10-dev AS builder
WORKDIR /eland
ENV VIRTUAL_ENV=/eland/venv
RUN python3 -m venv $VIRTUAL_ENV
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
ADD . /eland
ARG TARGETPLATFORM
RUN --mount=type=cache,target=/root/.cache/pip \
if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
python3 -m pip install \
--no-cache-dir --disable-pip-version-check --extra-index-url https://download.pytorch.org/whl/cpu \
torch==2.5.1+cpu .[all]; \
else \
python3 -m pip install \
--no-cache-dir --disable-pip-version-check \
.[all]; \
fi
FROM docker.elastic.co/wolfi/python:3.10
WORKDIR /eland
ENV VIRTUAL_ENV=/eland/venv
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
COPY --from=builder /eland /eland
# The eland_import_hub_model script is intended to be executed by a shell,
# which will see its shebang line and then execute it with the Python
# interpreter of the virtual environment. We want to keep this behavior even
# with Wolfi so that users can use the image as before. To do that, we use two
# tricks:
#
# * copy /bin/sh (that is, busybox's ash) from the builder image
# * revert to Docker's the default entrypoint, which is the only way to pass
# parameters to `eland_import_hub_model` without needing quotes.
#
COPY --from=builder /bin/sh /bin/sh
ENTRYPOINT []

View File

@ -1,3 +1 @@
include LICENSE.txt
include README.md
include eland/py.typed

View File

@ -50,6 +50,3 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
This product contains an adapted version of the "us-national-parks" dataset, https://data.world/kevinnayar/us-national-parks, by Kevin Nayar, https://data.world/kevinnayar, is licensed under CC BY, https://creativecommons.org/licenses/by/4.0/legalcode

444
README.md
View File

@ -1,280 +1,256 @@
<div align="center">
<a href="https://github.com/elastic/eland">
<img src="https://raw.githubusercontent.com/elastic/eland/main/docs/sphinx/logo/eland.png" width="30%"
alt="Eland" />
</a>
</div>
<br />
<div align="center">
<a href="https://pypi.org/project/eland"><img src="https://img.shields.io/pypi/v/eland.svg" alt="PyPI Version"></a>
<a href="https://anaconda.org/conda-forge/eland"><img src="https://img.shields.io/conda/vn/conda-forge/eland"
alt="Conda Version"></a>
<a href="https://pepy.tech/project/eland"><img src="https://static.pepy.tech/badge/eland" alt="Downloads"></a>
<a href="https://pypi.org/project/eland"><img src="https://img.shields.io/pypi/status/eland.svg"
alt="Package Status"></a>
<a href="https://buildkite.com/elastic/eland"><img src="https://badge.buildkite.com/d92340e800bc06a7c7c02a71b8d42fcb958bd18c25f99fe2d9.svg" alt="Build Status"></a>
<a href="https://github.com/elastic/eland/blob/main/LICENSE.txt"><img src="https://img.shields.io/pypi/l/eland.svg"
alt="License"></a>
<a href="https://eland.readthedocs.io"><img
src="https://readthedocs.org/projects/eland/badge/?version=latest" alt="Documentation Status"></a>
</div>
_Note, this project is still very much a work in progress and in an alpha state; input and contributions welcome!_
## About
<p align="center">
<a href="https://github.com/elastic/eland">
<img src="./docs/source/logo/eland.png" width="60%" alt="eland" />
</a>
</p>
<table>
<tr>
<td>Latest Release</td>
<td>
<a href="https://pypi.org/project/eland/">
<img src="https://img.shields.io/pypi/v/eland.svg" alt="latest release" />
</a>
</td>
<tr>
<td>Package Status</td>
<td>
<a href="https://pypi.org/project/eland/">
<img src="https://img.shields.io/pypi/status/eland.svg" alt="status" />
</a>
</td>
</tr>
<tr>
<td>License</td>
<td>
<a href="https://github.com/elastic/eland/blob/master/LICENSE.txt">
<img src="https://img.shields.io/pypi/l/eland.svg" alt="license" />
</a>
</td>
</tr>
<tr>
<td>Build Status</td>
<td>
<a href="https://clients-ci.elastic.co/job/elastic+eland+master/">
<img src="https://clients-ci.elastic.co/buildStatus/icon?job=elastic%2Beland%2Bmaster" alt="Build Status" />
</a>
</td>
</tr>
</table>
Eland is a Python Elasticsearch client for exploring and analyzing data in Elasticsearch with a familiar
Pandas-compatible API.
# What is it?
Where possible the package uses existing Python APIs and data structures to make it easy to switch between numpy,
pandas, or scikit-learn to their Elasticsearch powered equivalents. In general, the data resides in Elasticsearch and
not in memory, which allows Eland to access large datasets stored in Elasticsearch.
eland is a Elasticsearch client Python package to analyse, explore and manipulate data that resides in Elasticsearch.
Where possible the package uses existing Python APIs and data structures to make it easy to switch between numpy,
pandas, scikit-learn to their Elasticsearch powered equivalents. In general, the data resides in Elasticsearch and
not in memory, which allows eland to access large datasets stored in Elasticsearch.
Eland also provides tools to upload trained machine learning models from common libraries like
[scikit-learn](https://scikit-learn.org), [XGBoost](https://xgboost.readthedocs.io), and
[LightGBM](https://lightgbm.readthedocs.io) into Elasticsearch.
For example, to explore data in a large Elasticsearch index, simply create an eland DataFrame from an Elasticsearch
index pattern, and explore using an API that mirrors a subset of the pandas.DataFrame API:
## Getting Started
Eland can be installed from [PyPI](https://pypi.org/project/eland) with Pip:
```bash
$ python -m pip install eland
```
If using Eland to upload NLP models to Elasticsearch install the PyTorch extras:
```bash
$ python -m pip install 'eland[pytorch]'
```
Eland can also be installed from [Conda Forge](https://anaconda.org/conda-forge/eland) with Conda:
```bash
$ conda install -c conda-forge eland
```
### Compatibility
- Supports Python 3.9, 3.10, 3.11 and 3.12.
- Supports Pandas 1.5 and 2.
- Supports Elasticsearch 8+ clusters, recommended 8.16 or later for all features to work.
If you are using the NLP with PyTorch feature make sure your Eland minor version matches the minor
version of your Elasticsearch cluster. For all other features it is sufficient for the major versions
to match.
- You need to install the appropriate version of PyTorch to import an NLP model. Run `python -m pip
install 'eland[pytorch]'` to install that version.
### Prerequisites
Users installing Eland on Debian-based distributions may need to install prerequisite packages for the transitive
dependencies of Eland:
```bash
$ sudo apt-get install -y \
build-essential pkg-config cmake \
python3-dev libzip-dev libjpeg-dev
```
Note that other distributions such as CentOS, RedHat, Arch, etc. may require using a different package manager and
specifying different package names.
### Docker
If you want to use Eland without installing it just to run the available scripts, use the Docker
image.
It can be used interactively:
```bash
$ docker run -it --rm --network host docker.elastic.co/eland/eland
```
Running installed scripts is also possible without an interactive shell, e.g.:
```bash
$ docker run -it --rm --network host \
docker.elastic.co/eland/eland \
eland_import_hub_model \
--url http://host.docker.internal:9200/ \
--hub-model-id elastic/distilbert-base-cased-finetuned-conll03-english \
--task-type ner
```
### Connecting to Elasticsearch
Eland uses the [Elasticsearch low level client](https://elasticsearch-py.readthedocs.io) to connect to Elasticsearch.
This client supports a range of [connection options and authentication options](https://elasticsearch-py.readthedocs.io/en/stable/api.html#elasticsearch).
You can pass either an instance of `elasticsearch.Elasticsearch` to Eland APIs
or a string containing the host to connect to:
```python
import eland as ed
# Connecting to an Elasticsearch instance running on 'http://localhost:9200'
df = ed.DataFrame("http://localhost:9200", es_index_pattern="flights")
# Connecting to an Elastic Cloud instance
from elasticsearch import Elasticsearch
es = Elasticsearch(
cloud_id="cluster-name:...",
basic_auth=("elastic", "<password>")
)
df = ed.DataFrame(es, es_index_pattern="flights")
```
## DataFrames in Eland
`eland.DataFrame` wraps an Elasticsearch index in a Pandas-like API
and defers all processing and filtering of data to Elasticsearch
instead of your local machine. This means you can process large
amounts of data within Elasticsearch from a Jupyter Notebook
without overloading your machine.
➤ [Eland DataFrame API documentation](https://eland.readthedocs.io/en/latest/reference/dataframe.html)
➤ [Advanced examples in a Jupyter Notebook](https://eland.readthedocs.io/en/latest/examples/demo_notebook.html)
```python
>>> import eland as ed
>>> # Connect to 'flights' index via localhost Elasticsearch node
>>> df = ed.DataFrame('http://localhost:9200', 'flights')
>>> df = ed.DataFrame('localhost:9200', 'flights')
# eland.DataFrame instance has the same API as pandas.DataFrame
# except all data is in Elasticsearch. See .info() memory usage.
>>> df.head()
AvgTicketPrice Cancelled ... dayOfWeek timestamp
0 841.265642 False ... 0 2018-01-01 00:00:00
1 882.982662 False ... 0 2018-01-01 18:27:00
2 190.636904 False ... 0 2018-01-01 17:11:14
3 181.694216 True ... 0 2018-01-01 10:33:28
4 730.041778 False ... 0 2018-01-01 05:13:00
AvgTicketPrice Cancelled Carrier ... OriginWeather dayOfWeek timestamp
0 841.265642 False Kibana Airlines ... Sunny 0 2018-01-01 00:00:00
1 882.982662 False Logstash Airways ... Clear 0 2018-01-01 18:27:00
2 190.636904 False Logstash Airways ... Rain 0 2018-01-01 17:11:14
3 181.694216 True Kibana Airlines ... Thunder & Lightning 0 2018-01-01 10:33:28
4 730.041778 False Kibana Airlines ... Damaging Wind 0 2018-01-01 05:13:00
[5 rows x 27 columns]
>>> df.info()
<class 'eland.dataframe.DataFrame'>
Index: 13059 entries, 0 to 13058
Data columns (total 27 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 AvgTicketPrice 13059 non-null float64
1 Cancelled 13059 non-null bool
2 Carrier 13059 non-null object
...
24 OriginWeather 13059 non-null object
25 dayOfWeek 13059 non-null int64
26 timestamp 13059 non-null datetime64[ns]
dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)
memory usage: 80.0 bytes
Elasticsearch storage usage: 5.043 MB
>>> df.describe()
AvgTicketPrice DistanceKilometers DistanceMiles FlightDelayMin FlightTimeHour FlightTimeMin dayOfWeek
count 13059.000000 13059.000000 13059.000000 13059.000000 13059.000000 13059.000000 13059.000000
mean 628.253689 7092.142457 4406.853010 47.335171 8.518797 511.127842 2.835975
std 266.386661 4578.263193 2844.800855 96.743006 5.579019 334.741135 1.939365
min 100.020531 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 410.008918 2470.545974 1535.126118 0.000000 4.194976 251.738513 1.000000
50% 640.362667 7612.072403 4729.922470 0.000000 8.385816 503.148975 3.000000
75% 842.254990 9735.082407 6049.459005 15.000000 12.009396 720.534532 4.141221
max 1199.729004 19881.482422 12353.780273 360.000000 31.715034 1902.901978 6.000000
>>> df[['Carrier', 'AvgTicketPrice', 'Cancelled']]
Carrier AvgTicketPrice Cancelled
0 Kibana Airlines 841.265642 False
1 Logstash Airways 882.982662 False
2 Logstash Airways 190.636904 False
3 Kibana Airlines 181.694216 True
4 Kibana Airlines 730.041778 False
... ... ... ...
13054 Logstash Airways 1080.446279 False
13055 Logstash Airways 646.612941 False
13056 Logstash Airways 997.751876 False
13057 JetBeats 1102.814465 False
13058 JetBeats 858.144337 False
[13059 rows x 3 columns]
# Filtering of rows using comparisons
>>> df[(df.Carrier=="Kibana Airlines") & (df.AvgTicketPrice > 900.0) & (df.Cancelled == True)].head()
AvgTicketPrice Cancelled ... dayOfWeek timestamp
8 960.869736 True ... 0 2018-01-01 12:09:35
26 975.812632 True ... 0 2018-01-01 15:38:32
311 946.358410 True ... 0 2018-01-01 11:51:12
651 975.383864 True ... 2 2018-01-03 21:13:17
950 907.836523 True ... 2 2018-01-03 05:14:51
AvgTicketPrice Cancelled Carrier ... OriginWeather dayOfWeek timestamp
8 960.869736 True Kibana Airlines ... Heavy Fog 0 2018-01-01 12:09:35
26 975.812632 True Kibana Airlines ... Rain 0 2018-01-01 15:38:32
311 946.358410 True Kibana Airlines ... Heavy Fog 0 2018-01-01 11:51:12
651 975.383864 True Kibana Airlines ... Rain 2 2018-01-03 21:13:17
950 907.836523 True Kibana Airlines ... Thunder & Lightning 2 2018-01-03 05:14:51
[5 rows x 27 columns]
# Running aggregations across an index
>>> df[['DistanceKilometers', 'AvgTicketPrice']].aggregate(['sum', 'min', 'std'])
DistanceKilometers AvgTicketPrice
sum 9.261629e+07 8.204365e+06
min 0.000000e+00 1.000205e+02
std 4.578263e+03 2.663867e+02
>>> df[['Carrier', 'Origin', 'Dest']].nunique()
Carrier 4
Origin 156
Dest 156
dtype: int64
>>> s = df.AvgTicketPrice * 2 + df.DistanceKilometers - df.FlightDelayMin
>>> s
0 18174.857422
1 10589.365723
2 381.273804
3 739.126221
4 14818.327637
...
13054 10219.474121
13055 8381.823975
13056 12661.157104
13057 20819.488281
13058 18315.431274
Length: 13059, dtype: float64
>>> print(s.info_es())
index_pattern: flights
Index:
index_field: _id
is_source_field: False
Mappings:
capabilities:
es_field_name is_source es_dtype es_date_format pd_dtype is_searchable is_aggregatable is_scripted aggregatable_es_field_name
NaN script_field_None False double None float64 True True True script_field_None
Operations:
tasks: []
size: None
sort_params: None
_source: ['script_field_None']
body: {'script_fields': {'script_field_None': {'script': {'source': "(((doc['AvgTicketPrice'].value * 2) + doc['DistanceKilometers'].value) - doc['FlightDelayMin'].value)"}}}}
post_processing: []
>>> pd_df = ed.eland_to_pandas(df)
>>> pd_df.head()
AvgTicketPrice Cancelled Carrier ... OriginWeather dayOfWeek timestamp
0 841.265642 False Kibana Airlines ... Sunny 0 2018-01-01 00:00:00
1 882.982662 False Logstash Airways ... Clear 0 2018-01-01 18:27:00
2 190.636904 False Logstash Airways ... Rain 0 2018-01-01 17:11:14
3 181.694216 True Kibana Airlines ... Thunder & Lightning 0 2018-01-01 10:33:28
4 730.041778 False Kibana Airlines ... Damaging Wind 0 2018-01-01 05:13:00
[5 rows x 27 columns]
```
## Machine Learning in Eland
See [docs](https://eland.readthedocs.io/en/latest) and [demo_notebook.ipynb](https://eland.readthedocs.io/en/latest/examples/demo_notebook.html) for more examples.
### Regression and classification
## Where to get it
The source code is currently hosted on GitHub at:
https://github.com/elastic/eland
Eland allows transforming trained regression and classification models from scikit-learn, XGBoost, and LightGBM
libraries to be serialized and used as an inference model in Elasticsearch.
Binary installers for the latest released version are available at the [Python
package index](https://pypi.org/project/eland).
➤ [Eland Machine Learning API documentation](https://eland.readthedocs.io/en/latest/reference/ml.html)
```sh
pip install eland
```
➤ [Read more about Machine Learning in Elasticsearch](https://www.elastic.co/guide/en/machine-learning/current/ml-getting-started.html)
## Versions and Compatibility
```python
>>> from sklearn import datasets
>>> from xgboost import XGBClassifier
>>> from eland.ml import MLModel
### Python Version Support
# Train and exercise an XGBoost ML model locally
>>> training_data = datasets.make_classification(n_features=5)
>>> xgb_model = XGBClassifier(booster="gbtree")
>>> xgb_model.fit(training_data[0], training_data[1])
Officially Python 3.5.3 and above, 3.6, 3.7, and 3.8.
>>> xgb_model.predict(training_data[0])
[0 1 1 0 1 0 0 0 1 0]
eland depends on pandas version 0.25.3.
# Import the model into Elasticsearch
>>> es_model = MLModel.import_model(
es_client="http://localhost:9200",
model_id="xgb-classifier",
model=xgb_model,
feature_names=["f0", "f1", "f2", "f3", "f4"],
### Elasticsearch Versions
eland is versioned like the Elastic stack (eland 7.5.1 is compatible with Elasticsearch 7.x up to 7.5.1)
A major version of the client is compatible with the same major version of Elasticsearch.
No compatibility assurances are given between different major versions of the client and Elasticsearch.
Major differences likely exist between major versions of Elasticsearch,
particularly around request and response object formats, but also around API urls and behaviour.
## Connecting to Elasticsearch
eland uses the [Elasticsearch low level client](https://elasticsearch-py.readthedocs.io/) to connect to Elasticsearch.
This client supports a range of [connection options and authentication mechanisms]
(https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch).
### Basic Connection Options
```
>>> import eland as ed
>>> # Connect to flights index via localhost Elasticsearch node
>>> ed.DataFrame('localhost', 'flights')
>>> # Connect to flights index via localhost Elasticsearch node on port 9200
>>> ed.DataFrame('localhost:9200', 'flights')
>>> # Connect to flights index via localhost Elasticsearch node on port 9200 with <user>:<password> credentials
>>> ed.DataFrame('http://<user>:<password>@localhost:9200', 'flights')
>>> # Connect to flights index via ssl
>>> es = Elasticsearch(
'https://<user>:<password>@localhost:443',
use_ssl=True,
verify_certs=True,
ca_certs='/path/to/ca.crt'
)
>>> ed.DataFrame(es, 'flights')
# Exercise the ML model in Elasticsearch with the training data
>>> es_model.predict(training_data[0])
[0 1 1 0 1 0 0 0 1 0]
>>> # Connect to flights index via ssl using Urllib3HttpConnection options
>>> es = Elasticsearch(
['localhost:443', 'other_host:443'],
use_ssl=True,
verify_certs=True,
ca_certs='/path/to/CA_certs',
client_cert='/path/to/clientcert.pem',
client_key='/path/to/clientkey.pem'
)
>>> ed.DataFrame(es, 'flights')
```
### NLP with PyTorch
### Connecting to an Elasticsearch Cloud Cluster
For NLP tasks, Eland allows importing PyTorch trained BERT models into Elasticsearch. Models can be either plain PyTorch
models, or supported [transformers](https://huggingface.co/transformers) models from the
[Hugging Face model hub](https://huggingface.co/models).
```
>>> import eland as ed
>>> from elasticsearch import Elasticsearch
```bash
$ eland_import_hub_model \
--url http://localhost:9200/ \
--hub-model-id elastic/distilbert-base-cased-finetuned-conll03-english \
--task-type ner \
--start
>>> es = Elasticsearch(cloud_id="<cloud_id>", http_auth=('<user>','<password>'))
>>> es.info()
{'name': 'instance-0000000000', 'cluster_name': 'bf900cfce5684a81bca0be0cce5913bc', 'cluster_uuid': 'xLPvrV3jQNeadA7oM4l1jA', 'version': {'number': '7.4.2', 'build_flavor': 'default', 'build_type': 'tar', 'build_hash': '2f90bbf7b93631e52bafb59b3b049cb44ec25e96', 'build_date': '2019-10-28T20:40:44.881551Z', 'build_snapshot': False, 'lucene_version': '8.2.0', 'minimum_wire_compatibility_version': '6.8.0', 'minimum_index_compatibility_version': '6.0.0-beta1'}, 'tagline': 'You Know, for Search'}
>>> df = ed.read_es(es, 'reviews')
```
The example above will automatically start a model deployment. This is a
good shortcut for initial experimentation, but for anything that needs
good throughput you should omit the `--start` argument from the Eland
command line and instead start the model using the ML UI in Kibana.
The `--start` argument will deploy the model with one allocation and one
thread per allocation, which will not offer good performance. When starting
the model deployment using the ML UI in Kibana or the Elasticsearch
[API](https://www.elastic.co/guide/en/elasticsearch/reference/current/start-trained-model-deployment.html)
you will be able to set the threading options to make the best use of your
hardware.
## Why eland?
```python
>>> import elasticsearch
>>> from pathlib import Path
>>> from eland.common import es_version
>>> from eland.ml.pytorch import PyTorchModel
>>> from eland.ml.pytorch.transformers import TransformerModel
Naming is difficult, but as we had to call it something:
>>> es = elasticsearch.Elasticsearch("http://elastic:mlqa_admin@localhost:9200")
>>> es_cluster_version = es_version(es)
* eland: elastic and data
* eland: 'Elk/Moose' in Dutch (Alces alces)
* [Elandsgracht](https://goo.gl/maps/3hGBMqeGRcsBJfKx8): Amsterdam street near Elastic's Amsterdam office
# Load a Hugging Face transformers model directly from the model hub
>>> tm = TransformerModel(model_id="elastic/distilbert-base-cased-finetuned-conll03-english", task_type="ner", es_version=es_cluster_version)
Downloading: 100%|██████████| 257/257 [00:00<00:00, 108kB/s]
Downloading: 100%|██████████| 954/954 [00:00<00:00, 372kB/s]
Downloading: 100%|██████████| 208k/208k [00:00<00:00, 668kB/s]
Downloading: 100%|██████████| 112/112 [00:00<00:00, 43.9kB/s]
Downloading: 100%|██████████| 249M/249M [00:23<00:00, 11.2MB/s]
[Pronunciation](https://commons.wikimedia.org/wiki/File:Nl-eland.ogg): /ˈeːlɑnt/
# Export the model in a TorchScrpt representation which Elasticsearch uses
>>> tmp_path = "models"
>>> Path(tmp_path).mkdir(parents=True, exist_ok=True)
>>> model_path, config, vocab_path = tm.save(tmp_path)
# Import model into Elasticsearch
>>> ptm = PyTorchModel(es, tm.elasticsearch_model_id())
>>> ptm.import_model(model_path=model_path, config_path=None, vocab_path=vocab_path, config=config)
100%|██████████| 63/63 [00:12<00:00, 5.02it/s]
```

View File

@ -1,94 +0,0 @@
# Declare a Backstage Component that represents the Eland application.
---
# yaml-language-server: $schema=https://json.schemastore.org/catalog-info.json
apiVersion: backstage.io/v1alpha1
kind: Component
metadata:
name: eland
description: Python Client and Toolkit for DataFrames, Big Data, Machine Learning and ETL in Elasticsearch
annotations:
backstage.io/source-location: url:https://github.com/elastic/eland/
github.com/project-slug: elastic/eland
github.com/team-slug: elastic/ml-core
buildkite.com/project-slug: elastic/eland
tags:
- elasticsearch
- python
- machine-learning
- big-data
- etl
links:
- title: Eland docs
url: https://eland.readthedocs.io/
spec:
type: application
owner: group:ml-core
lifecycle: production
dependsOn:
- resource:eland-pipeline
- resource:eland-releaser-docker-pipeline
# yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/e57ee3bed7a6f73077a3f55a38e76e40ec87a7cf/rre.schema.json
---
apiVersion: backstage.io/v1alpha1
kind: Resource
metadata:
name: eland-pipeline
description: Run Eland tests
links:
- title: Pipeline
url: https://buildkite.com/elastic/eland
spec:
type: buildkite-pipeline
owner: group:ml-core
system: buildkite
implementation:
apiVersion: buildkite.elastic.dev/v1
kind: Pipeline
metadata:
name: Eland
description: Eland Python
spec:
pipeline_file: .buildkite/pipeline.yml
repository: elastic/eland
teams:
ml-core: {}
devtools-team: {}
es-docs: {}
everyone:
access_level: READ_ONLY
# yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/e57ee3bed7a6f73077a3f55a38e76e40ec87a7cf/rre.schema.json
---
apiVersion: backstage.io/v1alpha1
kind: Resource
metadata:
name: eland-release-docker-pipeline
description: Release Docker Artifacts for Eland
links:
- title: Pipeline
url: https://buildkite.com/elastic/eland-release-docker
spec:
type: buildkite-pipeline
owner: group:ml-core
system: buildkite
implementation:
apiVersion: buildkite.elastic.dev/v1
kind: Pipeline
metadata:
name: Eland - Release Docker
description: Release Docker Artifacts for Eland
spec:
pipeline_file: .buildkite/release-docker/pipeline.yml
provider_settings:
trigger_mode: none
repository: elastic/eland
teams:
ml-core: {}
devtools-team: {}
everyone:
access_level: READ_ONLY

View File

@ -5,7 +5,7 @@
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = sphinx
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".

View File

@ -1,8 +0,0 @@
project: 'Eland Python client'
cross_links:
- docs-content
toc:
- toc: reference
subs:
es: "Elasticsearch"
ml: "machine learning"

View File

@ -7,7 +7,7 @@ REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=sphinx
set SOURCEDIR=source
set BUILDDIR=build
if "%1" == "" goto help

View File

@ -1,63 +0,0 @@
---
mapped_pages:
- https://www.elastic.co/guide/en/elasticsearch/client/eland/current/dataframes.html
---
# Data Frames [dataframes]
`eland.DataFrame` wraps an Elasticsearch index in a Pandas-like API and defers all processing and filtering of data to Elasticsearch instead of your local machine. This means you can process large amounts of data within Elasticsearch from a Jupyter Notebook without overloading your machine.
```python
>>> import eland as ed
>>>
# Connect to 'flights' index via localhost Elasticsearch node
>>> df = ed.DataFrame('http://localhost:9200', 'flights')
# eland.DataFrame instance has the same API as pandas.DataFrame
# except all data is in Elasticsearch. See .info() memory usage.
>>> df.head()
AvgTicketPrice Cancelled ... dayOfWeek timestamp
0 841.265642 False ... 0 2018-01-01 00:00:00
1 882.982662 False ... 0 2018-01-01 18:27:00
2 190.636904 False ... 0 2018-01-01 17:11:14
3 181.694216 True ... 0 2018-01-01 10:33:28
4 730.041778 False ... 0 2018-01-01 05:13:00
[5 rows x 27 columns]
>>> df.info()
<class 'eland.dataframe.DataFrame'>
Index: 13059 entries, 0 to 13058
Data columns (total 27 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 AvgTicketPrice 13059 non-null float64
1 Cancelled 13059 non-null bool
2 Carrier 13059 non-null object
...
24 OriginWeather 13059 non-null object
25 dayOfWeek 13059 non-null int64
26 timestamp 13059 non-null datetime64[ns]
dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)
memory usage: 80.0 bytes
Elasticsearch storage usage: 5.043 MB
# Filtering of rows using comparisons
>>> df[(df.Carrier=="Kibana Airlines") & (df.AvgTicketPrice > 900.0) & (df.Cancelled == True)].head()
AvgTicketPrice Cancelled ... dayOfWeek timestamp
8 960.869736 True ... 0 2018-01-01 12:09:35
26 975.812632 True ... 0 2018-01-01 15:38:32
311 946.358410 True ... 0 2018-01-01 11:51:12
651 975.383864 True ... 2 2018-01-03 21:13:17
950 907.836523 True ... 2 2018-01-03 05:14:51
[5 rows x 27 columns]
# Running aggregations across an index
>>> df[['DistanceKilometers', 'AvgTicketPrice']].aggregate(['sum', 'min', 'std'])
DistanceKilometers AvgTicketPrice
sum 9.261629e+07 8.204365e+06
min 0.000000e+00 1.000205e+02
std 4.578263e+03 2.663867e+02
```

View File

@ -1,90 +0,0 @@
---
mapped_pages:
- https://www.elastic.co/guide/en/elasticsearch/client/eland/current/index.html
- https://www.elastic.co/guide/en/elasticsearch/client/eland/current/overview.html
navigation_title: Eland
---
# Eland Python client [overview]
Eland is a Python client and toolkit for DataFrames and {{ml}} in {{es}}. Full documentation is available on [Read the Docs](https://eland.readthedocs.io). Source code is available on [GitHub](https://github.com/elastic/eland).
## Compatibility [_compatibility]
* Supports Python 3.9+ and Pandas 1.5
* Supports {{es}} 8+ clusters, recommended 8.16 or later for all features to work. Make sure your Eland major version matches the major version of your Elasticsearch cluster.
The recommended way to set your requirements in your `setup.py` or `requirements.txt` is::
```
# Elasticsearch 8.x
eland>=8,<9
```
```
# Elasticsearch 7.x
eland>=7,<8
```
## Getting Started [_getting_started]
Create a `DataFrame` object connected to an {{es}} cluster running on `http://localhost:9200`:
```python
>>> import eland as ed
>>> df = ed.DataFrame(
... es_client="http://localhost:9200",
... es_index_pattern="flights",
... )
>>> df
AvgTicketPrice Cancelled ... dayOfWeek timestamp
0 841.265642 False ... 0 2018-01-01 00:00:00
1 882.982662 False ... 0 2018-01-01 18:27:00
2 190.636904 False ... 0 2018-01-01 17:11:14
3 181.694216 True ... 0 2018-01-01 10:33:28
4 730.041778 False ... 0 2018-01-01 05:13:00
... ... ... ... ... ...
13054 1080.446279 False ... 6 2018-02-11 20:42:25
13055 646.612941 False ... 6 2018-02-11 01:41:57
13056 997.751876 False ... 6 2018-02-11 04:09:27
13057 1102.814465 False ... 6 2018-02-11 08:28:21
13058 858.144337 False ... 6 2018-02-11 14:54:34
[13059 rows x 27 columns]
```
### Elastic Cloud [_elastic_cloud]
You can also connect Eland to an Elasticsearch instance in Elastic Cloud:
```python
>>> import eland as ed
>>> from elasticsearch import Elasticsearch
# First instantiate an 'Elasticsearch' instance connected to Elastic Cloud
>>> es = Elasticsearch(cloud_id="...", api_key="...")
# then wrap the client in an Eland DataFrame:
>>> df = ed.DataFrame(es, es_index_pattern="flights")
>>> df.head(5)
AvgTicketPrice Cancelled ... dayOfWeek timestamp
0 841.265642 False ... 0 2018-01-01 00:00:00
1 882.982662 False ... 0 2018-01-01 18:27:00
2 190.636904 False ... 0 2018-01-01 17:11:14
3 181.694216 True ... 0 2018-01-01 10:33:28
4 730.041778 False ... 0 2018-01-01 05:13:00
[5 rows x 27 columns]
```
Eland can be used for complex queries and aggregations:
```python
>>> df[df.Carrier != "Kibana Airlines"].groupby("Carrier").mean(numeric_only=False)
AvgTicketPrice Cancelled timestamp
Carrier
ES-Air 630.235816 0.129814 2018-01-21 20:45:00.200000000
JetBeats 627.457373 0.134698 2018-01-21 14:43:18.112400635
Logstash Airways 624.581974 0.125188 2018-01-21 16:14:50.711798340
```

View File

@ -1,19 +0,0 @@
---
mapped_pages:
- https://www.elastic.co/guide/en/elasticsearch/client/eland/current/installation.html
---
# Installation [installation]
Eland can be installed with [pip](https://pip.pypa.io) from [PyPI](https://pypi.org/project/eland). We recommend [using a virtual environment](https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/) when installing with pip:
```sh
$ python -m pip install eland
```
Alternatively, Eland can be installed with [Conda](https://docs.conda.io) from [Conda Forge](https://anaconda.org/conda-forge/eland):
```sh
$ conda install -c conda-forge eland
```

View File

@ -1,199 +0,0 @@
---
mapped_pages:
- https://www.elastic.co/guide/en/elasticsearch/client/eland/current/machine-learning.html
---
# Machine Learning [machine-learning]
## Trained models [ml-trained-models]
Eland allows transforming *some*
[trained models](https://eland.readthedocs.io/en/latest/reference/api/eland.ml.MLModel.import_model.html#parameters) from scikit-learn, XGBoost,
and LightGBM libraries to be serialized and used as an inference model in {{es}}.
```python
>>> from xgboost import XGBClassifier
>>> from eland.ml import MLModel
# Train and exercise an XGBoost ML model locally
>>> xgb_model = XGBClassifier(booster="gbtree")
>>> xgb_model.fit(training_data[0], training_data[1])
>>> xgb_model.predict(training_data[0])
[0 1 1 0 1 0 0 0 1 0]
# Import the model into Elasticsearch
>>> es_model = MLModel.import_model(
es_client="http://localhost:9200",
model_id="xgb-classifier",
model=xgb_model,
feature_names=["f0", "f1", "f2", "f3", "f4"],
)
# Exercise the ML model in Elasticsearch with the training data
>>> es_model.predict(training_data[0])
[0 1 1 0 1 0 0 0 1 0]
```
## Natural language processing (NLP) with PyTorch [ml-nlp-pytorch]
::::{important}
You need to install the appropriate version of PyTorch to import an NLP model. Run `python -m pip install 'eland[pytorch]'` to install that version.
::::
For NLP tasks, Eland enables you to import PyTorch models into {{es}}. Use the `eland_import_hub_model` script to download and install supported [transformer models](https://huggingface.co/transformers) from the [Hugging Face model hub](https://huggingface.co/models). For example:
```bash
eland_import_hub_model <authentication> \ <1>
--url http://localhost:9200/ \ <2>
--hub-model-id elastic/distilbert-base-cased-finetuned-conll03-english \ <3>
--task-type ner \ <4>
--start
```
1. Use an authentication method to access your cluster. Refer to [Authentication methods](machine-learning.md#ml-nlp-pytorch-auth).
2. The cluster URL. Alternatively, use `--cloud-id`.
3. Specify the identifier for the model in the Hugging Face model hub.
4. Specify the type of NLP task. Supported values are `fill_mask`, `ner`, `question_answering`, `text_classification`, `text_embedding`, `text_expansion`, `text_similarity` and `zero_shot_classification`.
For more information about the available options, run `eland_import_hub_model` with the `--help` option.
```bash
eland_import_hub_model --help
```
### Import model with Docker [ml-nlp-pytorch-docker]
::::{important}
To use the Docker container, you need to clone the Eland repository: [https://github.com/elastic/eland](https://github.com/elastic/eland)
::::
If you want to use Eland without installing it, you can use the Docker image:
You can use the container interactively:
```bash
docker run -it --rm --network host docker.elastic.co/eland/eland
```
Running installed scripts is also possible without an interactive shell, for example:
```bash
docker run -it --rm docker.elastic.co/eland/eland \
eland_import_hub_model \
--url $ELASTICSEARCH_URL \
--hub-model-id elastic/distilbert-base-uncased-finetuned-conll03-english \
--start
```
Replace the `$ELASTICSEARCH_URL` with the URL for your Elasticsearch cluster. For authentication purposes, include an administrator username and password in the URL in the following format: `https://username:password@host:port`.
### Install models in an air-gapped environment [ml-nlp-pytorch-air-gapped]
You can install models in a restricted or closed network by pointing the `eland_import_hub_model` script to local files.
For an offline install of a Hugging Face model, the model first needs to be cloned locally, Git and [Git Large File Storage](https://git-lfs.com/) are required to be installed in your system.
1. Select a model you want to use from Hugging Face. Refer to the [compatible third party model](docs-content://explore-analyze/machine-learning/nlp/ml-nlp-model-ref.md) list for more information on the supported architectures.
2. Clone the selected model from Hugging Face by using the model URL. For example:
```bash
git clone https://huggingface.co/dslim/bert-base-NER
```
This command results in a local copy of of the model in the directory `bert-base-NER`.
3. Use the `eland_import_hub_model` script with the `--hub-model-id` set to the directory of the cloned model to install it:
```bash
eland_import_hub_model \
--url 'XXXX' \
--hub-model-id /PATH/TO/MODEL \
--task-type ner \
--es-username elastic --es-password XXX \
--es-model-id bert-base-ner
```
If you use the Docker image to run `eland_import_hub_model` you must bind mount the model directory, so the container can read the files:
```bash
docker run --mount type=bind,source=/PATH/TO/MODEL,destination=/model,readonly -it --rm docker.elastic.co/eland/eland \
eland_import_hub_model \
--url 'XXXX' \
--hub-model-id /model \
--task-type ner \
--es-username elastic --es-password XXX \
--es-model-id bert-base-ner
```
Once its uploaded to {{es}}, the model will have the ID specified by `--es-model-id`. If it is not set, the model ID is derived from `--hub-model-id`; spaces and path delimiters are converted to double underscores `__`.
### Connect to Elasticsearch through a proxy [ml-nlp-pytorch-proxy]
Behind the scenes, Eland uses the `requests` Python library, which [allows configuring proxies through an environment variable](https://requests.readthedocs.io/en/latest/user/advanced/#proxies). For example, to use an HTTP proxy to connect to an HTTPS Elasticsearch cluster, you need to set the `HTTPS_PROXY` environment variable when invoking Eland:
```bash
HTTPS_PROXY=http://proxy-host:proxy-port eland_import_hub_model ...
```
If you disabled security on your Elasticsearch cluster, you should use `HTTP_PROXY` instead.
### Authentication methods [ml-nlp-pytorch-auth]
The following authentication options are available when using the import script:
* Elasticsearch username and password authentication (specified with the `-u` and `-p` options):
```bash
eland_import_hub_model -u <username> -p <password> --cloud-id <cloud-id> ...
```
These `-u` and `-p` options also work when you use `--url`.
* Elasticsearch username and password authentication (embedded in the URL):
```bash
eland_import_hub_model --url https://<user>:<password>@<hostname>:<port> ...
```
* Elasticsearch API key authentication:
```bash
eland_import_hub_model --es-api-key <api-key> --url https://<hostname>:<port> ...
```
* HuggingFace Hub access token (for private models):
```bash
eland_import_hub_model --hub-access-token <access-token> ...
```
### TLS/SSL [ml-nlp-pytorch-tls]
The following TLS/SSL options for Elasticsearch are available when using the import script:
* Specify alternate CA bundle to verify the cluster certificate:
```bash
eland_import_hub_model --ca-certs CA_CERTS ...
```
* Disable TLS/SSL verification altogether (strongly discouraged):
```bash
eland_import_hub_model --insecure ...
```

View File

@ -1,6 +0,0 @@
project: 'Eland reference'
toc:
- file: index.md
- file: installation.md
- file: dataframes.md
- file: machine-learning.md

View File

@ -1,5 +1,7 @@
elasticsearch>=7.0.5
pandas==0.25.3
matplotlib
nbval
sphinx==5.3.0
pytest>=5.2.1
git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master
numpydoc>=0.9.0
nbsphinx
furo

View File

@ -1,19 +1,16 @@
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Copyright 2019 Elasticsearch BV
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Configuration file for the Sphinx documentation builder.
#
@ -23,13 +20,12 @@
# -- Path setup --------------------------------------------------------------
import datetime
import os
import sys
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
sys.path.insert(0, os.path.abspath("../sphinxext"))
sys.path.extend(
@ -41,8 +37,8 @@ sys.path.extend(
# -- Project information -----------------------------------------------------
project = "eland"
copyright = f"{datetime.date.today().year}, Elasticsearch BV"
project = 'eland'
copyright = '2019, Elasticsearch B.V.'
# The full version, including alpha/beta/rc tags
import eland
@ -57,16 +53,16 @@ release = version
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.autosummary",
'sphinx.ext.autodoc',
"sphinx.ext.doctest",
"sphinx.ext.extlinks",
'numpydoc',
"matplotlib.sphinxext.plot_directive",
"sphinx.ext.todo",
"nbsphinx",
]
doctest_global_setup = """
doctest_global_setup = '''
try:
import eland as ed
except ImportError:
@ -75,21 +71,13 @@ try:
import pandas as pd
except ImportError:
pd = None
"""
'''
extlinks = {
"pandas_api_docs": (
"https://pandas.pydata.org/pandas-docs/stable/reference/api/%s.html",
"",
),
"pandas_user_guide": (
"https://pandas.pydata.org/pandas-docs/stable/user_guide/%s.html",
"Pandas User Guide/",
),
"es_api_docs": (
"https://www.elastic.co/guide/en/elasticsearch/reference/current/%s.html",
"",
),
'pandas_api_docs': ('https://pandas.pydata.org/pandas-docs/version/0.25.3/reference/api/%s.html', ''),
'pandas_user_guide': (
'https://pandas.pydata.org/pandas-docs/version/0.25.3/user_guide/%s.html', 'Pandas User Guide/'),
'es_api_docs': ('https://www.elastic.co/guide/en/elasticsearch/reference/current/%s.html', '')
}
numpydoc_attributes_as_param_list = False
@ -104,19 +92,20 @@ plot_pre_code = """import numpy as np
import eland as ed"""
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
templates_path = ['_templates']
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ["**.ipynb_checkpoints"]
exclude_patterns = ['**.ipynb_checkpoints']
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = "furo"
#
# html_theme = 'sphinx_rtd_theme'
html_theme = "pandas_sphinx_theme"
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
@ -126,4 +115,4 @@ html_theme = "furo"
html_logo = "logo/eland.png"
html_favicon = "logo/eland_favicon.png"
master_doc = "index"
master_doc = 'index'

View File

@ -1,5 +1,5 @@
=====================
Contributing to Eland
Contributing to eland
=====================
Eland is an open source project and we love to receive contributions
@ -57,7 +57,7 @@ approach before writing too much code.
Note that it is unlikely the project will merge refactors for the sake
of refactoring. These types of pull requests have a high cost to
maintainers in reviewing and testing with little to no tangible benefit.
This especially includes changes generated by tools.
This especially includes changes generated by tools.
The process for contributing to any of the `Elastic
repositories <https://github.com/elastic/>`__ is similar. Details for
@ -67,8 +67,8 @@ Fork and clone the repository
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
You will need to fork the main eland code or documentation repository
and clone it to your local machine. See `Github fork a repo
page <https://docs.github.com/en/free-pro-team@latest/github/getting-started-with-github/fork-a-repo>`__ - for help.
and clone it to your local machine. See `github help
page <https://help.github.com/articles/fork-a-repo>`__ for help.
Further instructions for specific projects are given below.
@ -77,17 +77,10 @@ Submitting your changes
Once your changes and tests are ready to submit for review:
1. Run the linter and test suite to ensure your changes do not break the existing code:
(TODO Add link to the testing document)
1. Test your changes
.. code-block:: bash
# Run Auto-format, lint, mypy type checker for your changes
$ nox -s format
# Run the test suite
$ pytest --doctest-modules eland/ tests/
$ pytest --nbval tests/notebook/
Run the test suite to make sure that nothing is broken (TODO add link
to testing doc).
2. Sign the Contributor License Agreement
@ -101,7 +94,7 @@ Once your changes and tests are ready to submit for review:
3. Rebase your changes
Update your local repository with the most recent code from the main
eland repository, and rebase your branch on top of the latest main
eland repository, and rebase your branch on top of the latest master
branch. We prefer your initial changes to be squashed into a single
commit. Later, if we ask you to make changes, add them as separate
commits. This makes them easier to review. As a final step before
@ -112,7 +105,7 @@ Once your changes and tests are ready to submit for review:
Push your local changes to your forked copy of the repository and
`submit a pull
request <https://docs.github.com/en/free-pro-team@latest/github/collaborating-with-issues-and-pull-requests/proposing-changes-to-your-work-with-pull-requests>`__. In
request <https://help.github.com/articles/using-pull-requests>`__. In
the pull request, choose a title which sums up the changes that you
have made, and in the body provide more details about what your
changes do. Also mention the number of the issue where discussion has
@ -120,14 +113,14 @@ Once your changes and tests are ready to submit for review:
Then sit back and wait. There will probably be discussion about the pull
request and, if any changes are needed, we would love to work with you
to get your pull request merged into `eland`.
to get your pull request merged into eland.
Please adhere to the general guideline that you should never force push
to a publicly shared branch. Once you have opened your pull request, you
should consider your branch publicly shared. Instead of force pushing
you can just add incremental commits; this is generally easier on your
reviewers. If you need to pick up changes from main, you can merge
main into your branch. A reviewer might ask you to rebase a
reviewers. If you need to pick up changes from master, you can merge
master into your branch. A reviewer might ask you to rebase a
long-running pull request in which case force pushing is okay for that
request. Note that squashing at the end of the review process should
also not be done, that can be done when the pull request is `integrated
@ -146,78 +139,28 @@ Configuring PyCharm And Running Tests
(All commands should be run from module root)
- Create a new project via \'Check out from Version
Control\'-\>\'Git\' on the \"Welcome to PyCharm\" page <or other>
- Enter the URL to your fork of eland
<e.g. `git@github.com:stevedodson/eland.git`>
- Click \'Yes\' for \'Checkout from Version Control\'
- Configure PyCharm environment:
In \'Preferences\' configure a \'Project: eland\'-\>\'Project Interpreter\'. Generally, we recommend creating a virtual environment.
(TODO link to installing for python version support)
- In \'Preferences\' set \'Tools\'-\>\'Python Integrated
Tools\'-\>\'Default test runner\' to `pytest`
- In \'Preferences\' set \'Tools\'-\>\'Python Integrated
Tools\'-\>\'Docstring format\' to `numpy`
- Install development requirements. Open terminal in virtual environment and run
.. code-block:: bash
pip install -r requirements-dev.txt
- Setup Elasticsearch instance with docker
.. code-block:: bash
ELASTICSEARCH_VERSION=elasticsearch:7.x-SNAPSHOT .ci/run-elasticsearch.sh`
- Check `http://localhost:9200` to verify if ElasticSearch Instance is running.
- Install local `eland` module <required to execute notebook tests>
.. code-block:: bash
python setup.py install
- To setup test environment -*note this modifies Elasticsearch indices* run
.. code-block:: bash
python -m tests.setup_tests
- To validate installation, open python console and run
.. code-block:: bash
import eland as ed
ed_df = ed.DataFrame('localhost', 'flights')
- To run the automatic formatter and check for lint issues
.. code-block:: bash
run `nox -s format`
- To test specific versions of Python run
.. code-block:: bash
nox -s test-3.12
- Create a new project via 'Check out from Version Control'->'Git'
on the "Welcome to PyCharm" page (or other)
- Enter the URL to your fork of eland
(e.g. ``git@github.com:stevedodson/eland.git``)
- Click 'Yes' for 'Checkout from Version Control'
- Configure PyCharm environment:
- In 'Preferences' configure a 'Project: eland'->'Project Interpreter'.
Generally, we recommend creating a virtual environment (TODO link to
installing for python version support).
- In 'Preferences' set 'Tools'->'Python Integrated Tools'->'Default
test runner' to ``pytest``
- In 'Preferences' set 'Tools'->'Python Integrated Tools'->'Docstring
format' to ``numpy``
- Install development requirements. Open terminal in virtual
environment and run ``pip install -r requirements-dev.txt``
- Setup Elasticsearch instance (assumes ``localhost:9200``), and run
``python -m eland.tests.setup_tests`` to setup test environment -
*note this modifies Elasticsearch indices*
- Run ``pytest --doctest-modules`` to validate install
Documentation
~~~~~~~~~~~~~
- `Install pandoc on your system <https://pandoc.org/installing.html>`__ -
.. code-block:: bash
# For Ubuntu or Debian
sudo apt-get install -y pandoc
- Install documentation requirements. Open terminal in virtual environment and run
.. code-block:: bash
pip install -r docs/requirements-docs.txt
- To verify/generate documentation run
.. code-block:: bash
nox -s docs
- Install documentation requirements. Open terminal in virtual
environment and run ``pip install -r requirements-dev.txt``

View File

@ -8,4 +8,3 @@ Development
:maxdepth: 2
contributing.rst
implementation.rst

File diff suppressed because one or more lines are too long

View File

@ -8,5 +8,4 @@ Examples
:maxdepth: 3
demo_notebook
introduction_to_eland_webinar
online_retail_analysis

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,882 @@
.. _implementation/dataframe_supported:
===============================
pandas.DataFrame supported APIs
===============================
The following table lists both implemented and not implemented methods. If you have need
of an operation that is listed as not implemented, feel free to open an issue on the
http://github.com/elastic/eland, or give a thumbs up to already created issues. Contributions are
also welcome!
The following table is structured as follows: The first column contains the method name.
The second column is a flag for whether or not there is an implementation in Modin for
the method in the left column. ``Y`` stands for yes, ``N`` stands for no.
https://github.com/adgirish/kaggleScape/blob/master/results/annotResults.csv represents a prioritised list.
+-------------------------+-------+------------------------------------------------+
| Method | Count | Notes |
+-------------------------+-------+------------------------------------------------+
| pd.read_csv | 1422 | y |
+-------------------------+-------+------------------------------------------------+
| pd.DataFrame | 886 | y |
+-------------------------+-------+------------------------------------------------+
| df.append | 792 | n |
+-------------------------+-------+------------------------------------------------+
| df.mean | 783 | y |
+-------------------------+-------+------------------------------------------------+
| df.head | 783 | y |
+-------------------------+-------+------------------------------------------------+
| df.drop | 761 | y |
+-------------------------+-------+------------------------------------------------+
| df.sum | 755 | y |
+-------------------------+-------+------------------------------------------------+
| df.to_csv | 693 | y |
+-------------------------+-------+------------------------------------------------+
| df.get | 669 | y |
+-------------------------+-------+------------------------------------------------+
| df.mode | 653 | n |
+-------------------------+-------+------------------------------------------------+
| df.astype | 649 | n |
+-------------------------+-------+------------------------------------------------+
| df.sub | 637 | n |
+-------------------------+-------+------------------------------------------------+
| pd.concat | 582 | n |
+-------------------------+-------+------------------------------------------------+
| df.apply | 577 | n |
+-------------------------+-------+------------------------------------------------+
| df.groupby | 557 | n |
+-------------------------+-------+------------------------------------------------+
| df.join | 544 | n |
+-------------------------+-------+------------------------------------------------+
| df.fillna | 543 | n |
+-------------------------+-------+------------------------------------------------+
| df.max | 508 | y |
+-------------------------+-------+------------------------------------------------+
| df.reset_index | 434 | n |
+-------------------------+-------+------------------------------------------------+
| pd.unique | 433 | n |
+-------------------------+-------+------------------------------------------------+
| df.le | 405 | n |
+-------------------------+-------+------------------------------------------------+
| df.count | 399 | y |
+-------------------------+-------+------------------------------------------------+
| pd.value_counts | 397 | y |
+-------------------------+-------+------------------------------------------------+
| df.sort_values | 390 | n |
+-------------------------+-------+------------------------------------------------+
| df.transform | 387 | n |
+-------------------------+-------+------------------------------------------------+
| df.merge | 376 | n |
+-------------------------+-------+------------------------------------------------+
| df.add | 346 | n |
+-------------------------+-------+------------------------------------------------+
| df.isnull | 338 | n |
+-------------------------+-------+------------------------------------------------+
| df.min | 321 | y |
+-------------------------+-------+------------------------------------------------+
| df.copy | 314 | n |
+-------------------------+-------+------------------------------------------------+
| df.replace | 300 | n |
+-------------------------+-------+------------------------------------------------+
| df.std | 261 | n |
+-------------------------+-------+------------------------------------------------+
| df.hist | 246 | y |
+-------------------------+-------+------------------------------------------------+
| df.filter | 234 | n |
+-------------------------+-------+------------------------------------------------+
| df.describe | 220 | y |
+-------------------------+-------+------------------------------------------------+
| df.ne | 218 | n |
+-------------------------+-------+------------------------------------------------+
| df.corr | 217 | n |
+-------------------------+-------+------------------------------------------------+
| df.median | 217 | n |
+-------------------------+-------+------------------------------------------------+
| df.items | 212 | n |
+-------------------------+-------+------------------------------------------------+
| pd.to_datetime | 204 | n |
+-------------------------+-------+------------------------------------------------+
| df.isin | 203 | n |
+-------------------------+-------+------------------------------------------------+
| df.dropna | 195 | n |
+-------------------------+-------+------------------------------------------------+
| pd.get_dummies | 190 | n |
+-------------------------+-------+------------------------------------------------+
| df.rename | 185 | n |
+-------------------------+-------+------------------------------------------------+
| df.info | 180 | y |
+-------------------------+-------+------------------------------------------------+
| df.set_index | 166 | n |
+-------------------------+-------+------------------------------------------------+
| df.keys | 159 | y |
+-------------------------+-------+------------------------------------------------+
| df.sample | 155 | n |
+-------------------------+-------+------------------------------------------------+
| df.agg | 140 | y |
+-------------------------+-------+------------------------------------------------+
| df.where | 138 | n |
+-------------------------+-------+------------------------------------------------+
| df.boxplot | 134 | n |
+-------------------------+-------+------------------------------------------------+
| df.clip | 116 | n |
+-------------------------+-------+------------------------------------------------+
| df.round | 116 | n |
+-------------------------+-------+------------------------------------------------+
| df.abs | 101 | n |
+-------------------------+-------+------------------------------------------------+
| df.stack | 97 | n |
+-------------------------+-------+------------------------------------------------+
| df.tail | 94 | y |
+-------------------------+-------+------------------------------------------------+
| df.update | 92 | n |
+-------------------------+-------+------------------------------------------------+
| df.iterrows | 90 | n |
+-------------------------+-------+------------------------------------------------+
| df.transpose | 87 | n |
+-------------------------+-------+------------------------------------------------+
| df.any | 85 | n |
+-------------------------+-------+------------------------------------------------+
| df.pipe | 80 | n |
+-------------------------+-------+------------------------------------------------+
| pd.eval | 73 | n |
+-------------------------+-------+------------------------------------------------+
| df.eval | 73 | n |
+-------------------------+-------+------------------------------------------------+
| pd.read_json | 72 | n |
+-------------------------+-------+------------------------------------------------+
| df.nunique | 70 | y |
+-------------------------+-------+------------------------------------------------+
| df.pivot | 70 | n |
+-------------------------+-------+------------------------------------------------+
| df.select | 68 | n |
+-------------------------+-------+------------------------------------------------+
| df.as_matrix | 67 | n |
+-------------------------+-------+------------------------------------------------+
| df.notnull | 66 | n |
+-------------------------+-------+------------------------------------------------+
| df.cumsum | 66 | n |
+-------------------------+-------+------------------------------------------------+
| df.prod | 64 | n |
+-------------------------+-------+------------------------------------------------+
| df.unstack | 64 | n |
+-------------------------+-------+------------------------------------------------+
| df.drop_duplicates | 63 | n |
+-------------------------+-------+------------------------------------------------+
| df.div | 63 | n |
+-------------------------+-------+------------------------------------------------+
| pd.crosstab | 59 | n |
+-------------------------+-------+------------------------------------------------+
| df.select_dtypes | 57 | y |
+-------------------------+-------+------------------------------------------------+
| df.pow | 56 | n |
+-------------------------+-------+------------------------------------------------+
| df.sort_index | 56 | n |
+-------------------------+-------+------------------------------------------------+
| df.product | 52 | n |
+-------------------------+-------+------------------------------------------------+
| df.isna | 51 | n |
+-------------------------+-------+------------------------------------------------+
| df.dot | 46 | n |
+-------------------------+-------+------------------------------------------------+
| pd.cut | 45 | n |
+-------------------------+-------+------------------------------------------------+
| df.bool | 44 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_dict | 44 | n |
+-------------------------+-------+------------------------------------------------+
| df.diff | 44 | n |
+-------------------------+-------+------------------------------------------------+
| df.insert | 44 | n |
+-------------------------+-------+------------------------------------------------+
| df.pop | 44 | n |
+-------------------------+-------+------------------------------------------------+
| df.query | 43 | y |
+-------------------------+-------+------------------------------------------------+
| df.var | 43 | n |
+-------------------------+-------+------------------------------------------------+
| df.__init__ | 41 | y |
+-------------------------+-------+------------------------------------------------+
| pd.to_numeric | 39 | n |
+-------------------------+-------+------------------------------------------------+
| df.squeeze | 39 | n |
+-------------------------+-------+------------------------------------------------+
| df.ge | 37 | n |
+-------------------------+-------+------------------------------------------------+
| df.quantile | 37 | n |
+-------------------------+-------+------------------------------------------------+
| df.reindex | 37 | n |
+-------------------------+-------+------------------------------------------------+
| df.rolling | 35 | n |
+-------------------------+-------+------------------------------------------------+
| pd.factorize | 32 | n |
+-------------------------+-------+------------------------------------------------+
| pd.melt | 31 | n |
+-------------------------+-------+------------------------------------------------+
| df.melt | 31 | n |
+-------------------------+-------+------------------------------------------------+
| df.rank | 31 | n |
+-------------------------+-------+------------------------------------------------+
| pd.read_table | 30 | n |
+-------------------------+-------+------------------------------------------------+
| pd.pivot_table | 30 | n |
+-------------------------+-------+------------------------------------------------+
| df.idxmax | 30 | n |
+-------------------------+-------+------------------------------------------------+
| pd.test | 29 | n |
+-------------------------+-------+------------------------------------------------+
| df.iteritems | 29 | n |
+-------------------------+-------+------------------------------------------------+
| df.shift | 28 | n |
+-------------------------+-------+------------------------------------------------+
| df.mul | 28 | n |
+-------------------------+-------+------------------------------------------------+
| pd.qcut | 25 | n |
+-------------------------+-------+------------------------------------------------+
| df.set_value | 25 | n |
+-------------------------+-------+------------------------------------------------+
| df.all | 24 | n |
+-------------------------+-------+------------------------------------------------+
| df.skew | 24 | n |
+-------------------------+-------+------------------------------------------------+
| df.aggregate | 23 | y |
+-------------------------+-------+------------------------------------------------+
| pd.match | 22 | n |
+-------------------------+-------+------------------------------------------------+
| df.nlargest | 22 | n |
+-------------------------+-------+------------------------------------------------+
| df.multiply | 21 | n |
+-------------------------+-------+------------------------------------------------+
| df.set_axis | 19 | n |
+-------------------------+-------+------------------------------------------------+
| df.eq | 18 | n |
+-------------------------+-------+------------------------------------------------+
| df.resample | 18 | n |
+-------------------------+-------+------------------------------------------------+
| pd.read_sql | 17 | n |
+-------------------------+-------+------------------------------------------------+
| df.duplicated | 16 | n |
+-------------------------+-------+------------------------------------------------+
| pd.date_range | 16 | n |
+-------------------------+-------+------------------------------------------------+
| df.interpolate | 15 | n |
+-------------------------+-------+------------------------------------------------+
| df.memory_usage | 15 | n |
+-------------------------+-------+------------------------------------------------+
| df.divide | 14 | n |
+-------------------------+-------+------------------------------------------------+
| df.cov | 13 | n |
+-------------------------+-------+------------------------------------------------+
| df.assign | 12 | n |
+-------------------------+-------+------------------------------------------------+
| df.subtract | 12 | n |
+-------------------------+-------+------------------------------------------------+
| pd.read_pickle | 11 | n |
+-------------------------+-------+------------------------------------------------+
| df.applymap | 11 | n |
+-------------------------+-------+------------------------------------------------+
| df.first | 11 | n |
+-------------------------+-------+------------------------------------------------+
| df.kurt | 10 | n |
+-------------------------+-------+------------------------------------------------+
| df.truncate | 10 | n |
+-------------------------+-------+------------------------------------------------+
| df.get_value | 9 | n |
+-------------------------+-------+------------------------------------------------+
| pd.read_hdf | 9 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_html | 9 | y |
+-------------------------+-------+------------------------------------------------+
| pd.read_sql_query | 9 | n |
+-------------------------+-------+------------------------------------------------+
| df.take | 8 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_pickle | 7 | n |
+-------------------------+-------+------------------------------------------------+
| df.itertuples | 7 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_string | 7 | y |
+-------------------------+-------+------------------------------------------------+
| df.last | 7 | n |
+-------------------------+-------+------------------------------------------------+
| df.sem | 7 | n |
+-------------------------+-------+------------------------------------------------+
| pd.to_pickle | 7 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_json | 7 | n |
+-------------------------+-------+------------------------------------------------+
| df.idxmin | 7 | n |
+-------------------------+-------+------------------------------------------------+
| df.xs | 6 | n |
+-------------------------+-------+------------------------------------------------+
| df.combine | 6 | n |
+-------------------------+-------+------------------------------------------------+
| pd.rolling_mean | 6 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_period | 6 | n |
+-------------------------+-------+------------------------------------------------+
| df.convert_objects | 5 | n |
+-------------------------+-------+------------------------------------------------+
| df.mask | 4 | n |
+-------------------------+-------+------------------------------------------------+
| df.pct_change | 4 | n |
+-------------------------+-------+------------------------------------------------+
| df.add_prefix | 4 | n |
+-------------------------+-------+------------------------------------------------+
| pd.read_excel | 4 | n |
+-------------------------+-------+------------------------------------------------+
| pd.rolling_std | 3 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_records | 3 | n |
+-------------------------+-------+------------------------------------------------+
| df.corrwith | 3 | n |
+-------------------------+-------+------------------------------------------------+
| df.swapaxes | 3 | n |
+-------------------------+-------+------------------------------------------------+
| df.__iter__ | 3 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_sql | 3 | n |
+-------------------------+-------+------------------------------------------------+
| pd.read_feather | 3 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_feather | 3 | n |
+-------------------------+-------+------------------------------------------------+
| df.__len__ | 3 | n |
+-------------------------+-------+------------------------------------------------+
| df.kurtosis | 3 | n |
+-------------------------+-------+------------------------------------------------+
| df.mod | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_sparse | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.get_values | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.__eq__ | 2 | n |
+-------------------------+-------+------------------------------------------------+
| pd.bdate_range | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.get_dtype_counts | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.combine_first | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df._get_numeric_data | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.nsmallest | 2 | n |
+-------------------------+-------+------------------------------------------------+
| pd.scatter_matrix | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.rename_axis | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.__setstate__ | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.cumprod | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.__getstate__ | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.equals | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.__getitem__ | 2 | y |
+-------------------------+-------+------------------------------------------------+
| df.clip_upper | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.floordiv | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_excel | 2 | n |
+-------------------------+-------+------------------------------------------------+
| df.reindex_axis | 1 | n |
+-------------------------+-------+------------------------------------------------+
| pd.to_timedelta | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.ewm | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.tz_localize | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.tz_convert | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_hdf | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.lookup | 1 | n |
+-------------------------+-------+------------------------------------------------+
| pd.merge_ordered | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.swaplevel | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.first_valid_index | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.lt | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.add_suffix | 1 | n |
+-------------------------+-------+------------------------------------------------+
| pd.rolling_median | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_dense | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.mad | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.align | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.__copy__ | 1 | n |
+-------------------------+-------+------------------------------------------------+
| pd.set_eng_float_format | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.add_suffix | 1 | n |
+-------------------------+-------+------------------------------------------------+
| pd.rolling_median | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.to_dense | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.mad | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.align | 1 | n |
+-------------------------+-------+------------------------------------------------+
| df.__copy__ | 1 | n |
+-------------------------+-------+------------------------------------------------+
| pd.set_eng_float_format | 1 | n |
+-------------------------+-------+------------------------------------------------+
+---------------------------+---------------------------------+----------------------------------------------------+
| DataFrame method | Eland Implementation? (Y/N/P/D) | Notes for Current implementation |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``T`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``abs`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``add`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``add_prefix`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``add_suffix`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``agg`` | Y | |
| ``aggregate`` | | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``align`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``all`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``any`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``append`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``apply`` | N | See ``agg`` |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``applymap`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``as_blocks`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``as_matrix`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``asfreq`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``asof`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``assign`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``astype`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``at`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``at_time`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``axes`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``between_time`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``bfill`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``blocks`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``bool`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``boxplot`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``clip`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``clip_lower`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``clip_upper`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``combine`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``combine_first`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``compound`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``consolidate`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``convert_objects`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``copy`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``corr`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``corrwith`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``count`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``cov`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``cummax`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``cummin`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``cumprod`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``cumsum`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``describe`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``diff`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``div`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``divide`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``dot`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``drop`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``drop_duplicates`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``dropna`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``dtypes`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``duplicated`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``empty`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``eq`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``equals`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``eval`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``ewm`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``expanding`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``ffill`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``fillna`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``filter`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``first`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``first_valid_index`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``floordiv`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``from_csv`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``from_dict`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``from_items`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``from_records`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``ftypes`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``ge`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``get`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``get_dtype_counts`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``get_ftype_counts`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``get_value`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``get_values`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``groupby`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``gt`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``head`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``hist`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``iat`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``idxmax`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``idxmin`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``iloc`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``infer_objects`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``info`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``insert`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``interpolate`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``is_copy`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``isin`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``isna`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``isnull`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``items`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``iteritems`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``iterrows`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``itertuples`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``ix`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``join`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``keys`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``kurt`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``kurtosis`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``last`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``last_valid_index`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``le`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``loc`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``lookup`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``lt`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``mad`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``mask`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``max`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``mean`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``median`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``melt`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``memory_usage`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``merge`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``min`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``mod`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``mode`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``mul`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``multiply`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``ndim`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``ne`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``nlargest`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``notna`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``notnull`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``nsmallest`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``nunique`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``pct_change`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``pipe`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``pivot`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``pivot_table`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``plot`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``pop`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``pow`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``prod`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``product`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``quantile`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``query`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``radd`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``rank`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``rdiv`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``reindex`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``reindex_axis`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``reindex_like`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``rename`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``rename_axis`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``reorder_levels`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``replace`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``resample`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``reset_index`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``rfloordiv`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``rmod`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``rmul`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``rolling`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``round`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``rpow`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``rsub`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``rtruediv`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``sample`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``select`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``select_dtypes`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``sem`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``set_axis`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``set_index`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``set_value`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``shape`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``shift`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``size`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``skew`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``slice_shift`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``sort_index`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``sort_values`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``sortlevel`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``squeeze`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``stack`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``std`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``style`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``sub`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``subtract`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``sum`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``swapaxes`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``swaplevel`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``tail`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``take`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_clipboard`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_csv`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_dense`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_dict`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_excel`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_feather`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_gbq`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_hdf`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_html`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_json`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_latex`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_msgpack`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_panel`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_parquet`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_period`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_pickle`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_records`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_sparse`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_sql`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_stata`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_string`` | Y | Default sets `max_rows=60` |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_timestamp`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``to_xarray`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``transform`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``transpose`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``truediv`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``truncate`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``tshift`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``tz_convert`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``tz_localize`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``unstack`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``update`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``values`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``var`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``where`` | N | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``xs`` | N | Deprecated in pandas |
+---------------------------+---------------------------------+----------------------------------------------------+

View File

@ -15,9 +15,9 @@ use of powerful Elasticsearch features such as aggregations.
Pandas and 3rd Party Storage Systems
------------------------------------
Generally, integrations with `3rd party storage systems <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_
Generally, integrations with [3rd party storage systems](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html)
(SQL, Google Big Query etc.) involve accessing these systems and reading all external data into an
in-core pandas data structure. This also applies to `Apache Arrow <https://arrow.apache.org/docs/python/pandas.html>`_
in-core pandas data structure. This also applies to [Apache Arrow](https://arrow.apache.org/docs/python/pandas.html)
structures.
Whilst this provides access to data in these systems, for large datasets this can require significant
@ -58,3 +58,4 @@ the ``pandas.DataFrame`` API. This resolves some of the issues above as:
* Creating a new ``eland.DataFrame`` API gives us full flexibility in terms of implementation. However,
it does create a large amount of work which may duplicate a lot of the ``pandas`` code - for example,
printing objects etc. - this creates maintenance issues etc.

View File

@ -0,0 +1,11 @@
.. _implementation:
====================
Implementation Notes
====================
.. toctree::
:maxdepth: 2
details.rst
dataframe_supported.rst

51
docs/source/index.rst Normal file
View File

@ -0,0 +1,51 @@
.. eland documentation master file, created by
.. module:: eland
****************************************************************
eland: pandas-like data analysis toolkit backed by Elasticsearch
****************************************************************
**Date**: |today| **Version**: |version|
**Useful links**:
`Source Repository <https://github.com/elastic/eland>`__ |
`Issues & Ideas <https://github.com/elastic/eland/issues>`__ |
`Q&A Support <https://discuss.elastic.co>`__ |
:mod:`eland` is an open source, Apache2-licensed elasticsearch Python client to analyse, explore and manipulate data that resides in elasticsearch.
Where possible the package uses existing Python APIs and data structures to make it easy to switch between Numpy, Pandas, Scikit-learn to their elasticsearch powered equivalents.
In general, the data resides in elasticsearch and not in memory, which allows eland to access large datasets stored in elasticsearch.
.. toctree::
:maxdepth: 2
:hidden:
reference/index
implementation/index
development/index
examples/index
* :doc:`reference/index`
* :doc:`reference/io`
* :doc:`reference/general_utility_functions`
* :doc:`reference/dataframe`
* :doc:`reference/series`
* :doc:`reference/indexing`
* :doc:`implementation/index`
* :doc:`implementation/details`
* :doc:`implementation/dataframe_supported`
* :doc:`development/index`
* :doc:`development/contributing`
* :doc:`examples/index`
* :doc:`examples/demo_notebook`
* :doc:`examples/online_retail_analysis`

BIN
docs/source/logo/eland.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

View File

Before

Width:  |  Height:  |  Size: 36 KiB

After

Width:  |  Height:  |  Size: 36 KiB

View File

Before

Width:  |  Height:  |  Size: 16 KiB

After

Width:  |  Height:  |  Size: 16 KiB

View File

@ -0,0 +1,6 @@
eland.DataFrame.agg
===================
.. currentmodule:: eland
.. automethod:: DataFrame.agg

View File

@ -0,0 +1,6 @@
eland.DataFrame.aggregate
=========================
.. currentmodule:: eland
.. automethod:: DataFrame.aggregate

View File

@ -0,0 +1,6 @@
eland.DataFrame.columns
=======================
.. currentmodule:: eland
.. autoattribute:: DataFrame.columns

View File

@ -1,6 +1,6 @@
eland.Series.nunique
====================
eland.DataFrame.count
=====================
.. currentmodule:: eland
.. automethod:: Series.nunique
.. automethod:: DataFrame.count

View File

@ -0,0 +1,6 @@
eland.DataFrame.describe
========================
.. currentmodule:: eland
.. automethod:: DataFrame.describe

View File

@ -0,0 +1,6 @@
eland.DataFrame.drop
====================
.. currentmodule:: eland
.. automethod:: DataFrame.drop

View File

@ -0,0 +1,6 @@
eland.DataFrame.dtypes
======================
.. currentmodule:: eland
.. autoattribute:: DataFrame.dtypes

View File

@ -0,0 +1,6 @@
eland.DataFrame.empty
=====================
.. currentmodule:: eland
.. autoattribute:: DataFrame.empty

View File

@ -0,0 +1,6 @@
eland.DataFrame.get
===================
.. currentmodule:: eland
.. automethod:: DataFrame.get

View File

@ -0,0 +1,6 @@
eland.DataFrame.head
====================
.. currentmodule:: eland
.. automethod:: DataFrame.head

View File

@ -0,0 +1,8 @@
eland.DataFrame.hist
====================
.. currentmodule:: eland
.. automethod:: DataFrame.hist
.. image:: eland-DataFrame-hist-1.png

View File

@ -0,0 +1,6 @@
eland.DataFrame.index
=====================
.. currentmodule:: eland
.. autoattribute:: DataFrame.index

View File

@ -0,0 +1,6 @@
eland.DataFrame.info
====================
.. currentmodule:: eland
.. automethod:: DataFrame.info

View File

@ -0,0 +1,6 @@
eland.DataFrame.info_es
=======================
.. currentmodule:: eland
.. automethod:: DataFrame.info_es

View File

@ -0,0 +1,6 @@
eland.DataFrame.keys
====================
.. currentmodule:: eland
.. automethod:: DataFrame.keys

View File

@ -0,0 +1,6 @@
eland.DataFrame.max
===================
.. currentmodule:: eland
.. automethod:: DataFrame.max

View File

@ -0,0 +1,6 @@
eland.DataFrame.mean
====================
.. currentmodule:: eland
.. automethod:: DataFrame.mean

View File

@ -0,0 +1,6 @@
eland.DataFrame.min
===================
.. currentmodule:: eland
.. automethod:: DataFrame.min

View File

@ -0,0 +1,6 @@
eland.DataFrame.nunique
=======================
.. currentmodule:: eland
.. automethod:: DataFrame.nunique

View File

@ -0,0 +1,6 @@
eland.DataFrame.query
=====================
.. currentmodule:: eland
.. automethod:: DataFrame.query

View File

@ -0,0 +1,18 @@
eland.DataFrame
================
.. currentmodule:: eland
.. autoclass:: DataFrame
..
HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
.. autosummary::
:toctree:
DataFrame.abs
DataFrame.add

View File

@ -0,0 +1,6 @@
eland.DataFrame.select_dtypes
=============================
.. currentmodule:: eland
.. automethod:: DataFrame.select_dtypes

View File

@ -0,0 +1,6 @@
eland.DataFrame.shape
=====================
.. currentmodule:: eland
.. autoattribute:: DataFrame.shape

View File

@ -0,0 +1,6 @@
eland.DataFrame.sum
===================
.. currentmodule:: eland
.. automethod:: DataFrame.sum

View File

@ -0,0 +1,6 @@
eland.DataFrame.tail
====================
.. currentmodule:: eland
.. automethod:: DataFrame.tail

View File

@ -0,0 +1,6 @@
eland.DataFrame.to_csv
======================
.. currentmodule:: eland
.. automethod:: DataFrame.to_csv

View File

@ -0,0 +1,6 @@
eland.DataFrame.to_html
=======================
.. currentmodule:: eland
.. automethod:: DataFrame.to_html

View File

@ -0,0 +1,6 @@
eland.DataFrame.to_numpy
========================
.. currentmodule:: eland
.. automethod:: DataFrame.to_numpy

View File

@ -0,0 +1,6 @@
eland.DataFrame.to_string
=========================
.. currentmodule:: eland
.. automethod:: DataFrame.to_string

View File

@ -0,0 +1,6 @@
eland.DataFrame.values
======================
.. currentmodule:: eland
.. autoattribute:: DataFrame.values

View File

@ -0,0 +1,6 @@
eland.Index
===========
.. currentmodule:: eland
.. autoclass:: Index

View File

@ -0,0 +1,6 @@
eland.Series.add
================
.. currentmodule:: eland
.. automethod:: Series.add

View File

@ -0,0 +1,6 @@
eland.Series.describe
=====================
.. currentmodule:: eland
.. automethod:: Series.describe

View File

@ -0,0 +1,6 @@
eland.Series.div
================
.. currentmodule:: eland
.. automethod:: Series.div

View File

@ -0,0 +1,6 @@
eland.Series.empty
==================
.. currentmodule:: eland
.. autoattribute:: Series.empty

View File

@ -0,0 +1,6 @@
eland.Series.floordiv
=====================
.. currentmodule:: eland
.. automethod:: Series.floordiv

View File

@ -0,0 +1,6 @@
eland.Series.head
=================
.. currentmodule:: eland
.. automethod:: Series.head

View File

@ -0,0 +1,8 @@
eland.Series.hist
====================
.. currentmodule:: eland
.. automethod:: Series.hist
.. image:: eland-Series-hist-2.png

View File

@ -0,0 +1,6 @@
eland.Series.index
==================
.. currentmodule:: eland
.. autoattribute:: Series.index

View File

@ -0,0 +1,6 @@
eland.Series.info_es
====================
.. currentmodule:: eland
.. automethod:: Series.info_es

View File

@ -0,0 +1,6 @@
eland.Series.max
================
.. currentmodule:: eland
.. automethod:: Series.max

View File

@ -0,0 +1,6 @@
eland.Series.mean
=================
.. currentmodule:: eland
.. automethod:: Series.mean

Some files were not shown because too many files have changed in this diff Show More