Skip to content

Commit

Permalink
perf: add/change scripts for tests (datahub-project#3840)
Browse files Browse the repository at this point in the history
  • Loading branch information
anshbansal authored Jan 17, 2022
1 parent 7d986ec commit 38e27a8
Show file tree
Hide file tree
Showing 5 changed files with 172 additions and 4 deletions.
15 changes: 15 additions & 0 deletions perf-test/locustfiles/get_entities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import random

from locust import HttpUser, constant, task


class GetEntityUser(HttpUser):
wait_time = constant(1)

@task
def entities(self):
id = random.randint(1, 100000)
self.client.request_name = "/entities?[urn]"
self.client.get(
f"/entities/urn:li:dataset:(urn:li:dataPlatform:bigquery,test_dataset_{id},PROD)"
)
4 changes: 0 additions & 4 deletions perf-test/locustfiles/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@
class IngestUser(HttpUser):
wait_time = constant(1)

@task
def config(self):
self.client.get("/config")

@task
def ingest(self):
proposed_snapshot = self._build_snapshot(random.randint(1, 100000))
Expand Down
69 changes: 69 additions & 0 deletions perf-test/locustfiles/ingest_small.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import json
import random

from datahub.emitter.serialization_helper import pre_json_transform
from datahub.metadata.com.linkedin.pegasus2avro.common import (
BrowsePaths,
Owner,
Ownership,
OwnershipType,
)
from datahub.metadata.com.linkedin.pegasus2avro.dataset import DatasetProperties
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
from locust import HttpUser, constant, task


class IngestUser(HttpUser):
"""
Same as Ingest test except we only test with less IDs
so we hit update existing asepct cases
"""

wait_time = constant(1)

@task
def ingest(self):
proposed_snapshot = self._build_snapshot(random.randint(1, 100))
snapshot_fqn = (
f"com.linkedin.metadata.snapshot.{proposed_snapshot.RECORD_SCHEMA.name}"
)
self.client.post(
"/entities?action=ingest",
json.dumps(
{
"entity": {
"value": {
snapshot_fqn: pre_json_transform(proposed_snapshot.to_obj())
}
}
}
),
)

def _build_snapshot(self, id: int):
urn = self._build_urn(id)
return DatasetSnapshot(
urn,
[
self._build_properties(),
self._build_ownership(id),
self._build_browsepaths(id),
],
)

def _build_urn(self, id: int):
return f"urn:li:dataset:(urn:li:dataPlatform:bigquery,test_dataset_{id},PROD)"

def _build_properties(self):
return DatasetProperties(description="This is a great dataset")

def _build_browsepaths(self, id: int):
return BrowsePaths([f"/perf/testing/path/{id}"])

def _build_ownership(self, id: int):
return Ownership(
[
Owner(f"urn:li:corpuser:test_{id}", OwnershipType.DATAOWNER),
Owner(f"urn:li:corpuser:common", OwnershipType.DATAOWNER),
]
)
84 changes: 84 additions & 0 deletions perf-test/scripts/ingest_sample.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import json
from multiprocessing.pool import ThreadPool as Pool

import requests
from datahub.emitter.serialization_helper import pre_json_transform
from datahub.metadata.com.linkedin.pegasus2avro.common import (
BrowsePaths,
Owner,
Ownership,
OwnershipType,
)
from datahub.metadata.com.linkedin.pegasus2avro.dataset import DatasetProperties
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot


def _build_snapshot(id: int):
urn = _build_urn(id)
return DatasetSnapshot(
urn,
[
_build_properties(),
_build_ownership(id),
_build_browsepaths(id),
],
)


def _build_urn(id: int):
return f"urn:li:dataset:(urn:li:dataPlatform:bigquery,test_dataset_{id},PROD)"


def _build_properties():
return DatasetProperties(description="This is a great dataset")


def _build_browsepaths(id: int):
return BrowsePaths([f"/perf/testing/path/{id}"])


def _build_ownership(id: int):
return Ownership(
[
Owner(f"urn:li:corpuser:test_{id}", OwnershipType.DATAOWNER),
Owner("urn:li:corpuser:common", OwnershipType.DATAOWNER),
]
)


def main(url: str, id: int):
proposed_snapshot = _build_snapshot(id)
snapshot_fqn = (
f"com.linkedin.metadata.snapshot.{proposed_snapshot.RECORD_SCHEMA.name}"
)
requests.post(
f"{url}/entities?action=ingest",
data=json.dumps(
{
"entity": {
"value": {
snapshot_fqn: pre_json_transform(proposed_snapshot.to_obj())
}
}
}
),
)


def worker(index: int):
try:
main("http://localhost:8080", index)
except RuntimeError as e:
print(f"error with {index}")


if __name__ == "__main__":

POOL_SIZE = 10
DATASETS = 100000

pool = Pool(POOL_SIZE)
for i in range(1, DATASETS + 1):
pool.apply_async(worker, (i,))
pool.close()
pool.join()
4 changes: 4 additions & 0 deletions perf-test/scripts/ingest_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
locust -f ./locustfiles/ingest.py --autostart --autoquit 1 --only-summary -t 30s --host http://localhost:8080 -u 10 -r 1
locust -f ./locustfiles/ingest.py --autostart --autoquit 1 --only-summary -t 150s --host http://localhost:8080 -u 100 -r 2
locust -f ./locustfiles/ingest.py --autostart --autoquit 1 --only-summary -t 300s --host http://localhost:8080 -u 500 -r 5
locust -f ./locustfiles/ingest.py --autostart --autoquit 1 --only-summary -t 300s --host http://localhost:8080 -u 1000 -r 10

0 comments on commit 38e27a8

Please sign in to comment.