Skip to content

Commit

Permalink
Releases v0.12.0 (#251)
Browse files Browse the repository at this point in the history
  • Loading branch information
wjsi authored Oct 3, 2024
1 parent ea82bb7 commit 8b5d1db
Show file tree
Hide file tree
Showing 298 changed files with 18,891 additions and 13,259 deletions.
2 changes: 1 addition & 1 deletion License
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ BSD 3-Clause
BSD 2-Clause
------------

- python-tblib:1.3.2
- python-tblib:3.0.0


MIT License
Expand Down
1 change: 0 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
recursive-include odps/static *.*
recursive-include odps/internal/static *.*
prune odps/static/ui/node_modules
include requirements.txt
global-include odps/**/*.yml
Expand Down
13 changes: 10 additions & 3 deletions benchmarks/perf_storage_api_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@

import logging
import sys
import time
import threading
import time

import pytest

from odps.apis.storage_api.conftest import storage_api_client # noqa: F401

if sys.version_info[0] == 3:
from odps.apis.storage_api import *
else:
Expand Down Expand Up @@ -87,7 +88,10 @@ def test_read_thread(storage_api_client):
global global_total_record
read_performance_threads = []
for i in range(0, thread_num):
read_performance_thread = threading.Thread(target=read_performance, args=[storage_api_client,])
read_performance_thread = threading.Thread(
target=read_performance,
args=[storage_api_client],
)
read_performance_threads.append(read_performance_thread)

start = time.time()
Expand All @@ -104,7 +108,10 @@ def test_read_thread(storage_api_client):
time.sleep(1)
now = time.time()
now_count = global_total_record
logger.info("index: %d, read, %f records per second" % (count, (now_count - start_count) / (now - start)))
logger.info(
"index: %d, read, %f records per second"
% (count, (now_count - start_count) / (now - start))
)

if judge and cal_count < 5:
cal_total_count += (now_count - start_count) / (now - start)
Expand Down
57 changes: 35 additions & 22 deletions benchmarks/perf_tabletunnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# limitations under the License.

from __future__ import print_function

import cProfile
import json
import os
Expand All @@ -26,12 +27,14 @@

if bool(json.loads(os.getenv("FORCE_PY", "0"))):
from odps import options

options.force_py = True

from datetime import datetime

from odps.compat import Decimal
from odps.conftest import odps, tunnel # noqa: F401
from odps.models import TableSchema
from datetime import datetime

# remember to reset False before committing
ENABLE_PROFILE = bool(json.loads(os.getenv("ENABLE_PROFILE", "0")))
Expand All @@ -40,14 +43,16 @@
COMPRESS_DATA = True
BUFFER_SIZE = 1024 * 1024
DATA_AMOUNT = 100000
STRING_LITERAL = "Soft kitty, warm kitty, little ball of fur; happy kitty, sleepy kitty, purr, purr"
STRING_LITERAL = (
"Soft kitty, warm kitty, little ball of fur; happy kitty, sleepy kitty, purr, purr"
)
NUMERIC_ONLY = bool(json.loads(os.getenv("NUMERIC_ONLY", "0")))


@pytest.fixture
def schema():
fields = ['a', 'b', 'c', 'd', 'e', 'f']
types = ['bigint', 'double', 'datetime', 'boolean', 'string', 'decimal']
fields = ["a", "b", "c", "d", "e", "f"]
types = ["bigint", "double", "datetime", "boolean", "string", "decimal"]
return TableSchema.from_lists(fields, types)


Expand All @@ -61,52 +66,54 @@ def profiled():
finally:
if ENABLE_PROFILE:
if DUMP_PROFILE:
pr.dump_stats('profile.out')
pr.dump_stats("profile.out")
p = Stats(pr)
p.strip_dirs()
p.sort_stats('time')
p.sort_stats("time")
p.print_stats(40)
p.print_callees('types.py:846\(validate_value', 20)
p.print_callees('types.py:828\(_validate_primitive_value', 20)
p.print_callees('tabletunnel.py:185\(write', 20)
p.print_callees("types.py:846\(validate_value", 20)
p.print_callees("types.py:828\(_validate_primitive_value", 20)
p.print_callees("tabletunnel.py:185\(write", 20)


def test_write(odps, schema, tunnel):
table_name = 'pyodps_test_tunnel_write_performance'
table_name = "pyodps_test_tunnel_write_performance"
odps.create_table(table_name, schema, if_not_exists=True)
ss = tunnel.create_upload_session(table_name)
r = ss.new_record()

start = time.time()
with ss.open_record_writer(0) as writer, profiled():
for i in range(DATA_AMOUNT):
r[0] = 2**63-1
r[0] = 2**63 - 1
r[1] = 0.0001
r[2] = datetime(2015, 11, 11) if not NUMERIC_ONLY else None
r[3] = True
r[4] = STRING_LITERAL if not NUMERIC_ONLY else None
r[5] = Decimal('3.15') if not NUMERIC_ONLY else None
r[5] = Decimal("3.15") if not NUMERIC_ONLY else None
writer.write(r)
n_bytes = writer.n_bytes
print(n_bytes, 'bytes', float(n_bytes) / 1024 / 1024 / (time.time() - start), 'MiB/s')
print(
n_bytes, "bytes", float(n_bytes) / 1024 / 1024 / (time.time() - start), "MiB/s"
)
ss.commit([0])
odps.delete_table(table_name, if_exists=True)


def test_read(odps, schema, tunnel):
table_name = 'pyodps_test_tunnel_read_performance'
table_name = "pyodps_test_tunnel_read_performance"
odps.delete_table(table_name, if_exists=True)
t = odps.create_table(table_name, schema)

def gen_data():
for i in range(DATA_AMOUNT):
r = t.new_record()
r[0] = 2 ** 63 - 1
r[0] = 2**63 - 1
r[1] = 0.0001
r[2] = datetime(2015, 11, 11) if not NUMERIC_ONLY else None
r[3] = True
r[4] = STRING_LITERAL if not NUMERIC_ONLY else None
r[5] = Decimal('3.15') if not NUMERIC_ONLY else None
r[5] = Decimal("3.15") if not NUMERIC_ONLY else None
yield r

odps.write_table(t, gen_data())
Expand All @@ -119,28 +126,34 @@ def gen_data():
for _ in reader:
cnt += 1
n_bytes = reader.n_bytes
print(n_bytes, 'bytes', float(n_bytes) / 1024 / 1024 / (time.time() - start), 'MiB/s')
print(
n_bytes, "bytes", float(n_bytes) / 1024 / 1024 / (time.time() - start), "MiB/s"
)
assert DATA_AMOUNT == cnt
odps.delete_table(table_name, if_exists=True)


def test_buffered_write(odps, schema, tunnel):
table_name = 'test_tunnel_bufferred_write'
table_name = "test_tunnel_bufferred_write"
odps.create_table(table_name, schema, if_not_exists=True)
ss = tunnel.create_upload_session(table_name)
r = ss.new_record()

start = time.time()
with ss.open_record_writer(buffer_size=BUFFER_SIZE, compress=COMPRESS_DATA) as writer:
with ss.open_record_writer(
buffer_size=BUFFER_SIZE, compress=COMPRESS_DATA
) as writer:
for i in range(DATA_AMOUNT):
r[0] = 2**63-1
r[0] = 2**63 - 1
r[1] = 0.0001
r[2] = datetime(2015, 11, 11) if not NUMERIC_ONLY else None
r[3] = True
r[4] = STRING_LITERAL if not NUMERIC_ONLY else None
r[5] = Decimal('3.15') if not NUMERIC_ONLY else None
r[5] = Decimal("3.15") if not NUMERIC_ONLY else None
writer.write(r)
n_bytes = writer.n_bytes
print(n_bytes, 'bytes', float(n_bytes) / 1024 / 1024 / (time.time() - start), 'MiB/s')
print(
n_bytes, "bytes", float(n_bytes) / 1024 / 1024 / (time.time() - start), "MiB/s"
)
ss.commit(writer.get_blocks_written())
odps.delete_table(table_name, if_exists=True)
28 changes: 16 additions & 12 deletions benchmarks/perf_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,60 +21,64 @@

import pytest

from odps.models import TableSchema, Record
from odps.models import Record, TableSchema

COMPRESS_DATA = True
BUFFER_SIZE = 1024 * 1024
DATA_AMOUNT = 100000
STRING_LITERAL = "Soft kitty, warm kitty, little ball of fur; happy kitty, sleepy kitty, purr, purr"
STRING_LITERAL = (
"Soft kitty, warm kitty, little ball of fur; happy kitty, sleepy kitty, purr, purr"
)


@pytest.fixture
def schema():
pr = cProfile.Profile()
pr.enable()
fields = ['bigint', 'double', 'datetime', 'boolean', 'string', 'decimal']
types = ['bigint', 'double', 'datetime', 'boolean', 'string', 'decimal']
fields = ["bigint", "double", "datetime", "boolean", "string", "decimal"]
types = ["bigint", "double", "datetime", "boolean", "string", "decimal"]
try:
yield TableSchema.from_lists(fields, types)
schema = TableSchema.from_lists(fields, types)
schema.build_snapshot()
yield schema
finally:
p = Stats(pr)
p.strip_dirs()
p.sort_stats('cumtime')
p.sort_stats("cumtime")
p.print_stats(40)


def test_set_record_field_bigint(schema):
r = Record(schema=schema)
for i in range(10**6):
r['bigint'] = 2**63-1
r["bigint"] = 2**63 - 1


def test_set_record_field_double(schema):
r = Record(schema=schema)
for i in range(10**6):
r['double'] = 0.0001
r["double"] = 0.0001


def test_set_record_field_boolean(schema):
r = Record(schema=schema)
for i in range(10**6):
r['boolean'] = False
r["boolean"] = False


def test_set_record_field_string(schema):
r = Record(schema=schema)
for i in range(10**6):
r['string'] = STRING_LITERAL
r["string"] = STRING_LITERAL


def test_write_set_record_field_datetime(schema):
r = Record(schema=schema)
for i in range(10**6):
r['datetime'] = datetime(2016, 1, 1)
r["datetime"] = datetime(2016, 1, 1)


def test_set_record_field_decimal(schema):
r = Record(schema=schema)
for i in range(10**6):
r['decimal'] = Decimal('1.111111')
r["decimal"] = Decimal("1.111111")
Loading

0 comments on commit 8b5d1db

Please sign in to comment.