Releases v0.12.0 (#251)

aliyun · Oct 3, 2024 · 8b5d1db · 8b5d1db
1 parent ea82bb7
commit 8b5d1db
Show file tree

Hide file tree

Showing 298 changed files with 18,891 additions and 13,259 deletions.
diff --git a/License b/License
@@ -233,7 +233,7 @@ BSD 3-Clause
 BSD 2-Clause
 ------------
 
-- python-tblib:1.3.2
+- python-tblib:3.0.0
 
 
 MIT License

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,5 +1,4 @@
 recursive-include odps/static *.*
-recursive-include odps/internal/static *.*
 prune odps/static/ui/node_modules
 include requirements.txt
 global-include odps/**/*.yml

diff --git a/benchmarks/perf_storage_api_arrow.py b/benchmarks/perf_storage_api_arrow.py
@@ -14,12 +14,13 @@
 
 import logging
 import sys
-import time
 import threading
+import time
 
 import pytest
 
 from odps.apis.storage_api.conftest import storage_api_client  # noqa: F401
+
 if sys.version_info[0] == 3:
     from odps.apis.storage_api import *
 else:
@@ -87,7 +88,10 @@ def test_read_thread(storage_api_client):
     global global_total_record
     read_performance_threads = []
     for i in range(0, thread_num):
-        read_performance_thread = threading.Thread(target=read_performance, args=[storage_api_client,])
+        read_performance_thread = threading.Thread(
+            target=read_performance,
+            args=[storage_api_client],
+        )
         read_performance_threads.append(read_performance_thread)
 
     start = time.time()
@@ -104,7 +108,10 @@ def test_read_thread(storage_api_client):
         time.sleep(1)
         now = time.time()
         now_count = global_total_record
-        logger.info("index: %d, read, %f records per second" % (count, (now_count - start_count) / (now - start)))
+        logger.info(
+            "index: %d, read, %f records per second"
+            % (count, (now_count - start_count) / (now - start))
+        )
 
         if judge and cal_count < 5:
             cal_total_count += (now_count - start_count) / (now - start)

diff --git a/benchmarks/perf_tabletunnel.py b/benchmarks/perf_tabletunnel.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 
 from __future__ import print_function
+
 import cProfile
 import json
 import os
@@ -26,12 +27,14 @@
 
 if bool(json.loads(os.getenv("FORCE_PY", "0"))):
     from odps import options
+
     options.force_py = True
 
+from datetime import datetime
+
 from odps.compat import Decimal
 from odps.conftest import odps, tunnel  # noqa: F401
 from odps.models import TableSchema
-from datetime import datetime
 
 # remember to reset False before committing
 ENABLE_PROFILE = bool(json.loads(os.getenv("ENABLE_PROFILE", "0")))
@@ -40,14 +43,16 @@
 COMPRESS_DATA = True
 BUFFER_SIZE = 1024 * 1024
 DATA_AMOUNT = 100000
-STRING_LITERAL = "Soft kitty, warm kitty, little ball of fur; happy kitty, sleepy kitty, purr, purr"
+STRING_LITERAL = (
+    "Soft kitty, warm kitty, little ball of fur; happy kitty, sleepy kitty, purr, purr"
+)
 NUMERIC_ONLY = bool(json.loads(os.getenv("NUMERIC_ONLY", "0")))
 
 
 @pytest.fixture
 def schema():
-    fields = ['a', 'b', 'c', 'd', 'e', 'f']
-    types = ['bigint', 'double', 'datetime', 'boolean', 'string', 'decimal']
+    fields = ["a", "b", "c", "d", "e", "f"]
+    types = ["bigint", "double", "datetime", "boolean", "string", "decimal"]
     return TableSchema.from_lists(fields, types)
 
 
@@ -61,52 +66,54 @@ def profiled():
     finally:
         if ENABLE_PROFILE:
             if DUMP_PROFILE:
-                pr.dump_stats('profile.out')
+                pr.dump_stats("profile.out")
             p = Stats(pr)
             p.strip_dirs()
-            p.sort_stats('time')
+            p.sort_stats("time")
             p.print_stats(40)
-            p.print_callees('types.py:846\(validate_value', 20)
-            p.print_callees('types.py:828\(_validate_primitive_value', 20)
-            p.print_callees('tabletunnel.py:185\(write', 20)
+            p.print_callees("types.py:846\(validate_value", 20)
+            p.print_callees("types.py:828\(_validate_primitive_value", 20)
+            p.print_callees("tabletunnel.py:185\(write", 20)
 
 
 def test_write(odps, schema, tunnel):
-    table_name = 'pyodps_test_tunnel_write_performance'
+    table_name = "pyodps_test_tunnel_write_performance"
     odps.create_table(table_name, schema, if_not_exists=True)
     ss = tunnel.create_upload_session(table_name)
     r = ss.new_record()
 
     start = time.time()
     with ss.open_record_writer(0) as writer, profiled():
         for i in range(DATA_AMOUNT):
-            r[0] = 2**63-1
+            r[0] = 2**63 - 1
             r[1] = 0.0001
             r[2] = datetime(2015, 11, 11) if not NUMERIC_ONLY else None
             r[3] = True
             r[4] = STRING_LITERAL if not NUMERIC_ONLY else None
-            r[5] = Decimal('3.15') if not NUMERIC_ONLY else None
+            r[5] = Decimal("3.15") if not NUMERIC_ONLY else None
             writer.write(r)
         n_bytes = writer.n_bytes
-    print(n_bytes, 'bytes', float(n_bytes) / 1024 / 1024 / (time.time() - start), 'MiB/s')
+    print(
+        n_bytes, "bytes", float(n_bytes) / 1024 / 1024 / (time.time() - start), "MiB/s"
+    )
     ss.commit([0])
     odps.delete_table(table_name, if_exists=True)
 
 
 def test_read(odps, schema, tunnel):
-    table_name = 'pyodps_test_tunnel_read_performance'
+    table_name = "pyodps_test_tunnel_read_performance"
     odps.delete_table(table_name, if_exists=True)
     t = odps.create_table(table_name, schema)
 
     def gen_data():
         for i in range(DATA_AMOUNT):
             r = t.new_record()
-            r[0] = 2 ** 63 - 1
+            r[0] = 2**63 - 1
             r[1] = 0.0001
             r[2] = datetime(2015, 11, 11) if not NUMERIC_ONLY else None
             r[3] = True
             r[4] = STRING_LITERAL if not NUMERIC_ONLY else None
-            r[5] = Decimal('3.15') if not NUMERIC_ONLY else None
+            r[5] = Decimal("3.15") if not NUMERIC_ONLY else None
             yield r
 
     odps.write_table(t, gen_data())
@@ -119,28 +126,34 @@ def gen_data():
         for _ in reader:
             cnt += 1
         n_bytes = reader.n_bytes
-    print(n_bytes, 'bytes', float(n_bytes) / 1024 / 1024 / (time.time() - start), 'MiB/s')
+    print(
+        n_bytes, "bytes", float(n_bytes) / 1024 / 1024 / (time.time() - start), "MiB/s"
+    )
     assert DATA_AMOUNT == cnt
     odps.delete_table(table_name, if_exists=True)
 
 
 def test_buffered_write(odps, schema, tunnel):
-    table_name = 'test_tunnel_bufferred_write'
+    table_name = "test_tunnel_bufferred_write"
     odps.create_table(table_name, schema, if_not_exists=True)
     ss = tunnel.create_upload_session(table_name)
     r = ss.new_record()
 
     start = time.time()
-    with ss.open_record_writer(buffer_size=BUFFER_SIZE, compress=COMPRESS_DATA) as writer:
+    with ss.open_record_writer(
+        buffer_size=BUFFER_SIZE, compress=COMPRESS_DATA
+    ) as writer:
         for i in range(DATA_AMOUNT):
-            r[0] = 2**63-1
+            r[0] = 2**63 - 1
             r[1] = 0.0001
             r[2] = datetime(2015, 11, 11) if not NUMERIC_ONLY else None
             r[3] = True
             r[4] = STRING_LITERAL if not NUMERIC_ONLY else None
-            r[5] = Decimal('3.15') if not NUMERIC_ONLY else None
+            r[5] = Decimal("3.15") if not NUMERIC_ONLY else None
             writer.write(r)
         n_bytes = writer.n_bytes
-    print(n_bytes, 'bytes', float(n_bytes) / 1024 / 1024 / (time.time() - start), 'MiB/s')
+    print(
+        n_bytes, "bytes", float(n_bytes) / 1024 / 1024 / (time.time() - start), "MiB/s"
+    )
     ss.commit(writer.get_blocks_written())
     odps.delete_table(table_name, if_exists=True)
diff --git a/benchmarks/perf_types.py b/benchmarks/perf_types.py
@@ -21,60 +21,64 @@
 
 import pytest
 
-from odps.models import TableSchema, Record
+from odps.models import Record, TableSchema
 
 COMPRESS_DATA = True
 BUFFER_SIZE = 1024 * 1024
 DATA_AMOUNT = 100000
-STRING_LITERAL = "Soft kitty, warm kitty, little ball of fur; happy kitty, sleepy kitty, purr, purr"
+STRING_LITERAL = (
+    "Soft kitty, warm kitty, little ball of fur; happy kitty, sleepy kitty, purr, purr"
+)
 
 
 @pytest.fixture
 def schema():
     pr = cProfile.Profile()
     pr.enable()
-    fields = ['bigint', 'double', 'datetime', 'boolean', 'string', 'decimal']
-    types = ['bigint', 'double', 'datetime', 'boolean', 'string', 'decimal']
+    fields = ["bigint", "double", "datetime", "boolean", "string", "decimal"]
+    types = ["bigint", "double", "datetime", "boolean", "string", "decimal"]
     try:
-        yield TableSchema.from_lists(fields, types)
+        schema = TableSchema.from_lists(fields, types)
+        schema.build_snapshot()
+        yield schema
     finally:
         p = Stats(pr)
         p.strip_dirs()
-        p.sort_stats('cumtime')
+        p.sort_stats("cumtime")
         p.print_stats(40)
 
 
 def test_set_record_field_bigint(schema):
     r = Record(schema=schema)
     for i in range(10**6):
-        r['bigint'] = 2**63-1
+        r["bigint"] = 2**63 - 1
 
 
 def test_set_record_field_double(schema):
     r = Record(schema=schema)
     for i in range(10**6):
-        r['double'] = 0.0001
+        r["double"] = 0.0001
 
 
 def test_set_record_field_boolean(schema):
     r = Record(schema=schema)
     for i in range(10**6):
-        r['boolean'] = False
+        r["boolean"] = False
 
 
 def test_set_record_field_string(schema):
     r = Record(schema=schema)
     for i in range(10**6):
-        r['string'] = STRING_LITERAL
+        r["string"] = STRING_LITERAL
 
 
 def test_write_set_record_field_datetime(schema):
     r = Record(schema=schema)
     for i in range(10**6):
-        r['datetime'] = datetime(2016, 1, 1)
+        r["datetime"] = datetime(2016, 1, 1)
 
 
 def test_set_record_field_decimal(schema):
     r = Record(schema=schema)
     for i in range(10**6):
-        r['decimal'] = Decimal('1.111111')
+        r["decimal"] = Decimal("1.111111")