From 998e6d35d859d4fe3539fb209acb050373dea26e Mon Sep 17 00:00:00 2001 From: Richard Taylor Date: Thu, 25 Jan 2024 18:11:31 +0000 Subject: [PATCH] #1622: Fix more cases (including CI) # Summary The CI tests identified some issues that don't show up on a normal test run. This commit fixes those issues. It also highlighted that there were numerous areas that didn't have sufficient test coverage for the case that the caller had already opened the resource. The indexer has some notable changes, but the biggest area affected is the parsers when writing from an already opened source. This commit adds unit tests for the index and all the parser formats for this case, and fixes the code to support the lack of nested contexts. # Tests - Setup the required databases for CI by copying the commands in the github actions - Run `hatch run +py=3.11 ci:test` and ensure all tests pass and coverage remains sufficient - Run `hatch run test` in case it is different and ensure all tests pass and coverage remains sufficient This also means that all linting etc. has been run too. --- frictionless/analyzer/analyzer.py | 3 +- frictionless/formats/gsheets/parser.py | 3 +- frictionless/formats/html/parser.py | 3 +- frictionless/formats/pandas/parser.py | 3 +- frictionless/formats/qsv/adapter.py | 3 +- frictionless/formats/spss/parser.py | 6 +- frictionless/formats/sql/adapter.py | 3 +- frictionless/formats/sql/parser.py | 3 +- frictionless/indexer/indexer.py | 60 +++++++++++-------- frictionless/steps/table/table_debug.py | 5 +- frictionless/steps/table/table_validate.py | 13 ++-- tests/analyzer/test_resource.py | 26 ++++++++ tests/formats/csv/test_parser.py | 14 +++++ tests/formats/excel/parsers/test_xls.py | 13 ++++ tests/formats/excel/parsers/test_xlsx.py | 13 ++++ tests/formats/gsheets/test_parser.py | 15 ++--- tests/formats/html/test_parser.py | 14 +++++ tests/formats/inline/test_parser.py | 12 ++++ tests/formats/json/parsers/test_json.py | 17 ++++++ tests/formats/json/parsers/test_jsonl.py | 15 +++++ tests/formats/ods/test_parser.py | 16 +++++ tests/formats/pandas/test_parser.py | 13 ++++ tests/formats/parquet/test_parser.py | 17 ++++++ tests/formats/spss/test_parser.py | 15 +++++ .../sql/databases/duckdb/test_parser.py | 13 ++++ .../sql/databases/mysql/test_parser.py | 29 +++++++++ .../sql/databases/postgresql/test_parser.py | 29 +++++++++ tests/formats/sql/test_parser.py | 13 ++++ tests/formats/yaml/test_parser.py | 17 ++++++ tests/indexer/test_resource.py | 15 +++++ tests/steps/table/test_table_debug.py | 33 ++++++++++ 31 files changed, 405 insertions(+), 49 deletions(-) create mode 100644 tests/steps/table/test_table_debug.py diff --git a/frictionless/analyzer/analyzer.py b/frictionless/analyzer/analyzer.py index 3ab1c89884..df24da5481 100644 --- a/frictionless/analyzer/analyzer.py +++ b/frictionless/analyzer/analyzer.py @@ -34,7 +34,8 @@ def analyze_table_resource( # Iterate rows columns_data: Dict[str, List[Any]] = {} numeric = ["integer", "numeric", "number"] - with resource: + # Use a copy of the resource to avoid side effects (see #1622) + with resource.to_copy() as resource: for row in resource.row_stream: null_columns = 0 for field_name in row: diff --git a/frictionless/formats/gsheets/parser.py b/frictionless/formats/gsheets/parser.py index 7cf81b2777..523118132f 100644 --- a/frictionless/formats/gsheets/parser.py +++ b/frictionless/formats/gsheets/parser.py @@ -53,7 +53,8 @@ def write_row_stream(self, source: TableResource): sh = gc.open_by_key(key) wks = sh.worksheet_by_id(gid) if gid else sh[0] # type: ignore data: List[Any] = [] - with source: + # Use a copy of the source to avoid side effects (see #1622) + with source.to_copy() as source: data.append(source.schema.field_names) for row in source.row_stream: data.append(row.to_list()) diff --git a/frictionless/formats/html/parser.py b/frictionless/formats/html/parser.py index a304685ef3..a3d0a5934c 100644 --- a/frictionless/formats/html/parser.py +++ b/frictionless/formats/html/parser.py @@ -57,7 +57,8 @@ def read_cell_stream_create(self) -> types.ICellStream: # It will give us an ability to support HtmlDialect def write_row_stream(self, source: TableResource): html = "\n" - with source: + # Use a copy of the source to avoid side effects (see #1622) + with source.to_copy() as source: html += "" for name in source.schema.field_names: html += f"" diff --git a/frictionless/formats/pandas/parser.py b/frictionless/formats/pandas/parser.py index ab7b3389a0..28c2bd4f6c 100644 --- a/frictionless/formats/pandas/parser.py +++ b/frictionless/formats/pandas/parser.py @@ -128,7 +128,8 @@ def write_row_stream(self, source: TableResource): data_rows: List[Tuple[Any]] = [] index_rows: List[Tuple[Any]] = [] fixed_types = {} - with source: + # Use a copy of the source to avoid side effects (see #1622) + with source.to_copy() as source: for row in source.row_stream: data_values: List[Any] = [] index_values: List[Any] = [] diff --git a/frictionless/formats/qsv/adapter.py b/frictionless/formats/qsv/adapter.py index eae77976f6..2b18a7b371 100644 --- a/frictionless/formats/qsv/adapter.py +++ b/frictionless/formats/qsv/adapter.py @@ -27,7 +27,8 @@ def read_schema(self, resource: Resource) -> Schema: command = [self.qsv_path, "stats", "--infer-dates", "--dates-whitelist", "all"] process = sp.Popen(command, stdout=sp.PIPE, stdin=sp.PIPE) # TODO: Use FileResource here (or future resource.stream_bytes()) - with resource: + # Use a copy of the resource to avoid side effects (see #1622) + with resource.to_copy() as resource: while True: chunk = resource.read_bytes(size=BLOCK_SIZE) if not chunk: diff --git a/frictionless/formats/spss/parser.py b/frictionless/formats/spss/parser.py index 0b706bdf9f..9a40054fd0 100644 --- a/frictionless/formats/spss/parser.py +++ b/frictionless/formats/spss/parser.py @@ -99,7 +99,8 @@ def write_row_stream(self, source: TableResource): # Write rows with sav.SavWriter(self.resource.normpath, ioUtf8=True, **spss_schema) as writer: # type: ignore - with source: + # Use a copy of the source to avoid side effects (see #1622) + with source.to_copy() as source: for row in source.row_stream: # type: ignore cells: List[Any] = [] for field in source.schema.fields: # type: ignore @@ -130,7 +131,8 @@ def __write_convert_schema(self, source: TableResource): "varTypes": {}, "formats": {}, } - with source: + # Use a copy of the source to avoid side effects (see #1622) + with source.to_copy() as source: # Add fields sizes: Dict[str, int] = {} mapping = self.__write_convert_type() diff --git a/frictionless/formats/sql/adapter.py b/frictionless/formats/sql/adapter.py index 5f49b7b4b5..554798a358 100644 --- a/frictionless/formats/sql/adapter.py +++ b/frictionless/formats/sql/adapter.py @@ -109,7 +109,8 @@ def write_package(self, package: Package): for table in self.metadata.sorted_tables: if package.has_table_resource(table.name): resource = package.get_table_resource(table.name) - with resource: + # Use a copy of the resource to avoid side effects (see #1622) + with resource.to_copy() as resource: self.write_row_stream(resource.row_stream, table_name=table.name) return models.PublishResult( url=self.engine.url.render_as_string(hide_password=True), diff --git a/frictionless/formats/sql/parser.py b/frictionless/formats/sql/parser.py index d9475e53fc..3e1d68883e 100644 --- a/frictionless/formats/sql/parser.py +++ b/frictionless/formats/sql/parser.py @@ -51,6 +51,7 @@ def write_row_stream(self, source: TableResource): adapter = SqlAdapter(engine, control=control) if not adapter: raise FrictionlessException(f"Not supported source: {self.resource.normpath}") - with source: + # Write from a copy to prevent side effects (see #1622) + with source.to_copy() as source: adapter.write_schema(source.schema, table_name=control.table) adapter.write_row_stream(source.row_stream, table_name=control.table) diff --git a/frictionless/indexer/indexer.py b/frictionless/indexer/indexer.py index e689315d41..8277987ba8 100644 --- a/frictionless/indexer/indexer.py +++ b/frictionless/indexer/indexer.py @@ -45,20 +45,24 @@ def __attrs_post_init__(self): def index(self) -> Optional[Report]: self.prepare_resource() - with self.resource: - # Index is resouce-based operation not supporting FKs - if self.resource.schema.foreign_keys: - self.resource.schema.foreign_keys = [] - self.create_table() - while True: - try: - return self.populate_table() - except Exception: - if self.fast and self.use_fallback: - self.fast = False - continue - self.delete_table() - raise + + # Infer resource if needed + if self.resource.closed: + self.resource.infer() + + # Index is resouce-based operation not supporting FKs + if self.resource.schema.foreign_keys: + self.resource.schema.foreign_keys = [] + self.create_table() + while True: + try: + return self.populate_table() + except Exception: + if self.fast and self.use_fallback: + self.fast = False + continue + self.delete_table() + raise def prepare_resource(self): if self.qsv_path: @@ -108,10 +112,12 @@ def populate_table_fast_sqlite(self): sql_command = f".import '|cat -' \"{self.table_name}\"" command = ["sqlite3", "-csv", self.adapter.engine.url.database, sql_command] process = subprocess.Popen(command, stdin=PIPE, stdout=PIPE) - for line_number, line in enumerate(self.resource.byte_stream, start=1): - if line_number > 1: - process.stdin.write(line) # type: ignore - self.report_progress(f"{self.resource.stats.bytes} bytes") + # Iterate over a copy of the resouce to avoid side effects (see #1622) + with self.resource.to_copy() as resource: + for line_number, line in enumerate(resource.byte_stream, start=1): + if line_number > 1: + process.stdin.write(line) # type: ignore + self.report_progress(f"{self.resource.stats.bytes} bytes") process.stdin.close() # type: ignore process.wait() @@ -119,14 +125,16 @@ def populate_table_fast_postgresql(self): database_url = self.adapter.engine.url.render_as_string(hide_password=False) with platform.psycopg.connect(database_url) as connection: with connection.cursor() as cursor: - query = 'COPY "%s" FROM STDIN CSV HEADER' % self.table_name - with cursor.copy(query) as copy: # type: ignore - while True: - chunk = self.resource.read_bytes(size=settings.BLOCK_SIZE) - if not chunk: - break - copy.write(chunk) - self.report_progress(f"{self.resource.stats.bytes} bytes") + # Iterate over a copy of the resouce to avoid side effects (see #1622) + with self.resource.to_copy() as resource: + query = 'COPY "%s" FROM STDIN CSV HEADER' % self.table_name + with cursor.copy(query) as copy: # type: ignore + while True: + chunk = resource.read_bytes(size=settings.BLOCK_SIZE) + if not chunk: + break + copy.write(chunk) + self.report_progress(f"{self.resource.stats.bytes} bytes") def delete_table(self): self.adapter.delete_resource(self.table_name) diff --git a/frictionless/steps/table/table_debug.py b/frictionless/steps/table/table_debug.py index b5175bfd9b..1810785368 100644 --- a/frictionless/steps/table/table_debug.py +++ b/frictionless/steps/table/table_debug.py @@ -33,8 +33,9 @@ def transform_resource(self, resource: Resource): # Data def data(): # type: ignore - with current: - for row in current.row_stream: # type: ignore + # Use a copy of the source to avoid side effects (see #1622) + with current.to_copy() as current_copy: + for row in current_copy.row_stream: # type: ignore self.function(row) # type: ignore yield row diff --git a/frictionless/steps/table/table_validate.py b/frictionless/steps/table/table_validate.py index 1d17bd1afd..dba4d2ff92 100644 --- a/frictionless/steps/table/table_validate.py +++ b/frictionless/steps/table/table_validate.py @@ -29,11 +29,14 @@ def transform_resource(self, resource: Resource): # Data def data(): # type: ignore - with current: - if not current.header.valid: # type: ignore - raise FrictionlessException(error=current.header.errors[0]) # type: ignore - yield current.header # type: ignore - for row in current.row_stream: # type: ignore + # Use a copy of the source to avoid side effects (see #1622) + with current.to_copy() as current_copy: # type: ignore + if not current_copy.header.valid: # type: ignore + raise FrictionlessException( + error=current_copy.header.errors[0] # type: ignore + ) # type: ignore + yield current_copy.header # type: ignore + for row in current_copy.row_stream: # type: ignore if not row.valid: # type: ignore raise FrictionlessException(error=row.errors[0]) # type: ignore yield row diff --git a/tests/analyzer/test_resource.py b/tests/analyzer/test_resource.py index 53da72cc12..f572afb9e7 100644 --- a/tests/analyzer/test_resource.py +++ b/tests/analyzer/test_resource.py @@ -241,3 +241,29 @@ def test_analyze_resource_detailed_with_invalid_data(): assert analysis["rowsWithNullValues"] == 3 assert analysis["notNullRows"] == 1 assert analysis["variableTypes"] == {"integer": 3, "string": 1} + + +def test_analyze_resource_is_independent_bug_1622(): + # Test that we can analyze a resource without side effects + resource = TableResource(path="data/analysis-data.csv") + with resource: + analysis = resource.analyze() + assert list(analysis.keys()) == [ + "variableTypes", + "notNullRows", + "rowsWithNullValues", + "fieldStats", + "averageRecordSizeInBytes", + "timeTaken", + "md5", + "sha256", + "bytes", + "fields", + "rows", + ] + assert round(analysis["averageRecordSizeInBytes"]) == 85 + assert analysis["fields"] == 11 + assert analysis["rows"] == 9 + assert analysis["rowsWithNullValues"] == 2 + assert analysis["notNullRows"] == 7 + assert analysis["variableTypes"] == {} diff --git a/tests/formats/csv/test_parser.py b/tests/formats/csv/test_parser.py index 2bf4bca368..6978352174 100644 --- a/tests/formats/csv/test_parser.py +++ b/tests/formats/csv/test_parser.py @@ -344,3 +344,17 @@ def test_csv_parser_proper_quote_issue_493(): resource.infer() assert resource.dialect.to_descriptor() == {} assert len(resource.schema.fields) == 126 + + +@pytest.mark.skipif(platform.type == "windows", reason="Fix on Windows") +def test_csv_parser_write_independent_issue_1622(tmpdir): + source = TableResource(path="data/table.csv") + with source: + target = TableResource(path=str(tmpdir.join("table.csv"))) + source.write(target) + with target: + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/excel/parsers/test_xls.py b/tests/formats/excel/parsers/test_xls.py index 73e5a02213..9668a32ee1 100644 --- a/tests/formats/excel/parsers/test_xls.py +++ b/tests/formats/excel/parsers/test_xls.py @@ -169,3 +169,16 @@ def test_xls_parser_cast_int_to_string_1251(): {"A": "001", "B": "b", "C": "1", "D": "a", "E": 1}, {"A": "002", "B": "c", "C": "1", "D": "1", "E": 1}, ] + + +def test_xls_parser_write_independent_bug_1622(tmpdir): + source = TableResource(path="data/table.csv") + with source: + target = TableResource(path=str(tmpdir.join("table.xls"))) + source.write(target) + with target: + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/excel/parsers/test_xlsx.py b/tests/formats/excel/parsers/test_xlsx.py index 2deb051e7f..61f2b520ed 100644 --- a/tests/formats/excel/parsers/test_xlsx.py +++ b/tests/formats/excel/parsers/test_xlsx.py @@ -307,3 +307,16 @@ def test_xlsx_parser_cannot_read_resource_from_remote_package_issue_1504(): resource = package.get_table_resource("excel") table = resource.read_table() assert len(table.rows) == 4 + + +def test_xlsx_parser_write_independent_1622(tmpdir): + source = TableResource(path="data/table.csv") + with source: + target = TableResource(path=str(tmpdir.join("table.xlsx"))) + source.write(target) + with target: + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/gsheets/test_parser.py b/tests/formats/gsheets/test_parser.py index 815167fb45..22f1dfbbff 100644 --- a/tests/formats/gsheets/test_parser.py +++ b/tests/formats/gsheets/test_parser.py @@ -52,10 +52,11 @@ def test_gsheets_parser_write(google_credentials_path): path = "https://docs.google.com/spreadsheets/d/1F2OiYmaf8e3x7jSc95_uNgfUyBlSXrcRg-4K_MFNZQI/edit" control = formats.GsheetsControl(credentials=google_credentials_path) source = TableResource(path="data/table.csv") - target = source.write(path=path, control=control) - with target: - assert target.header == ["id", "name"] - assert target.read_rows() == [ - {"id": 1, "name": "english"}, - {"id": 2, "name": "中国人"}, - ] + with source: + target = source.write(path=path, control=control) + with target: + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/html/test_parser.py b/tests/formats/html/test_parser.py index 225cd22180..382cf325f6 100644 --- a/tests/formats/html/test_parser.py +++ b/tests/formats/html/test_parser.py @@ -62,3 +62,17 @@ def test_html_parser_newline_in_cell_construction_file_issue_865(tmpdir): target = source.write(str(tmpdir.join("table.csv"))) target.infer(stats=True) assert target.stats.rows == 226 + + +@pytest.mark.skipif(platform.type == "windows", reason="Fix on Windows") +def test_html_parser_write_independent_bug_1622(tmpdir): + source = TableResource(path="data/table.csv") + with source: + target = TableResource(path=str(tmpdir.join("table.html"))) + source.write(target) + with target: + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/inline/test_parser.py b/tests/formats/inline/test_parser.py index 829c695b2a..a5a060b920 100644 --- a/tests/formats/inline/test_parser.py +++ b/tests/formats/inline/test_parser.py @@ -139,3 +139,15 @@ def test_inline_parser_write_skip_header(): with TableResource(path="data/table.csv") as resource: resource.write(target) assert target.data == [[1, "english"], [2, "中国人"]] + + +@pytest.mark.skip +def test_inline_parser_write_keyed_independent_bug_1622(tmpdir): + control = formats.InlineControl(keyed=True) + source = TableResource(path="data/table.csv") + with source: + target = source.write(format="inline", control=control) + assert target.data == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/json/parsers/test_json.py b/tests/formats/json/parsers/test_json.py index 12a49af9a6..386b8df9b7 100644 --- a/tests/formats/json/parsers/test_json.py +++ b/tests/formats/json/parsers/test_json.py @@ -135,3 +135,20 @@ def test_json_parser_write_skip_header(tmpdir): with TableResource(path="data/table.csv") as resource: target = resource.write(target) assert target.read_data() == [[1, "english"], [2, "中国人"]] + + +# Bugs + + +def test_json_parser_write_independent_bug_1622(tmpdir): + source = TableResource(path="data/table.csv") + with source: + target = TableResource(path=str(tmpdir.join("table.json"))) + target = source.write(target) + assert target.normpath + with open(target.normpath) as file: + assert json.load(file) == [ + ["id", "name"], + [1, "english"], + [2, "中国人"], + ] diff --git a/tests/formats/json/parsers/test_jsonl.py b/tests/formats/json/parsers/test_jsonl.py index b29cb9339d..6c55799a38 100644 --- a/tests/formats/json/parsers/test_jsonl.py +++ b/tests/formats/json/parsers/test_jsonl.py @@ -59,3 +59,18 @@ def test_jsonl_parser_write_skip_header(tmpdir): {"field1": 1, "field2": "english"}, {"field1": 2, "field2": "中国人"}, ] + + +# Bugs + + +def test_jsonl_parser_write_independent_bug_1622(tmpdir): + source = TableResource(path="data/table.csv") + with source: + target = source.write(path=str(tmpdir.join("table.jsonl"))) + with target: + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/ods/test_parser.py b/tests/formats/ods/test_parser.py index 1ab6d564a8..c8a491aa3d 100644 --- a/tests/formats/ods/test_parser.py +++ b/tests/formats/ods/test_parser.py @@ -139,3 +139,19 @@ def test_ods_parser_write_skip_header(tmpdir): resource.write_table(target) table = target.read_table() assert table.header == ["field1", "field2"] + + +# Bugs + + +def test_ods_parser_write_independent_bug_1622(tmpdir): + source = TableResource(path="data/table.csv") + with source: + target = TableResource(path=str(tmpdir.join("table.ods"))) + source.write(target) + with target: + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/pandas/test_parser.py b/tests/formats/pandas/test_parser.py index cb60d791da..ce22960a13 100644 --- a/tests/formats/pandas/test_parser.py +++ b/tests/formats/pandas/test_parser.py @@ -324,3 +324,16 @@ def test_validate_package_with_in_code_resources_1245(): datapackage.add_resource(resource) report = validate(datapackage) assert len(report.errors) == 0 + + +# Bugs + + +def test_pandas_parser_write_independent_bug_1622(): + source = TableResource(path="data/table.csv") + with source: + target = source.write(format="pandas") + assert target.data.to_dict("records") == [ # type: ignore + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/parquet/test_parser.py b/tests/formats/parquet/test_parser.py index 76b39efda0..142f257989 100644 --- a/tests/formats/parquet/test_parser.py +++ b/tests/formats/parquet/test_parser.py @@ -77,3 +77,20 @@ def test_parquet_parser_write_datetime_field_with_timezone(tmpdir): ) } ] + + +# Bugs + + +def test_parquet_parser_write_independent_bug_1622(tmpdir): + source = TableResource(path="data/table.csv") + with source: + target = TableResource(path=str(tmpdir.join("table.parq"))) + source.write(target) + with target: + assert target.format == "parq" + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/spss/test_parser.py b/tests/formats/spss/test_parser.py index 7fe29d1571..e8e90991d0 100644 --- a/tests/formats/spss/test_parser.py +++ b/tests/formats/spss/test_parser.py @@ -128,3 +128,18 @@ def test_spss_parser_write_timezone(tmpdir): "time": time(18), }, ] + + +# Bugs + + +def test_spss_parser_write_independent_bug_1622(tmpdir): + source = TableResource(path="data/table.csv") + with source: + target = source.write(str(tmpdir.join("table.sav"))) + with target: + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/sql/databases/duckdb/test_parser.py b/tests/formats/sql/databases/duckdb/test_parser.py index edce113821..ca90c7db40 100644 --- a/tests/formats/sql/databases/duckdb/test_parser.py +++ b/tests/formats/sql/databases/duckdb/test_parser.py @@ -160,3 +160,16 @@ def test_sql_parser_describe_to_yaml_failing_issue_821(duckdb_url_data): resource = TableResource(path=duckdb_url_data, control=control) resource.infer() assert resource.to_yaml() + + +def test_sql_parser_write_independent_issue_1622(duckdb_url_data): + source = TableResource(path="data/table.csv") + with source: + control = formats.SqlControl(table="name", order_by="id") + target = source.write(path=duckdb_url_data, control=control) + with target: + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/sql/databases/mysql/test_parser.py b/tests/formats/sql/databases/mysql/test_parser.py index c95b61b2fb..efd2d70c7b 100644 --- a/tests/formats/sql/databases/mysql/test_parser.py +++ b/tests/formats/sql/databases/mysql/test_parser.py @@ -55,3 +55,32 @@ def test_sql_parser_write_string_pk_issue_777_mysql(mysql_url): {"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}, ] + + +@pytest.mark.skipif(platform.type == "darwin", reason="Skip SQL test in MacOS") +@pytest.mark.skipif(platform.type == "windows", reason="Skip SQL test in Windows") +def test_sql_parser_write_independent_bug_1622(mysql_url): + source = TableResource(path="data/timezone.csv") + with source: + control = formats.SqlControl(table="timezone") + target = source.write(path=mysql_url, control=control) + with target: + assert target.header == ["datetime", "time"] + assert target.read_rows() == [ + { + "datetime": datetime(2020, 1, 1, 15), + "time": time(15), + }, + { + "datetime": datetime(2020, 1, 1, 15), + "time": time(15), + }, + { + "datetime": datetime(2020, 1, 1, 12), + "time": time(12), + }, + { + "datetime": datetime(2020, 1, 1, 18), + "time": time(18), + }, + ] diff --git a/tests/formats/sql/databases/postgresql/test_parser.py b/tests/formats/sql/databases/postgresql/test_parser.py index 6e8f7acc33..94d43378c2 100644 --- a/tests/formats/sql/databases/postgresql/test_parser.py +++ b/tests/formats/sql/databases/postgresql/test_parser.py @@ -62,3 +62,32 @@ def test_sql_parser_write_string_pk_issue_777_postgresql(postgresql_url): {"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}, ] + + +@pytest.mark.skipif(platform.type == "darwin", reason="Skip SQL test in MacOS") +@pytest.mark.skipif(platform.type == "windows", reason="Skip SQL test in Windows") +def test_sql_parser_write_independent_bug_1622(postgresql_url): + source = TableResource(path="data/timezone.csv") + with source: + control = formats.SqlControl(table="timezone") + target = source.write(postgresql_url, control=control) + with target: + assert target.header == ["datetime", "time"] + assert target.read_rows() == [ + { + "datetime": datetime(2020, 1, 1, 15), + "time": time(15), + }, + { + "datetime": datetime(2020, 1, 1, 15), + "time": time(15), + }, + { + "datetime": datetime(2020, 1, 1, 12), + "time": time(12), + }, + { + "datetime": datetime(2020, 1, 1, 18), + "time": time(18), + }, + ] diff --git a/tests/formats/sql/test_parser.py b/tests/formats/sql/test_parser.py index 996fee9ffc..beef5df76c 100644 --- a/tests/formats/sql/test_parser.py +++ b/tests/formats/sql/test_parser.py @@ -151,3 +151,16 @@ def test_sql_parser_describe_to_yaml_failing_issue_821(sqlite_url_data): resource = TableResource(path=sqlite_url_data, control=control) resource.infer() assert resource.to_yaml() + + +def test_sql_parser_write_independent_bug_1622(sqlite_url_data): + source = TableResource(path="data/table.csv") + with source: + control = formats.SqlControl(table="name", order_by="id") + target = source.write(path=sqlite_url_data, control=control) + with target: + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/yaml/test_parser.py b/tests/formats/yaml/test_parser.py index 186eab9423..69bc7362fd 100644 --- a/tests/formats/yaml/test_parser.py +++ b/tests/formats/yaml/test_parser.py @@ -48,3 +48,20 @@ def test_yaml_parser_write_skip_header(tmpdir): {"field1": 1, "field2": "english"}, {"field1": 2, "field2": "中国人"}, ] + + +# Bugs + + +def test_yaml_parser_write_independent_bug_1622(tmpdir): + source = TableResource(path="data/table.csv") + with source: + target = TableResource(path=str(tmpdir.join("table.yaml"))) + source.write(target) + with target: + assert target.format == "yaml" + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/indexer/test_resource.py b/tests/indexer/test_resource.py index d542cce73b..b134ccee79 100644 --- a/tests/indexer/test_resource.py +++ b/tests/indexer/test_resource.py @@ -94,3 +94,18 @@ def test_resource_index_sqlite_on_progress(database_url, mocker): assert on_progress.call_count == 2 on_progress.assert_any_call(control.table, "2 rows") on_progress.assert_any_call(control.table, "3 rows") + + +# Bugs + + +@pytest.mark.parametrize("database_url", database_urls) +def test_resource_index_sqlite_independent_bug_1622(database_url): + assert control.table + resource = TableResource(path="data/table.csv") + with resource: + resource.index(database_url, name=control.table) + assert TableResource(path=database_url, control=control).read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/steps/table/test_table_debug.py b/tests/steps/table/test_table_debug.py new file mode 100644 index 0000000000..48019a3bc0 --- /dev/null +++ b/tests/steps/table/test_table_debug.py @@ -0,0 +1,33 @@ +from frictionless import Pipeline, steps +from frictionless.resources import TableResource + + +class Counter: + count = 0 + + def __call__(self, row): + self.count += 1 + + +def test_step_table_debug(): + source = TableResource(path="data/transform.csv") + counter = Counter() + + pipeline = Pipeline( + steps=[steps.table_debug(function=counter)], + ) + target = source.transform(pipeline) + assert target.schema.to_descriptor() == { + "fields": [ + {"name": "id", "type": "integer"}, + {"name": "name", "type": "string"}, + {"name": "population", "type": "integer"}, + ] + } + assert target.read_rows() == [ + {"id": 1, "name": "germany", "population": 83}, + {"id": 2, "name": "france", "population": 66}, + {"id": 3, "name": "spain", "population": 47}, + ] + + assert counter.count == 3
{name}