diff --git a/frictionless/analyzer/analyzer.py b/frictionless/analyzer/analyzer.py index 3ab1c89884..df24da5481 100644 --- a/frictionless/analyzer/analyzer.py +++ b/frictionless/analyzer/analyzer.py @@ -34,7 +34,8 @@ def analyze_table_resource( # Iterate rows columns_data: Dict[str, List[Any]] = {} numeric = ["integer", "numeric", "number"] - with resource: + # Use a copy of the resource to avoid side effects (see #1622) + with resource.to_copy() as resource: for row in resource.row_stream: null_columns = 0 for field_name in row: diff --git a/frictionless/formats/gsheets/parser.py b/frictionless/formats/gsheets/parser.py index 7cf81b2777..523118132f 100644 --- a/frictionless/formats/gsheets/parser.py +++ b/frictionless/formats/gsheets/parser.py @@ -53,7 +53,8 @@ def write_row_stream(self, source: TableResource): sh = gc.open_by_key(key) wks = sh.worksheet_by_id(gid) if gid else sh[0] # type: ignore data: List[Any] = [] - with source: + # Use a copy of the source to avoid side effects (see #1622) + with source.to_copy() as source: data.append(source.schema.field_names) for row in source.row_stream: data.append(row.to_list()) diff --git a/frictionless/formats/html/parser.py b/frictionless/formats/html/parser.py index a304685ef3..a3d0a5934c 100644 --- a/frictionless/formats/html/parser.py +++ b/frictionless/formats/html/parser.py @@ -57,7 +57,8 @@ def read_cell_stream_create(self) -> types.ICellStream: # It will give us an ability to support HtmlDialect def write_row_stream(self, source: TableResource): html = "
{name} | " diff --git a/frictionless/formats/pandas/parser.py b/frictionless/formats/pandas/parser.py index ab7b3389a0..28c2bd4f6c 100644 --- a/frictionless/formats/pandas/parser.py +++ b/frictionless/formats/pandas/parser.py @@ -128,7 +128,8 @@ def write_row_stream(self, source: TableResource): data_rows: List[Tuple[Any]] = [] index_rows: List[Tuple[Any]] = [] fixed_types = {} - with source: + # Use a copy of the source to avoid side effects (see #1622) + with source.to_copy() as source: for row in source.row_stream: data_values: List[Any] = [] index_values: List[Any] = [] diff --git a/frictionless/formats/qsv/adapter.py b/frictionless/formats/qsv/adapter.py index eae77976f6..2b18a7b371 100644 --- a/frictionless/formats/qsv/adapter.py +++ b/frictionless/formats/qsv/adapter.py @@ -27,7 +27,8 @@ def read_schema(self, resource: Resource) -> Schema: command = [self.qsv_path, "stats", "--infer-dates", "--dates-whitelist", "all"] process = sp.Popen(command, stdout=sp.PIPE, stdin=sp.PIPE) # TODO: Use FileResource here (or future resource.stream_bytes()) - with resource: + # Use a copy of the resource to avoid side effects (see #1622) + with resource.to_copy() as resource: while True: chunk = resource.read_bytes(size=BLOCK_SIZE) if not chunk: diff --git a/frictionless/formats/spss/parser.py b/frictionless/formats/spss/parser.py index 0b706bdf9f..9a40054fd0 100644 --- a/frictionless/formats/spss/parser.py +++ b/frictionless/formats/spss/parser.py @@ -99,7 +99,8 @@ def write_row_stream(self, source: TableResource): # Write rows with sav.SavWriter(self.resource.normpath, ioUtf8=True, **spss_schema) as writer: # type: ignore - with source: + # Use a copy of the source to avoid side effects (see #1622) + with source.to_copy() as source: for row in source.row_stream: # type: ignore cells: List[Any] = [] for field in source.schema.fields: # type: ignore @@ -130,7 +131,8 @@ def __write_convert_schema(self, source: TableResource): "varTypes": {}, "formats": {}, } - with source: + # Use a copy of the source to avoid side effects (see #1622) + with source.to_copy() as source: # Add fields sizes: Dict[str, int] = {} mapping = self.__write_convert_type() diff --git a/frictionless/formats/sql/adapter.py b/frictionless/formats/sql/adapter.py index 5f49b7b4b5..554798a358 100644 --- a/frictionless/formats/sql/adapter.py +++ b/frictionless/formats/sql/adapter.py @@ -109,7 +109,8 @@ def write_package(self, package: Package): for table in self.metadata.sorted_tables: if package.has_table_resource(table.name): resource = package.get_table_resource(table.name) - with resource: + # Use a copy of the resource to avoid side effects (see #1622) + with resource.to_copy() as resource: self.write_row_stream(resource.row_stream, table_name=table.name) return models.PublishResult( url=self.engine.url.render_as_string(hide_password=True), diff --git a/frictionless/formats/sql/parser.py b/frictionless/formats/sql/parser.py index d9475e53fc..3e1d68883e 100644 --- a/frictionless/formats/sql/parser.py +++ b/frictionless/formats/sql/parser.py @@ -51,6 +51,7 @@ def write_row_stream(self, source: TableResource): adapter = SqlAdapter(engine, control=control) if not adapter: raise FrictionlessException(f"Not supported source: {self.resource.normpath}") - with source: + # Write from a copy to prevent side effects (see #1622) + with source.to_copy() as source: adapter.write_schema(source.schema, table_name=control.table) adapter.write_row_stream(source.row_stream, table_name=control.table) diff --git a/frictionless/indexer/indexer.py b/frictionless/indexer/indexer.py index e689315d41..8277987ba8 100644 --- a/frictionless/indexer/indexer.py +++ b/frictionless/indexer/indexer.py @@ -45,20 +45,24 @@ def __attrs_post_init__(self): def index(self) -> Optional[Report]: self.prepare_resource() - with self.resource: - # Index is resouce-based operation not supporting FKs - if self.resource.schema.foreign_keys: - self.resource.schema.foreign_keys = [] - self.create_table() - while True: - try: - return self.populate_table() - except Exception: - if self.fast and self.use_fallback: - self.fast = False - continue - self.delete_table() - raise + + # Infer resource if needed + if self.resource.closed: + self.resource.infer() + + # Index is resouce-based operation not supporting FKs + if self.resource.schema.foreign_keys: + self.resource.schema.foreign_keys = [] + self.create_table() + while True: + try: + return self.populate_table() + except Exception: + if self.fast and self.use_fallback: + self.fast = False + continue + self.delete_table() + raise def prepare_resource(self): if self.qsv_path: @@ -108,10 +112,12 @@ def populate_table_fast_sqlite(self): sql_command = f".import '|cat -' \"{self.table_name}\"" command = ["sqlite3", "-csv", self.adapter.engine.url.database, sql_command] process = subprocess.Popen(command, stdin=PIPE, stdout=PIPE) - for line_number, line in enumerate(self.resource.byte_stream, start=1): - if line_number > 1: - process.stdin.write(line) # type: ignore - self.report_progress(f"{self.resource.stats.bytes} bytes") + # Iterate over a copy of the resouce to avoid side effects (see #1622) + with self.resource.to_copy() as resource: + for line_number, line in enumerate(resource.byte_stream, start=1): + if line_number > 1: + process.stdin.write(line) # type: ignore + self.report_progress(f"{self.resource.stats.bytes} bytes") process.stdin.close() # type: ignore process.wait() @@ -119,14 +125,16 @@ def populate_table_fast_postgresql(self): database_url = self.adapter.engine.url.render_as_string(hide_password=False) with platform.psycopg.connect(database_url) as connection: with connection.cursor() as cursor: - query = 'COPY "%s" FROM STDIN CSV HEADER' % self.table_name - with cursor.copy(query) as copy: # type: ignore - while True: - chunk = self.resource.read_bytes(size=settings.BLOCK_SIZE) - if not chunk: - break - copy.write(chunk) - self.report_progress(f"{self.resource.stats.bytes} bytes") + # Iterate over a copy of the resouce to avoid side effects (see #1622) + with self.resource.to_copy() as resource: + query = 'COPY "%s" FROM STDIN CSV HEADER' % self.table_name + with cursor.copy(query) as copy: # type: ignore + while True: + chunk = resource.read_bytes(size=settings.BLOCK_SIZE) + if not chunk: + break + copy.write(chunk) + self.report_progress(f"{self.resource.stats.bytes} bytes") def delete_table(self): self.adapter.delete_resource(self.table_name) diff --git a/frictionless/steps/table/table_debug.py b/frictionless/steps/table/table_debug.py index b5175bfd9b..1810785368 100644 --- a/frictionless/steps/table/table_debug.py +++ b/frictionless/steps/table/table_debug.py @@ -33,8 +33,9 @@ def transform_resource(self, resource: Resource): # Data def data(): # type: ignore - with current: - for row in current.row_stream: # type: ignore + # Use a copy of the source to avoid side effects (see #1622) + with current.to_copy() as current_copy: + for row in current_copy.row_stream: # type: ignore self.function(row) # type: ignore yield row diff --git a/frictionless/steps/table/table_validate.py b/frictionless/steps/table/table_validate.py index 1d17bd1afd..dba4d2ff92 100644 --- a/frictionless/steps/table/table_validate.py +++ b/frictionless/steps/table/table_validate.py @@ -29,11 +29,14 @@ def transform_resource(self, resource: Resource): # Data def data(): # type: ignore - with current: - if not current.header.valid: # type: ignore - raise FrictionlessException(error=current.header.errors[0]) # type: ignore - yield current.header # type: ignore - for row in current.row_stream: # type: ignore + # Use a copy of the source to avoid side effects (see #1622) + with current.to_copy() as current_copy: # type: ignore + if not current_copy.header.valid: # type: ignore + raise FrictionlessException( + error=current_copy.header.errors[0] # type: ignore + ) # type: ignore + yield current_copy.header # type: ignore + for row in current_copy.row_stream: # type: ignore if not row.valid: # type: ignore raise FrictionlessException(error=row.errors[0]) # type: ignore yield row diff --git a/tests/analyzer/test_resource.py b/tests/analyzer/test_resource.py index 53da72cc12..f572afb9e7 100644 --- a/tests/analyzer/test_resource.py +++ b/tests/analyzer/test_resource.py @@ -241,3 +241,29 @@ def test_analyze_resource_detailed_with_invalid_data(): assert analysis["rowsWithNullValues"] == 3 assert analysis["notNullRows"] == 1 assert analysis["variableTypes"] == {"integer": 3, "string": 1} + + +def test_analyze_resource_is_independent_bug_1622(): + # Test that we can analyze a resource without side effects + resource = TableResource(path="data/analysis-data.csv") + with resource: + analysis = resource.analyze() + assert list(analysis.keys()) == [ + "variableTypes", + "notNullRows", + "rowsWithNullValues", + "fieldStats", + "averageRecordSizeInBytes", + "timeTaken", + "md5", + "sha256", + "bytes", + "fields", + "rows", + ] + assert round(analysis["averageRecordSizeInBytes"]) == 85 + assert analysis["fields"] == 11 + assert analysis["rows"] == 9 + assert analysis["rowsWithNullValues"] == 2 + assert analysis["notNullRows"] == 7 + assert analysis["variableTypes"] == {} diff --git a/tests/formats/csv/test_parser.py b/tests/formats/csv/test_parser.py index 2bf4bca368..6978352174 100644 --- a/tests/formats/csv/test_parser.py +++ b/tests/formats/csv/test_parser.py @@ -344,3 +344,17 @@ def test_csv_parser_proper_quote_issue_493(): resource.infer() assert resource.dialect.to_descriptor() == {} assert len(resource.schema.fields) == 126 + + +@pytest.mark.skipif(platform.type == "windows", reason="Fix on Windows") +def test_csv_parser_write_independent_issue_1622(tmpdir): + source = TableResource(path="data/table.csv") + with source: + target = TableResource(path=str(tmpdir.join("table.csv"))) + source.write(target) + with target: + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/excel/parsers/test_xls.py b/tests/formats/excel/parsers/test_xls.py index 73e5a02213..9668a32ee1 100644 --- a/tests/formats/excel/parsers/test_xls.py +++ b/tests/formats/excel/parsers/test_xls.py @@ -169,3 +169,16 @@ def test_xls_parser_cast_int_to_string_1251(): {"A": "001", "B": "b", "C": "1", "D": "a", "E": 1}, {"A": "002", "B": "c", "C": "1", "D": "1", "E": 1}, ] + + +def test_xls_parser_write_independent_bug_1622(tmpdir): + source = TableResource(path="data/table.csv") + with source: + target = TableResource(path=str(tmpdir.join("table.xls"))) + source.write(target) + with target: + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/excel/parsers/test_xlsx.py b/tests/formats/excel/parsers/test_xlsx.py index 2deb051e7f..61f2b520ed 100644 --- a/tests/formats/excel/parsers/test_xlsx.py +++ b/tests/formats/excel/parsers/test_xlsx.py @@ -307,3 +307,16 @@ def test_xlsx_parser_cannot_read_resource_from_remote_package_issue_1504(): resource = package.get_table_resource("excel") table = resource.read_table() assert len(table.rows) == 4 + + +def test_xlsx_parser_write_independent_1622(tmpdir): + source = TableResource(path="data/table.csv") + with source: + target = TableResource(path=str(tmpdir.join("table.xlsx"))) + source.write(target) + with target: + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/gsheets/test_parser.py b/tests/formats/gsheets/test_parser.py index 815167fb45..22f1dfbbff 100644 --- a/tests/formats/gsheets/test_parser.py +++ b/tests/formats/gsheets/test_parser.py @@ -52,10 +52,11 @@ def test_gsheets_parser_write(google_credentials_path): path = "https://docs.google.com/spreadsheets/d/1F2OiYmaf8e3x7jSc95_uNgfUyBlSXrcRg-4K_MFNZQI/edit" control = formats.GsheetsControl(credentials=google_credentials_path) source = TableResource(path="data/table.csv") - target = source.write(path=path, control=control) - with target: - assert target.header == ["id", "name"] - assert target.read_rows() == [ - {"id": 1, "name": "english"}, - {"id": 2, "name": "中国人"}, - ] + with source: + target = source.write(path=path, control=control) + with target: + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/html/test_parser.py b/tests/formats/html/test_parser.py index 225cd22180..382cf325f6 100644 --- a/tests/formats/html/test_parser.py +++ b/tests/formats/html/test_parser.py @@ -62,3 +62,17 @@ def test_html_parser_newline_in_cell_construction_file_issue_865(tmpdir): target = source.write(str(tmpdir.join("table.csv"))) target.infer(stats=True) assert target.stats.rows == 226 + + +@pytest.mark.skipif(platform.type == "windows", reason="Fix on Windows") +def test_html_parser_write_independent_bug_1622(tmpdir): + source = TableResource(path="data/table.csv") + with source: + target = TableResource(path=str(tmpdir.join("table.html"))) + source.write(target) + with target: + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/inline/test_parser.py b/tests/formats/inline/test_parser.py index 829c695b2a..a5a060b920 100644 --- a/tests/formats/inline/test_parser.py +++ b/tests/formats/inline/test_parser.py @@ -139,3 +139,15 @@ def test_inline_parser_write_skip_header(): with TableResource(path="data/table.csv") as resource: resource.write(target) assert target.data == [[1, "english"], [2, "中国人"]] + + +@pytest.mark.skip +def test_inline_parser_write_keyed_independent_bug_1622(tmpdir): + control = formats.InlineControl(keyed=True) + source = TableResource(path="data/table.csv") + with source: + target = source.write(format="inline", control=control) + assert target.data == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/json/parsers/test_json.py b/tests/formats/json/parsers/test_json.py index 12a49af9a6..386b8df9b7 100644 --- a/tests/formats/json/parsers/test_json.py +++ b/tests/formats/json/parsers/test_json.py @@ -135,3 +135,20 @@ def test_json_parser_write_skip_header(tmpdir): with TableResource(path="data/table.csv") as resource: target = resource.write(target) assert target.read_data() == [[1, "english"], [2, "中国人"]] + + +# Bugs + + +def test_json_parser_write_independent_bug_1622(tmpdir): + source = TableResource(path="data/table.csv") + with source: + target = TableResource(path=str(tmpdir.join("table.json"))) + target = source.write(target) + assert target.normpath + with open(target.normpath) as file: + assert json.load(file) == [ + ["id", "name"], + [1, "english"], + [2, "中国人"], + ] diff --git a/tests/formats/json/parsers/test_jsonl.py b/tests/formats/json/parsers/test_jsonl.py index b29cb9339d..6c55799a38 100644 --- a/tests/formats/json/parsers/test_jsonl.py +++ b/tests/formats/json/parsers/test_jsonl.py @@ -59,3 +59,18 @@ def test_jsonl_parser_write_skip_header(tmpdir): {"field1": 1, "field2": "english"}, {"field1": 2, "field2": "中国人"}, ] + + +# Bugs + + +def test_jsonl_parser_write_independent_bug_1622(tmpdir): + source = TableResource(path="data/table.csv") + with source: + target = source.write(path=str(tmpdir.join("table.jsonl"))) + with target: + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/ods/test_parser.py b/tests/formats/ods/test_parser.py index 1ab6d564a8..c8a491aa3d 100644 --- a/tests/formats/ods/test_parser.py +++ b/tests/formats/ods/test_parser.py @@ -139,3 +139,19 @@ def test_ods_parser_write_skip_header(tmpdir): resource.write_table(target) table = target.read_table() assert table.header == ["field1", "field2"] + + +# Bugs + + +def test_ods_parser_write_independent_bug_1622(tmpdir): + source = TableResource(path="data/table.csv") + with source: + target = TableResource(path=str(tmpdir.join("table.ods"))) + source.write(target) + with target: + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/pandas/test_parser.py b/tests/formats/pandas/test_parser.py index cb60d791da..ce22960a13 100644 --- a/tests/formats/pandas/test_parser.py +++ b/tests/formats/pandas/test_parser.py @@ -324,3 +324,16 @@ def test_validate_package_with_in_code_resources_1245(): datapackage.add_resource(resource) report = validate(datapackage) assert len(report.errors) == 0 + + +# Bugs + + +def test_pandas_parser_write_independent_bug_1622(): + source = TableResource(path="data/table.csv") + with source: + target = source.write(format="pandas") + assert target.data.to_dict("records") == [ # type: ignore + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/parquet/test_parser.py b/tests/formats/parquet/test_parser.py index 76b39efda0..142f257989 100644 --- a/tests/formats/parquet/test_parser.py +++ b/tests/formats/parquet/test_parser.py @@ -77,3 +77,20 @@ def test_parquet_parser_write_datetime_field_with_timezone(tmpdir): ) } ] + + +# Bugs + + +def test_parquet_parser_write_independent_bug_1622(tmpdir): + source = TableResource(path="data/table.csv") + with source: + target = TableResource(path=str(tmpdir.join("table.parq"))) + source.write(target) + with target: + assert target.format == "parq" + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/spss/test_parser.py b/tests/formats/spss/test_parser.py index 7fe29d1571..e8e90991d0 100644 --- a/tests/formats/spss/test_parser.py +++ b/tests/formats/spss/test_parser.py @@ -128,3 +128,18 @@ def test_spss_parser_write_timezone(tmpdir): "time": time(18), }, ] + + +# Bugs + + +def test_spss_parser_write_independent_bug_1622(tmpdir): + source = TableResource(path="data/table.csv") + with source: + target = source.write(str(tmpdir.join("table.sav"))) + with target: + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/sql/databases/duckdb/test_parser.py b/tests/formats/sql/databases/duckdb/test_parser.py index edce113821..ca90c7db40 100644 --- a/tests/formats/sql/databases/duckdb/test_parser.py +++ b/tests/formats/sql/databases/duckdb/test_parser.py @@ -160,3 +160,16 @@ def test_sql_parser_describe_to_yaml_failing_issue_821(duckdb_url_data): resource = TableResource(path=duckdb_url_data, control=control) resource.infer() assert resource.to_yaml() + + +def test_sql_parser_write_independent_issue_1622(duckdb_url_data): + source = TableResource(path="data/table.csv") + with source: + control = formats.SqlControl(table="name", order_by="id") + target = source.write(path=duckdb_url_data, control=control) + with target: + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/sql/databases/mysql/test_parser.py b/tests/formats/sql/databases/mysql/test_parser.py index c95b61b2fb..efd2d70c7b 100644 --- a/tests/formats/sql/databases/mysql/test_parser.py +++ b/tests/formats/sql/databases/mysql/test_parser.py @@ -55,3 +55,32 @@ def test_sql_parser_write_string_pk_issue_777_mysql(mysql_url): {"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}, ] + + +@pytest.mark.skipif(platform.type == "darwin", reason="Skip SQL test in MacOS") +@pytest.mark.skipif(platform.type == "windows", reason="Skip SQL test in Windows") +def test_sql_parser_write_independent_bug_1622(mysql_url): + source = TableResource(path="data/timezone.csv") + with source: + control = formats.SqlControl(table="timezone") + target = source.write(path=mysql_url, control=control) + with target: + assert target.header == ["datetime", "time"] + assert target.read_rows() == [ + { + "datetime": datetime(2020, 1, 1, 15), + "time": time(15), + }, + { + "datetime": datetime(2020, 1, 1, 15), + "time": time(15), + }, + { + "datetime": datetime(2020, 1, 1, 12), + "time": time(12), + }, + { + "datetime": datetime(2020, 1, 1, 18), + "time": time(18), + }, + ] diff --git a/tests/formats/sql/databases/postgresql/test_parser.py b/tests/formats/sql/databases/postgresql/test_parser.py index 6e8f7acc33..94d43378c2 100644 --- a/tests/formats/sql/databases/postgresql/test_parser.py +++ b/tests/formats/sql/databases/postgresql/test_parser.py @@ -62,3 +62,32 @@ def test_sql_parser_write_string_pk_issue_777_postgresql(postgresql_url): {"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}, ] + + +@pytest.mark.skipif(platform.type == "darwin", reason="Skip SQL test in MacOS") +@pytest.mark.skipif(platform.type == "windows", reason="Skip SQL test in Windows") +def test_sql_parser_write_independent_bug_1622(postgresql_url): + source = TableResource(path="data/timezone.csv") + with source: + control = formats.SqlControl(table="timezone") + target = source.write(postgresql_url, control=control) + with target: + assert target.header == ["datetime", "time"] + assert target.read_rows() == [ + { + "datetime": datetime(2020, 1, 1, 15), + "time": time(15), + }, + { + "datetime": datetime(2020, 1, 1, 15), + "time": time(15), + }, + { + "datetime": datetime(2020, 1, 1, 12), + "time": time(12), + }, + { + "datetime": datetime(2020, 1, 1, 18), + "time": time(18), + }, + ] diff --git a/tests/formats/sql/test_parser.py b/tests/formats/sql/test_parser.py index 996fee9ffc..beef5df76c 100644 --- a/tests/formats/sql/test_parser.py +++ b/tests/formats/sql/test_parser.py @@ -151,3 +151,16 @@ def test_sql_parser_describe_to_yaml_failing_issue_821(sqlite_url_data): resource = TableResource(path=sqlite_url_data, control=control) resource.infer() assert resource.to_yaml() + + +def test_sql_parser_write_independent_bug_1622(sqlite_url_data): + source = TableResource(path="data/table.csv") + with source: + control = formats.SqlControl(table="name", order_by="id") + target = source.write(path=sqlite_url_data, control=control) + with target: + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/formats/yaml/test_parser.py b/tests/formats/yaml/test_parser.py index 186eab9423..69bc7362fd 100644 --- a/tests/formats/yaml/test_parser.py +++ b/tests/formats/yaml/test_parser.py @@ -48,3 +48,20 @@ def test_yaml_parser_write_skip_header(tmpdir): {"field1": 1, "field2": "english"}, {"field1": 2, "field2": "中国人"}, ] + + +# Bugs + + +def test_yaml_parser_write_independent_bug_1622(tmpdir): + source = TableResource(path="data/table.csv") + with source: + target = TableResource(path=str(tmpdir.join("table.yaml"))) + source.write(target) + with target: + assert target.format == "yaml" + assert target.header == ["id", "name"] + assert target.read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/indexer/test_resource.py b/tests/indexer/test_resource.py index d542cce73b..b134ccee79 100644 --- a/tests/indexer/test_resource.py +++ b/tests/indexer/test_resource.py @@ -94,3 +94,18 @@ def test_resource_index_sqlite_on_progress(database_url, mocker): assert on_progress.call_count == 2 on_progress.assert_any_call(control.table, "2 rows") on_progress.assert_any_call(control.table, "3 rows") + + +# Bugs + + +@pytest.mark.parametrize("database_url", database_urls) +def test_resource_index_sqlite_independent_bug_1622(database_url): + assert control.table + resource = TableResource(path="data/table.csv") + with resource: + resource.index(database_url, name=control.table) + assert TableResource(path=database_url, control=control).read_rows() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中国人"}, + ] diff --git a/tests/steps/table/test_table_debug.py b/tests/steps/table/test_table_debug.py new file mode 100644 index 0000000000..48019a3bc0 --- /dev/null +++ b/tests/steps/table/test_table_debug.py @@ -0,0 +1,33 @@ +from frictionless import Pipeline, steps +from frictionless.resources import TableResource + + +class Counter: + count = 0 + + def __call__(self, row): + self.count += 1 + + +def test_step_table_debug(): + source = TableResource(path="data/transform.csv") + counter = Counter() + + pipeline = Pipeline( + steps=[steps.table_debug(function=counter)], + ) + target = source.transform(pipeline) + assert target.schema.to_descriptor() == { + "fields": [ + {"name": "id", "type": "integer"}, + {"name": "name", "type": "string"}, + {"name": "population", "type": "integer"}, + ] + } + assert target.read_rows() == [ + {"id": 1, "name": "germany", "population": 83}, + {"id": 2, "name": "france", "population": 66}, + {"id": 3, "name": "spain", "population": 47}, + ] + + assert counter.count == 3