From 382c021f1b1bf5213584a462183daf55418ea211 Mon Sep 17 00:00:00 2001 From: Shivam Malhotra Date: Mon, 6 Nov 2023 17:22:13 -0600 Subject: [PATCH] Added additional test --- py/server/tests/test_parquet.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/py/server/tests/test_parquet.py b/py/server/tests/test_parquet.py index 5e6b41101e4..c921a7e02a4 100644 --- a/py/server/tests/test_parquet.py +++ b/py/server/tests/test_parquet.py @@ -391,6 +391,32 @@ def test_dates_and_time(self): self.assert_table_equals(dh_table.select(["someDateColumn", "someTimeColumn"]), from_disk_pandas.select(["someDateColumn", "someTimeColumn"])) + def test_time_with_different_units(self): + """ Test that we can write and read time columns with different units """ + dh_table = empty_table(20000).update(formulas=[ + "someTimeColumn = i % 10 == 0 ? null : java.time.LocalTime.of(i%24, i%60, (i+10)%60)" + ]) + write(dh_table, "data_from_dh.parquet") + table = pyarrow.parquet.read_table('data_from_dh.parquet') + + def time_test_helper(pa_table, new_schema, dest): + # Write the provided pyarrow table type-casted to the new schema + pyarrow.parquet.write_table(pa_table.cast(new_schema), dest) + from_disk = read(dest) + df_from_disk = to_pandas(from_disk) + original_df = pa_table.to_pandas() + # Compare the dataframes as strings + print((df_from_disk.astype(str) == original_df.astype(str)).all().values.all()) + + # Test for nanoseconds, microseconds, and milliseconds + schema_nsec = table.schema.set(0, pyarrow.field('someTimeColumn', pyarrow.time64('ns'))) + time_test_helper(table, schema_nsec, "data_from_pq_nsec.parquet") + + schema_usec = table.schema.set(0, pyarrow.field('someTimeColumn', pyarrow.time64('us'))) + time_test_helper(table, schema_usec, "data_from_pq_usec.parquet") + + schema_msec = table.schema.set(0, pyarrow.field('someTimeColumn', pyarrow.time32('ms'))) + time_test_helper(table, schema_msec, "data_from_pq_msec.parquet") if __name__ == '__main__': unittest.main()