Skip to content

Commit

Permalink
fix: As an AB I can't skip rows based on “Line numbers to skip (0-ind…
Browse files Browse the repository at this point in the history
…exed)[TCTC-7097] (#638)

* fix: skiprows can be a given list

* feat: adapt tests for csv_meta changes

* feat: to keep the 0-indexed feature

* chore(doc): update the CHANGELOG

* feat: to keep column names, even with the provided 0-indexed values
  • Loading branch information
Sanix-Darker authored Nov 15, 2023
1 parent 9fb1039 commit f8efd95
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

### Fixed

- Csv: fix get-metadatas from CSVs files with `skiprows` as list (0-indexed) in Datasource.
- FTP: retry connection on `SSHException` while opening a remote url.

## [0.12.0] - 2023-09-01
Expand Down
11 changes: 10 additions & 1 deletion peakina/readers/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ def read_csv(
"""
The read_csv method is able to make a preview by reading on chunks
"""

# NOTE: To keep column-names in the final result
if isinstance(kwargs.get("skiprows", None), list):
kwargs["skiprows"] = [x + 1 for x in kwargs["skiprows"]]

if preview_nrows is not None or preview_offset:
if (skipfooter := kwargs.pop("skipfooter", None)) is None:
skipfooter = 0
Expand Down Expand Up @@ -101,7 +106,11 @@ def csv_meta(
"df_rows": reader_kwargs["nrows"],
}

start = 0 + reader_kwargs.get("skiprows", 0)
skiprows = reader_kwargs.get("skiprows", 0)
if isinstance(skiprows, list):
skiprows = len(skiprows)

start = 0 + skiprows
end = total_rows - reader_kwargs.get("skipfooter", 0)

preview_offset = reader_kwargs.get("preview_offset", 0)
Expand Down
33 changes: 33 additions & 0 deletions tests/readers/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,27 @@ def test_simple_csv_preview(path):
assert ds.get_df().shape == (2, 2)
assert ds.get_df().equals(pd.DataFrame({"month": ["Mars-14", "Avr-14"], "value": [3.3, 3.1]}))

# with skiprows as list
ds = DataSource(
path("fixture-1.csv"),
reader_kwargs={"skiprows": [0, 2, 5, 7, 9, 11, 12]},
)
assert ds.get_df().equals(
pd.DataFrame(
{
"month": {
0: "Fev-14",
1: "Avr-14",
2: "Mai-14",
3: "Juil-14",
4: "Sept-14",
5: "Nov-14",
},
"value": {0: 3.2, 1: 3.1, 2: 3.9, 3: 3.1, 4: 3.4, 5: 3.7},
}
)
)


def test_csv_metadata(path):
"""
Expand All @@ -75,6 +96,7 @@ def test_csv_metadata(path):
"total_rows": 12,
}

# skiprows as integer
ds = DataSource(
path("fixture-1.csv"),
reader_kwargs={"skiprows": 3, "skipfooter": 4},
Expand All @@ -85,6 +107,17 @@ def test_csv_metadata(path):
"total_rows": 12,
}

# skiprows as list
ds = DataSource(
path("fixture-1.csv"),
reader_kwargs={"skiprows": [0, 2, 4], "skipfooter": 4},
)
assert ds.get_df().shape == (5, 2)
assert ds.get_metadata() == {
"df_rows": 5,
"total_rows": 12,
}

ds = DataSource(
path("fixture-1.csv"),
reader_kwargs={"nrows": 3},
Expand Down

0 comments on commit f8efd95

Please sign in to comment.