Skip to content

Commit

Permalink
Support dialect.skip_blank_rows (#1387)
Browse files Browse the repository at this point in the history
* Added `dialect.skip_blank_rows`

* Updated the docs

* Improved tests

* Updated docs
  • Loading branch information
roll authored Jan 17, 2023
1 parent 7128abf commit 481854a
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 2 deletions.
4 changes: 4 additions & 0 deletions data/blank-rows-no-fields.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
id,name,age
1101,John,30

1102,Julie,26
36 changes: 36 additions & 0 deletions docs/framework/dialect.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,42 @@ with Resource('capital-3.csv', dialect=dialect, schema=schema) as resource:
print(f'Valid: {resource.header.valid}') # without "header_case" it will have 2 errors
```

## Comment Char

Specifies char used to comment the rows:

```python script tabs=Python
from frictionless import Resource, Dialect

dialect = Dialect(comment_char="#")
with Resource(b'name\n#row1\nrow2', format="csv", dialect=dialect) as resource:
print(resource.read_rows())
```

## Comment Rows

A list of rows to ignore:

```python script tabs=Python
from frictionless import Resource, Dialect

dialect = Dialect(comment_rows=[2])
with Resource(b'name\nrow1\nrow2', format="csv", dialect=dialect) as resource:
print(resource.read_rows())
```

## Skip Blank Rows

Ignores rows if they are completely blank.

```python script tabs=Python
from frictionless import Resource, Dialect

dialect = Dialect(skip_blank_rows=True)
with Resource(b'name\n\nrow2', format="csv", dialect=dialect) as resource:
print(resource.read_rows())
```

## Reference

```yaml reference
Expand Down
25 changes: 24 additions & 1 deletion frictionless/dialect/dialect.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,14 @@ class Dialect(Metadata):
A list of rows to ignore. For example: [1, 2]
"""

skip_blank_rows: bool = False
"""
Ignores rows if they are completely blank
"""

controls: List[Control] = attrs.field(factory=list)
"""
A list of controls which defines different aspects of reading data.
A list of controls which defines different aspects of reading data.
"""

# Describe
Expand Down Expand Up @@ -169,6 +174,7 @@ def read_fragment(self, sample):
def read_enumerated_content_stream(self, cell_stream):
first_content_row = self.create_first_content_row()
comment_filter = self.create_comment_filter()
blank_filter = self.create_blank_filter()

# Emit content stream
for row_number, cells in enumerate(cell_stream, start=1):
Expand All @@ -177,6 +183,9 @@ def read_enumerated_content_stream(self, cell_stream):
if comment_filter:
if not comment_filter(row_number, cells):
continue
if blank_filter:
if not blank_filter(cells):
continue
yield (row_number, cells)

# Filter
Expand All @@ -203,6 +212,19 @@ def comment_filter(row_number, cells):

return comment_filter

def create_blank_filter(self):
if not self.skip_blank_rows:
return None

# Create filter
def blank_filter(cells):
for cell in cells:
if cell not in [None, ""]:
return True
return False

return blank_filter

# Metadata

metadata_type = "dialect"
Expand All @@ -219,6 +241,7 @@ def comment_filter(row_number, cells):
"headerCase": {"type": "boolean"},
"commentChar": {"type": "string"},
"commentRows": {"type": "array"},
"skipBlankRows": {"type": "boolean"},
},
}

Expand Down
22 changes: 21 additions & 1 deletion tests/dialect/test_general.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from frictionless import Dialect, FrictionlessException
from frictionless import Resource, Dialect, FrictionlessException


# General
Expand All @@ -21,3 +21,23 @@ def test_dialect_bad_property():
assert error.note == "descriptor is not valid"
assert reasons[0].type == "dialect-error"
assert reasons[0].note == "'bad' is not of type 'array' at property 'headerRows'"


# Blank Rows


@pytest.mark.parametrize(
"path",
[
"data/blank-rows.csv",
"data/blank-rows-multiple.csv",
"data/blank-rows-no-fields.csv",
],
)
def test_dialect_skip_blank_rows(path):
dialect = Dialect(skip_blank_rows=True)
with Resource(path, dialect=dialect) as resource:
assert resource.read_rows() == [
{"id": 1101, "name": "John", "age": 30},
{"id": 1102, "name": "Julie", "age": 26},
]

0 comments on commit 481854a

Please sign in to comment.