Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[tests/style] Fix tf test and formatting #1762

Merged
merged 1 commit into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v5.0.0
hooks:
- id: check-ast
- id: check-yaml
Expand All @@ -16,7 +16,7 @@ repos:
- id: no-commit-to-branch
args: ['--branch', 'main']
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.2
rev: v0.7.1
hooks:
- id: ruff
args: [ --fix ]
Expand Down
12 changes: 6 additions & 6 deletions doctr/datasets/datasets/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ def _read_sample(self, index: int) -> Tuple[torch.Tensor, Any]:
assert "labels" in target, "Target should contain 'labels' key"
elif isinstance(target, tuple):
assert len(target) == 2
assert isinstance(target[0], str) or isinstance(
target[0], np.ndarray
), "first element of the tuple should be a string or a numpy array"
assert isinstance(target[0], str) or isinstance(target[0], np.ndarray), (
"first element of the tuple should be a string or a numpy array"
)
assert isinstance(target[1], list), "second element of the tuple should be a list"
else:
assert isinstance(target, str) or isinstance(
target, np.ndarray
), "Target should be a string or a numpy array"
assert isinstance(target, str) or isinstance(target, np.ndarray), (
"Target should be a string or a numpy array"
)

# Read image
img = (
Expand Down
12 changes: 6 additions & 6 deletions doctr/datasets/datasets/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ def _read_sample(self, index: int) -> Tuple[tf.Tensor, Any]:
assert "labels" in target, "Target should contain 'labels' key"
elif isinstance(target, tuple):
assert len(target) == 2
assert isinstance(target[0], str) or isinstance(
target[0], np.ndarray
), "first element of the tuple should be a string or a numpy array"
assert isinstance(target[0], str) or isinstance(target[0], np.ndarray), (
"first element of the tuple should be a string or a numpy array"
)
assert isinstance(target[1], list), "second element of the tuple should be a list"
else:
assert isinstance(target, str) or isinstance(
target, np.ndarray
), "Target should be a string or a numpy array"
assert isinstance(target, str) or isinstance(target, np.ndarray), (
"Target should be a string or a numpy array"
)

# Read image
img = (
Expand Down
6 changes: 3 additions & 3 deletions doctr/models/predictor/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ def forward(
# Forward again to get predictions on straight pages
loc_preds = self.det_predictor(pages, **kwargs)

assert all(
len(loc_pred) == 1 for loc_pred in loc_preds
), "Detection Model in ocr_predictor should output only one class"
assert all(len(loc_pred) == 1 for loc_pred in loc_preds), (
"Detection Model in ocr_predictor should output only one class"
)

loc_preds = [list(loc_pred.values())[0] for loc_pred in loc_preds]
# Detach objectness scores from loc_preds
Expand Down
6 changes: 3 additions & 3 deletions doctr/models/predictor/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ def __call__(
# forward again to get predictions on straight pages
loc_preds_dict = self.det_predictor(pages, **kwargs) # type: ignore[assignment]

assert all(
len(loc_pred) == 1 for loc_pred in loc_preds_dict
), "Detection Model in ocr_predictor should output only one class"
assert all(len(loc_pred) == 1 for loc_pred in loc_preds_dict), (
"Detection Model in ocr_predictor should output only one class"
)
loc_preds: List[np.ndarray] = [list(loc_pred.values())[0] for loc_pred in loc_preds_dict] # type: ignore[union-attr]
# Detach objectness scores from loc_preds
loc_preds, objectness_scores = detach_scores(loc_preds)
Expand Down
2 changes: 1 addition & 1 deletion doctr/transforms/functional/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def rotate_sample(
rotated_geoms[..., 0] = rotated_geoms[..., 0] / rotated_img.shape[1]
rotated_geoms[..., 1] = rotated_geoms[..., 1] / rotated_img.shape[0]

return rotated_img, np.clip(rotated_geoms, 0, 1)
return rotated_img, np.clip(np.around(rotated_geoms, decimals=15), 0, 1)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@odulcy-mindee I added this already to the pytorch implementation (

return rotated_img, np.clip(np.around(rotated_geoms, decimals=15), 0, 1)
) but missed to add it to TF side 😅

Now with TF 2.18 it installs with numpy 2.0 (before <2.0 with 2.0 compatibility) and that's the reason why the test was failing



def crop_detection(
Expand Down
2 changes: 1 addition & 1 deletion doctr/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@
except (urllib.error.URLError, IOError) as e:
if url[:5] == "https":
url = url.replace("https:", "http:")
print("Failed download. Trying https -> http instead." f" Downloading {url} to {file_path}")
print(f"Failed download. Trying https -> http instead. Downloading {url} to {file_path}")

Check warning on line 115 in doctr/utils/data.py

View check run for this annotation

Codecov / codecov/patch

doctr/utils/data.py#L115

Added line #L115 was not covered by tests
_urlretrieve(url, file_path)
else:
raise e
Expand Down
4 changes: 2 additions & 2 deletions doctr/utils/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ def update(
"""
if gt_boxes.shape[0] != len(gt_labels) or pred_boxes.shape[0] != len(pred_labels):
raise AssertionError(
"there should be the same number of boxes and string both for the ground truth " "and the predictions"
"there should be the same number of boxes and string both for the ground truth and the predictions"
)

# Compute IoU
Expand Down Expand Up @@ -525,7 +525,7 @@ def update(
"""
if gt_boxes.shape[0] != gt_labels.shape[0] or pred_boxes.shape[0] != pred_labels.shape[0]:
raise AssertionError(
"there should be the same number of boxes and string both for the ground truth " "and the predictions"
"there should be the same number of boxes and string both for the ground truth and the predictions"
)

# Compute IoU
Expand Down
4 changes: 2 additions & 2 deletions references/classification/train_pytorch_character.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def main(args):
sampler=SequentialSampler(val_set),
pin_memory=torch.cuda.is_available(),
)
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)")
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)")

batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301))

Expand Down Expand Up @@ -272,7 +272,7 @@ def main(args):
sampler=RandomSampler(train_set),
pin_memory=torch.cuda.is_available(),
)
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)")
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)")

if args.show_samples:
x, target = next(iter(train_loader))
Expand Down
4 changes: 2 additions & 2 deletions references/classification/train_pytorch_orientation.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def main(args):
sampler=SequentialSampler(val_set),
pin_memory=torch.cuda.is_available(),
)
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)")
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)")

batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301))

Expand Down Expand Up @@ -278,7 +278,7 @@ def main(args):
sampler=RandomSampler(train_set),
pin_memory=torch.cuda.is_available(),
)
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)")
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)")

if args.show_samples:
x, target = next(iter(train_loader))
Expand Down
6 changes: 2 additions & 4 deletions references/classification/train_tensorflow_character.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,7 @@ def main(args):
collate_fn=collate_fn,
)
print(
f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
f"{val_loader.num_batches} batches)"
f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {val_loader.num_batches} batches)"
)

# Load doctr model
Expand Down Expand Up @@ -226,8 +225,7 @@ def main(args):
collate_fn=collate_fn,
)
print(
f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "
f"{train_loader.num_batches} batches)"
f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {train_loader.num_batches} batches)"
)

if args.show_samples:
Expand Down
6 changes: 2 additions & 4 deletions references/classification/train_tensorflow_orientation.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,7 @@ def main(args):
collate_fn=collate_fn,
)
print(
f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
f"{val_loader.num_batches} batches)"
f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {val_loader.num_batches} batches)"
)

# Load doctr model
Expand Down Expand Up @@ -236,8 +235,7 @@ def main(args):
collate_fn=collate_fn,
)
print(
f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "
f"{train_loader.num_batches} batches)"
f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {train_loader.num_batches} batches)"
)

if args.show_samples:
Expand Down
5 changes: 2 additions & 3 deletions references/detection/evaluate_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def main(args):
pin_memory=torch.cuda.is_available(),
collate_fn=ds.collate_fn,
)
print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in " f"{len(test_loader)} batches)")
print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in {len(test_loader)} batches)")

batch_transforms = Normalize(mean=mean, std=std)

Expand Down Expand Up @@ -143,8 +143,7 @@ def main(args):
print("Running evaluation")
val_loss, recall, precision, mean_iou = evaluate(model, test_loader, batch_transforms, metric, amp=args.amp)
print(
f"Validation loss: {val_loss:.6} (Recall: {recall:.2%} | Precision: {precision:.2%} | "
f"Mean IoU: {mean_iou:.2%})"
f"Validation loss: {val_loss:.6} (Recall: {recall:.2%} | Precision: {precision:.2%} | Mean IoU: {mean_iou:.2%})"
)


Expand Down
5 changes: 2 additions & 3 deletions references/detection/evaluate_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def main(args):
drop_last=False,
shuffle=False,
)
print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in " f"{len(test_loader)} batches)")
print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in {len(test_loader)} batches)")

batch_transforms = T.Normalize(mean=mean, std=std)

Expand All @@ -122,8 +122,7 @@ def main(args):
print("Running evaluation")
val_loss, recall, precision, mean_iou = evaluate(model, test_loader, batch_transforms, metric)
print(
f"Validation loss: {val_loss:.6} (Recall: {recall:.2%} | Precision: {precision:.2%} | "
f"Mean IoU: {mean_iou:.2%})"
f"Validation loss: {val_loss:.6} (Recall: {recall:.2%} | Precision: {precision:.2%} | Mean IoU: {mean_iou:.2%})"
)


Expand Down
4 changes: 2 additions & 2 deletions references/detection/train_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def main(args):
pin_memory=torch.cuda.is_available(),
collate_fn=val_set.collate_fn,
)
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)")
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)")
with open(os.path.join(args.val_path, "labels.json"), "rb") as f:
val_hash = hashlib.sha256(f.read()).hexdigest()

Expand Down Expand Up @@ -316,7 +316,7 @@ def main(args):
pin_memory=torch.cuda.is_available(),
collate_fn=train_set.collate_fn,
)
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)")
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)")
with open(os.path.join(args.train_path, "labels.json"), "rb") as f:
train_hash = hashlib.sha256(f.read()).hexdigest()

Expand Down
6 changes: 2 additions & 4 deletions references/detection/train_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,7 @@ def main(args):
drop_last=False,
)
print(
f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
f"{val_loader.num_batches} batches)"
f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {val_loader.num_batches} batches)"
)
with open(os.path.join(args.val_path, "labels.json"), "rb") as f:
val_hash = hashlib.sha256(f.read()).hexdigest()
Expand Down Expand Up @@ -269,8 +268,7 @@ def main(args):
drop_last=True,
)
print(
f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "
f"{train_loader.num_batches} batches)"
f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {train_loader.num_batches} batches)"
)
with open(os.path.join(args.train_path, "labels.json"), "rb") as f:
train_hash = hashlib.sha256(f.read()).hexdigest()
Expand Down
2 changes: 1 addition & 1 deletion references/recognition/evaluate_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def main(args):
pin_memory=torch.cuda.is_available(),
collate_fn=ds.collate_fn,
)
print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in " f"{len(test_loader)} batches)")
print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in {len(test_loader)} batches)")

mean, std = model.cfg["mean"], model.cfg["std"]
batch_transforms = Normalize(mean=mean, std=std)
Expand Down
2 changes: 1 addition & 1 deletion references/recognition/evaluate_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def main(args):
drop_last=False,
shuffle=False,
)
print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in " f"{len(test_loader)} batches)")
print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in {len(test_loader)} batches)")

mean, std = model.cfg["mean"], model.cfg["std"]
batch_transforms = T.Normalize(mean=mean, std=std)
Expand Down
4 changes: 2 additions & 2 deletions references/recognition/train_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def main(args):
pin_memory=torch.cuda.is_available(),
collate_fn=val_set.collate_fn,
)
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)")
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)")

batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301))

Expand Down Expand Up @@ -326,7 +326,7 @@ def main(args):
pin_memory=torch.cuda.is_available(),
collate_fn=train_set.collate_fn,
)
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)")
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)")

if args.show_samples:
x, target = next(iter(train_loader))
Expand Down
6 changes: 2 additions & 4 deletions references/recognition/train_pytorch_ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,7 @@ def main(rank: int, world_size: int, args):
pin_memory=torch.cuda.is_available(),
collate_fn=val_set.collate_fn,
)
print(
f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)"
)
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)")

batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301))

Expand Down Expand Up @@ -266,7 +264,7 @@ def main(rank: int, world_size: int, args):
pin_memory=torch.cuda.is_available(),
collate_fn=train_set.collate_fn,
)
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)")
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)")

if rank == 0 and args.show_samples:
x, target = next(iter(train_loader))
Expand Down
6 changes: 2 additions & 4 deletions references/recognition/train_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,7 @@ def main(args):
drop_last=False,
)
print(
f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
f"{val_loader.num_batches} batches)"
f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {val_loader.num_batches} batches)"
)

# Load doctr model
Expand Down Expand Up @@ -274,8 +273,7 @@ def main(args):
drop_last=True,
)
print(
f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "
f"{train_loader.num_batches} batches)"
f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {train_loader.num_batches} batches)"
)

if args.show_samples:
Expand Down
3 changes: 1 addition & 2 deletions tests/pytorch/test_transforms_pt.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@ def test_resize():
# Symetric padding
transfo = Resize(output_size, preserve_aspect_ratio=True, symmetric_pad=True)
assert repr(transfo) == (
f"Resize(output_size={output_size}, interpolation='bilinear', "
f"preserve_aspect_ratio=True, symmetric_pad=True)"
f"Resize(output_size={output_size}, interpolation='bilinear', preserve_aspect_ratio=True, symmetric_pad=True)"
)
out = transfo(input_t)
assert out.shape[-2:] == output_size
Expand Down
2 changes: 1 addition & 1 deletion tests/tensorflow/test_transforms_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_resize():
# Symetric padding
transfo = T.Resize(output_size, preserve_aspect_ratio=True, symmetric_pad=True)
assert repr(transfo) == (
f"Resize(output_size={output_size}, method='bilinear', " f"preserve_aspect_ratio=True, symmetric_pad=True)"
f"Resize(output_size={output_size}, method='bilinear', preserve_aspect_ratio=True, symmetric_pad=True)"
)
out = transfo(input_t)
# Asymetric padding
Expand Down
Loading