Skip to content

Commit

Permalink
Merge pull request #4246 from GSA-TTS/main
Browse files Browse the repository at this point in the history
  • Loading branch information
jadudm authored Aug 31, 2024
2 parents c122813 + 11db765 commit b7649d9
Show file tree
Hide file tree
Showing 9 changed files with 92 additions and 64 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/linting.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ jobs:

- name: Run type checking
working-directory: ./backend
run: mypy .
run: |
mypy . --install-types --non-interactive
mypy .
- name: Run HTML template linting
working-directory: ./backend
Expand Down
Binary file not shown.
6 changes: 6 additions & 0 deletions backend/audit/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,9 +681,15 @@ def test_locked_pdf_file(self):
self.assertRaises(ValidationError, validate_pdf_file_integrity, file)

def test_scanned_pdf_file(self):
"""PDF files that have too few parsable characters are invalid"""
with open("audit/fixtures/scanned.pdf", "rb") as file:
self.assertRaises(ValidationError, validate_pdf_file_integrity, file)

def test_not_enough_readable_pages_pdf_file(self):
"""PDF files whose percentage of readable pages is too low are invalid"""
with open("audit/fixtures/not-enough-readable-pages.pdf", "rb") as file:
self.assertRaises(ValidationError, validate_pdf_file_integrity, file)

def test_valid_pdf_file(self):
with open("audit/fixtures/basic.pdf", "rb") as file:
validate_pdf_file_integrity(file)
Expand Down
25 changes: 18 additions & 7 deletions backend/audit/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,7 @@ def validate_single_audit_report_file_extension(file):
def validate_pdf_file_integrity(file):
"""Files must be readable PDFs"""
MIN_CHARARACTERS_IN_PDF = 6000
MIN_PERCENT_READABLE_PAGES = 0.50

try:
reader = PdfReader(file)
Expand All @@ -672,17 +673,27 @@ def validate_pdf_file_integrity(file):
"We were unable to process the file you uploaded because it is encrypted."
)

text_length = 0
total_chars = 0
num_pages_with_text = 0

for page in reader.pages:
page_text = page.extract_text()
text_length += len(page_text)
# If we find enough characters, we're content.
if text_length >= MIN_CHARARACTERS_IN_PDF:
break
total_chars += len(page_text)
num_pages_with_text += 1 if len(page_text) else 0

percent_readable_pages = num_pages_with_text / len(reader.pages)

if text_length < MIN_CHARARACTERS_IN_PDF:
if total_chars == 0:
raise ValidationError(
"We were unable to process the file you uploaded because it contains no readable text."
)
elif total_chars < MIN_CHARARACTERS_IN_PDF:
raise ValidationError(
"We were unable to process the file you uploaded because it contains too little readable text."
)
elif percent_readable_pages < MIN_PERCENT_READABLE_PAGES:
raise ValidationError(
"We were unable to process the file you uploaded because it contains no readable text or too little text."
f"We were unable to process the file you uploaded because only {percent_readable_pages:.0%} of the pages contain readable text (minimum {MIN_PERCENT_READABLE_PAGES:.0%} required.)"
)

except ValidationError:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import re
from datetime import timedelta
import datetime
from dateutil.relativedelta import relativedelta

from django.conf import settings

Expand Down Expand Up @@ -310,14 +311,18 @@ def xform_auditee_fiscal_period_end(general_information):

def xform_auditee_fiscal_period_start(general_information):
"""Constructs the fiscal period start from the fiscal period end"""
# Transformation to be documented.
fiscal_start_date = xform_census_date_to_datetime(
# As of 8/30/2024 this logic has been adjusted to handle invalid start dates of the fiscal year.
# Previously, fiscal_start date was subtracting 365 days from the end date.
# All migrations of historical census records had been completed prior to this change.
fiscal_end_date = xform_census_date_to_datetime(
general_information.get("auditee_fiscal_period_end")
) - timedelta(days=365)
)
fiscal_start_date = (
fiscal_end_date - relativedelta(years=1) + datetime.timedelta(days=1)
)
general_information["auditee_fiscal_period_start"] = fiscal_start_date.strftime(
"%Y-%m-%d"
)

return general_information


Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import json
from unittest.mock import mock_open, patch
from django.conf import settings
Expand Down Expand Up @@ -190,12 +191,12 @@ def setUp(self):
def test_when_auditee_fiscal_period_end_is_valid(self):
"""Test that the function returns the correct results when the fiscal period end is valid."""
result = xform_auditee_fiscal_period_start(self.general_information)
fiscal_end = datetime.strptime(
self.general_information["auditee_fiscal_period_end"],
"%m/%d/%Y %H:%M:%S",
)
expected_date = (
datetime.strptime(
self.general_information["auditee_fiscal_period_end"],
"%m/%d/%Y %H:%M:%S",
)
- timedelta(days=365)
fiscal_end - relativedelta(years=1) + timedelta(days=1)
).strftime("%Y-%m-%d")
self.assertEqual(result["auditee_fiscal_period_start"], expected_date)

Expand Down
2 changes: 1 addition & 1 deletion backend/census_historical_migration/test_sac_creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(
self.DOLLARTHRESHOLD = "750000"
self.EIN = "134278617"
self.ENTITY_TYPE = entity_type
self.FYENDDATE = "01/01/2022 00:00:00"
self.FYENDDATE = "06/30/2022 00:00:00"
self.GOINGCONCERN = "N"
self.LOWRISK = "N"
self.MATERIALNONCOMPLIANCE = "N"
Expand Down
15 changes: 8 additions & 7 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# This file is autogenerated by pip-compile with Python 3.12
# This file is autogenerated by pip-compile with Python 3.11
# by the following command:
#
# pip-compile --allow-unsafe --generate-hashes --output-file=requirements.txt ./requirements/requirements.in
Expand Down Expand Up @@ -97,9 +97,7 @@ cffi==1.16.0 \
--hash=sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627 \
--hash=sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956 \
--hash=sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357
# via
# cryptography
# gevent
# via cryptography
charset-normalizer==3.3.2 \
--hash=sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027 \
--hash=sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087 \
Expand Down Expand Up @@ -897,6 +895,7 @@ python-dateutil==2.9.0.post0 \
--hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \
--hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427
# via
# -r ./requirements/requirements.in
# botocore
# faker
# pandas
Expand Down Expand Up @@ -1163,6 +1162,10 @@ text-unidecode==1.3 \
--hash=sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8 \
--hash=sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93
# via python-slugify
types-python-dateutil==2.9.0.20240821 \
--hash=sha256:9649d1dcb6fef1046fb18bebe9ea2aa0028b160918518c34589a46045f6ebd98 \
--hash=sha256:f5889fcb4e63ed4aaa379b44f93c32593d50b9a94c9a60a0c854d8cc3511cd57
# via -r ./requirements/requirements.in
typing-extensions==4.11.0 \
--hash=sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0 \
--hash=sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a
Expand All @@ -1175,9 +1178,7 @@ typing-extensions==4.11.0 \
tzdata==2024.1 \
--hash=sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd \
--hash=sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252
# via
# django
# pandas
# via pandas
uritemplate==4.1.1 \
--hash=sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0 \
--hash=sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e
Expand Down
78 changes: 40 additions & 38 deletions backend/requirements/requirements.in
Original file line number Diff line number Diff line change
@@ -1,38 +1,40 @@
boto3
cfenv
cryptography>=42.0.5
django-cors-headers
django-csp
django-dbbackup
django-storages[boto3]
Django>=5.0.8
djangorestframework>=3.15.2
djangorestframework-simplejwt
django-fsm
environs[django]
faker
fs
greenlet>=3.0rc3
gunicorn[gevent]>=22.0.0
jsonpath-ng
jsonschema
newrelic>=9.11.0
oic
openpyxl
pandas
peewee
psycogreen
psycopg2-binary
pycryptodome>=3.19.1
pycryptodomex>=3.19.1
pydash
pyjwt
pypdf>=3.17.0
python-json-logger
python-slugify
pyyaml
requests>=2.32.3
sqlalchemy
sqlparse>=0.5.0
uritemplate
urllib3>=2.2.2
boto3
cfenv
cryptography>=42.0.5
django-cors-headers
django-csp
django-dbbackup
django-storages[boto3]
Django>=5.0.8
djangorestframework>=3.15.2
djangorestframework-simplejwt
django-fsm
environs[django]
faker
fs
greenlet>=3.0rc3
gunicorn[gevent]>=22.0.0
jsonpath-ng
jsonschema
newrelic>=9.11.0
oic
openpyxl
pandas
peewee
psycogreen
psycopg2-binary
pycryptodome>=3.19.1
pycryptodomex>=3.19.1
pydash
pyjwt
pypdf>=3.17.0
python-dateutil==2.9.0.post0
python-json-logger
python-slugify
pyyaml
requests>=2.32.3
sqlalchemy
sqlparse>=0.5.0
types-python-dateutil==2.9.0.20240821
uritemplate
urllib3>=2.2.2

0 comments on commit b7649d9

Please sign in to comment.