diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml new file mode 100644 index 000000000..979b81116 --- /dev/null +++ b/.github/workflows/cifuzz.yml @@ -0,0 +1,35 @@ +name: CIFuzz +on: [pull_request] +permissions: {} +jobs: + Fuzzing: + runs-on: ubuntu-latest + permissions: + security-events: write + steps: + - name: Build Fuzzers + id: build + uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master + with: + oss-fuzz-project-name: 'dateparser' + language: python + - name: Run Fuzzers + uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master + with: + oss-fuzz-project-name: 'dateparser' + language: python + fuzz-seconds: 600 + output-sarif: true + - name: Upload Crash + uses: actions/upload-artifact@v3 + if: failure() && steps.build.outcome == 'success' + with: + name: artifacts + path: ./out/artifacts + - name: Upload Sarif + if: always() && steps.build.outcome == 'success' + uses: github/codeql-action/upload-sarif@v2 + with: + # Path to SARIF file relative to the root of the repository + sarif_file: cifuzz-sarif/results.sarif + checkout_path: cifuzz-sarif diff --git a/dateparser/parser.py b/dateparser/parser.py index dcd8a194d..16751188b 100644 --- a/dateparser/parser.py +++ b/dateparser/parser.py @@ -564,7 +564,7 @@ def _correct_for_time_frame(self, dateobj, tz): try: tz = tz or get_timezone_from_tz_string(self.settings.TIMEZONE) tz_offset = tz.utcoffset(dateobj) - except pytz.UnknownTimeZoneError: + except (pytz.UnknownTimeZoneError, pytz.NonExistentTimeError): tz_offset = timedelta(hours=0) if "past" in self.settings.PREFER_DATES_FROM: diff --git a/tests/test_clean_api.py b/tests/test_clean_api.py index fc0f4f15c..c487ec07a 100644 --- a/tests/test_clean_api.py +++ b/tests/test_clean_api.py @@ -111,6 +111,69 @@ def test_dates_which_match_locales_are_parsed( self.when_date_is_parsed(date_string, locales=locales) self.then_parsed_date_is(expected_date) + @parameterized.expand( + [ + param( + date_string="0:4", + locales=["fr-PF"], + languages=["en"], + region="", + date_formats=["%a", "%a", "%a", "%a"], + expected_date=datetime(1969, 12, 31, 14, 4), + ) + ] + ) + def test_dates_parse_utc_offset_does_not_throw( + self, date_string, locales, languages, region, date_formats, expected_date + ): + """ + Bug discovered by OSSFuzz that caused an exception in pytz to halt parsing + Regression test to ensure that this is not reintroduced + """ + self.when_date_is_parsed_with_args_and_settings( + date_string, + languages=languages, + locales=locales, + region=region, + date_formats=date_formats, + settings={ + "CACHE_SIZE_LIMIT": 1000, + "DATE_ORDER": "YDM", + "DEFAULT_LANGUAGES": [ + "mzn", + "as", + "af", + "fur", + "sr-Cyrl", + "kw", + "ne", + "en", + "vi", + "teo", + "sr", + "cgg", + ], + "LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD": 0.18823535008398845, + "NORMALIZE": True, + "PARSERS": ["custom-formats", "absolute-time"], + "PREFER_DATES_FROM": "past", + "PREFER_DAY_OF_MONTH": "first", + "PREFER_LOCALE_DATE_ORDER": True, + "PREFER_MONTH_OF_YEAR": "current", + "RELATIVE_BASE": datetime( + year=1970, month=1, day=1, hour=0, minute=0, second=0 + ), + "REQUIRE_PARTS": [], + "RETURN_AS_TIMEZONE_AWARE": False, + "RETURN_TIME_AS_PERIOD": False, + "SKIP_TOKENS": [], + "STRICT_PARSING": False, + "TIMEZONE": "America/Hermosillo", + "TO_TIMEZONE": "Asia/Almaty", + }, + ) + self.then_parsed_date_and_time_is(expected_date) + @parameterized.expand( [ param(date_string="January 24, 2014", locales=["pt-AO"]), @@ -133,6 +196,24 @@ def when_date_is_parsed(self, date_string, languages=None, locales=None): def when_date_is_parsed_with_settings(self, date_string, settings=None): self.result = dateparser.parse(date_string, settings=settings) + def when_date_is_parsed_with_args_and_settings( + self, + date_string, + languages=None, + locales=None, + region=None, + date_formats=None, + settings=None, + ): + self.result = dateparser.parse( + date_string, + languages=languages, + locales=locales, + region=region, + date_formats=date_formats, + settings=settings, + ) + def then_parsed_date_is(self, expected_date): self.assertEqual( self.result, datetime.combine(expected_date, datetime.min.time())