Skip to content

Commit

Permalink
Merge branch 'scrapinghub:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
soksanichenko authored Nov 11, 2024
2 parents 257f50b + a4144c6 commit 7a4d21b
Show file tree
Hide file tree
Showing 12 changed files with 127 additions and 41 deletions.
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[flake8]

max-line-length = 119
ignore = W503, E203, E501, E722, F401
ignore = W503, E203, E501, E722, F401, E701, E704

exclude =
docs/conf.py
46 changes: 27 additions & 19 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,9 @@ jobs:
fail-fast: false
matrix:
include:
- python-version: 3.8
- python-version: 3.9
env:
TOXENV: flake8
- python-version: 3.7
env:
TOXENV: py
- python-version: 3.8
env:
TOXENV: py
- python-version: 3.9
env:
TOXENV: py
Expand All @@ -38,30 +32,44 @@ jobs:
- python-version: "3.11"
env:
TOXENV: latest
- python-version: "3.11"
- python-version: "3.12"
env:
TOXENV: py
- python-version: "3.12"
env:
TOXENV: latest
- python-version: "3.13"
env:
TOXENV: py
- python-version: "3.13"
env:
TOXENV: latest
- python-version: "3.13"
env:
TOXENV: twinecheck
- python-version: "3.11" # Keep in sync with tox.ini
- python-version: "3.12" # Keep in sync with tox.ini
env:
TOXENV: docs
steps:
- uses: actions/checkout@v3
- name: 'Set up Python ${{ matrix.python-version }}'
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: '${{ matrix.python-version }}'
python-version: ${{ matrix.python-version }}
- name: Install language-pack-fr
run: sudo apt-get update && sudo apt-get install language-pack-fr
- name: Install python dependencies
run: pip install tox
- name: tox
run: tox -e py
run: sudo apt-get update && sudo apt-get install -y language-pack-fr build-essential
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install tox
- name: Run tests
run: tox -e ${{ matrix.env.TOXENV }}
- name: Upload coverage.xml to codecov
if: ${{ matrix.python-version == '3.9' && matrix.env.TOXENV == 'latest'}}
if: ${{ matrix.env.python-version == '3.9' && matrix.env.TOXENV == 'latest' }}
uses: codecov/codecov-action@v3

pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: pre-commit/[email protected]
- uses: actions/checkout@v3
- uses: pre-commit/[email protected]
2 changes: 1 addition & 1 deletion .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.x"
- name: Install dependencies
Expand Down
10 changes: 5 additions & 5 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,8 @@ For that reason it is important that the input is a valid date, otherwise it cou

To reduce the possibility of receiving false positives, make sure that:

- The input string it's a valid date and it doesn't contain any other words or numbers.
- If you know the language or languages beforehand you add them through the ``languages`` or ``locales`` properties.
- The input string is a valid date and doesn't contain any other words or numbers.
- If you know the language or languages beforehand, you add them through the ``languages`` or ``locales`` properties.


On the other hand, if you want to exclude any of the default parsers
Expand All @@ -156,7 +156,7 @@ are executed, you can do so through the
Installation
------------

Dateparser supports Python >= 3.7. You can install it by doing:
Dateparser supports Python >= 3.9. You can install it by doing:

::

Expand All @@ -172,7 +172,7 @@ If you want to use the jalali or hijri calendar, you need to install the
Common use cases
----------------

**dateparser** can be used with a really different number of purposes,
**dateparser** can be used for a wide variety of purposes,
but it stands out when it comes to:

Consuming data from different sources:
Expand All @@ -191,7 +191,7 @@ Offering natural interaction with users:

- **Tooling and CLI**: allow users to write “3 days ago” to retrieve
information.
- **Search engine**: allow people to search by date in an easiest /
- **Search engine**: allow people to search by date in an easy /
natural format.
- **Bots**: allow users to interact with a bot easily

Expand Down
2 changes: 1 addition & 1 deletion dateparser/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def check_settings(settings):
setting_props = settings_values[setting_name]

# check type:
if not setting_type == setting_props["type"]:
if not isinstance(setting_value, setting_props["type"]):
raise SettingValidationError(
'"{}" must be "{}", not "{}".'.format(
setting_name, setting_props["type"].__name__, setting_type.__name__
Expand Down
13 changes: 9 additions & 4 deletions dateparser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,10 +598,13 @@ def _correct_for_month(self, dateobj):
relative_base_month = (
relative_base.month if hasattr(relative_base, "month") else relative_base
)
if getattr(self, "_token_month", None) or relative_base_month:

if getattr(self, "_token_month", None):
return dateobj

dateobj = set_correct_month_from_settings(dateobj, self.settings)
dateobj = set_correct_month_from_settings(
dateobj, self.settings, relative_base_month
)
return dateobj

@classmethod
Expand All @@ -613,11 +616,13 @@ def parse(cls, datestring, settings, tz=None):
# correction for past, future if applicable
dateobj = po._correct_for_time_frame(dateobj, tz)

# correction for preference of month: beginning, current, end
# must happen before day so that day is derived from the correct month
dateobj = po._correct_for_month(dateobj)

# correction for preference of day: beginning, current, end
dateobj = po._correct_for_day(dateobj)

# correction for preference of month: beginning, current, end
dateobj = po._correct_for_month(dateobj)
period = po._get_period()

return dateobj, period
Expand Down
5 changes: 2 additions & 3 deletions dateparser/timezone_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,8 @@ def get_offset(tz_obj, regex, repl="", replw=""):
search_regex_parts.append(tz_obj[0])
yield get_offset(tz_obj, regex)

# alternate patterns
for replace, replacewith in tz_info.get("replace", []):
for tz_obj in tz_info["timezones"]:
# alternate patterns
for replace, replacewith in tz_info.get("replace", []):
search_regex_parts.append(re.sub(replace, replacewith, tz_obj[0]))
yield get_offset(tz_obj, regex, repl=replace, replw=replacewith)

Expand Down
8 changes: 8 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,11 @@ ignore =
dateparser/freshness_date_parser.py E722
dateparser/parser.py E722
dateparser/docs/conf.py E402

# Additional ignored codes
E203
E501
E722
F401
E701
E704
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,18 @@
license="BSD",
zip_safe=False,
keywords="dateparser",
python_requires=">=3.7",
python_requires=">=3.8", # Python 3.8 is required for fuzzing
classifiers=[
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"License :: OSI Approved :: BSD License",
"Natural Language :: English",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: Implementation :: CPython",
],
)
2 changes: 1 addition & 1 deletion tests/test_clean_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def test_dates_which_match_locales_are_parsed(
languages=["en"],
region="",
date_formats=["%a", "%a", "%a", "%a"],
expected_date=datetime(1969, 12, 31, 14, 4),
expected_date=datetime(1969, 1, 31, 14, 4),
)
]
)
Expand Down
60 changes: 60 additions & 0 deletions tests/test_date_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1265,6 +1265,66 @@ def test_prefer_dates_from_with_timezone(
self.then_date_was_parsed_by_date_parser()
self.then_date_obj_exactly_is(expected)

@parameterized.expand(
[
param(
"2015",
prefer_day="current",
prefer_month="current",
today=datetime(2010, 2, 10),
expected=datetime(2015, 2, 10),
),
param(
"2015",
prefer_day="last",
prefer_month="current",
today=datetime(2010, 2, 10),
expected=datetime(2015, 2, 28),
),
param(
"2015",
prefer_day="first",
prefer_month="current",
today=datetime(2010, 2, 10),
expected=datetime(2015, 2, 1),
),
param(
"2015",
prefer_day="current",
prefer_month="last",
today=datetime(2010, 2, 10),
expected=datetime(2015, 12, 10),
),
param(
"2015",
prefer_day="last",
prefer_month="last",
today=datetime(2010, 2, 10),
expected=datetime(2015, 12, 31),
),
param(
"2020", # Leap year last day test
prefer_day="last",
prefer_month="current",
today=datetime(2010, 2, 10),
expected=datetime(2020, 2, 29),
),
]
)
def test_dates_with_no_day_or_month(
self, date_string, prefer_day, prefer_month, today=None, expected=None
):
self.given_parser(
settings={
"PREFER_DAY_OF_MONTH": prefer_day,
"PREFER_MONTH_OF_YEAR": prefer_month,
"RELATIVE_BASE": today,
}
)
self.when_date_is_parsed(date_string)
self.then_date_was_parsed_by_date_parser()
self.then_date_obj_exactly_is(expected)

def given_local_tz_offset(self, offset):
self.add_patch(
patch.object(
Expand Down
12 changes: 9 additions & 3 deletions tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ def test_search_date_string(self, shortname, datetime_string):
"Die UdSSR blieb gemäß dem Neutralitätspakt "
"vom 13. April 1941 gegenüber Japan vorerst neutral.",
[
("Die", datetime.datetime(1999, 12, 28, 0, 0)),
("Die", datetime.datetime(1999, 1, 28, 0, 0)),
("13. April 1941", datetime.datetime(1941, 4, 13, 0, 0)),
],
settings={"RELATIVE_BASE": datetime.datetime(2000, 1, 1)},
Expand Down Expand Up @@ -825,7 +825,10 @@ def test_splitting_of_not_parsed(self, shortname, string, expected, settings=Non
"бомбардировки срещу Япония, използувайки новозавладените острови като бази.",
),
# Chinese
param("zh", "不過大多數人仍多把第二次世界大戰的爆發定為1939年9月1日德國入侵波蘭開始,2015年04月08日10点05。"),
param(
"zh",
"不過大多數人仍多把第二次世界大戰的爆發定為1939年9月1日德國入侵波蘭開始,2015年04月08日10点05。",
),
# Czech
param(
"cs",
Expand Down Expand Up @@ -897,7 +900,10 @@ def test_splitting_of_not_parsed(self, shortname, string, expected, settings=Non
"d'Etiopia. Il 9 maggio 1936 venne proclamato l'Impero. ",
),
# Japanese
param("ja", "1933年(昭和8年)12月23日午前6時39分、宮城(現:皇居)内の産殿にて誕生。"),
param(
"ja",
"1933年(昭和8年)12月23日午前6時39分、宮城(現:皇居)内の産殿にて誕生。",
),
# Persian
param("fa", "نگ جهانی دوم جنگ جدی بین سپتامبر 1939 و 2 سپتامبر 1945 بود."),
# Polish
Expand Down

0 comments on commit 7a4d21b

Please sign in to comment.