Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Commit

Permalink
Merge pull request #330 from cmc333333/37-42-fixes
Browse files Browse the repository at this point in the history
Fixes for 37 CFR 42 history
  • Loading branch information
cmc333333 authored Dec 2, 2016
2 parents c682688 + da0a3d4 commit e96412e
Show file tree
Hide file tree
Showing 18 changed files with 173 additions and 111 deletions.
17 changes: 15 additions & 2 deletions regparser/commands/fill_with_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,28 @@
logger = logging.getLogger(__name__)


def drop_initial_orphans(versions_with_parents, existing):
"""We can only build a version if there's a complete tree before it to
build from. As such, we need to drop any orphaned versions from the
beginning of our list"""
for idx, (version, parent) in enumerate(versions_with_parents):
if version.identifier in existing:
return versions_with_parents[idx:]
logger.warning("No previous annual edition to version %s; ignoring",
version.identifier)
return []


def dependencies(tree_dir, version_dir, versions_with_parents):
"""Set up the dependency graph for this regulation. First calculates
"gaps" -- versions for which there is no existing tree. In this
calculation, we ignore the first version, as we won't be able to build
anything for it. Add dependencies for any gaps, tying the output tree to
the preceding tree, the version info and the parsed rule"""
existing_tree_ids = set(tree.path[-1] for tree in tree_dir.sub_entries())
versions_with_parents = versions_with_parents[1:]
gaps = [(version, parent) for (version, parent) in versions_with_parents
version_pairs = drop_initial_orphans(
versions_with_parents, existing_tree_ids)
gaps = [(version, parent) for (version, parent) in version_pairs
if version.identifier not in existing_tree_ids]

deps = dependency.Graph()
Expand Down
5 changes: 3 additions & 2 deletions regparser/commands/import_notice.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ def has_requirements(notice_xml):
logger.error("Missing publish date (eregs-published-date attribute "
"on the DATES tag)")
elif not notice_xml.fr_volume:
logger.error("Missing volume (eregs-fr-volume attribute on the first "
"PRTPAGE tag)")
logger.error("Missing volume (fr-volume attribute on root)")
elif not notice_xml.start_page:
logger.error("Missing volume (fr-start-page attribute on root)")
else:
return True

Expand Down
4 changes: 4 additions & 0 deletions regparser/commands/preprocess_notice.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,20 @@ def preprocess_notice(document_number):
"effective_on",
"cfr_references",
"comments_close_on",
"end_page",
"full_text_xml_url",
"html_url",
"publication_date",
"regulation_id_numbers",
"start_page",
"volume"
])
notice_xmls = list(notice_xmls_for_url(meta['full_text_xml_url']))
for notice_xml in notice_xmls:
notice_xml.published = meta['publication_date']
notice_xml.fr_volume = meta['volume']
notice_xml.start_page = meta['start_page']
notice_xml.end_page = meta['end_page']
if meta.get('html_url'):
notice_xml.fr_html_url = meta['html_url']
if meta.get("comments_close_on"):
Expand Down
19 changes: 7 additions & 12 deletions regparser/commands/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,21 +58,16 @@ def generate_dependencies(version_dir, version_ids, delays_by_version):
return deps


class InvalidEffectiveDate(Exception):
def __init__(self, version_id):
self.version_id = version_id
super(InvalidEffectiveDate, self).__init__(
"No effective date for this rule: {}".format(version_id))


def write_to_disk(xml, version_entry, delay=None):
"""Serialize a Version instance to disk"""
effective = xml.effective if delay is None else delay.until
if not effective:
raise InvalidEffectiveDate(xml.version_id)
version = Version(identifier=xml.version_id, effective=effective,
published=xml.published)
version_entry.write(version)
if effective:
version = Version(identifier=xml.version_id, effective=effective,
published=xml.published)
version_entry.write(version)
else:
logger.warning("No effective date for this rule: %s. Skipping",
xml.version_id)


def write_if_needed(cfr_title, cfr_part, version_ids, xmls, delays_by_version):
Expand Down
3 changes: 2 additions & 1 deletion regparser/notice/fake.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
def build(doc_number, effective_on, cfr_title, cfr_part):
notice_xml = NoticeXML(etree.fromstring("""
<ROOT>
<PRTPAGE P="1" />
<AGENCY></AGENCY>
<SUBJECT></SUBJECT>
</ROOT>
Expand All @@ -16,5 +15,7 @@ def build(doc_number, effective_on, cfr_title, cfr_part):
notice_xml.version_id = doc_number
notice_xml.effective = effective_on
notice_xml.published = effective_on
notice_xml.start_page = 0
notice_xml.end_page = 0
notice_xml.cfr_refs = [TitlePartsRef(cfr_title, [cfr_part])]
return notice_xml
32 changes: 10 additions & 22 deletions regparser/notice/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,14 @@ def add_children(el, children):
return el


def _root_property(attrib):
"""We add multiple attributes to the NoticeXML's root element"""
def _root_property(attrib, transform=None):
"""We add multiple attributes to the NoticeXML's root element. Account for
data transforms (e.g. to an integer)"""
def getter(self):
return self.xml.attrib.get(attrib)
value = self.xml.attrib.get(attrib)
if transform and value is not None:
return transform(value)
return value

def setter(self, value):
self.xml.attrib[attrib] = str(value)
Expand Down Expand Up @@ -330,25 +334,6 @@ def published(self):
def published(self, value):
self._set_date_attr('published', value)

@property
def fr_volume(self):
value = self.xpath(".//PRTPAGE")[0].attrib.get('eregs-fr-volume')
if value:
return int(value)

@fr_volume.setter
def fr_volume(self, value):
for prtpage in self.xpath(".//PRTPAGE"):
prtpage.attrib['eregs-fr-volume'] = str(value)

@property
def start_page(self):
return int(self.xpath(".//PRTPAGE")[0].attrib["P"]) - 1

@property
def end_page(self):
return int(self.xpath(".//PRTPAGE")[-1].attrib["P"])

@cached_property # rather expensive operation, so cache results
def amendments(self):
return fetch_amendments(self.xml)
Expand Down Expand Up @@ -399,6 +384,9 @@ def supporting_documents(self, value):
fr_html_url = _root_property('fr-html-url')
comment_doc_id = _root_property('eregs-comment-doc-id')
primary_docket = _root_property('eregs-primary-docket')
fr_volume = _root_property('fr-volume', int)
start_page = _root_property('fr-start-page', int)
end_page = _root_property('fr-end-page', int)

def as_dict(self):
"""We use JSON to represent notices in the API. This converts the
Expand Down
1 change: 0 additions & 1 deletion regparser/tree/depth/derive.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ def derive_depths(original_markers, additional_constraints=None):
# @todo: There's probably efficiency gains to making these rules over
# prefixes (see above) rather than over the whole collection at once
problem.addConstraint(rules.same_parent_same_type, all_vars)
problem.addConstraint(rules.stars_occupy_space, all_vars)

for constraint in additional_constraints:
constraint(problem.addConstraint, all_vars)
Expand Down
39 changes: 38 additions & 1 deletion regparser/tree/depth/optional_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from constraint import InSetConstraint

from regparser.tree.depth import markers
from regparser.tree.depth.rules import ancestors
from regparser.tree.depth.rules import ancestors, _level_and_children


def depth_type_inverses(constrain, all_variables):
Expand Down Expand Up @@ -43,6 +43,43 @@ def inner(prev_typ, prev_depth, typ, depth):
constrain(inner, [prev_typ, prev_depth, typ, depth])


def stars_occupy_space(constrain, all_variables):
"""Star markers can't be ignored in sequence, so 1, *, 2 doesn't make
sense for a single level, unless it's an inline star. In the inline
case, we can think of it as 1, intro-text-to-1, 2"""

def per_level(elements):
level, grouped_children = _level_and_children(elements)

if not level:
return True # Base Case

last_idx, last_typ = -1, None
for typ, idx, _ in level:
if typ == markers.stars:
if idx == 0: # STARS_TAG, not INLINE_STARS
last_idx += 1
# sequences must be increasing. Exception for markerless
elif (last_idx >= idx and
markers.markerless not in (last_typ, typ)):
return False
else:
last_idx = idx
last_typ = typ

for children in grouped_children: # Recurse
if not per_level(children):
return False
return True

def inner(*all_vars):
elements = [tuple(all_vars[i:i + 3])
for i in range(0, len(all_vars), 3)]
return per_level(elements)

constrain(inner, all_variables)


def limit_paragraph_types(*p_types):
"""Constraint paragraphs to a limited set of paragraph types. This can
reduce the search space if we know (for example) that the text comes from
Expand Down
33 changes: 0 additions & 33 deletions regparser/tree/depth/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,39 +140,6 @@ def per_level(elements, parent_type=None):
return per_level(elements)


def stars_occupy_space(*all_vars):
"""Star markers can't be ignored in sequence, so 1, *, 2 doesn't make
sense for a single level, unless it's an inline star. In the inline
case, we can think of it as 1, intro-text-to-1, 2"""
elements = [tuple(all_vars[i:i + 3]) for i in range(0, len(all_vars), 3)]

def per_level(elements):
level, grouped_children = _level_and_children(elements)

if not level:
return True # Base Case

last_idx, last_typ = -1, None
for typ, idx, _ in level:
if typ == markers.stars:
if idx == 0: # STARS_TAG, not INLINE_STARS
last_idx += 1
# sequences must be increasing. Exception for markerless
elif (last_idx >= idx and
markers.markerless not in (last_typ, typ)):
return False
else:
last_idx = idx
last_typ = typ

for children in grouped_children: # Recurse
if not per_level(children):
return False
return True

return per_level(elements)


def depth_type_order(order):
"""Create a function which constrains paragraphs depths to a particular
type sequence. For example, we know a priori what regtext and
Expand Down
3 changes: 2 additions & 1 deletion regparser/tree/xml_parser/reg_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,8 @@ class RegtextParagraphProcessor(paragraph_processor.ParagraphProcessor):
def additional_constraints(self):
return [
optional_rules.depth_type_inverses,
optional_rules.limit_sequence_gap(3)
optional_rules.limit_sequence_gap(3),
optional_rules.stars_occupy_space,
] + self.relaxed_constraints()

def relaxed_constraints(self):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="regparser",
version="4.0.0",
version="4.1.0",
packages=find_packages(),
classifiers=[
'License :: Public Domain',
Expand Down
11 changes: 11 additions & 0 deletions tests/commands_fill_with_rules_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,14 @@ def test_process(self, Notice, compile_regulation):
self.assertEqual(changes, {
"1000-2-b": ["2b changes"], "1000-2-c": ["2c changes"],
"1000-4-a": ["4a changes"]})


def test_drop_initial_orphan_versions():
version_list = [Version(letter, None, None) for letter in 'abcdef']
version_pairs = list(zip(version_list, [None] + version_list[1:]))
existing = {'c', 'e'}

result = fill_with_rules.drop_initial_orphans(version_pairs, existing)
result = [pair[0].identifier for pair in result]

assert result == ['c', 'd', 'e', 'f']
44 changes: 25 additions & 19 deletions tests/commands_import_notice_tests.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,33 @@
from unittest import TestCase

from regparser.commands import import_notice
from regparser.notice.xml import NoticeXML
from regparser.test_utils.xml_builder import XMLBuilder


class CommandsImportNoticeTests(TestCase):
def test_has_requirments(self):
"""Validate that certain attributes are required"""
with XMLBuilder("ROOT", **{"eregs-version-id": "vvv"}) as ctx:
ctx.PRTPAGE(P=44, **{"eregs-fr-volume": "124"})
ctx.DATES(**{"eregs-published-date": "2005-05-05"})
notice_xml = NoticeXML(ctx.xml_copy())
self.assertTrue(import_notice.has_requirements(notice_xml))
def test_has_requirments():
"""Validate that certain attributes are required"""
root_attrs = {
"eregs-version-id": "vvv",
"fr-volume": 124,
"fr-start-page": 44,
"fr-end-page": 55
}
with XMLBuilder("ROOT", **root_attrs) as ctx:
ctx.DATES(**{"eregs-published-date": "2005-05-05"})
notice_xml = NoticeXML(ctx.xml_copy())
assert import_notice.has_requirements(notice_xml)

notice_xml = NoticeXML(ctx.xml_copy())
del notice_xml.xml.attrib['eregs-version-id']
assert not import_notice.has_requirements(notice_xml)

notice_xml = NoticeXML(ctx.xml_copy())
del notice_xml.xml.attrib['eregs-version-id']
self.assertFalse(import_notice.has_requirements(notice_xml))
notice_xml = NoticeXML(ctx.xml_copy())
del notice_xml.xml.attrib['fr-volume']
assert not import_notice.has_requirements(notice_xml)

notice_xml = NoticeXML(ctx.xml_copy())
del notice_xml.xml.xpath('//PRTPAGE')[0].attrib['eregs-fr-volume']
self.assertFalse(import_notice.has_requirements(notice_xml))
notice_xml = NoticeXML(ctx.xml_copy())
del notice_xml.xml.attrib['fr-start-page']
assert not import_notice.has_requirements(notice_xml)

notice_xml = NoticeXML(ctx.xml_copy())
del notice_xml.xml.xpath('//DATES')[0].attrib['eregs-published-date']
self.assertFalse(import_notice.has_requirements(notice_xml))
notice_xml = NoticeXML(ctx.xml_copy())
del notice_xml.xml.xpath('//DATES')[0].attrib['eregs-published-date']
assert not import_notice.has_requirements(notice_xml)
4 changes: 3 additions & 1 deletion tests/commands_preprocess_notice_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ def expect_common_json(self, **kwargs):
params = {'effective_on': '2008-08-08',
'publication_date': '2007-07-07',
'full_text_xml_url': 'some://url',
'volume': 45}
'volume': 45,
'start_page': 111,
'end_page': 222}
params.update(kwargs)
self.expect_json_http(params, uri=re.compile('.*federalregister.*'))
# No data from regs.gov
Expand Down
20 changes: 12 additions & 8 deletions tests/commands_versions_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,14 +102,6 @@ def test_write_to_disk(self):
self.assertEqual((path / '111').read().effective, date(2002, 2, 2))
self.assertEqual((path / '222').read().effective, date(2004, 4, 4))

def test_write_to_disk_no_effective(self):
"""If a version is somehow associated with a proposed rule (or a final
rule has been misparsed), we should get an exception"""
xml = Mock()
xml.effective = None
with self.assertRaises(versions.InvalidEffectiveDate):
versions.write_to_disk(xml, entry.Version('12', '1000', '11'))

@patch('regparser.commands.versions.write_to_disk')
def test_write_if_needed_raises_exception(self, write_to_disk):
"""If an input file is missing, this raises an exception"""
Expand Down Expand Up @@ -158,3 +150,15 @@ def test_write_if_needed_delays(self, write_to_disk):
'title', 'part', ['111'], {'111': 'xml111'},
{'111': versions.Delay('222', 'until-date')})
self.assertTrue(write_to_disk.called)


def test_write_to_disk_no_effective(monkeypatch):
"""If a version is somehow associated with a proposed rule (or a final
rule has been misparsed), we should get a warning"""
xml = Mock(effective=None, version_id='vv123')
monkeypatch.setattr(versions, 'logger', Mock())

versions.write_to_disk(xml, entry.Version('12', '1000', '11'))

assert versions.logger.warning.called
assert 'vv123' in versions.logger.warning.call_args[0]
Loading

0 comments on commit e96412e

Please sign in to comment.