Skip to content

Commit

Permalink
Merge pull request #49 from roclark/patch/fix-ncaaf-boxscore-parsing
Browse files Browse the repository at this point in the history
Update NCAAF Boxscore meta information parsing
  • Loading branch information
roclark authored Dec 29, 2018
2 parents 01e124a + aa51f33 commit 39e04b2
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 52 deletions.
64 changes: 24 additions & 40 deletions sportsreference/ncaaf/boxscore.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def _retrieve_html_page(self, uri):
return None
return pq(utils._remove_html_comment_tags(url_data))

def _parse_game_date_and_location(self, field, boxscore):
def _parse_game_date_and_location(self, boxscore):
"""
Retrieve the game's date and location.
Expand All @@ -135,44 +135,31 @@ def _parse_game_date_and_location(self, field, boxscore):
Parameters
----------
field : string
The name of the attribute to parse
boxscore : PyQuery object
A PyQuery object containing all of the HTML data from the boxscore.
Returns
-------
string
Depending on the requested field, returns a text representation of
either the date or location of the game.
"""
scheme = BOXSCORE_SCHEME[field]
scheme = BOXSCORE_SCHEME['time']
items = [i.text() for i in boxscore(scheme).items()]
game_info = items[0].split('\n')
index = BOXSCORE_ELEMENT_INDEX[field]
# If the game is a bowl game or a championship game, it will have a
# different layout for the game information where the specific game
# title, such as the name of the bowl game, will be the first line of
# text. All other matchers should have the index matcher increased by
# 1.
for day in ['monday', 'tuesday', 'wednesday', 'thursday', 'friday',
'saturday', 'sunday']:
# The day info is generally the first line in text for non-special
# games.
if day in game_info[0].lower():
if index >= len(game_info):
return ''
if 'sports logos.net' in game_info[index].lower() or \
game_info[index] == '':
return ''
return game_info[index]
index += 1
if index >= len(game_info):
return ''
if 'sports logos.net' in game_info[index].lower() or \
game_info[index] == '':
return ''
return game_info[index]
time = ''
date = ''
stadium = ''
for line in game_info:
time_match = re.findall(r'(\d:\d\d|\d\d:\d\d)', line.lower())
if len(time_match) > 0:
time = line
for day in ['monday', 'tuesday', 'wednesday', 'thursday', 'friday',
'saturday', 'sunday']:
if day in line.lower():
date = line
# In general, locations are in the format 'Stadium Name - City,
# State'. Since the ' - ' characters seem to be unique to the
# location line, it should be safe to use this as a matcher.
if ' - ' in line:
stadium = line
setattr(self, '_time', time)
setattr(self, '_date', date)
setattr(self, '_stadium', stadium)

def _parse_name(self, field, boxscore):
"""
Expand Down Expand Up @@ -230,14 +217,10 @@ def _parse_game_data(self, uri):
short_field == 'winning_abbr' or \
short_field == 'losing_name' or \
short_field == 'losing_abbr' or \
short_field == 'uri':
continue
if short_field == 'date' or \
short_field == 'uri' or \
short_field == 'date' or \
short_field == 'time' or \
short_field == 'stadium':
value = self._parse_game_date_and_location(short_field,
boxscore)
setattr(self, field, value)
continue
if short_field == 'away_name' or \
short_field == 'home_name':
Expand All @@ -252,6 +235,7 @@ def _parse_game_data(self, uri):
short_field,
index)
setattr(self, field, value)
self._parse_game_date_and_location(boxscore)

@property
def dataframe(self):
Expand Down
24 changes: 12 additions & 12 deletions tests/unit/test_ncaaf_boxscore.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,9 +300,9 @@ def test_game_information_regular_game(self):
"""
m = MockBoxscoreData(MockField(mock_field))

self.boxscore._parse_game_date_and_location(m)
for field, value in fields.items():
result = self.boxscore._parse_game_date_and_location(field, m)
assert result == value
assert getattr(self.boxscore, field) == value

def test_game_information_championship_game(self):
fields = ['date', 'time', 'stadium']
Expand All @@ -320,9 +320,9 @@ def test_game_information_championship_game(self):
"""
m = MockBoxscoreData(MockField(mock_field))

self.boxscore._parse_game_date_and_location(m)
for field, value in fields.items():
result = self.boxscore._parse_game_date_and_location(field, m)
assert result == value
assert getattr(self.boxscore, field) == value

def test_somewhat_limited_game_information(self):
fields = ['date', 'time', 'stadium']
Expand All @@ -337,9 +337,9 @@ def test_somewhat_limited_game_information(self):
"""
m = MockBoxscoreData(MockField(mock_field))

self.boxscore._parse_game_date_and_location(m)
for field, value in fields.items():
result = self.boxscore._parse_game_date_and_location(field, m)
assert result == value
assert getattr(self.boxscore, field) == value

def test_limited_game_information(self):
fields = ['date', 'time', 'stadium']
Expand All @@ -352,9 +352,9 @@ def test_limited_game_information(self):
mock_field = 'Friday Nov 24, 2017'
m = MockBoxscoreData(MockField(mock_field))

self.boxscore._parse_game_date_and_location(m)
for field, value in fields.items():
result = self.boxscore._parse_game_date_and_location(field, m)
assert result == value
assert getattr(self.boxscore, field) == value

def test_limited_game_information_championship(self):
fields = ['date', 'time', 'stadium']
Expand All @@ -370,9 +370,9 @@ def test_limited_game_information_championship(self):
"""
m = MockBoxscoreData(MockField(mock_field))

self.boxscore._parse_game_date_and_location(m)
for field, value in fields.items():
result = self.boxscore._parse_game_date_and_location(field, m)
assert result == value
assert getattr(self.boxscore, field) == value

def test_no_game_information_championship(self):
fields = ['date', 'time', 'stadium']
Expand All @@ -387,9 +387,9 @@ def test_no_game_information_championship(self):
"""
m = MockBoxscoreData(MockField(mock_field))

self.boxscore._parse_game_date_and_location(m)
for field, value in fields.items():
result = self.boxscore._parse_game_date_and_location(field, m)
assert result == value
assert getattr(self.boxscore, field) == value

def test_empty_boxscore_class_returns_dataframe_of_none(self):
fake_points = PropertyMock(return_value=None)
Expand Down

0 comments on commit 39e04b2

Please sign in to comment.