Skip to content

Commit

Permalink
Merge pull request #4 from MGMCN/add-university
Browse files Browse the repository at this point in the history
add university information
  • Loading branch information
MGMCN authored Mar 14, 2024
2 parents 4553d03 + e841a17 commit cde0373
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def clean_text(element):
return element.get_text(strip=True).replace("\n", "").replace("\t", "")
return ""

university = clean_text(soup.find('p', class_='me-md-3'))
location = clean_text(soup.find('p', string=lambda x: x and '勤務地' in x))
research_field = clean_text(soup.find('p', string=lambda x: x and '研究分野' in x))
start_date = clean_text(soup.find('p', string=lambda x: x and '公開開始日' in x))
Expand Down Expand Up @@ -115,6 +116,7 @@ def clean_text(element):
if parent:
qualification += clean_text(parent)

# print(f'University: {university}')
# print(f'Location: {location}')
# print(f'Research Field: {research_field}')
# print(f'Start Date: {start_date}')
Expand All @@ -124,6 +126,7 @@ def clean_text(element):
# print(f'Qualification: {qualification}')

return {
"university": university,
"location": location,
"research_field": research_field,
"start_date": start_date,
Expand All @@ -134,6 +137,7 @@ def clean_text(element):
}
else:
return {
"university": None,
"location": None,
"research_field": None,
"start_date": None,
Expand Down

0 comments on commit cde0373

Please sign in to comment.