Skip to content

Commit

Permalink
add target and strat_name matchers
Browse files Browse the repository at this point in the history
  • Loading branch information
jonhusson committed Dec 17, 2016
1 parent ea84b2f commit 2a914d9
Showing 1 changed file with 40 additions and 7 deletions.
47 changes: 40 additions & 7 deletions notebooks/1_candidate_generation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,27 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n"
]
}
],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"import os\n",
"os.environ['SNORKELDB']=\"postgres://jhusson@localhost:5432/snorkel_strom\"\n",
"\n",
"from snorkel import SnorkelSession\n",
"session = SnorkelSession()"
]
Expand Down Expand Up @@ -74,7 +86,7 @@
"source": [
"from snorkel.models import candidate_subclass\n",
"\n",
"Spouse = candidate_subclass('Spouse', ['person1', 'person2'])"
"strom = candidate_subclass('strom', ['strom', 'strat_name'])"
]
},
{
Expand Down Expand Up @@ -123,9 +135,30 @@
},
"outputs": [],
"source": [
"from snorkel.matchers import PersonMatcher\n",
"from snorkel.matchers import RegexMatchSpan\n",
"\n",
"strom_matcher = RegexMatchSpan(rgx=\"stromatol|thrombol\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from snorkel.matchers import DictionaryMatch\n",
"import urllib\n",
"import json\n",
"\n",
"request = urllib.urlopen('https://macrostrat.org/api/v2/defs/strat_names?all')\n",
"data = json.loads(request.read())\n",
"\n",
"strat_dict = { r['strat_name_long'] for r in data['success']['data'] }\n",
"\n",
"\n",
"person_matcher = PersonMatcher(longest_match_only=True)"
"strat_matcher=DictionaryMatch(d=strat_dict,ignore_case=False,longest_match_only=True)"
]
},
{
Expand Down Expand Up @@ -704,7 +737,7 @@
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
Expand All @@ -718,7 +751,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
"version": "2.7.11"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 2a914d9

Please sign in to comment.