Skip to content

Commit

Permalink
Fix (some) issues
Browse files Browse the repository at this point in the history
  • Loading branch information
iross committed Dec 17, 2016
1 parent ea84b2f commit f38bcf2
Showing 1 changed file with 4 additions and 5 deletions.
9 changes: 4 additions & 5 deletions remap_sentences.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
#IMPORT THE SENTENCES DUMP
cursor.execute("""
SELECT docid, sentid, words, poses, ners, lemmas, dep_paths, dep_parents
FROM %(my_app)s_sentences_%(my_product)s;
FROM %(my_app)s_sentences_%(my_product)s ORDER BY docid, sentid;
""", {
"my_app": AsIs(config['app_name']),
"my_product": AsIs(config['product'].lower())
Expand Down Expand Up @@ -70,17 +70,16 @@
parsed_sent["char_offsets"][wordidx] = sentence_running_count
sentence_running_count += len(parsed_sent["words"][wordidx]) + 1

sentence_start = doc_char_counts[sent[0]] if sent[0] in doc_char_counts else 0
# This will probably be off by one...
if sent[0] in doc_char_counts:
sentence_start = doc_char_counts[sent[0]] + 1
doc_char_counts[sent[0]] += sentence_running_count - 1
doc_char_counts[sent[0]] += sentence_running_count
else:
sentence_start = 0
doc_char_counts[sent[0]] = sentence_running_count - 1
doc_char_counts[sent[0]] = sentence_running_count

# keep this running count as the sentence-level offset stable_id
snorkel_cursor.execute("INSERT INTO context (id, type, stable_id) VALUES (nextval('seq'), 'sentence', %(stable_id)s)", {"stable_id": docid[0] + "::sentence:%s:%s" % (sentence_start, doc_char_counts[sent[0]])})
snorkel_cursor.execute("INSERT INTO context (id, type, stable_id) VALUES (nextval('seq'), 'sentence', %(stable_id)s)", {"stable_id": sent[0] + "::sentence:%s:%s" % (sentence_start, doc_char_counts[sent[0]])})

snorkel_connection.commit()
snorkel_cursor.execute(" \
Expand Down

0 comments on commit f38bcf2

Please sign in to comment.