From f38bcf26eaedd24f1e4f17690f6baaddb3a87826 Mon Sep 17 00:00:00 2001
From: Ian Ross <ian.ackerman.ross@gmail.com>
Date: Sat, 17 Dec 2016 09:54:33 -0800
Subject: [PATCH] Fix (some) issues

---
 remap_sentences.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/remap_sentences.py b/remap_sentences.py
index 2ab8610..eb6681e 100644
--- a/remap_sentences.py
+++ b/remap_sentences.py
@@ -40,7 +40,7 @@
 #IMPORT THE SENTENCES DUMP
 cursor.execute("""
             SELECT docid, sentid, words, poses, ners, lemmas, dep_paths, dep_parents
-            FROM %(my_app)s_sentences_%(my_product)s;
+            FROM %(my_app)s_sentences_%(my_product)s ORDER BY docid, sentid;
             """, {
                 "my_app": AsIs(config['app_name']),
                     "my_product": AsIs(config['product'].lower())
@@ -70,17 +70,16 @@
         parsed_sent["char_offsets"][wordidx] = sentence_running_count
         sentence_running_count += len(parsed_sent["words"][wordidx]) + 1
 
-    sentence_start = doc_char_counts[sent[0]] if sent[0] in doc_char_counts else 0
     # This will probably be off by one...
     if sent[0] in doc_char_counts:
         sentence_start = doc_char_counts[sent[0]] + 1
-        doc_char_counts[sent[0]] += sentence_running_count - 1
+        doc_char_counts[sent[0]] += sentence_running_count
     else:
         sentence_start = 0
-        doc_char_counts[sent[0]] = sentence_running_count - 1
+        doc_char_counts[sent[0]] = sentence_running_count
 
     # keep this running count as the sentence-level offset stable_id
-    snorkel_cursor.execute("INSERT INTO context (id, type, stable_id) VALUES (nextval('seq'), 'sentence', %(stable_id)s)", {"stable_id": docid[0] + "::sentence:%s:%s" % (sentence_start, doc_char_counts[sent[0]])})
+    snorkel_cursor.execute("INSERT INTO context (id, type, stable_id) VALUES (nextval('seq'), 'sentence', %(stable_id)s)", {"stable_id": sent[0] + "::sentence:%s:%s" % (sentence_start, doc_char_counts[sent[0]])})
 
     snorkel_connection.commit()
     snorkel_cursor.execute(" \