Merge pull request #72 from tenex/lb/remove-pythonic-cobwebs

Remove old Python stuff
tenex · Feb 23, 2016 · ca4a2b7 · ca4a2b7
2 parents b0687bb + 7227192
commit ca4a2b7
Show file tree

Hide file tree

Showing 8 changed files with 57 additions and 173 deletions.
diff --git a/app/config.py b/app/config.py
diff --git a/app/github_contributions.py b/app/github_contributions.py
@@ -1,87 +1,5 @@
-from flask import Flask, render_template
-from flask.ext.pymongo import PyMongo, ASCENDING, DESCENDING
-from tools import jsonify
-from datetime import datetime, timezone
-import dateutil.parser
-import time
-import math
-
+from flask import Flask
+from flask.ext.pymongo import PyMongo
 app = Flask(__name__)
-app.config.from_pyfile('config.py')
+app.config['MONGO_DBNAME'] = 'contributions'
 mongo = PyMongo(app)
-
-PAGE_SIZE = 50
-
-@app.route('/')
-def index():
-    return app.send_static_file('index.html')
-
-@app.route('/stats')
-def stats():
-    c = mongo.db.contributions
-    latest_event = c.find().sort('created_at', DESCENDING).limit(1)
-    latest_event = [e['created_at'] for e in latest_event].pop()
-    latest_event_dt = dateutil.parser.parse(latest_event)
-    latest_event_age = datetime.now(timezone.utc) - latest_event_dt
-    latest_event_age = int(latest_event_age.total_seconds())
-    summary = {
-        "eventCount": c.count(),
-        "latestEvent": latest_event,
-        "latestEventAge": latest_event_age
-    }
-    return jsonify(**summary)
-
-@app.route('/error')
-def error():
-    time.sleep(3)
-    raise RuntimeError('Here is an error, as you requested.')
-
-@app.errorhandler(Exception)
-def runtime_error_handler(err):
-    err_data = {
-        'error': str(err)
-    }
-    return jsonify(err_data), 500
-
-@app.route('/user/<username>')
-def user(username):
-    collection = mongo.db.contributions
-    criteria = {
-        '_user_lower': username.lower(),
-    }
-    repos = collection.find(criteria)
-    repos = repos.distinct('repo')
-    repos.sort(key=str.lower)
-
-    event_count = collection.find(criteria).count()
-
-    summary = {
-        "username": username,
-        "eventCount": event_count,
-        "repos": repos,
-    }
-    return jsonify(**summary)
-
-@app.route('/user/<username>/events')
-@app.route('/user/<username>/events/<int:page>')
-def events(username, page=1):
-    collection = mongo.db.contributions
-    criteria = {
-        '_user_lower': username.lower(),
-    }
-
-    skip = (page-1) * PAGE_SIZE
-    #total_pages = math.ceil(float(total) / PAGE_SIZE)
-
-    events = collection.find(criteria)
-    events = events.sort("created_at", DESCENDING)
-    events = events.skip(skip).limit(PAGE_SIZE)
-    events = list(events)
-    events = {
-        "events": events,
-        "start": skip+1,
-        "end": skip+len(events),
-        "currentPage": page,
-        "size": len(events)
-    }
-    return jsonify(**events)
diff --git a/app/tools.py b/app/tools.py
diff --git a/manage b/manage
@@ -15,38 +15,12 @@ def _make_context():
     )
 
 manager = Manager(app)
-manager.add_command('runserver', Server(host='0.0.0.0', use_debugger=True))
 manager.add_command('shell', Shell(
     make_context=_make_context,
     use_ipython=True,
     banner='GitHub Contributions Shell'
 ))
 
-@manager.command
-def usernames():
-    """ generate list of distinct usernames from transformed data
-    """
-    import glob
-    import gzip
-    import json
-
-    users = set()
-    files = list(glob.glob(os.path.join(
-        os.environ['GHC_TRANSFORMED_PATH'], '*.json.gz')))
-    print("looking in", len(files), "files")
-    for ix, path in enumerate(files):
-        with gzip.open(path, 'rt', encoding='utf-8') as f:
-            events = (json.loads(line) for line in f)
-            users |= set((e.get('_user_lower') for e in events))
-        print(ix, "\t", len(files))
-
-    users = sorted(users)
-
-    with open('users.txt', 'wt') as f:
-        for u in users: print(u, file=f)
-
-    print("there are", len(users), "users")
-
 @manager.command
 def ensure_indexes():
     """ ensure contributions are indexed

diff --git a/requirements-w32.txt b/requirements-w32.txt
@@ -2,13 +2,10 @@ filelock==1.0.3
 Flask==0.10.1
 Flask-PyMongo==0.4.0
 Flask-Script==2.0.5
-itsdangerous==0.24
-Jinja2==2.7.3
 MarkupSafe==0.23
 pymongo==3.1.1
 python-dateutil==2.4.2
 requests==2.7.0
 six==1.9.0
 termcolor==1.1.0
-Werkzeug==0.10.4
 rollbar=0.11.2
diff --git a/requirements.txt b/requirements.txt
@@ -2,15 +2,10 @@ filelock==1.0.3
 Flask==0.10.1
 Flask-PyMongo==0.4.0
 Flask-Script==2.0.5
-itsdangerous==0.24
-Jinja2==2.7.3
-MarkupSafe==0.23
 pymongo==3.1.1
 python-dateutil==2.4.2
 python-rapidjson==0.0.6
 requests==2.7.0
 six==1.9.0
 termcolor==1.1.0
-uWSGI==2.0.11.2
-Werkzeug==0.10.4
 rollbar==0.11.2
diff --git a/util/mongo/find-duplicate-events.js b/util/mongo/find-duplicate-events.js
@@ -1,31 +1,31 @@
 // This shouldn't ever need to be run unless the existing data needs re-importing
 // Takes 40 minutes to clear 10 records :(
 db.contributions.aggregate(
-    [
-        {
-            "$match": {
-                "_event_id": { "$exists" : true },
-            },
-        },
-        {
-            "$group": {
-                "_id": { "_event_id": "$_event_id" },
-                "uniqueIds": { "$push": "$_id" },
-                "count": { "$sum": 1 },
-            },
-        },
-        {
-            "$match": {
-                "count": { "$gt": 1 },
-            }
-        },
-        {
-            $out : "duplicates",
-        }
-    ],
+  [
     {
-        "allowDiskUse": true,
+      "$match": {
+        "_event_id": { "$exists" : true }
+      },
+    },
+    {
+      "$group": {
+        "_id": { "_event_id": "$_event_id" },
+        "uniqueIds": { "$push": "$_id" },
+        "count": { "$sum": 1 }
+      },
+    },
+    {
+      "$match": {
+        "count": { "$gt": 1 }
+      }
+    },
+    {
+      $out : "duplicates"
     }
+  ],
+  {
+    "allowDiskUse": true
+  }
 );
 // .forEach(function(doc) {
 //     doc.uniqueIds.shift();

diff --git a/util/usernames b/util/usernames
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+from __future__ import print_function
+import os
+import sys
+import glob
+import gzip
+import json
+
+
+def usernames():
+    """ generate list of distinct usernames from transformed data
+    """
+    users = set()
+    files = list(glob.glob(os.path.join(
+        os.environ['GHC_TRANSFORMED_PATH'], '*.json.gz')))
+    print("looking in", len(files), "files")
+    for ix, path in enumerate(files):
+        with gzip.open(path, 'rt', encoding='utf-8') as f:
+            events = (json.loads(line) for line in f)
+            users |= set((e.get('_user_lower') for e in events))
+        print(ix, "\t", len(files))
+
+    users = sorted(users)
+
+    with open('users.txt', 'wt') as f:
+        for u in users: print(u, file=f)
+
+    print("there are", len(users), "users")
+
+if __name__=='__main__':
+   usernames()