Skip to content

Commit

Permalink
Merge pull request #72 from tenex/lb/remove-pythonic-cobwebs
Browse files Browse the repository at this point in the history
Remove old Python stuff
  • Loading branch information
Liam committed Feb 23, 2016
2 parents b0687bb + 7227192 commit ca4a2b7
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 173 deletions.
2 changes: 0 additions & 2 deletions app/config.py

This file was deleted.

88 changes: 3 additions & 85 deletions app/github_contributions.py
Original file line number Diff line number Diff line change
@@ -1,87 +1,5 @@
from flask import Flask, render_template
from flask.ext.pymongo import PyMongo, ASCENDING, DESCENDING
from tools import jsonify
from datetime import datetime, timezone
import dateutil.parser
import time
import math

from flask import Flask
from flask.ext.pymongo import PyMongo
app = Flask(__name__)
app.config.from_pyfile('config.py')
app.config['MONGO_DBNAME'] = 'contributions'
mongo = PyMongo(app)

PAGE_SIZE = 50

@app.route('/')
def index():
return app.send_static_file('index.html')

@app.route('/stats')
def stats():
c = mongo.db.contributions
latest_event = c.find().sort('created_at', DESCENDING).limit(1)
latest_event = [e['created_at'] for e in latest_event].pop()
latest_event_dt = dateutil.parser.parse(latest_event)
latest_event_age = datetime.now(timezone.utc) - latest_event_dt
latest_event_age = int(latest_event_age.total_seconds())
summary = {
"eventCount": c.count(),
"latestEvent": latest_event,
"latestEventAge": latest_event_age
}
return jsonify(**summary)

@app.route('/error')
def error():
time.sleep(3)
raise RuntimeError('Here is an error, as you requested.')

@app.errorhandler(Exception)
def runtime_error_handler(err):
err_data = {
'error': str(err)
}
return jsonify(err_data), 500

@app.route('/user/<username>')
def user(username):
collection = mongo.db.contributions
criteria = {
'_user_lower': username.lower(),
}
repos = collection.find(criteria)
repos = repos.distinct('repo')
repos.sort(key=str.lower)

event_count = collection.find(criteria).count()

summary = {
"username": username,
"eventCount": event_count,
"repos": repos,
}
return jsonify(**summary)

@app.route('/user/<username>/events')
@app.route('/user/<username>/events/<int:page>')
def events(username, page=1):
collection = mongo.db.contributions
criteria = {
'_user_lower': username.lower(),
}

skip = (page-1) * PAGE_SIZE
#total_pages = math.ceil(float(total) / PAGE_SIZE)

events = collection.find(criteria)
events = events.sort("created_at", DESCENDING)
events = events.skip(skip).limit(PAGE_SIZE)
events = list(events)
events = {
"events": events,
"start": skip+1,
"end": skip+len(events),
"currentPage": page,
"size": len(events)
}
return jsonify(**events)
29 changes: 0 additions & 29 deletions app/tools.py

This file was deleted.

26 changes: 0 additions & 26 deletions manage
Original file line number Diff line number Diff line change
Expand Up @@ -15,38 +15,12 @@ def _make_context():
)

manager = Manager(app)
manager.add_command('runserver', Server(host='0.0.0.0', use_debugger=True))
manager.add_command('shell', Shell(
make_context=_make_context,
use_ipython=True,
banner='GitHub Contributions Shell'
))

@manager.command
def usernames():
""" generate list of distinct usernames from transformed data
"""
import glob
import gzip
import json

users = set()
files = list(glob.glob(os.path.join(
os.environ['GHC_TRANSFORMED_PATH'], '*.json.gz')))
print("looking in", len(files), "files")
for ix, path in enumerate(files):
with gzip.open(path, 'rt', encoding='utf-8') as f:
events = (json.loads(line) for line in f)
users |= set((e.get('_user_lower') for e in events))
print(ix, "\t", len(files))

users = sorted(users)

with open('users.txt', 'wt') as f:
for u in users: print(u, file=f)

print("there are", len(users), "users")

@manager.command
def ensure_indexes():
""" ensure contributions are indexed
Expand Down
3 changes: 0 additions & 3 deletions requirements-w32.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,10 @@ filelock==1.0.3
Flask==0.10.1
Flask-PyMongo==0.4.0
Flask-Script==2.0.5
itsdangerous==0.24
Jinja2==2.7.3
MarkupSafe==0.23
pymongo==3.1.1
python-dateutil==2.4.2
requests==2.7.0
six==1.9.0
termcolor==1.1.0
Werkzeug==0.10.4
rollbar=0.11.2
5 changes: 0 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,10 @@ filelock==1.0.3
Flask==0.10.1
Flask-PyMongo==0.4.0
Flask-Script==2.0.5
itsdangerous==0.24
Jinja2==2.7.3
MarkupSafe==0.23
pymongo==3.1.1
python-dateutil==2.4.2
python-rapidjson==0.0.6
requests==2.7.0
six==1.9.0
termcolor==1.1.0
uWSGI==2.0.11.2
Werkzeug==0.10.4
rollbar==0.11.2
46 changes: 23 additions & 23 deletions util/mongo/find-duplicate-events.js
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
// This shouldn't ever need to be run unless the existing data needs re-importing
// Takes 40 minutes to clear 10 records :(
db.contributions.aggregate(
[
{
"$match": {
"_event_id": { "$exists" : true },
},
},
{
"$group": {
"_id": { "_event_id": "$_event_id" },
"uniqueIds": { "$push": "$_id" },
"count": { "$sum": 1 },
},
},
{
"$match": {
"count": { "$gt": 1 },
}
},
{
$out : "duplicates",
}
],
[
{
"allowDiskUse": true,
"$match": {
"_event_id": { "$exists" : true }
},
},
{
"$group": {
"_id": { "_event_id": "$_event_id" },
"uniqueIds": { "$push": "$_id" },
"count": { "$sum": 1 }
},
},
{
"$match": {
"count": { "$gt": 1 }
}
},
{
$out : "duplicates"
}
],
{
"allowDiskUse": true
}
);
// .forEach(function(doc) {
// doc.uniqueIds.shift();
Expand Down
31 changes: 31 additions & 0 deletions util/usernames
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env python3
from __future__ import print_function
import os
import sys
import glob
import gzip
import json


def usernames():
""" generate list of distinct usernames from transformed data
"""
users = set()
files = list(glob.glob(os.path.join(
os.environ['GHC_TRANSFORMED_PATH'], '*.json.gz')))
print("looking in", len(files), "files")
for ix, path in enumerate(files):
with gzip.open(path, 'rt', encoding='utf-8') as f:
events = (json.loads(line) for line in f)
users |= set((e.get('_user_lower') for e in events))
print(ix, "\t", len(files))

users = sorted(users)

with open('users.txt', 'wt') as f:
for u in users: print(u, file=f)

print("there are", len(users), "users")

if __name__=='__main__':
usernames()

0 comments on commit ca4a2b7

Please sign in to comment.