Skip to content

Commit

Permalink
initial
Browse files Browse the repository at this point in the history
  • Loading branch information
pdaian committed Apr 10, 2019
0 parents commit c2af245
Show file tree
Hide file tree
Showing 150 changed files with 194,841 additions and 0 deletions.
107 changes: 107 additions & 0 deletions calculate_profit_from_logs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import pygraphviz as pgv
import csv, csv_hack, os
import json
from exchanges import get_trade_data_from_log_item

COLORS = ["red", "blue", "green", "orange", "purple", "black", "yellow", "grey", "darkgreen"] * 10

logsdict = csv.DictReader(open('data/all_logs_bigquery.csv'), delimiter=',',
quotechar='"', quoting=csv.QUOTE_MINIMAL)
logs = {}

def get_rate_label(token1, amount1, token2, amount2):
if (token1 >= token2 and token1 != "ETH" and token1 != "WETH") or (token2 == "ETH" or token2 == "WETH"): # arbitrary ordering/ tiebreak
try:
return "%4g %s" % (amount1/amount2, token1 + '/' + token2)
except ZeroDivisionError:
return "[INF] %s" % (token1 + '/' + token2)
try:
return "%4g %s" % (amount2/amount1, token2 + '/' + token1)
except ZeroDivisionError:
return "[INF] %s" % (token2 + '/' + token1)


def get_profit_graph(logset, txhash):
dot = pgv.AGraph(label=txhash + ' Profit Flow', directed=True, strict=False, nodesep=1.0, ranksep=0.5, sep=0.0, labelfloat=False)
unknown = False
graph_edges = []
logindex = 1
tokens_involved = set()
trades = []
for logitem in logset:
address = logitem[0]
data = logitem[1]
topicstext = logitem[2].replace('\'', '\"')
topics = json.loads(topicstext)
data = data[2:] # strip 0x from hex
trades_data = get_trade_data_from_log_item(topics, data, address)
if trades_data is not None:
for trade_data in trades_data:
(tokenget_addr, tokenget_label, tokenget, amountget, tokengive_addr, tokengive_label, tokengive, amountgive, exchange) = trade_data
graph_edges.append((tokenget, "!" + exchange, amountget)) # (add "!" to mark special exchange node)
graph_edges.append(("!" + exchange, tokengive, amountgive))

rate_label = get_rate_label(tokenget, amountget, tokengive, amountgive)
tradenode_label = "Trade #" + str(logindex) + " (" + exchange + ")\n" + rate_label
dot.add_edge(tokenget_label, tradenode_label, label=("%4g" % amountget), color=COLORS[logindex])
dot.add_edge(tradenode_label, tokengive_label, label=("%4g" % amountgive), color=COLORS[logindex])
trades.append(tradenode_label)
tokens_involved.add(tokenget_label)
tokens_involved.add(tokengive_label)
logindex += 1
else:
# some item in the logset failed to parse => we don't have complete profit picture
unknown = True
for token in list(tokens_involved):
dot.add_subgraph(token, rank='same')
dot.add_subgraph(trades, rank='same')
for i in range(0, len(trades) - 1):
dot.add_edge(trades[i], trades[i+1], style="invis")
return(graph_edges, unknown, dot)

def calculate_profit_for(profit_graph):
token_profits = {}
for edge in profit_graph:
if not edge[0] in token_profits:
token_profits[edge[0]] = 0
if not edge[1] in token_profits:
token_profits[edge[1]] = 0
token_profits[edge[0]] -= edge[2]
token_profits[edge[1]] += edge[2]
return token_profits

for log in logsdict:
hash = log['transaction_hash']
if not hash in logs:
logs[hash] = []
logs[hash].append((log['address'], log['data'], log['topics']))


spamwriter = csv.writer(open('data/profits.csv', 'w'), delimiter=',',
quotechar='"', quoting=csv.QUOTE_MINIMAL)
spamwriter.writerow(["txhash","drawn","unknown","all_positive","eth_profit","profit_graph","profit_calcs"])

i = 0
total = len(logs)
for txhash in logs:
i += 1
print(txhash, i, "of", total)
output_file_name = 'profit_graphs/' + txhash + '.png'
drawn = False
(profit_graph, unknown, dot) = get_profit_graph(logs[txhash], txhash)
if unknown:
# failed to process given entry because some exchange that's in dex_list.py is missing a log parser
print("UNKNOWN!", txhash)
if not unknown and len(profit_graph) > 2:
if not os.path.exists(output_file_name):
dot.draw(output_file_name, prog="dot")
drawn = True
profit_calcs = calculate_profit_for(profit_graph)
all_positive = True
for token in profit_calcs:
if token[0] != "!":
if profit_calcs[token] < 0:
all_positive = False
profit_graph_data = json.dumps(profit_graph)
profit_calcs_data = json.dumps(profit_calcs)
spamwriter.writerow([txhash, drawn, unknown, all_positive, profit_calcs.get('ETH', 0), profit_graph_data, profit_calcs_data])
34 changes: 34 additions & 0 deletions calculate_slots.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from statistics import mean
import csv
import numpy as np

import csv_hack

arbitrageurs = {}
slotprices = {}

def add_to_count(arbitrageurs, arbitrageur):
if arbitrageur in arbitrageurs:
arbitrageurs[arbitrageur] += 1
else:
arbitrageurs[arbitrageur] = 1

slotsdict = csv.DictReader(open('data/gas_slots_6207336_6146507.csv'))
slotsdict = csv.DictReader(open('data/gas_slots.csv'))
for tx in slotsdict:
slot = int(tx['tx_position'])
if int(tx['gas_used']) < (int(tx['gas_limit']) * 0.6) and int(tx['gas_price']) > 310000000000 and tx['log_topics'].count("~") > 1 and not tx['to'].lower() in ["0xa62142888aba8370742be823c1782d17a0389da1", "0xdd9fd6b6f8f7ea932997992bbe67eabb3e316f3c"]:
print(tx['hash'], tx['from'], tx['to'])
add_to_count(arbitrageurs, tx['from'])
if not slot in slotprices:
slotprices[slot] = []
slotprices[slot].append(int(tx['gas_price']))

for arbitrageur in arbitrageurs.keys():
if arbitrageurs[arbitrageur] > 0:
print("arber", "https://etherscan.io/address/" + arbitrageur, arbitrageurs[arbitrageur])

open("data/slots_new.csv", "w").write("\n".join([",".join([str(x/(10**9)) for x in [ np.percentile(slotprices[slot], 10), np.percentile(slotprices[slot], 50), np.percentile(slotprices[slot], 75), np.percentile(slotprices[slot], 90), np.percentile(slotprices[slot], 99)]]) for slot in range(0, 10)]))
for slot in slotprices:
prices = slotprices[slot]
print(slot, np.percentile(prices, 10), np.percentile(prices, 50), np.percentile(prices, 75), np.percentile(prices, 99))
23 changes: 23 additions & 0 deletions count_wins.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import csv
def get_winner_dict():
winner_dict = {}
slotsdict = csv.DictReader(open('data/slot_auction.csv'))
for slot in slotsdict:
slot['log_count'] = slot['log_addrs'].count("~") + min(1, len(slot['log_addrs']))
winner_dict[slot['hash']] = slot
return winner_dict


arbs = {}

winner_dict = get_winner_dict()
print(len(winner_dict.keys()))

for hash in winner_dict:
if winner_dict[hash]['log_count'] > 0:
sender = winner_dict[hash]['from']
if not sender in arbs:
arbs[sender] = 0
arbs[sender] += 1

print(arbs)
12 changes: 12 additions & 0 deletions csv_hack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# QUICK AND DIRTY HACK FROM https://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072
import sys, csv
maxInt = sys.maxsize

decrement = False
try:
csv.field_size_limit(maxInt)
except OverflowError:
maxInt = int(maxInt/10)
decrement = True
# END HACK

4 changes: 4 additions & 0 deletions csv_to_sqlite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import os

os.system('sqlite3 data/arbitrage_new.db ".mode csv" ".import data/block_fees.csv block_fees" ".import data/eth.csv eth_data" ".import data/all_logs_bigquery.csv logs" ".import data/block_data.csv blocks" ".import data/all_success_arb_txs_bigquery.csv success" ".import data/all_inclfail_arb_txs_bigquery.csv wfail" ".import data/auctions.csv auctions" ".import data/profits.csv profits" "CREATE TABLE mergedprofitabletxs AS SELECT *,substr(timestamp,0,11) as date FROM wfail LEFT JOIN profits on profits.txhash=wfail.transaction_hash LEFT JOIN success on success.transaction_hash=wfail.transaction_hash LEFT JOIN blocks on blocks.number=wfail.block_number GROUP BY wfail.transaction_hash ORDER BY CAST(wfail.block_number as INTEGER) DESC, CAST(wfail.transaction_index AS INTEGER) ASC;" ".quit" ""')
os.system('mv data/arbitrage_new.db data/arbitrage.db')
1 change: 1 addition & 0 deletions data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Coming soon; check/star this space over the next day!
33 changes: 33 additions & 0 deletions etherdelta/readme.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
The file contract_addr.txt contains all the contracts (including EtherDelta contract itself) with successful trade transactions sent to it within [3900000, 5550000)

for each file:

all_txs/all_txs-{begin_block}-{end_block}-1.txt

contains all the transactions sent to the addresses in contract_addr.txt in the block range [begin_block, end_block).

There are 5 coloumns in each file, each line represents one transaction:

BlockNumber TransactionHash From To GasPrice(Wei) GasUsed InputData


for each file:

succ_txs/succ_txs-{begin_block}-{end_block}-1.txt

contains all the transactions with one or more Etherdelta Trade Event in the block range [begin_block, end_block),

There are 6 coloumns in each file, each line represents one transaction:

BlockNumber TransactionHash Tag From To InputData


The Tag is one of { Trade, Arbitrage, Unknown}:

Trade means this transctions only generated one Trade Event, which means it is a normal trade transaction.

Arbitrage mean this transction generated exactly 2 Trade Events and the buy/sell tokens form a pair.

Otherwise, the transation will be tagged as Unknown.


50 changes: 50 additions & 0 deletions etherdelta/scripts/find_all_txs.py

Large diffs are not rendered by default.

71 changes: 71 additions & 0 deletions etherdelta/scripts/find_succ_txs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import argparse
from _pysha3 import keccak_256
from web3 import Web3, HTTPProvider
import json

web3 = Web3(HTTPProvider('http://localhost:8549'))
#web3 = Web3(HTTPProvider('https://mainnet.infura.io/Ky03pelFIxoZdAUsr82w'))

etherDeltaAddress = '0x8d12A197cB00D4747a1fe03395095ce2A5CC6819'
etherAddress = '0000000000000000000000000000000000000000000000000000000000000000'

tradeAPI = '0x' + \
keccak_256(
b'Trade(address,uint256,address,uint256,address,address)'
).hexdigest()

parser = argparse.ArgumentParser(description='EtherDelta Arbitrage Bot.')
parser.add_argument('--st',dest='st' ,type=int, action='store', default='5000000')
parser.add_argument('--len',dest='len' ,type=int, action='store', default='100')
parser.add_argument('--r',dest='r' ,type=int, action='store', default='20')
args = parser.parse_args()

startBlock = args.st
endBlock = args.st + args.len
ratio = args.r
result_dir = '../results/succ_txs-{}-{}-{}.txt'.format(startBlock,endBlock,ratio)


import os
if os.path.isfile(result_dir):
print('Previous file exists.')
with open(result_dir, 'r') as f:
lines = f.readlines()
if(len(lines) >= 1):
print('last line:',lines[-1])
number = int(lines[-1].split()[0])

else:
print('No previous file.')
number = 0



for idx in range(max(startBlock, number), endBlock + 1, ratio):
block = web3.eth.getBlock(idx)
transactions = block['transactions']
print('block number:', idx)
for txHash in transactions:
tx = web3.eth.getTransaction(txHash)
receipt = web3.eth.getTransactionReceipt(txHash)
token_pair_list = []
for log in receipt['logs']:
if 'topics' in log and len(log['topics']):
if log['topics'][0].hex() == tradeAPI:
token_pair_list.append((log['data'][24 + 2: 64 + 2],log['data'][24 + 128 + 2: 64 + 128+ 2]))

num = len(token_pair_list)
tag = None
if num == 2 and token_pair_list[0][0] == token_pair_list[1][1] and token_pair_list[1][0] == token_pair_list[0][1]:
tag = 'Arbitrage'
elif num == 1:
tag = 'Trade'
elif num:
tag = 'Unknown'
if tag is not None:
result = "{} {} {} {} {} {}\n".format(idx, txHash.hex(), tag, tx['from'], tx['to'], tx['input'])
print(result)
with open(result_dir,'a') as f:
f.write(result)


19 changes: 19 additions & 0 deletions etherdelta/scripts/run_find_all_txs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env bash

onexit() {
kill -TERM -0
wait
}
trap onexit INT

set -x

for (( i=3900000; i<5550000; i+=10000))
do
python3 find_all_txs.py --st $i --len 10000 --r 1 &

done

wait

#python3 find_succ_txs.py --st 3900000 --len 100000 --r 20
21 changes: 21 additions & 0 deletions etherdelta/scripts/run_find_succ_txs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env bash

onexit() {
kill -TERM -0
wait
}
trap onexit INT

set -x

for (( i=3900000; i<=5550000; i+=50000))
do
python3 find_succ_txs.py --st $i --len 50000 --r 1 &
# cmd="python3 find_succ_txs.py --st $i --len 100000 --r 20 &"
# echo "running $cmd"
# eval $cmd
done

wait

#python3 find_succ_txs.py --st 3900000 --len 100000 --r 20
Loading

0 comments on commit c2af245

Please sign in to comment.