From edffd47e25590d2d7f248061d8110ec292240d56 Mon Sep 17 00:00:00 2001 From: Benjamin Smith Date: Mon, 20 Mar 2023 12:17:28 +0100 Subject: [PATCH] [CIP-20] Update Query & Test (#30) This PR finalizes the DB query based on the progress made in [rewards script](https://github.com/cowprotocol/solver-rewards) -- specifically that query which was finalized [here](https://github.com/cowprotocol/solver-rewards/pull/203) Its a bit gross that the orderbook query lives in two places (for the time being). At some point we will want to expose this as an API endpoint (on a constrained `block_range`) directly via the orderbook. However, until we have landed on the correct query (sufficient to reproduce solver rewards) we will maintain this externally - in two places. The tests introduced here are both for the DB extraction result and then the transformation phase to JSON before upload. Closes #31 --- src/fetch/orderbook.py | 23 +++- src/models/batch_rewards_schema.py | 11 +- src/sql/orderbook/batch_rewards.sql | 161 +++++++++++++--------- src/sync/order_rewards.py | 3 +- tests/integration/test_fetch_orderbook.py | 85 ++++++++++++ tests/unit/test_batch_rewards_schema.py | 100 ++++++++++++++ 6 files changed, 309 insertions(+), 74 deletions(-) create mode 100644 tests/unit/test_batch_rewards_schema.py diff --git a/src/fetch/orderbook.py b/src/fetch/orderbook.py index 1a70d313..a159f617 100644 --- a/src/fetch/orderbook.py +++ b/src/fetch/orderbook.py @@ -16,6 +16,15 @@ from src.utils import open_query REORG_THRESHOLD = 65 +NUMERICAL_COLUMNS = [ + "execution_cost", + "surplus", + "fee", + "uncapped_payment_eth", + "capped_payment", + "winning_score", + "reference_score", +] class OrderbookEnv(Enum): @@ -101,9 +110,19 @@ def get_batch_rewards(cls, block_range: BlockRange) -> DataFrame: .replace("{{start_block}}", str(block_range.block_from)) .replace("{{end_block}}", str(block_range.block_to)) ) - data_types = {"block_number": "int64"} + data_types = { + # According to this: https://stackoverflow.com/a/11548224 + # capitalized int64 means `Optional` and it appears to work. + "block_number": "Int64", + "block_deadline": "int64", + } barn, prod = cls._query_both_dbs(cow_reward_query, data_types) # Solvers do not appear in both environments! assert set(prod.solver).isdisjoint(set(barn.solver)), "solver overlap!" - return pd.concat([prod, barn]) + # Ensure numerical types. + combined_df = pd.concat([prod, barn]) + for number_col in NUMERICAL_COLUMNS: + combined_df[number_col] = pd.to_numeric(combined_df[number_col]) + + return combined_df diff --git a/src/models/batch_rewards_schema.py b/src/models/batch_rewards_schema.py index d134c7ce..8e253ea0 100644 --- a/src/models/batch_rewards_schema.py +++ b/src/models/batch_rewards_schema.py @@ -4,6 +4,7 @@ from dataclasses import dataclass from typing import Any +import pandas from pandas import DataFrame @@ -18,19 +19,21 @@ def from_pdf_to_dune_records(cls, rewards_df: DataFrame) -> list[dict[str, Any]] """Converts Pandas DataFrame into the expected stream type for Dune""" return [ { - "block_number": int(row["block_number"]), + "block_number": int(row["block_number"]) + if not pandas.isna(row["block_number"]) + else None, "tx_hash": row["tx_hash"], "solver": row["solver"], + "block_deadline": int(row["block_deadline"]), "data": { # All the following values are in WEI. - "reward_eth": str(row["reward_eth"]), + "uncapped_payment_eth": str(row["uncapped_payment_eth"]), + "capped_payment": str(row["capped_payment"]), "execution_cost": str(row["execution_cost"]), "surplus": str(row["surplus"]), "fee": str(row["fee"]), "winning_score": str(row["winning_score"]), "reference_score": str(row["reference_score"]), - # TODO - Not sure yet how to parse this bytea[] - # Will need to experiment with this. "participating_solvers": row["participating_solvers"], }, } diff --git a/src/sql/orderbook/batch_rewards.sql b/src/sql/orderbook/batch_rewards.sql index 4a748d61..a416a0fa 100644 --- a/src/sql/orderbook/batch_rewards.sql +++ b/src/sql/orderbook/batch_rewards.sql @@ -1,68 +1,97 @@ -WITH observed_settlements AS ( -SELECT - -- settlement - tx_hash, - solver, - -- settlement_observations - block_number, - effective_gas_price * gas_used AS execution_cost, - surplus, - fee, - -- auction_transaction - auction_id -FROM settlement_observations so -JOIN settlements s - ON s.block_number = so.block_number - AND s.log_index = so.log_index -JOIN auction_transaction at - ON s.tx_from = at.tx_from - AND s.tx_nonce = at.tx_nonce -WHERE block_number > {{start_block}} AND block_number <= {{end_block}} -), +WITH observed_settlements AS (SELECT + -- settlement + tx_hash, + solver, + s.block_number, + -- settlement_observations + effective_gas_price * gas_used AS execution_cost, + surplus, + fee, + -- auction_transaction + at.auction_id + FROM settlement_observations so + JOIN settlements s + ON s.block_number = so.block_number + AND s.log_index = so.log_index + JOIN auction_transaction at + ON s.tx_from = at.tx_from + AND s.tx_nonce = at.tx_nonce + JOIN settlement_scores ss + ON at.auction_id = ss.auction_id + WHERE ss.block_deadline > {{start_block}} + AND ss.block_deadline <= {{end_block}}), -reward_data AS ( - SELECT - -- observations - tx_hash, - coalesce( - solver, - -- This is the winning solver (i.e. last entry of participants array) - participants[array_length(participants, 1)] - ) as solver, - -- Right-hand terms in coalesces below represent the case when settlement - -- observations are unavailable (i.e. no settlement corresponds to reported scores). - -- In particular, this means that surplus, fee and execution cost are all zero. - -- When there is an absence of settlement block number, we fall back - -- on the block_deadline from the settlement_scores table. - coalesce(block_number, block_deadline) as block_number, - coalesce(execution_cost, 0) as execution_cost, - coalesce(surplus, 0) as surplus, - coalesce(fee, 0) as fee, - surplus + fee - reference_score AS payment, - -- scores - winning_score, - reference_score, - -- participation - participants -FROM settlement_scores ss --- If there are reported scores, --- there will always be a record of auction participants -JOIN auction_participants ap - ON os.auction_id = ap.auction_id - -- outer joins made in order to capture non-existent settlements. -LEFT OUTER JOIN observed_settlements os - ON os.auction_id = ss.auction_id -) + auction_participation as (SELECT ss.auction_id, + array_agg( + concat('0x', encode(participant, 'hex')) ORDER BY participant + ) as participating_solvers + FROM auction_participants + JOIN settlement_scores ss + ON auction_participants.auction_id = ss.auction_id + WHERE block_deadline > {{start_block}} + AND block_deadline <= {{end_block}} + GROUP BY ss.auction_id), + reward_data AS (SELECT + -- observations + tx_hash, + ss.auction_id, + -- TODO - Assuming that `solver == winner` when both not null + -- We will need to monitor that `solver == winner`! + coalesce(solver, winner) as solver, + block_number as settlement_block, + block_deadline, + case + when block_number is not null and block_number > block_deadline then 0 + else coalesce(execution_cost, 0) end as execution_cost, + case + when block_number is not null and block_number > block_deadline then 0 + else coalesce(surplus, 0) end as surplus, + case + when block_number is not null and block_number > block_deadline then 0 + else coalesce(fee, 0) end as fee, + -- scores + winning_score, + reference_score, + -- auction_participation + participating_solvers + FROM settlement_scores ss + -- If there are reported scores, + -- there will always be a record of auction participants + JOIN auction_participation ap + ON ss.auction_id = ap.auction_id + -- outer joins made in order to capture non-existent settlements. + LEFT OUTER JOIN observed_settlements os + ON os.auction_id = ss.auction_id), + reward_per_auction as (SELECT tx_hash, + settlement_block, + block_deadline, + solver, + execution_cost, + surplus, + fee, + surplus + fee - reference_score as uncapped_payment_eth, + -- Uncapped Reward = CLAMP_[-E, E + exec_cost](uncapped_payment_eth) + LEAST(GREATEST(-10000000000000000, surplus + fee - reference_score), + 10000000000000000 + execution_cost) as capped_payment, + winning_score, + reference_score, + participating_solvers + FROM reward_data) -SELECT - block_number, - concat('0x', encode(tx_hash, 'hex')) as tx_hash, - concat('0x', encode(solver, 'hex')) as solver, - execution_cost, - surplus, - fee, - surplus + fee - reference_score as reward_eth, - winning_score, - reference_score, - participants as participating_solvers -FROM reward_data + +SELECT settlement_block as block_number, + block_deadline, + case + when tx_hash is NULL then NULL + else concat('0x', encode(tx_hash, 'hex')) + end as tx_hash, + concat('0x', encode(solver, 'hex')) as solver, + execution_cost, + surplus, + fee, + uncapped_payment_eth, + capped_payment, + winning_score, + reference_score, + participating_solvers +FROM reward_per_auction diff --git a/src/sync/order_rewards.py b/src/sync/order_rewards.py index 8312fbf8..5c2e2596 100644 --- a/src/sync/order_rewards.py +++ b/src/sync/order_rewards.py @@ -87,8 +87,7 @@ def sync_batch_rewards( block_from=last_sync_block( aws, table=sync_table, - genesis_block=16691686, # First Recorded Batch Reward block - # TODO - use correct genesis block here. (note this is not actually determined yet) + genesis_block=16862919, # First Recorded Batch Reward block ), block_to=fetcher.get_latest_block(), ) diff --git a/tests/integration/test_fetch_orderbook.py b/tests/integration/test_fetch_orderbook.py index d2dee2c0..4177a492 100644 --- a/tests/integration/test_fetch_orderbook.py +++ b/tests/integration/test_fetch_orderbook.py @@ -40,6 +40,91 @@ def test_get_order_rewards(self): self.assertIsNone(pd.testing.assert_frame_equal(expected, rewards_df)) + def test_get_batch_rewards(self): + block_number = 16846500 + block_range = BlockRange(block_number, block_number + 25) + rewards_df = OrderbookFetcher.get_batch_rewards(block_range) + expected = pd.DataFrame( + { + "block_number": pd.Series([16846495, 16846502, pd.NA], dtype="Int64"), + "block_deadline": [16846509, 16846516, 16846524], + "tx_hash": [ + "0x2189c2994dcffcd40cc92245e216b0fda42e0f30573ce4b131341e8ac776ed75", + "0x8328fa642f47adb61f751363cf718d707dafcdc258898fa953945afd42aa020f", + None, + ], + "solver": [ + "0xb20b86c4e6deeb432a22d773a221898bbbd03036", + "0x55a37a2e5e5973510ac9d9c723aec213fa161919", + "0x55a37a2e5e5973510ac9d9c723aec213fa161919", + ], + "execution_cost": [ + 5417013431615490.0, + 14681404168612460.0, + 0.0, + ], + "surplus": [ + 5867838023808109.0, + 104011002982952096.0, + 0.0, + ], + "fee": [ + 7751978767036064.0, + 10350680045815652.0, + 0.0, + ], + "uncapped_payment_eth": [ + 7232682540629268.0, + 82825156151734416.0, + -3527106002507021.0, + ], + "capped_payment": [ + 7232682540629268.0, + 24681404168612460.0, + -3527106002507021.0, + ], + "winning_score": [ + 6537976145828389.0, + 95640781782532192.0, + 3527282436747751.0, + ], + "reference_score": [ + 6387134250214905.0, + 31536526877033328.0, + 3527106002507021.0, + ], + "participating_solvers": [ + [ + "0x398890be7c4fac5d766e1aeffde44b2ee99f38ef", + "0xb20b86c4e6deeb432a22d773a221898bbbd03036", + ], + [ + "0x55a37a2e5e5973510ac9d9c723aec213fa161919", + "0x97ec0a17432d71a3234ef7173c6b48a2c0940896", + "0xa21740833858985e4d801533a808786d3647fb83", + "0xb20b86c4e6deeb432a22d773a221898bbbd03036", + "0xbff9a1b539516f9e20c7b621163e676949959a66", + "0xc9ec550bea1c64d779124b23a26292cc223327b6", + "0xda869be4adea17ad39e1dfece1bc92c02491504f", + ], + [ + "0x149d0f9282333681ee41d30589824b2798e9fb47", + "0x3cee8c7d9b5c8f225a8c36e7d3514e1860309651", + "0x55a37a2e5e5973510ac9d9c723aec213fa161919", + "0x7a0a8890d71a4834285efdc1d18bb3828e765c6a", + "0x97ec0a17432d71a3234ef7173c6b48a2c0940896", + "0xa21740833858985e4d801533a808786d3647fb83", + "0xb20b86c4e6deeb432a22d773a221898bbbd03036", + "0xbff9a1b539516f9e20c7b621163e676949959a66", + "0xc9ec550bea1c64d779124b23a26292cc223327b6", + "0xda869be4adea17ad39e1dfece1bc92c02491504f", + "0xe9ae2d792f981c53ea7f6493a17abf5b2a45a86b", + ], + ], + }, + ) + self.assertIsNone(pd.testing.assert_frame_equal(expected, rewards_df)) + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/test_batch_rewards_schema.py b/tests/unit/test_batch_rewards_schema.py new file mode 100644 index 00000000..57e9026e --- /dev/null +++ b/tests/unit/test_batch_rewards_schema.py @@ -0,0 +1,100 @@ +import unittest + +import pandas +import pandas as pd + +from src.models.batch_rewards_schema import BatchRewards + +ONE_ETH = 1000000000000000000 + + +class TestModelBatchRewards(unittest.TestCase): + def test_order_rewards_transformation(self): + sample_df = pd.DataFrame( + { + "block_number": pd.Series([123, pandas.NA], dtype="Int64"), + "block_deadline": [789, 1011], + "tx_hash": [ + "0x71", + None, + ], + "solver": [ + "0x51", + "0x52", + ], + "execution_cost": [9999 * ONE_ETH, 1], + "surplus": [2 * ONE_ETH, 3 * ONE_ETH], + "fee": [ + 1000000000000000, + 0, + ], + "uncapped_payment_eth": [0, -10 * ONE_ETH], + "capped_payment": [-1000000000000000, -1000000000000000], + "winning_score": [123456 * ONE_ETH, 6789 * ONE_ETH], + "reference_score": [ONE_ETH, 2 * ONE_ETH], + "participating_solvers": [ + [ + "0x51", + "0x52", + "0x53", + ], + [ + "0x51", + "0x52", + "0x53", + "0x54", + "0x55", + "0x56", + ], + ], + } + ) + + self.assertEqual( + [ + { + "block_deadline": 789, + "block_number": 123, + "data": { + "capped_payment": "-1000000000000000", + "execution_cost": "9999000000000000000000", + "fee": "1000000000000000", + "participating_solvers": ["0x51", "0x52", "0x53"], + "reference_score": "1000000000000000000", + "surplus": "2000000000000000000", + "uncapped_payment_eth": "0", + "winning_score": "123456000000000000000000", + }, + "solver": "0x51", + "tx_hash": "0x71", + }, + { + "block_deadline": 1011, + "block_number": None, + "data": { + "capped_payment": "-1000000000000000", + "execution_cost": "1", + "fee": "0", + "participating_solvers": [ + "0x51", + "0x52", + "0x53", + "0x54", + "0x55", + "0x56", + ], + "reference_score": "2000000000000000000", + "surplus": "3000000000000000000", + "uncapped_payment_eth": "-10000000000000000000", + "winning_score": "6789000000000000000000", + }, + "solver": "0x52", + "tx_hash": None, + }, + ], + BatchRewards.from_pdf_to_dune_records(sample_df), + ) + + +if __name__ == "__main__": + unittest.main()