From edffd47e25590d2d7f248061d8110ec292240d56 Mon Sep 17 00:00:00 2001
From: Benjamin Smith <bh2smith@users.noreply.github.com>
Date: Mon, 20 Mar 2023 12:17:28 +0100
Subject: [PATCH] [CIP-20] Update Query & Test (#30)

This PR finalizes the DB query based on the progress made in [rewards script](https://github.com/cowprotocol/solver-rewards) -- specifically that query which was finalized [here](https://github.com/cowprotocol/solver-rewards/pull/203)

Its a bit gross that the orderbook query lives in two places (for the time being). At some point we will want to expose this as an API endpoint (on a constrained `block_range`) directly via the orderbook. However, until we have landed on the correct query (sufficient to reproduce solver rewards) we will maintain this externally - in two places.

The tests introduced here are both for the DB extraction result and then the transformation phase to JSON before upload.

Closes #31
---
 src/fetch/orderbook.py                    |  23 +++-
 src/models/batch_rewards_schema.py        |  11 +-
 src/sql/orderbook/batch_rewards.sql       | 161 +++++++++++++---------
 src/sync/order_rewards.py                 |   3 +-
 tests/integration/test_fetch_orderbook.py |  85 ++++++++++++
 tests/unit/test_batch_rewards_schema.py   | 100 ++++++++++++++
 6 files changed, 309 insertions(+), 74 deletions(-)
 create mode 100644 tests/unit/test_batch_rewards_schema.py
diff --git a/src/fetch/orderbook.py b/src/fetch/orderbook.py
index 1a70d313..a159f617 100644
--- a/src/fetch/orderbook.py
+++ b/src/fetch/orderbook.py
@@ -16,6 +16,15 @@
 from src.utils import open_query
 
 REORG_THRESHOLD = 65
+NUMERICAL_COLUMNS = [
+    "execution_cost",
+    "surplus",
+    "fee",
+    "uncapped_payment_eth",
+    "capped_payment",
+    "winning_score",
+    "reference_score",
+]
 
 
 class OrderbookEnv(Enum):
@@ -101,9 +110,19 @@ def get_batch_rewards(cls, block_range: BlockRange) -> DataFrame:
             .replace("{{start_block}}", str(block_range.block_from))
             .replace("{{end_block}}", str(block_range.block_to))
         )
-        data_types = {"block_number": "int64"}
+        data_types = {
+            # According to this: https://stackoverflow.com/a/11548224
+            # capitalized int64 means `Optional<Integer>` and it appears to work.
+            "block_number": "Int64",
+            "block_deadline": "int64",
+        }
         barn, prod = cls._query_both_dbs(cow_reward_query, data_types)
 
         # Solvers do not appear in both environments!
         assert set(prod.solver).isdisjoint(set(barn.solver)), "solver overlap!"
-        return pd.concat([prod, barn])
+        # Ensure numerical types.
+        combined_df = pd.concat([prod, barn])
+        for number_col in NUMERICAL_COLUMNS:
+            combined_df[number_col] = pd.to_numeric(combined_df[number_col])
+
+        return combined_df
diff --git a/src/models/batch_rewards_schema.py b/src/models/batch_rewards_schema.py
index d134c7ce..8e253ea0 100644
--- a/src/models/batch_rewards_schema.py
+++ b/src/models/batch_rewards_schema.py
@@ -4,6 +4,7 @@
 from dataclasses import dataclass
 from typing import Any
 
+import pandas
 from pandas import DataFrame
 
 
@@ -18,19 +19,21 @@ def from_pdf_to_dune_records(cls, rewards_df: DataFrame) -> list[dict[str, Any]]
         """Converts Pandas DataFrame into the expected stream type for Dune"""
         return [
             {
-                "block_number": int(row["block_number"]),
+                "block_number": int(row["block_number"])
+                if not pandas.isna(row["block_number"])
+                else None,
                 "tx_hash": row["tx_hash"],
                 "solver": row["solver"],
+                "block_deadline": int(row["block_deadline"]),
                 "data": {
                     # All the following values are in WEI.
-                    "reward_eth": str(row["reward_eth"]),
+                    "uncapped_payment_eth": str(row["uncapped_payment_eth"]),
+                    "capped_payment": str(row["capped_payment"]),
                     "execution_cost": str(row["execution_cost"]),
                     "surplus": str(row["surplus"]),
                     "fee": str(row["fee"]),
                     "winning_score": str(row["winning_score"]),
                     "reference_score": str(row["reference_score"]),
-                    # TODO - Not sure yet how to parse this bytea[]
-                    #  Will need to experiment with this.
                     "participating_solvers": row["participating_solvers"],
                 },
             }
diff --git a/src/sql/orderbook/batch_rewards.sql b/src/sql/orderbook/batch_rewards.sql
index 4a748d61..a416a0fa 100644
--- a/src/sql/orderbook/batch_rewards.sql
+++ b/src/sql/orderbook/batch_rewards.sql
@@ -1,68 +1,97 @@
-WITH observed_settlements AS (
-SELECT
-    -- settlement
-    tx_hash,
-    solver,
-    -- settlement_observations
-    block_number,
-    effective_gas_price * gas_used AS execution_cost,
-    surplus,
-    fee,
-    -- auction_transaction
-    auction_id
-FROM settlement_observations so
-JOIN settlements s
-  ON s.block_number = so.block_number
-  AND s.log_index = so.log_index
-JOIN auction_transaction at
-  ON s.tx_from = at.tx_from
-  AND s.tx_nonce = at.tx_nonce
-WHERE block_number > {{start_block}} AND block_number <= {{end_block}}
-),
+WITH observed_settlements AS (SELECT
+                                -- settlement
+                                tx_hash,
+                                solver,
+                                s.block_number,
+                                -- settlement_observations
+                                effective_gas_price * gas_used AS execution_cost,
+                                surplus,
+                                fee,
+                                -- auction_transaction
+                                at.auction_id
+                              FROM settlement_observations so
+                                     JOIN settlements s
+                                          ON s.block_number = so.block_number
+                                            AND s.log_index = so.log_index
+                                     JOIN auction_transaction at
+                                          ON s.tx_from = at.tx_from
+                                            AND s.tx_nonce = at.tx_nonce
+                                     JOIN settlement_scores ss
+                                          ON at.auction_id = ss.auction_id
+                              WHERE ss.block_deadline > {{start_block}}
+                                AND ss.block_deadline <= {{end_block}}),
 
-reward_data AS (
-  SELECT
-    -- observations
-    tx_hash,
-    coalesce(
-      solver,
-      -- This is the winning solver (i.e. last entry of participants array)
-      participants[array_length(participants, 1)]
-    ) as solver,
-    -- Right-hand terms in coalesces below represent the case when settlement
-    -- observations are unavailable (i.e. no settlement corresponds to reported scores).
-    -- In particular, this means that surplus, fee and execution cost are all zero.
-    -- When there is an absence of settlement block number, we fall back
-    -- on the block_deadline from the settlement_scores table.
-    coalesce(block_number, block_deadline) as block_number,
-    coalesce(execution_cost, 0) as execution_cost,
-    coalesce(surplus, 0) as surplus,
-    coalesce(fee, 0) as fee,
-    surplus + fee - reference_score AS payment,
-    -- scores
-    winning_score,
-    reference_score,
-    -- participation
-    participants
-FROM settlement_scores ss
--- If there are reported scores,
--- there will always be a record of auction participants
-JOIN auction_participants ap
-  ON os.auction_id = ap.auction_id
-  -- outer joins made in order to capture non-existent settlements.
-LEFT OUTER JOIN observed_settlements os
-  ON os.auction_id = ss.auction_id
-)
+     auction_participation as (SELECT ss.auction_id,
+                                      array_agg(
+                                              concat('0x', encode(participant, 'hex')) ORDER BY participant
+                                        ) as participating_solvers
+                               FROM auction_participants
+                                      JOIN settlement_scores ss
+                                           ON auction_participants.auction_id = ss.auction_id
+                               WHERE block_deadline > {{start_block}}
+                                 AND block_deadline <= {{end_block}}
+                               GROUP BY ss.auction_id),
+     reward_data AS (SELECT
+                       -- observations
+                       tx_hash,
+                       ss.auction_id,
+                       -- TODO - Assuming that `solver == winner` when both not null
+                       --  We will need to monitor that `solver == winner`!
+                       coalesce(solver, winner)               as solver,
+                       block_number                           as settlement_block,
+                       block_deadline,
+                       case
+                         when block_number is not null and block_number > block_deadline then 0
+                         else coalesce(execution_cost, 0) end as execution_cost,
+                       case
+                         when block_number is not null and block_number > block_deadline then 0
+                         else coalesce(surplus, 0) end        as surplus,
+                       case
+                         when block_number is not null and block_number > block_deadline then 0
+                         else coalesce(fee, 0) end            as fee,
+                       -- scores
+                       winning_score,
+                       reference_score,
+                       -- auction_participation
+                       participating_solvers
+                     FROM settlement_scores ss
+                            -- If there are reported scores,
+                            -- there will always be a record of auction participants
+                            JOIN auction_participation ap
+                                 ON ss.auction_id = ap.auction_id
+                       -- outer joins made in order to capture non-existent settlements.
+                            LEFT OUTER JOIN observed_settlements os
+                                            ON os.auction_id = ss.auction_id),
+     reward_per_auction as (SELECT tx_hash,
+                                   settlement_block,
+                                   block_deadline,
+                                   solver,
+                                   execution_cost,
+                                   surplus,
+                                   fee,
+                                   surplus + fee - reference_score           as uncapped_payment_eth,
+                                   -- Uncapped Reward = CLAMP_[-E, E + exec_cost](uncapped_payment_eth)
+                                   LEAST(GREATEST(-10000000000000000, surplus + fee - reference_score),
+                                         10000000000000000 + execution_cost) as capped_payment,
+                                   winning_score,
+                                   reference_score,
+                                   participating_solvers
+                            FROM reward_data)
 
-SELECT
-  block_number,
-  concat('0x', encode(tx_hash, 'hex'))  as tx_hash,
-  concat('0x', encode(solver, 'hex'))  as solver,
-  execution_cost,
-  surplus,
-  fee,
-  surplus + fee - reference_score as reward_eth,
-  winning_score,
-  reference_score,
-  participants as participating_solvers
-FROM reward_data
+
+SELECT settlement_block                    as block_number,
+       block_deadline,
+       case
+         when tx_hash is NULL then NULL
+         else concat('0x', encode(tx_hash, 'hex'))
+         end                               as tx_hash,
+       concat('0x', encode(solver, 'hex')) as solver,
+       execution_cost,
+       surplus,
+       fee,
+       uncapped_payment_eth,
+       capped_payment,
+       winning_score,
+       reference_score,
+       participating_solvers
+FROM reward_per_auction
diff --git a/src/sync/order_rewards.py b/src/sync/order_rewards.py
index 8312fbf8..5c2e2596 100644
--- a/src/sync/order_rewards.py
+++ b/src/sync/order_rewards.py
@@ -87,8 +87,7 @@ def sync_batch_rewards(
         block_from=last_sync_block(
             aws,
             table=sync_table,
-            genesis_block=16691686,  # First Recorded Batch Reward block
-            # TODO - use correct genesis block here. (note this is not actually determined yet)
+            genesis_block=16862919,  # First Recorded Batch Reward block
         ),
         block_to=fetcher.get_latest_block(),
     )
diff --git a/tests/integration/test_fetch_orderbook.py b/tests/integration/test_fetch_orderbook.py
index d2dee2c0..4177a492 100644
--- a/tests/integration/test_fetch_orderbook.py
+++ b/tests/integration/test_fetch_orderbook.py
@@ -40,6 +40,91 @@ def test_get_order_rewards(self):
 
         self.assertIsNone(pd.testing.assert_frame_equal(expected, rewards_df))
 
+    def test_get_batch_rewards(self):
+        block_number = 16846500
+        block_range = BlockRange(block_number, block_number + 25)
+        rewards_df = OrderbookFetcher.get_batch_rewards(block_range)
+        expected = pd.DataFrame(
+            {
+                "block_number": pd.Series([16846495, 16846502, pd.NA], dtype="Int64"),
+                "block_deadline": [16846509, 16846516, 16846524],
+                "tx_hash": [
+                    "0x2189c2994dcffcd40cc92245e216b0fda42e0f30573ce4b131341e8ac776ed75",
+                    "0x8328fa642f47adb61f751363cf718d707dafcdc258898fa953945afd42aa020f",
+                    None,
+                ],
+                "solver": [
+                    "0xb20b86c4e6deeb432a22d773a221898bbbd03036",
+                    "0x55a37a2e5e5973510ac9d9c723aec213fa161919",
+                    "0x55a37a2e5e5973510ac9d9c723aec213fa161919",
+                ],
+                "execution_cost": [
+                    5417013431615490.0,
+                    14681404168612460.0,
+                    0.0,
+                ],
+                "surplus": [
+                    5867838023808109.0,
+                    104011002982952096.0,
+                    0.0,
+                ],
+                "fee": [
+                    7751978767036064.0,
+                    10350680045815652.0,
+                    0.0,
+                ],
+                "uncapped_payment_eth": [
+                    7232682540629268.0,
+                    82825156151734416.0,
+                    -3527106002507021.0,
+                ],
+                "capped_payment": [
+                    7232682540629268.0,
+                    24681404168612460.0,
+                    -3527106002507021.0,
+                ],
+                "winning_score": [
+                    6537976145828389.0,
+                    95640781782532192.0,
+                    3527282436747751.0,
+                ],
+                "reference_score": [
+                    6387134250214905.0,
+                    31536526877033328.0,
+                    3527106002507021.0,
+                ],
+                "participating_solvers": [
+                    [
+                        "0x398890be7c4fac5d766e1aeffde44b2ee99f38ef",
+                        "0xb20b86c4e6deeb432a22d773a221898bbbd03036",
+                    ],
+                    [
+                        "0x55a37a2e5e5973510ac9d9c723aec213fa161919",
+                        "0x97ec0a17432d71a3234ef7173c6b48a2c0940896",
+                        "0xa21740833858985e4d801533a808786d3647fb83",
+                        "0xb20b86c4e6deeb432a22d773a221898bbbd03036",
+                        "0xbff9a1b539516f9e20c7b621163e676949959a66",
+                        "0xc9ec550bea1c64d779124b23a26292cc223327b6",
+                        "0xda869be4adea17ad39e1dfece1bc92c02491504f",
+                    ],
+                    [
+                        "0x149d0f9282333681ee41d30589824b2798e9fb47",
+                        "0x3cee8c7d9b5c8f225a8c36e7d3514e1860309651",
+                        "0x55a37a2e5e5973510ac9d9c723aec213fa161919",
+                        "0x7a0a8890d71a4834285efdc1d18bb3828e765c6a",
+                        "0x97ec0a17432d71a3234ef7173c6b48a2c0940896",
+                        "0xa21740833858985e4d801533a808786d3647fb83",
+                        "0xb20b86c4e6deeb432a22d773a221898bbbd03036",
+                        "0xbff9a1b539516f9e20c7b621163e676949959a66",
+                        "0xc9ec550bea1c64d779124b23a26292cc223327b6",
+                        "0xda869be4adea17ad39e1dfece1bc92c02491504f",
+                        "0xe9ae2d792f981c53ea7f6493a17abf5b2a45a86b",
+                    ],
+                ],
+            },
+        )
+        self.assertIsNone(pd.testing.assert_frame_equal(expected, rewards_df))
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/unit/test_batch_rewards_schema.py b/tests/unit/test_batch_rewards_schema.py
new file mode 100644
index 00000000..57e9026e
--- /dev/null
+++ b/tests/unit/test_batch_rewards_schema.py
@@ -0,0 +1,100 @@
+import unittest
+
+import pandas
+import pandas as pd
+
+from src.models.batch_rewards_schema import BatchRewards
+
+ONE_ETH = 1000000000000000000
+
+
+class TestModelBatchRewards(unittest.TestCase):
+    def test_order_rewards_transformation(self):
+        sample_df = pd.DataFrame(
+            {
+                "block_number": pd.Series([123, pandas.NA], dtype="Int64"),
+                "block_deadline": [789, 1011],
+                "tx_hash": [
+                    "0x71",
+                    None,
+                ],
+                "solver": [
+                    "0x51",
+                    "0x52",
+                ],
+                "execution_cost": [9999 * ONE_ETH, 1],
+                "surplus": [2 * ONE_ETH, 3 * ONE_ETH],
+                "fee": [
+                    1000000000000000,
+                    0,
+                ],
+                "uncapped_payment_eth": [0, -10 * ONE_ETH],
+                "capped_payment": [-1000000000000000, -1000000000000000],
+                "winning_score": [123456 * ONE_ETH, 6789 * ONE_ETH],
+                "reference_score": [ONE_ETH, 2 * ONE_ETH],
+                "participating_solvers": [
+                    [
+                        "0x51",
+                        "0x52",
+                        "0x53",
+                    ],
+                    [
+                        "0x51",
+                        "0x52",
+                        "0x53",
+                        "0x54",
+                        "0x55",
+                        "0x56",
+                    ],
+                ],
+            }
+        )
+
+        self.assertEqual(
+            [
+                {
+                    "block_deadline": 789,
+                    "block_number": 123,
+                    "data": {
+                        "capped_payment": "-1000000000000000",
+                        "execution_cost": "9999000000000000000000",
+                        "fee": "1000000000000000",
+                        "participating_solvers": ["0x51", "0x52", "0x53"],
+                        "reference_score": "1000000000000000000",
+                        "surplus": "2000000000000000000",
+                        "uncapped_payment_eth": "0",
+                        "winning_score": "123456000000000000000000",
+                    },
+                    "solver": "0x51",
+                    "tx_hash": "0x71",
+                },
+                {
+                    "block_deadline": 1011,
+                    "block_number": None,
+                    "data": {
+                        "capped_payment": "-1000000000000000",
+                        "execution_cost": "1",
+                        "fee": "0",
+                        "participating_solvers": [
+                            "0x51",
+                            "0x52",
+                            "0x53",
+                            "0x54",
+                            "0x55",
+                            "0x56",
+                        ],
+                        "reference_score": "2000000000000000000",
+                        "surplus": "3000000000000000000",
+                        "uncapped_payment_eth": "-10000000000000000000",
+                        "winning_score": "6789000000000000000000",
+                    },
+                    "solver": "0x52",
+                    "tx_hash": None,
+                },
+            ],
+            BatchRewards.from_pdf_to_dune_records(sample_df),
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()