Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

V5 json file parsing from rescue mode #1

Merged
merged 20 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/rust-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ on:
- cron: "30 00 * * *"

jobs:
types:
all-tests:
timeout-minutes: 60
runs-on: ubuntu-latest
steps:
Expand Down
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ version = "0.0.1"

[dependencies]
anyhow = "^1.0"
bcs = { git = "https://github.com/aptos-labs/bcs.git", rev = "d31fab9d81748e2594be5cd5cdf845786a30562d" }
chrono = { version = "0.4.19", features = ["clock", "serde"] }
clap = { version = "4.3.5", features = ["derive", "unstable-styles"] }
diem-temppath = { git = "https://github.com/0LNetworkCommunity/diem.git", branch = "release" }
Expand All @@ -29,6 +30,9 @@ once_cell = "^1.2"
serde = { version = "^1.0", features = ["derive", "rc"] }
serde_json = { version = "^1", features = ["preserve_order"] }
tokio = { version = "1", features = ["full"] }
hex = "0.4.3"
tar = "0.4.43"
smooth-json = "0.2.7"

[dev-dependencies]
sqlx = { version = "0.8", features = [ "runtime-tokio", "tls-native-tls", "sqlite", "migrate", "macros", "derive", "postgres"] }
Expand Down
39 changes: 21 additions & 18 deletions src/cypher_templates.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//! organic free trade template literals for cypher queries
use anyhow::Result;
use anyhow::{Context, Result};

// TODO move this to a .CQL file so we can lint and debug
pub fn write_batch_tx_string(list_str: &str) -> String {
Expand All @@ -16,12 +16,16 @@ MERGE (from)-[rel:Tx {{tx_hash: tx.tx_hash}}]->(to)
ON CREATE SET rel.created_at = timestamp(), rel.modified_at = null
ON MATCH SET rel.modified_at = timestamp()
SET
rel += tx.args,
rel.block_datetime = tx.block_datetime,
rel.block_timestamp = tx.block_timestamp,
rel.relation = tx.relation,
rel.function = tx.function

// Conditionally add `tx.args` if it exists
FOREACH (_ IN CASE WHEN tx.args IS NOT NULL THEN [1] ELSE [] END |
SET rel += tx.args
)

WITH rel

RETURN
Expand Down Expand Up @@ -59,7 +63,7 @@ RETURN
)
}

use anyhow::bail;
use log::warn;
use serde::Serialize;
use serde_json::Value;

Expand All @@ -72,23 +76,24 @@ use serde_json::Value;
/// # Returns
/// A string in the format `{key: value, nested: {key2: value2}, array: [value3, value4]}` that can be used in Cypher queries.
/// Thanks Copilot ;)
pub fn to_cypher_object<T: Serialize>(object: &T, prefix: Option<&str>) -> Result<String> {
pub fn to_cypher_object<T: Serialize>(object: &T) -> Result<String> {
// Serialize the struct to a JSON value

let serialized_value = serde_json::to_value(object).expect("Failed to serialize");
// dbg!(&serialized_value);

// Convert the JSON value into a map for easy processing
let map = if let Value::Object(obj) = serialized_value {
obj
} else {
bail!("Expected the serialized value to be an object");
let flattener = smooth_json::Flattener {
separator: "_",
..Default::default()
};

// Convert the JSON value into a map for easy processing
let flat = flattener.flatten(&serialized_value);
let map = flat.as_object().context("cannot map on json object")?;
// Build properties part of the Cypher object
let properties: Vec<String> = map
.into_iter()
.map(|(mut key, value)| {
.map(|(key, value)| {
let formatted_value = match value {
Value::String(s) => format!("'{}'", s), // Wrap strings in single quotes
Value::Number(n) => n.to_string(), // Use numbers as-is
Expand All @@ -104,19 +109,17 @@ pub fn to_cypher_object<T: Serialize>(object: &T, prefix: Option<&str>) -> Resul
Value::Bool(b) => b.to_string(),
Value::Null => "null".to_string(),
Value::Object(_) => {
to_cypher_object(elem, None).unwrap_or("error".to_owned())
to_cypher_object(elem).unwrap_or("error".to_owned())
} // Recurse for nested objects in arrays
_ => panic!("Unsupported type in array for Cypher serialization"),
_ => "Unsupported type in array for Cypher serialization".to_string(),
})
.collect();
format!("[{}]", elements.join(", "))
}
Value::Object(_) => {
if let Some(p) = prefix {
key = format!("{}.{}", p, key);
}
to_cypher_object(&value, Some(&key)).unwrap_or("error".to_owned())
} // Recurse for nested objects
warn!("the json should have been flattened before this");
"recursive object error".to_string()
}
};
format!("{}: {}", key, formatted_value)
})
Expand Down Expand Up @@ -165,6 +168,6 @@ fn test_serialize_to_cypher_object() {
};

// Serialize to a Cypher object
let cypher_object = to_cypher_object(&person, None).unwrap();
let cypher_object = to_cypher_object(&person).unwrap();
println!("{}", cypher_object);
}
1 change: 0 additions & 1 deletion src/extract_transactions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ pub fn make_master_tx(
tx_hash,
expiration_timestamp: user_tx.expiration_timestamp_secs(),
sender: user_tx.sender(),
recipient: relation_label.get_recipient(),
epoch,
round,
block_timestamp,
Expand Down
227 changes: 227 additions & 0 deletions src/json_rescue_v5_extract.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
use crate::{
schema_transaction::{EntryFunctionArgs, RelationLabel, WarehouseEvent, WarehouseTxMaster},
unzip_temp::decompress_tar_archive,
};
use diem_crypto::HashValue;
use libra_backwards_compatibility::{
sdk::{
v5_0_0_genesis_transaction_script_builder::ScriptFunctionCall as ScriptFunctionCallGenesis,
v5_2_0_transaction_script_builder::ScriptFunctionCall as ScriptFunctionCallV520,
},
version_five::{
legacy_address_v5::LegacyAddressV5,
transaction_type_v5::{TransactionPayload, TransactionV5},
transaction_view_v5::{ScriptView, TransactionDataView, TransactionViewV5},
},
};

use anyhow::{anyhow, Context, Result};
use diem_temppath::TempPath;
use diem_types::account_address::AccountAddress;
use log::trace;
use std::path::{Path, PathBuf};
/// The canonical transaction archives for V5 were kept in a different format as in v6 and v7.
/// As of Nov 2024, there's a project to recover the V5 transaction archives to be in the same bytecode flat file format as v6 and v7.
/// Until then, we must parse the json files.

pub fn extract_v5_json_rescue(
one_json_file: &Path,
) -> Result<(Vec<WarehouseTxMaster>, Vec<WarehouseEvent>)> {
let json = std::fs::read_to_string(one_json_file).context("could not read file")?;

let txs: Vec<TransactionViewV5> = serde_json::from_str(&json)
.map_err(|e| anyhow!("could not parse JSON to TransactionViewV5, {:?}", e))?;

let mut tx_vec = vec![];
let event_vec = vec![];

for t in txs {
let mut wtxs = WarehouseTxMaster::default();
match &t.transaction {
TransactionDataView::UserTransaction { sender, script, .. } => {
wtxs.sender = cast_legacy_account(sender)?;

// must cast from V5 Hashvalue buffer layout
wtxs.tx_hash = HashValue::from_slice(&t.hash.to_vec())?;

wtxs.function = make_function_name(script);
trace!("function: {}", &wtxs.function);

decode_transaction_args(&mut wtxs, &t.bytes)?;

// TODO:
// wtxs.events
// wtxs.block_timestamp

tx_vec.push(wtxs);
}
TransactionDataView::BlockMetadata { timestamp_usecs: _ } => {
// TODO get epoch events
// t.events.iter().any(|e|{
// if let epoch: NewEpoch = e.data {
// }
// })
}
_ => {}
}
}

Ok((tx_vec, event_vec))
}

pub fn decode_transaction_args(wtx: &mut WarehouseTxMaster, tx_bytes: &[u8]) -> Result<()> {
// test we can bcs decode to the transaction object
let t: TransactionV5 = bcs::from_bytes(tx_bytes).unwrap();

if let TransactionV5::UserTransaction(u) = &t {
if let TransactionPayload::ScriptFunction(_) = &u.raw_txn.payload {
if let Some(sf) = &ScriptFunctionCallGenesis::decode(&u.raw_txn.payload) {
// TODO: some script functions have very large payloads which clog the e.g. Miner. So those are only added for the catch-all txs which don't fall into categories we are interested in.
match sf {
ScriptFunctionCallGenesis::BalanceTransfer { destination, .. } => {
wtx.relation_label =
RelationLabel::Transfer(cast_legacy_account(destination)?);

wtx.entry_function = Some(EntryFunctionArgs::V5(sf.to_owned()));
}
ScriptFunctionCallGenesis::CreateAccUser { .. } => {
// onboards self
wtx.relation_label = RelationLabel::Onboarding(wtx.sender);
}
ScriptFunctionCallGenesis::CreateAccVal { .. } => {
// onboards self
wtx.relation_label = RelationLabel::Onboarding(wtx.sender);
}

ScriptFunctionCallGenesis::CreateUserByCoinTx { account, .. } => {
wtx.relation_label =
RelationLabel::Onboarding(cast_legacy_account(account)?);
}
ScriptFunctionCallGenesis::CreateValidatorAccount {
sliding_nonce: _,
new_account_address,
..
} => {
wtx.relation_label =
RelationLabel::Onboarding(cast_legacy_account(new_account_address)?);
}
ScriptFunctionCallGenesis::CreateValidatorOperatorAccount {
sliding_nonce: _,
new_account_address,
..
} => {
wtx.relation_label =
RelationLabel::Onboarding(cast_legacy_account(new_account_address)?);
}

ScriptFunctionCallGenesis::MinerstateCommit { .. } => {
wtx.relation_label = RelationLabel::Miner;
}
ScriptFunctionCallGenesis::MinerstateCommitByOperator { .. } => {
wtx.relation_label = RelationLabel::Miner;
}
_ => {
wtx.relation_label = RelationLabel::Configuration;

wtx.entry_function = Some(EntryFunctionArgs::V5(sf.to_owned()));
}
}
}

if let Some(sf) = &ScriptFunctionCallV520::decode(&u.raw_txn.payload) {
match sf {
ScriptFunctionCallV520::CreateAccUser { .. } => {
wtx.relation_label = RelationLabel::Onboarding(wtx.sender);
}
ScriptFunctionCallV520::CreateAccVal { .. } => {
wtx.relation_label = RelationLabel::Onboarding(wtx.sender);
}

ScriptFunctionCallV520::CreateValidatorAccount {
sliding_nonce: _,
new_account_address,
..
} => {
wtx.relation_label =
RelationLabel::Onboarding(cast_legacy_account(new_account_address)?);
}
ScriptFunctionCallV520::CreateValidatorOperatorAccount {
sliding_nonce: _,
new_account_address,
..
} => {
wtx.relation_label =
RelationLabel::Onboarding(cast_legacy_account(new_account_address)?);
}
ScriptFunctionCallV520::MinerstateCommit { .. } => {
wtx.relation_label = RelationLabel::Miner;
}
ScriptFunctionCallV520::MinerstateCommitByOperator { .. } => {
wtx.relation_label = RelationLabel::Miner;
}
_ => {
wtx.relation_label = RelationLabel::Configuration;
wtx.entry_function = Some(EntryFunctionArgs::V520(sf.to_owned()));
}
}
}
}
}
Ok(())
}

/// from a tgz file unwrap to temp path
/// NOTE: we return the Temppath object for the directory
/// for the enclosing function to handle
/// since it will delete all the files once it goes out of scope.
pub fn decompress_to_temppath(tgz_file: &Path) -> Result<TempPath> {
let temp_dir = TempPath::new();
temp_dir.create_as_dir()?;

decompress_tar_archive(tgz_file, temp_dir.path())?;

Ok(temp_dir)
}

/// gets all json files decompressed from tgz
pub fn list_all_json_files(search_dir: &Path) -> Result<Vec<PathBuf>> {
let path = search_dir.canonicalize()?;

let pattern = format!(
"{}/**/*.json",
path.to_str().context("cannot parse starting dir")?
);

let vec_pathbuf = glob::glob(&pattern)?.map(|el| el.unwrap()).collect();
Ok(vec_pathbuf)
}

/// gets all json files decompressed from tgz
pub fn list_all_tgz_archives(search_dir: &Path) -> Result<Vec<PathBuf>> {
let path = search_dir.canonicalize()?;

let pattern = format!(
"{}/**/*.tgz",
path.to_str().context("cannot parse starting dir")?
);

let vec_pathbuf = glob::glob(&pattern)?.map(|el| el.unwrap()).collect();
Ok(vec_pathbuf)
}

// TODO: gross borrows, lazy.
fn make_function_name(script: &ScriptView) -> String {
let module = script.module_name.as_ref();

let function = script.function_name.as_ref();

format!(
"0x::{}::{}",
module.unwrap_or(&"none".to_string()),
function.unwrap_or(&"none".to_string())
)
}

fn cast_legacy_account(legacy: &LegacyAddressV5) -> Result<AccountAddress> {
Ok(AccountAddress::from_hex_literal(&legacy.to_hex_literal())?)
}
Loading
Loading