Skip to content

Commit

Permalink
feat: anonymity calculations (#11)
Browse files Browse the repository at this point in the history
* feat: heuristics in python now

* fix: installation

* chore: ignore pngs and pdfs

* feat: notebooks for clustering and thesis graphs

* feat: anonymity calculations

* feat: config for regtest

* feat: analysis and wrapup

---------

This PR brings the functionality of calculating the anonymity scores over days of the coinjoins.

Apart from that, support for regtest in config (CoinjoinConfiguration) was added, with some minor fixes regarding installation.

Co-authored-by: Stanislav Boboň <[email protected]>
  • Loading branch information
mmstanone and Stanislav Boboň authored Dec 6, 2024
1 parent e5ee488 commit 8475b11
Show file tree
Hide file tree
Showing 18 changed files with 1,241 additions and 403 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,6 @@ venv
cmake-build-debug
.vscode
*.ipynb_checkpoints

*.png
*.pdf
10 changes: 5 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ ADD . /blocksci


RUN curl -LsSf https://astral.sh/uv/install.sh | sh
RUN /root/.cargo/bin/uv python install 3.8.20
RUN /root/.cargo/bin/uv python pin 3.8.20
RUN /root/.local/bin/uv python install 3.8.20
RUN /root/.local/bin/uv python pin 3.8.20

RUN /root/.cargo/bin/uv run which pip3
RUN /root/.local/bin/uv run which pip3

RUN mkdir -p /usr/lib/python3.8/site-packages/

RUN cd /blocksci && \
/root/.cargo/bin/uv venv && CC=gcc-7 CXX=g++ /root/.cargo/bin/uv run pip3 install -r /blocksci/pip-all-requirements.txt
/root/.local/bin/uv venv && CC=gcc-7 CXX=g++ /root/.local/bin/uv run pip3 install -r /blocksci/pip-all-requirements.txt

# Build BlockSci
RUN cd blocksci && \
Expand All @@ -41,7 +41,7 @@ RUN cd blocksci && \
# Install BlockSci Python bindings

RUN cd blocksci && rm -rf blockscipy/build && \
/root/.cargo/bin/uv venv && CC=gcc-7 CXX=g++-7 /root/.cargo/bin/uv run pip3 install -e blockscipy
/root/.local/bin/uv venv && CC=gcc-7 CXX=g++-7 /root/.local/bin/uv run pip3 install -e blockscipy

# remove the build folder for blockscipy, as we will rebuild again anyway

Expand Down
903 changes: 756 additions & 147 deletions Notebooks/Clusters.ipynb

Large diffs are not rendered by default.

317 changes: 156 additions & 161 deletions Notebooks/thesis.ipynb

Large diffs are not rendered by default.

147 changes: 145 additions & 2 deletions blockscipy/src/chain/coinjoin_module_py.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,9 @@ void init_coinjoin_module(py::class_<Blockchain> &cl) {
pointingToTransactions[spendingTx].push_back(output.getValue());
}

for (const auto &[spendingTx, values] : pointingToTransactions) {
for (const auto &[key, values] : pointingToTransactions) {
(void)key;

if (values.size() < 2) continue;
for (const auto &value : values) {
anonymitySets[value]--;
Expand All @@ -400,6 +402,7 @@ void init_coinjoin_module(py::class_<Blockchain> &cl) {
// compute log(a1! * a2! * a3!...) where a_i is the size of the ith anonymity set using lgamma
double resultValue = 0;
for (const auto &[key, value] : anonymitySets) {
(void)key;
resultValue += lgamma(value + 1) / log(2);
}

Expand All @@ -423,5 +426,145 @@ void init_coinjoin_module(py::class_<Blockchain> &cl) {
reduce_func);
},
"Compute anonymity degradation in coinjoins", pybind11::arg("start"), pybind11::arg("stop"),
pybind11::arg("coinjoinType"), pybind11::arg("daysToConsider"));
pybind11::arg("coinjoinType"), pybind11::arg("daysToConsider"))
.def(
"real_anonymity_degradation",
[](Blockchain &chain, BlockHeight start, BlockHeight stop, int daysToConsider, std::string coinjoinType) {
// CJTX and its anonymity sets
using AnonymitySetsFuncType = std::unordered_map<Transaction, std::unordered_map<int64_t, int64_t>>;
// For txes which consolidate inputs from multiple cjtxes. CJTX = Transaction, map = <value, count>
// value = the anonymity set, count = how many times it appears in the consolidation tx (how much it
// degrades the anonymity set)
using PointingToTransactionsType =
std::unordered_map<Transaction, std::unordered_map<int64_t, int64_t>>;

auto filteringFunc = [&](const Transaction &tx) -> bool {
auto is_coinjoin_type = [&](const Transaction &tx, const std::string &coinjoinType) {
if (coinjoinType == "ww2") return blocksci::heuristics::isWasabi2CoinJoin(tx);
if (coinjoinType == "ww1") return blocksci::heuristics::isWasabi1CoinJoin(tx);
if (coinjoinType == "wp") return blocksci::heuristics::isWhirlpoolCoinJoin(tx);
return false;
};
return is_coinjoin_type(tx, coinjoinType);
};

auto mapFunc = [&](const Transaction &tx) -> AnonymitySetsFuncType {
if (!filteringFunc(tx)) {
return {};
}

AnonymitySetsFuncType result;
auto anonymitySets = std::unordered_map<int64_t, int64_t>();

for (const auto &output : tx.outputs()) {
anonymitySets[output.getValue()]++;
}

result[tx] = anonymitySets;
return result;
};

auto reduceFunc = [](AnonymitySetsFuncType &map1,
AnonymitySetsFuncType &map2) -> AnonymitySetsFuncType & {
for (const auto &[key, value] : map2) {
if (map1.find(key) == map1.end()) {
map1[key] = value;
} else {
for (const auto &[key2, value2] : value) {
if (map1[key].find(key2) == map1[key].end()) {
map1[key][key2] = value2;
} else {
map1[key][key2] += value2;
}
}
}
}
return map1;
};

auto initialAnonymitySets =
chain[{start, stop}].mapReduce<AnonymitySetsFuncType, decltype(mapFunc), decltype(reduceFunc)>(
mapFunc, reduceFunc);

auto mapFunc2 = [&](const Transaction &tx) -> PointingToTransactionsType {
PointingToTransactionsType result;
auto coinJoinTag = blocksci::heuristics::getCoinjoinTag(tx);
if (coinJoinTag != blocksci::heuristics::CoinJoinType::None) {
return {};
}
for (const auto &input : tx.inputs()) {
auto inputTx = input.getSpentTx();
if (tx.block().timestamp() - inputTx.block().timestamp() > daysToConsider * 24 * 60 * 60) {
continue;
}

if (initialAnonymitySets.find(inputTx) == initialAnonymitySets.end()) {
continue;
}

if (result.find(inputTx) == result.end()) {
result[inputTx] = {};
}

if (result[inputTx].find(input.getValue()) == result[inputTx].end()) {
result[inputTx][input.getValue()] = 1;
} else {
result[inputTx][input.getValue()]++;
}
}
auto sum = 0;

for (const auto &[cjtx, anonymitySet] : result) {
for (const auto &[value, count] : anonymitySet) {
sum += count;
}
}

return sum > 1 ? result : PointingToTransactionsType{};
};

auto reduceFunc2 = [&](PointingToTransactionsType &map1,
PointingToTransactionsType &map2) -> PointingToTransactionsType & {
for (const auto &[tx, anonymitySets] : map2) {
if (map1.find(tx) == map1.end()) {
map1[tx] = anonymitySets;
} else {
for (const auto &[value, count] : anonymitySets) {
if (map1[tx].find(value) == map1[tx].end()) {
map1[tx][value] = count;
} else {
map1[tx][value] += count;
}
}
}
}
return map1;
};
if (daysToConsider > 0) {
auto pointingToTransactions =
chain[{start, stop}]
.mapReduce<PointingToTransactionsType, decltype(mapFunc2), decltype(reduceFunc2)>(
mapFunc2, reduceFunc2);

for (const auto &[tx, anonymitySets] : pointingToTransactions) {
for (const auto &[value, count] : anonymitySets) {
initialAnonymitySets[tx][value] -= count;
}
}
}

std::unordered_map<Transaction, double> result;
for (const auto &[tx, anonymitySets] : initialAnonymitySets) {
double resultValue = 0;
for (const auto &[key, value] : anonymitySets) {
resultValue += lgamma(value + 1) / log(2);
}
result[tx] = resultValue;
}

return result;
},
"Compute real anonymity degradation in coinjoins", pybind11::arg("start"), pybind11::arg("stop"),
pybind11::arg("daysToConsider"), pybind11::arg("coinjoinType"));
;
}
16 changes: 3 additions & 13 deletions blockscipy/src/cluster/cluster/cluster_py.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <blocksci/cluster/coinjoin_cluster_manager.hpp>
#include <blocksci/cluster/coinjoin_clustering_heuristics.hpp>
#include <blocksci/heuristics/change_address.hpp>
#include <blocksci/heuristics/tx_identification.hpp>
#include <range/v3/range_for.hpp>

#include "caster_py.hpp"
Expand Down Expand Up @@ -105,31 +106,20 @@ void init_coinjoin_cluster_manager(pybind11::module &s) {
"scripts")
.def_static(
"create_clustering",
[](Blockchain &chain, BlockHeight start, BlockHeight stop, const std::vector<std::string> &heuristics,
[](Blockchain &chain, BlockHeight start, BlockHeight stop, blocksci::coinjoin_heuristics::ClusteringHeuristic heuristicFunc,
const std::string &outputPath, bool overwrite, std::string coinjoinType) {
py::scoped_ostream_redirect stream(std::cout, py::module::import("sys").attr("stdout"));

auto heuristicFunc = blocksci::coinjoin_heuristics::getClusteringHeuristic("None");
for (const auto &heuristic : heuristics) {
heuristicFunc = heuristicFunc & blocksci::coinjoin_heuristics::getClusteringHeuristic(heuristic);
}

if (stop == -1) {
stop = chain.size();
}
auto range = chain[{start, stop}];
return CoinjoinClusterManager::createClustering(range, heuristicFunc, outputPath, overwrite,
coinjoinType);
},
py::arg("chain"), py::arg("start"), py::arg("stop"), py::arg("heuristics"), py::arg("output_path"),
py::arg("chain"), py::arg("start"), py::arg("stop"), py::arg("heuristicFunc"), py::arg("output_path"),
py::arg("overwrite") = false, py::arg("coinjoin_type") = "None",
"Creates a clustering of the blockchain using the given heuristic and saves it to the given output path.\n"
"Possible heuristics are:\n"
"- 'None': No heuristic is used\n"
"- 'OneOutputConsolidation': Clusters are created based on transactions with multiple inputs and one "
"output\n"
"- 'OneInputConsolidation': Clusters are created based on transactions with one input and multiple "
"outputs\n\n"
"Possible coinjoin types are:\n"
"- 'none': No coinjoin type is used\n"
"- 'wasabi2': Wasabi CoinJoin type 2\n"
Expand Down
50 changes: 49 additions & 1 deletion blockscipy/src/heuristics_py.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <blocksci/chain/blockchain.hpp>
#include <blocksci/chain/transaction.hpp>
#include <blocksci/heuristics.hpp>
#include <blocksci/cluster/coinjoin_clustering_heuristics.hpp>

#include "caster_py.hpp"
#include "proxy.hpp"
Expand All @@ -17,17 +18,19 @@
namespace py = pybind11;
using namespace blocksci;
using namespace blocksci::heuristics;
using namespace blocksci::coinjoin_heuristics;

struct Heuristics {};
struct Change {};
struct CoinJoinHeuristic {};

void init_heuristics(py::module &m) {
py::enum_<heuristics::CoinJoinType>(m, "CoinJoinType")
.value("WW2zkSNACKs", heuristics::CoinJoinType::WW2zkSNACKs)
.value("WW2PostzkSNACKs", heuristics::CoinJoinType::WW2PostzkSNACKs)
.value("WW1", heuristics::CoinJoinType::WW1)
.value("Whirlpool", heuristics::CoinJoinType::Whirlpool)
.value("None", heuristics::CoinJoinType::None);
.value("NoCJ", heuristics::CoinJoinType::None);

py::enum_<heuristics::CoinJoinResult>(m, "CoinJoinResult")
.value("True", heuristics::CoinJoinResult::True)
Expand Down Expand Up @@ -192,4 +195,49 @@ void init_heuristics(py::module &m) {
"Return a ChangeHeuristic object that selects spent outputs. Useful in combination with heuristics that "
"select unspent outputs as candidates.");

py::class_<CoinJoinHeuristic> s3(cl, "coinjoin");

py::class_<ClusteringHeuristic>(s3, "c", "Class representing a CoinJoin clustering heuristic")
// .def(py::init([](Proxy<void> &heuristic) {
// std::function<void(const Transaction& tx, const std::unordered_set<Transaction>& coinjoinTransactions,
// AddressDisjointSets& ds, std::unordered_map<Address, uint32_t>& collectedAddresses)> clusteringFunc =
// [heuristic](const Transaction& tx, const std::unordered_set<Transaction>& coinjoinTransactions,
// AddressDisjointSets& ds, std::unordered_map<Address, uint32_t>& collectedAddresses) { return heuristic(tx, coinjoinTransactions, ds, collectedAddresses); };
// return ClusteringHeuristic(clusteringFunc);
// }))
.def("__and__", [](ClusteringHeuristic &this_heuristic, ClusteringHeuristic &other_heuristic) { return this_heuristic & other_heuristic; }, py::arg("other_heuristic"),
"Return a new heuristic that clusters based on _both_ heuristics.");
// .def_property_readonly(
// "__call__",
// [](ClusteringHeuristic &ch) -> Proxy<void> {
// return lift(makeSimpleProxy<Transaction>(), [ch](const Transaction &tx) { return ch(tx); });
// },
// "Return all outputs matching the change heuristic")


s3.def_property_readonly_static(
"input_one_hop", [](pybind11::object &) { return blocksci::coinjoin_heuristics::ClusteringHeuristic{blocksci::coinjoin_heuristics::OneInputConsolidation{}}; },
"Return a ClusteringHeuristic object implementing the clustering over consolidations of inputs happening one hop before the coinjoin tx.")
.def_property_readonly_static(
"output_one_hop", [](pybind11::object &) { return blocksci::coinjoin_heuristics::ClusteringHeuristic{blocksci::coinjoin_heuristics::OneOutputConsolidation{}}; },
"Return a ClusteringHeuristic object implementing the clustering over consolidations of outputs happening one hop after the coinjoin tx and the consolidation transactions have ONLY 1 OUTPUT.")
.def_property_readonly_static(
"output_one_hop_threshold", [](pybind11::object &) { return blocksci::coinjoin_heuristics::ClusteringHeuristic{blocksci::coinjoin_heuristics::OneHopOutputThresholdConsolidation{}}; },
"Return a ClusteringHeuristic object implementing the clustering over consolidations of outputs happening one hop after the coinjoin tx.")
.def_property_readonly_static(
"output_two_hop_threshold", [](pybind11::object &) { return blocksci::coinjoin_heuristics::ClusteringHeuristic{blocksci::coinjoin_heuristics::TwoHopOutputThresholdConsolidation{}}; },
"Return a ClusteringHeuristic object implementing the clustering over consolidations of outputs happening two hops after the coinjoin tx.")
.def_property_readonly_static(
"output_three_hop_threshold", [](pybind11::object &) { return blocksci::coinjoin_heuristics::ClusteringHeuristic{blocksci::coinjoin_heuristics::ThreeHopOutputThresholdConsolidation{}}; },
"Return a ClusteringHeuristic object implementing the clustering over consolidations of outputs happening three hops after the coinjoin tx.")
.def_property_readonly_static(
"none", [](pybind11::object &) { return blocksci::coinjoin_heuristics::ClusteringHeuristic{blocksci::coinjoin_heuristics::NoClustering{}}; },
"Return a ClusteringHeuristic object implementing no clustering heuristic: This effectively does nothing.")
.def_property_readonly_static(
"output_one_hop_with_change", [](pybind11::object &) { return blocksci::coinjoin_heuristics::ClusteringHeuristic{blocksci::coinjoin_heuristics::OneOutputConsolidationWithChange{}}; },
"Return a ClusteringHeuristic object implementing the clustering over consolidations of outputs happening one hop after the coinjoin tx and the consolidation transactions have 2 outputs - main and change.")
;



}
4 changes: 1 addition & 3 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@ jupyter contrib nbextension install --user

CC=gcc-7 CXX=g++-7 pip3 install -e blockscipy || exit 1

source .venv/bin/activate

cd Notebooks

jupyter notebook --ip=0.0.0.0 --allow-root || exit 1
jupyter notebook --ip="0.0.0.0" --allow-root || exit 1

4 changes: 3 additions & 1 deletion external/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -113,5 +113,7 @@ add_library(rocksdb_headers INTERFACE)
target_include_directories(rocksdb_headers SYSTEM INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/rocksdb/include)

set(CMAKE_CXX_FLAGS "${saved_flags}")

set(saved_flags2 "${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
add_subdirectory(bitcoin-api-cpp EXCLUDE_FROM_ALL)
set(CMAKE_CXX_FLAGS "${saved_flags2}")
1 change: 1 addition & 0 deletions include/blocksci/cluster/coinjoin_cluster_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#define coinjoin_cluster_manager_hpp

#include <blocksci/blocksci_export.h>
#include <blocksci/heuristics/tx_identification.hpp>

#include <functional>
#include <iostream>
Expand Down
5 changes: 3 additions & 2 deletions include/blocksci/cluster/coinjoin_clustering_heuristics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ namespace blocksci {
struct BLOCKSCI_EXPORT ClusteringHeuristicsType {
enum Enum {
OneOutputConsolidation,
OneOutputConsolidationWithChange,
OneInputConsolidation,
OneHopOutputThresholdConsolidation,
TwoHopOutputThresholdConsolidation,
Expand Down Expand Up @@ -62,6 +63,8 @@ namespace blocksci {
};

using OneOutputConsolidation = ClusteringHeuristicImpl<ClusteringHeuristicsType::OneOutputConsolidation>;
using OneOutputConsolidationWithChange =
ClusteringHeuristicImpl<ClusteringHeuristicsType::OneOutputConsolidationWithChange>;
using OneInputConsolidation = ClusteringHeuristicImpl<ClusteringHeuristicsType::OneInputConsolidation>;
using OneHopOutputThresholdConsolidation =
ClusteringHeuristicImpl<ClusteringHeuristicsType::OneHopOutputThresholdConsolidation>;
Expand All @@ -72,8 +75,6 @@ namespace blocksci {
ClusteringHeuristicImpl<ClusteringHeuristicsType::ThreeHopOutputThresholdConsolidation>;
using NoClustering = ClusteringHeuristicImpl<ClusteringHeuristicsType::None>;

ClusteringHeuristic BLOCKSCI_EXPORT getClusteringHeuristic(const std::string& heuristicName);

} // namespace coinjoin_heuristics
} // namespace blocksci

Expand Down
1 change: 0 additions & 1 deletion pip-all-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ backports.zoneinfo==0.2.1
beautifulsoup4==4.12.3
bech32==1.2.0
bleach==6.1.0
-e [email protected]:mmstanone/blocksci.git@6ac64d128a474c2623a5d353492b9d853921b3b7#egg=blocksci&subdirectory=blockscipy
certifi==2019.11.28
cffi==1.17.1
chardet==3.0.4
Expand Down
Loading

0 comments on commit 8475b11

Please sign in to comment.