Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow event producer to specify which values to anonymize #55

Merged
merged 1 commit into from
Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions rs/canister/api/can.did
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
type AnonymizationInitConfig = record {
users : opt bool;
exclusions : opt vec text;
sources : opt bool;
};
type Anonymizable = variant { Anonymize : text; Public : text };
type EventsArgs = record { start : nat64; length : nat64 };
type EventsResponse = record {
events : vec IndexedEvent;
latest_event_index : opt nat64;
};
type IdempotentEvent = record {
source : opt text;
source : opt Anonymizable;
name : text;
user : opt text;
user : opt Anonymizable;
timestamp : nat64;
payload : vec nat8;
idempotency_key : nat;
Expand All @@ -26,7 +22,6 @@ type IndexedEvent = record {
};
type InitArgs = record {
push_events_whitelist : vec principal;
anonymization_config : opt AnonymizationInitConfig;
read_events_whitelist : vec principal;
};
type PushEventsArgs = record { events : vec IdempotentEvent };
Expand Down
20 changes: 18 additions & 2 deletions rs/canister/api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ pub struct IdempotentEvent {
pub idempotency_key: u128,
pub name: String,
pub timestamp: TimestampMillis,
pub user: Option<String>,
pub source: Option<String>,
pub user: Option<Anonymizable>,
pub source: Option<Anonymizable>,
#[serde(with = "serde_bytes")]
pub payload: Vec<u8>,
}
Expand All @@ -32,3 +32,19 @@ pub struct IndexedEvent {
#[serde(with = "serde_bytes")]
pub payload: Vec<u8>,
}

#[derive(CandidType, Serialize, Deserialize, Clone, Debug)]
pub enum Anonymizable {
Public(String),
Anonymize(String),
}

impl Anonymizable {
pub fn new(value: String, anonymize: bool) -> Anonymizable {
if anonymize {
Anonymizable::Anonymize(value)
} else {
Anonymizable::Public(value)
}
}
}
8 changes: 0 additions & 8 deletions rs/canister/api/src/lifecycle/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,4 @@ use serde::Serialize;
pub struct InitArgs {
pub push_events_whitelist: Vec<Principal>,
pub read_events_whitelist: Vec<Principal>,
pub anonymization_config: Option<AnonymizationInitConfig>,
}

#[derive(CandidType, Serialize, Deserialize, Clone, Debug, Default)]
pub struct AnonymizationInitConfig {
pub users: Option<bool>,
pub sources: Option<bool>,
pub exclusions: Option<Vec<String>>,
}
1 change: 0 additions & 1 deletion rs/canister/impl/src/lifecycle/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ fn init(args: InitArgs) {
state::init(State::new(
args.push_events_whitelist.into_iter().collect(),
args.read_events_whitelist.into_iter().collect(),
args.anonymization_config.unwrap_or_default(),
));

ic_cdk_timers::set_timer(Duration::ZERO, || {
Expand Down
42 changes: 37 additions & 5 deletions rs/canister/impl/src/model/events.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
use crate::memory::{get_events_data_memory, get_events_index_memory, Memory};
use crate::model::string_to_num_map::StringToNumMap;
use candid::Deserialize;
use event_store_canister::{IdempotentEvent, IndexedEvent, TimestampMillis};
use event_store_canister::{Anonymizable, IdempotentEvent, IndexedEvent, TimestampMillis};
use ic_stable_structures::storable::Bound;
use ic_stable_structures::{StableLog, Storable};
use serde::Serialize;
use sha2::Digest;
use std::borrow::Cow;
use std::fmt::Write;

pub struct Events {
events: StableLog<StorableEvent, Memory, Memory>,
Expand All @@ -22,8 +24,8 @@ impl Events {
.collect()
}

pub fn push(&mut self, event: IdempotentEvent) {
let storable = self.convert_to_storable(event, self.events.len());
pub fn push(&mut self, event: IdempotentEvent, salt: [u8; 32]) {
let storable = self.convert_to_storable(event, self.events.len(), salt);

self.events.append(&storable).unwrap();
}
Expand All @@ -34,14 +36,23 @@ impl Events {
}
}

fn convert_to_storable(&mut self, event: IdempotentEvent, index: u64) -> StorableEvent {
fn convert_to_storable(
&mut self,
event: IdempotentEvent,
index: u64,
salt: [u8; 32],
) -> StorableEvent {
StorableEvent {
index,
name: self.string_to_num_map.convert_to_num(event.name),
timestamp: event.timestamp,
user: event.user.map(|u| self.string_to_num_map.convert_to_num(u)),
user: event
.user
.map(|u| to_maybe_anonymized_string(u, salt))
.map(|u| self.string_to_num_map.convert_to_num(u)),
source: event
.source
.map(|s| to_maybe_anonymized_string(s, salt))
.map(|s| self.string_to_num_map.convert_to_num(s)),
payload: event.payload,
}
Expand Down Expand Up @@ -119,3 +130,24 @@ impl Storable for StorableEvent {
fn is_empty_slice<T>(vec: &[T]) -> bool {
vec.is_empty()
}

fn to_maybe_anonymized_string(value: Anonymizable, salt: [u8; 32]) -> String {
match value {
Anonymizable::Public(s) => s,
Anonymizable::Anonymize(s) => anonymize(&s, salt),
}
}

fn anonymize(value: &str, salt: [u8; 32]) -> String {
// Generates a 32 character string from the input value + the salt
let mut hasher = sha2::Sha256::new();
hasher.update(value.as_bytes());
hasher.update(salt);
let hash: [u8; 32] = hasher.finalize().into();

let mut string = String::with_capacity(32);
for byte in &hash[0..16] {
write!(string, "{byte:02x}").unwrap();
}
string
}
68 changes: 4 additions & 64 deletions rs/canister/impl/src/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,11 @@ use crate::env;
use crate::model::events::Events;
use crate::model::salt::Salt;
use candid::Principal;
use event_store_canister::{
AnonymizationInitConfig, IdempotentEvent, TimestampMillis, WhitelistedPrincipals,
};
use event_store_canister::{IdempotentEvent, TimestampMillis, WhitelistedPrincipals};
use event_store_utils::EventDeduper;
use serde::{Deserialize, Serialize};
use sha2::Digest;
use std::cell::RefCell;
use std::collections::HashSet;
use std::fmt::Write;

thread_local! {
static STATE: RefCell<Option<State>> = RefCell::default();
Expand All @@ -24,7 +20,6 @@ pub struct State {
events: Events,
event_deduper: EventDeduper,
salt: Salt,
anonymization_config: AnonymizationConfig,
}

const STATE_ALREADY_INITIALIZED: &str = "State has already been initialized";
Expand Down Expand Up @@ -56,14 +51,12 @@ impl State {
pub fn new(
push_events_whitelist: HashSet<Principal>,
read_events_whitelist: HashSet<Principal>,
anonymization_config: AnonymizationInitConfig,
) -> State {
State {
push_events_whitelist,
read_events_whitelist,
events: Events::default(),
event_deduper: EventDeduper::default(),
anonymization_config: anonymization_config.into(),
salt: Salt::default(),
}
}
Expand Down Expand Up @@ -93,62 +86,9 @@ impl State {
self.salt.set(salt);
}

pub fn push_event(&mut self, mut event: IdempotentEvent, now: TimestampMillis) {
if !self.event_deduper.try_push(event.idempotency_key, now) {
return;
}

if self.anonymization_config.users {
if let Some(user) = event
.user
.as_mut()
.filter(|u| !self.anonymization_config.exclusions.contains(*u))
{
*user = self.anonymize(user);
}
}

if self.anonymization_config.sources {
if let Some(source) = event
.source
.as_mut()
.filter(|s| !self.anonymization_config.exclusions.contains(*s))
{
*source = self.anonymize(source);
}
}

self.events.push(event);
}

fn anonymize(&self, value: &str) -> String {
// Generates a 32 character string from the input value + the salt
let mut hasher = sha2::Sha256::new();
hasher.update(value.as_bytes());
hasher.update(self.salt.get());
let hash: [u8; 32] = hasher.finalize().into();

let mut string = String::with_capacity(32);
for byte in &hash[0..16] {
write!(string, "{byte:02x}").unwrap();
}
string
}
}

#[derive(Serialize, Deserialize, Default)]
struct AnonymizationConfig {
users: bool,
sources: bool,
exclusions: HashSet<String>,
}

impl From<AnonymizationInitConfig> for AnonymizationConfig {
fn from(value: AnonymizationInitConfig) -> Self {
AnonymizationConfig {
users: value.users.unwrap_or_default(),
sources: value.sources.unwrap_or_default(),
exclusions: value.exclusions.unwrap_or_default().into_iter().collect(),
pub fn push_event(&mut self, event: IdempotentEvent, now: TimestampMillis) {
if self.event_deduper.try_push(event.idempotency_key, now) {
self.events.push(event, self.salt.get());
}
}
}
14 changes: 3 additions & 11 deletions rs/integration_tests/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@
use crate::rng::{random, random_bytes, random_principal, random_string};
use crate::setup::setup_new_env;
use candid::Principal;
use event_store_canister::{
AnonymizationInitConfig, EventsArgs, IdempotentEvent, InitArgs, PushEventsArgs,
};
use event_store_canister::{Anonymizable, EventsArgs, IdempotentEvent, InitArgs, PushEventsArgs};
use pocket_ic::PocketIc;
use std::fs::File;
use std::io::Read;
Expand Down Expand Up @@ -81,11 +79,6 @@ fn users_and_source_can_be_anonymized(users: bool, sources: bool) {
} = install_canister(Some(InitArgs {
push_events_whitelist: vec![random_principal()],
read_events_whitelist: vec![random_principal()],
anonymization_config: Some(AnonymizationInitConfig {
users: Some(users),
sources: Some(sources),
exclusions: None,
}),
}));

let user = random_string();
Expand All @@ -100,8 +93,8 @@ fn users_and_source_can_be_anonymized(users: bool, sources: bool) {
idempotency_key: random(),
name: random_string(),
timestamp: 1000,
user: Some(user.clone()),
source: Some(source.clone()),
user: Some(Anonymizable::new(user.clone(), users)),
source: Some(Anonymizable::new(source.clone(), sources)),
payload: Vec::new(),
}],
},
Expand Down Expand Up @@ -143,7 +136,6 @@ fn install_canister(init_args: Option<InitArgs>) -> TestEnv {
let init_args = init_args.unwrap_or_else(|| InitArgs {
push_events_whitelist: vec![random_principal()],
read_events_whitelist: vec![random_principal()],
anonymization_config: None,
});

let canister_id = env.create_canister_with_settings(Some(controller), None);
Expand Down
26 changes: 13 additions & 13 deletions rs/producer/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use event_store_canister::{IdempotentEvent, TimestampMillis};
use event_store_canister::{Anonymizable, IdempotentEvent, TimestampMillis};
use ic_principal::Principal;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::sync::{Arc, Mutex, MutexGuard};
Expand Down Expand Up @@ -41,16 +41,16 @@ struct ClientInner<R> {
pub struct Event {
name: String,
timestamp: TimestampMillis,
user: Option<String>,
source: Option<String>,
user: Option<Anonymizable>,
source: Option<Anonymizable>,
payload: Vec<u8>,
}

pub struct EventBuilder {
name: String,
timestamp: TimestampMillis,
user: Option<String>,
source: Option<String>,
user: Option<Anonymizable>,
source: Option<Anonymizable>,
payload: Vec<u8>,
}

Expand All @@ -65,23 +65,23 @@ impl EventBuilder {
}
}

pub fn with_user(mut self, user: impl Into<String>) -> Self {
self.user = Some(user.into());
pub fn with_user(mut self, user: impl Into<String>, anonymize: bool) -> Self {
self.user = Some(Anonymizable::new(user.into(), anonymize));
self
}

pub fn with_maybe_user(mut self, user: Option<impl Into<String>>) -> Self {
self.user = user.map(|u| u.into());
pub fn with_maybe_user(mut self, user: Option<impl Into<String>>, anonymize: bool) -> Self {
self.user = user.map(|u| Anonymizable::new(u.into(), anonymize));
self
}

pub fn with_source(mut self, source: impl Into<String>) -> Self {
self.source = Some(source.into());
pub fn with_source(mut self, source: impl Into<String>, anonymize: bool) -> Self {
self.source = Some(Anonymizable::new(source.into(), anonymize));
self
}

pub fn with_maybe_source(mut self, source: Option<impl Into<String>>) -> Self {
self.source = source.map(|u| u.into());
pub fn with_maybe_source(mut self, source: Option<impl Into<String>>, anonymize: bool) -> Self {
self.source = source.map(|u| Anonymizable::new(u.into(), anonymize));
self
}

Expand Down
Loading