-
Notifications
You must be signed in to change notification settings - Fork 1
/
run_ours_blinkdist_cora.R
executable file
·54 lines (44 loc) · 2.04 KB
/
run_ours_blinkdist_cora.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
library(furrr)
plan(multisession(workers=4))
library(exchanger)
library(comparator)
library(tidyverse)
source("util.R")
source("run_ours.R")
setwd("./datasets")
source("load_cora.R")
setwd("../")
snbinom_var <- n_records^2
snbinom_size <- (n_records - 1)^2 / (snbinom_var - n_records + 1)
snbinom_prob <- (n_records - 1) / snbinom_var
expt_configs = list(
list(name = "coupon", clust_prior = GeneralizedCouponRP(ShiftedNegBinomRV(snbinom_size, snbinom_prob), GammaRV(1, 1/100))),
list(name = "py", clust_prior = PitmanYorRP(GammaRV(1, 1/100), BetaRV(1, 1))),
list(name = "ewens", clust_prior = EwensRP(GammaRV(1, 1/100))),
list(name = "blinkcoupon", clust_prior = GeneralizedCouponRP(nrow(records), Inf))
)
future_map(expt_configs, function(e) {
distort_prior <- BetaRV(1, 4)
dist_2 <- function(x, y) {
x <- strsplit(x, '\\s+')
y <- strsplit(y, '\\s+')
FuzzyTokenSet(Abbreviation(), deletion=0.5)(x, y)
}
attr_params <- c(
"authors" = Attribute(transform_dist_fn(dist_2, 3.0, scaling_factor = 10.0),
distort_prob_prior = distort_prior,
exclude_entity_value = FALSE),
"title" = Attribute(transform_dist_fn(dist_2, 3.0, scaling_factor = 10.0),
distort_prob_prior = distort_prior,
exclude_entity_value = FALSE),
"venue" = Attribute(transform_dist_fn(dist_2, 5.0, scaling_factor = 10.0),
distort_prob_prior = distort_prior,
exclude_entity_value = FALSE),
"year" = Attribute(transform_dist_fn(Levenshtein(normalize=TRUE), 5.0, scaling_factor = 10.0),
distort_prob_prior = distort_prior,
exclude_entity_value = FALSE)
)
model <- exchanger(records, attr_params, e$clust_prior)
expt_name <- paste0("cora_ours_blinkdist_", e$name, "_", gsub("[ :]", "_", date()))
run_ours(expt_name, model, true_membership, n_samples = 10000, burnin_interval = 100000)
}, .options = furrr_options(packages=c("comparator", "exchanger", "clevr")))