-
Notifications
You must be signed in to change notification settings - Fork 1
/
run_blink_cora.R
executable file
·38 lines (31 loc) · 1.36 KB
/
run_blink_cora.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
library(exchanger)
library(tidyverse)
source("util.R")
source("run_ours.R")
setwd("./datasets")
source("load_cora.R")
setwd("../")
clust_prior <- GeneralizedCouponRP(n_records, Inf)
distort_prior <- BetaRV(0.001 * n_records, 0.1 * n_records)
dist_2 <- function(x, y) {
x <- strsplit(x, '\\s+')
y <- strsplit(y, '\\s+')
FuzzyTokenSet(Abbreviation(), deletion=0.5)(x, y)
}
attr_params <- c(
"authors" = Attribute(transform_dist_fn(dist_2, 3.0, scaling_factor = 10.0),
distort_prob_prior = distort_prior,
exclude_entity_value = FALSE),
"title" = Attribute(transform_dist_fn(dist_2, 3.0, scaling_factor = 10.0),
distort_prob_prior = distort_prior,
exclude_entity_value = FALSE),
"venue" = Attribute(transform_dist_fn(dist_2, 5.0, scaling_factor = 10.0),
distort_prob_prior = distort_prior,
exclude_entity_value = FALSE),
"year" = Attribute(transform_dist_fn(Levenshtein(normalize=TRUE), 5.0, scaling_factor = 10.0),
distort_prob_prior = distort_prior,
exclude_entity_value = FALSE)
)
model <- exchanger(records, attr_params, clust_prior)
expt_name <- paste0("cora_blink_", gsub("[ :]", "_", date()))
run_ours(expt_name, model, true_membership, n_samples = 10000, burnin_interval = 100000)