-
Notifications
You must be signed in to change notification settings - Fork 0
/
tst.320_33-128.r
102 lines (90 loc) · 2.99 KB
/
tst.320_33-128.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
suppressMessages(library(pbdMPI,quietly = TRUE))
suppressMessages(library(pbdDMAT,quietly = TRUE))
suppressMessages(library(pbdIO,quietly = TRUE))
.libPaths('./R/x86_64-pc-linux-gnu-library/3.3')
suppressMessages(library('data.table', quietly = TRUE))
suppressMessages(library('RecordLinkage', lib.loc="./R/x86_64-pc-linux-gnu-library/3.3", quietly = TRUE))
FR = 33;
TO = 128;
init()
#Rprof(append = TRUE)
x = comm.fread ("auth1", pattern="*",quote="",sep=";",header=F)
names(x) = c("un","n","fn","ln","e","a");
x = x[,c("n", "e", "ln", "fn", "un", "fn","a")];
x1 = x[,c("n", "e", "ln", "fn", "un", "ln","a")];
names(x)=c("n", "e", "ln", "fn", "un", "ifn","a")
names(x1)=c("n", "e", "ln", "fn", "un", "ifn","a")
#dx = dim(x);
#comm.print(dx, all.rank=TRUE)
barrier()
comm.print("read all");
#xf <- do.call('rbind',allgather(x))
#dxf = dim(xf);
#comm.print(dxf, all.rank=TRUE)
#tandem.webdev,Agence-Tandem,Agence-Tandem,Agence-Tandem,[email protected],Agence-Tandem <[email protected]>
myrank=comm.rank();
fnamev=paste("320_33-128/outV",myrank,sep=".");
if (FR == 0){
pairs = compare.linkage (x, x1, exclude=c(7),strcmp=c(1:6),strcmpfun = jarowinkler);
barrier()
comm.print("Computed self pairs");
#predict and write out matches
MM=apply(pairs$pairs[,c("n", "e", "ln", "fn", "un", "ifn")],1,max, na.rm = T)>.8&pairs$pairs$id1 != pairs$pairs$id2;
val = c();
ll = sum(MM);
if (ll > 0){
p = pairs$pairs[MM,-9];
comm.print(c(ll,dim(p)))
a = rep(myrank,ll);
b = a;
p$a = a;
p$b = b
#val = data.frame(cbind(a, b, p));
fwrite(p,file=fnamev, sep=";",quote=FALSE,append=T);
rm(val)
}
}
message.pass <- function(off=1) {
myrank <- comm.rank()
otherrank <- (myrank+off) %% comm.size()
# Send a message to the partner
#comm.print(paste("passed to ",c(myrank,otherrank)),all.rank=TRUE)
isend (x1[,c("n", "e", "ln", "fn", "un", "ifn","a")], rank.dest=otherrank);
}
message.get <- function(off=1) {
myrank <- comm.rank();
otherrank <- (myrank-off) %% comm.size();
# Receive the message
comm.print(paste("about to rcv ", paste(myrank,otherrank)))
irecv(rank.source=otherrank);
}
ncom = comm.size();
nc = ceiling(comm.size()/2);
for (i in max(1,FR):min(TO,nc)){
message.pass(i);
x1=message.get(i);
pairs = compare.linkage (x, x1, exclude=c(7),strcmp=c(1:6),strcmpfun = jarowinkler);
MM=apply(pairs$pairs[,c("n", "e", "ln", "fn", "un", "ifn")],1,max,na.rm = T)>.8;
ll = sum(MM);
p = pairs$pairs[MM,-9];
comm.print(c(ll,dim(p)));
if (ll > 0){
orank = (myrank-i)%%ncom;
a = rep(myrank,ll);
b = rep(orank, ll);
#val0 = cbind (a, b, p);
p$a=a;
p$b=b;
#val = rbind(val, val0);
fwrite(p,file=fnamev, sep=";",quote=FALSE,append=T);
}
}
#comm.print(lbl[1:10,], all.rank=TRUE)
barrier();
comm.print("Finished computing");
##fnamel=paste("outL",myrank,sep=".");
##fwrite(data.frame(lbl),file=fnamel, sep=";",quote=FALSE);
#fnamev=paste("outV",myrank,sep=".");
#fwrite(data.frame(val),file=fnamev, sep=";",quote=FALSE);
barrier();
#finalize();