-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 94c02d8
Showing
81 changed files
with
7,867 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
""" | ||
把分group合并为一个 | ||
""" | ||
|
||
import os | ||
#os.chdir(r"E:\graph") | ||
|
||
def agg_file(group: list, source = 'graph02_10k_agg', dest="graph02_10k_agg"): | ||
tgroupf = source | ||
|
||
with open(tgroupf, 'r') as fin: | ||
cnts = fin.readlines() | ||
|
||
groups = list() | ||
for i in range(0, len(cnts), 2): | ||
n, s, f = cnts[i].strip().split('\t') | ||
no = int(i/2) | ||
if no in group: | ||
#del cnts[i] | ||
#del cnts[i+1] | ||
with open('graph' + str(int(no)) + '_agg') as fin: | ||
#cntst = fin.readline() | ||
cntss = fin.readlines() | ||
#print(cntss) | ||
groups += cntss | ||
else: | ||
stri = "%s\t%s\t%s\n" % (n, s, f) | ||
#print(stri) | ||
groups.append(stri) | ||
i = i + 1 | ||
groups.append(cnts[i]) | ||
|
||
with open(dest, 'w') as fout: | ||
for line in groups: | ||
#print(line) | ||
fout.write(line) | ||
|
||
if __name__ == "__main__": | ||
traceno = 23 | ||
fnum = "10k" | ||
agg_file([0,], 'graph%02d_%s_agg' % (traceno, fnum), 'graph%02d_%s_agg' % (traceno, fnum)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
|
||
""" | ||
生成合并的graph file | ||
""" | ||
import os | ||
os.chdir(r"/home/flnan/group_divided") | ||
level = 1 | ||
nnode = 19 | ||
group_no = 16 | ||
|
||
group_file = r"graph34_"+str(group_no)+"_l" + str(level) | ||
|
||
|
||
class cgroup: | ||
def __init__(self, num, size, freq, nodes): | ||
self.num = num | ||
self.size = size | ||
self.freq = freq | ||
self.nodes = nodes | ||
|
||
def __add__(self, __o): | ||
num = self.num + __o.num | ||
size = self.size + __o.size | ||
freq = self.freq + __o.freq | ||
nodes = self.nodes + __o.nodes | ||
return cgroup(num, size, freq, nodes) | ||
|
||
|
||
with open(group_file, 'r') as fin: | ||
cnts = fin.readlines() | ||
|
||
groups = list() | ||
tfreq = 0 | ||
for i in range(0, len(cnts), 2): | ||
#print(i,cnts[i]) | ||
no, n, s, f = cnts[i].strip().split('\t') | ||
tfreq += int(f) | ||
i = i + 1 | ||
#print(i) | ||
nodes = cnts[i].split('\t') | ||
nodesn = list() | ||
for i in range(int(n)): | ||
nodesn.append(int(nodes[i])) | ||
tmpg = cgroup(int(n), int(s), int(f), nodesn) | ||
groups.append(tmpg) | ||
|
||
cnts = list() | ||
avgfreq = tfreq / 20 | ||
print(tfreq, avgfreq) | ||
|
||
ngroups = list() | ||
ngnum = list() | ||
nn = list() | ||
tmpg = cgroup(0, 0, 0, list()) | ||
for i in range(len(groups)): | ||
#print(type(tmpg)) | ||
if tmpg.freq < avgfreq and groups[i].freq < avgfreq and groups[i].num < 1000: | ||
tmpg = tmpg + groups[i] | ||
nn.append(i) | ||
if tmpg.freq > avgfreq: | ||
ngroups.append(tmpg) | ||
tmpg = cgroup(0, 0, 0, list()) | ||
ngnum.append(nn) | ||
nn = list() | ||
if groups[i].freq > avgfreq: | ||
if (tmpg.nodes != None): | ||
ngroups.append(tmpg) | ||
ngroups.append(groups[i]) | ||
tmpg = cgroup(0, 0, 0, list()) | ||
ngnum.append(nn) | ||
nn = list() | ||
ngroups.append(tmpg) | ||
ngnum.append(nn) | ||
|
||
with open("graph"+str(group_no)+"l" + str(level) + "_agg", 'w') as fout: | ||
fout.write(str(len(ngroups))) | ||
fout.write('\n') | ||
for g in ngroups: | ||
pass | ||
print(g.num, g.freq) | ||
print(g.nodes) | ||
fout.write(str(g.num) + '\t' + str(g.size) + '\t' + str(g.freq) + '\n') | ||
for i in g.nodes: | ||
fout.write(str(i) + '\t') | ||
fout.write('\n') | ||
# with open("graph"+str(group_no)+"l" + str(level) + "_agg", 'w') as fout: | ||
# for n in ngnum: | ||
# for i in n: | ||
# fout.write(str(i) + '\t') | ||
# fout.write('\n') | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
with open("louvain_node_34", 'r') as fin, open('louvain_data_34', 'w') as fout: | ||
line = fin.readline() | ||
while(line != ""): | ||
cnts = int(line.strip().split('\t')[2]) | ||
if(cnts != 0): | ||
fout.write(line) | ||
line = fin.readline() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
import os | ||
|
||
os.chdir(r"E:\graph") | ||
|
||
with open("graph_16.data", 'r') as fin, open("graph16.d", 'w') as fout: | ||
line = fin.readline() | ||
while (line != ""): | ||
cnts = line.strip().split('\t') | ||
fout.write('"%s"\t"%s"\t%s\n' % (cnts[0], cnts[1], cnts[2])) | ||
line = fin.readline() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
""" | ||
找出超过平均值的group,生成单独的文件 | ||
""" | ||
|
||
import os | ||
import merge_discrete_file | ||
import agg | ||
import time | ||
|
||
os.chdir(r"/home/flnan/group_divided") | ||
node_num = 6 | ||
|
||
traceno = 23 | ||
fnum = "10k" | ||
|
||
graph_file = "graph%02d_%s_agg" % (traceno, fnum) #"graph_n19l" + str(level) | ||
|
||
freqkeys_file = "freqkeys%02d_%s" % (traceno, fnum) | ||
stat_file = "../twitter/stat%02d" % traceno | ||
|
||
with open(freqkeys_file, 'r') as fin: | ||
freqkeys = fin.readlines() | ||
|
||
with open(stat_file, 'r') as fin: | ||
keystat = fin.readlines() | ||
|
||
key_info = dict() | ||
tsize = 0 | ||
tfreq = 0 | ||
for info in keystat: | ||
key, size, freq = info.strip().split("\t") | ||
key_info[key] = (int(size), int(freq)) | ||
tsize += int(size) | ||
tfreq += int(freq) | ||
|
||
for i in range(len(freqkeys)): | ||
freqkeys[i] = freqkeys[i].strip() | ||
|
||
|
||
def split(source, g, favg): | ||
with open(source, 'r') as fin: | ||
cnts = fin.readlines() | ||
|
||
sgroups = list() | ||
sgsize = list() | ||
sgfreq = list() | ||
|
||
dgroups = list() | ||
for i in range(0, len(cnts), 2): | ||
if g == i/2: | ||
n, s, f = cnts[i].strip().split('\t') | ||
dgroups = cnts[i + 1].strip().split('\t') | ||
|
||
tmp = list() | ||
stsize = 0 | ||
stfreq = 0 | ||
for item in dgroups: | ||
key = freqkeys[int(item)] | ||
isize = key_info[key][0] | ||
ifreq = key_info[key][1] | ||
if(stfreq + ifreq < favg): | ||
stfreq += ifreq | ||
stsize += isize | ||
tmp.append(item) | ||
else: | ||
sgroups.append(tmp) | ||
sgsize.append(stsize) | ||
sgfreq.append(stfreq) | ||
tmp = list() | ||
stsize = 0 | ||
stfreq = 0 | ||
stfreq += ifreq | ||
stsize += isize | ||
tmp.append(item) | ||
|
||
sgroups.append(tmp) | ||
sgsize.append(stsize) | ||
sgfreq.append(stfreq) | ||
print(sgsize) | ||
print(sgfreq) | ||
print(sgroups) | ||
|
||
with open(source, 'w') as fout: | ||
for i in range(len(sgroups)): | ||
fout.write("%d\t%d\t%d\n" % (len(sgroups[i]), sgsize[i], sgfreq[i])) | ||
for item in sgroups[i]: | ||
fout.write(str(item)+'\t') | ||
fout.write('\n') | ||
|
||
|
||
|
||
#while(True): | ||
with open(graph_file, 'r') as fin: | ||
cnts = fin.readlines() | ||
|
||
group_num = int(len(cnts)/2) | ||
#print(group_num) | ||
|
||
groups = list() #[list() for i in range(group_num)] | ||
|
||
for i in range(0, len(cnts), 2): | ||
n, s, f = cnts[i].strip().split('\t') | ||
tmp = cnts[i+1].strip().split('\t') | ||
groups.append([int(i) for i in tmp]) | ||
|
||
for i in range(group_num): | ||
groups[i].sort() | ||
#print(i, groups[i]) | ||
|
||
total_size = 0 | ||
total_freq = 0 | ||
size_stat = list() | ||
freq_stat = list() | ||
group_len = list() | ||
#with open(group_file, 'w') as fout: | ||
for i in range(group_num): | ||
size = 0 | ||
freq = 0 | ||
for node in groups[i]: | ||
key = freqkeys[node] | ||
size += key_info[key][0] | ||
freq += key_info[key][1] | ||
total_size += size | ||
total_freq += freq | ||
size_stat.append(size) | ||
freq_stat.append(freq) | ||
group_len.append(len(groups[i])) | ||
|
||
Favg = total_freq/node_num | ||
print("Favg =", Favg) | ||
print("Total items =", sum(group_len)) | ||
|
||
for j in range(5): | ||
divided_group = list() | ||
for i in range(group_num): | ||
if freq_stat[i] > Favg: | ||
print(i, freq_stat[i]) | ||
divided_group.append(i) | ||
g = set(groups[i]) | ||
with open("graph_"+str(i), 'w') as fout, open("louvain%02d_%s" % (traceno, fnum), 'r') as fin: | ||
line = fin.readline() | ||
while line != "": | ||
first, second, tmp = line.strip().split('\t') | ||
if int(first) in g and int(second) in g: | ||
fout.write(line) | ||
line = fin.readline() | ||
else: | ||
continue | ||
|
||
for g in divided_group: | ||
status = os.system("louvain-convert -i graph_"+ str(g) +" -o graph"+ str(g) +".bin -w graph"+ str(g) +".w") | ||
time.sleep(1) | ||
status = os.system("louvain-louvain graph"+ str(g) +".bin -l -1 -q id_qual -w graph"+ str(g) +".w > graph"+ str(g) +".tree") | ||
time.sleep(1) | ||
status = os.system("louvain-hierarchy graph"+ str(g) +".tree -m > graph"+ str(g)) | ||
time.sleep(1) | ||
|
||
#status = os.popen("rm *.bin *.w *.tree graph_*") | ||
|
||
for g in divided_group: | ||
merge_discrete_file.merge_group(traceno, fnum, g) | ||
|
||
agg.agg_file(divided_group, 'graph%02d_%s_agg' % (traceno, fnum), 'graph%02d_%s_agg' % (traceno, fnum)) | ||
|
||
|
||
# 顽固部分 | ||
for g in divided_group: | ||
split('graph%02d_%s_agg' % (traceno, fnum), g, Favg) | ||
|
||
# agg.agg_file(divided_group) | ||
#print(divided_group) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
traceno=25 | ||
for i in 10 50 100 300 500 | ||
do | ||
echo "louvain"$traceno"_"$i"k" | ||
/usr/bin/louvain-convert -i ../"louvain"$traceno"_"$i't' -o "graph"$traceno"_"$i.b -w "graph"$traceno"_"$i.w | ||
/usr/bin/louvain-louvain "graph"$traceno"_"$i.b -l -1 -q id_qual -w "graph"$traceno"_"$i.w > "graph"$traceno"_"$i.t | ||
/usr/bin/louvain-hierarchy "graph"$traceno"_"$i.t -m > "graph"$traceno"_"$i | ||
done |
Oops, something went wrong.