From 94c02d858addb796668a34e3decea5e99d157100 Mon Sep 17 00:00:00 2001 From: alphared Date: Tue, 26 Nov 2024 15:52:47 +0800 Subject: [PATCH] create ACCache --- GroupDivision/agg.py | 41 + GroupDivision/agg_groups.py | 92 ++ GroupDivision/clean.py | 7 + GroupDivision/convert.py | 10 + GroupDivision/divided_graph.py | 171 ++++ GroupDivision/initial.sh | 8 + GroupDivision/merge_discrete_file.py | 108 +++ Preproccess/split_in_days.py | 32 + Preproccess/stats.py | 23 + Preproccess/thread_split.py | 19 + README.md | 142 +++ plot/Exp01-SelectedClusters/data_iops.csv | 6 + plot/Exp01-SelectedClusters/data_l95.csv | 6 + plot/Exp01-SelectedClusters/iops.py | 67 ++ plot/Exp01-SelectedClusters/meta.csv | 40 + plot/Exp01-SelectedClusters/tail.py | 64 ++ plot/Exp02-LoadBalance/data.csv | 6 + plot/Exp02-LoadBalance/load-balance.py | 68 ++ plot/Exp02-LoadBalance/storage-balance.py | 80 ++ plot/Exp02-LoadBalance/storagebalance.csv | 6 + plot/Exp02-LoadBalance/store-balance-sub.py | 71 ++ .../MarkerDefine.py | 101 ++ plot/Exp03-MemCostTimeConsuming/data_mem.csv | 3 + plot/Exp03-MemCostTimeConsuming/data_time.csv | 3 + plot/Exp03-MemCostTimeConsuming/mem-sub-2.py | 90 ++ plot/Exp03-MemCostTimeConsuming/mem-sub.py | 90 ++ plot/Exp03-MemCostTimeConsuming/mem.py | 102 ++ plot/Exp03-MemCostTimeConsuming/time.py | 90 ++ plot/Exp04-NodeNum/data_n15.csv | 76 ++ plot/Exp04-NodeNum/iops.py | 70 ++ plot/Exp04-NodeNum/tail.py | 70 ++ plot/Exp05-NetworkSpeed/data_new.csv | 16 + plot/Exp05-NetworkSpeed/iops - 1G.py | 78 ++ plot/Exp05-NetworkSpeed/iops - 5G.py | 78 ++ plot/Exp05-NetworkSpeed/iops.py | 77 ++ plot/Exp05-NetworkSpeed/tail.py | 79 ++ plot/Exp06-WindowSize/MarkerDefine.py | 101 ++ plot/Exp06-WindowSize/data.csv | 21 + plot/Exp06-WindowSize/data_iops.csv | 5 + plot/Exp06-WindowSize/data_l95.csv | 5 + plot/Exp06-WindowSize/data_l99.csv | 5 + plot/Exp06-WindowSize/iops.py | 89 ++ plot/Exp06-WindowSize/tail.py | 93 ++ .../MarkerDefine.py | 101 ++ plot/Exp07-CorrelationThreshold/data.csv | 16 + plot/Exp07-CorrelationThreshold/data_iops.csv | 5 + plot/Exp07-CorrelationThreshold/data_l95.csv | 5 + plot/Exp07-CorrelationThreshold/iops.py | 91 ++ plot/Exp07-CorrelationThreshold/tail.py | 88 ++ src/.gitignore | 7 + src/CMSketch.cpp | 395 ++++++++ src/CMSketch.h | 49 + src/CMakeLists.txt | 61 ++ src/ErasureCode/ErasureCode.cpp | 233 +++++ src/ErasureCode/ErasureCode.h | 61 ++ src/FreqList.cpp | 74 ++ src/FreqList.h | 30 + src/FreqTable.cpp | 345 +++++++ src/FreqTable.h | 41 + src/ListNode.cpp | 60 ++ src/ListNode.h | 27 + src/MemcachedClient.cpp | 323 +++++++ src/MemcachedClient.h | 35 + src/OurScheme.cpp | 882 ++++++++++++++++++ src/OurScheme.h | 41 + src/Random.cpp | 274 ++++++ src/Random.h | 12 + src/SPCache.cpp | 273 ++++++ src/SPCache.h | 17 + src/config.h | 310 ++++++ src/config.json | 115 +++ src/eccache.cpp | 561 +++++++++++ src/eccache.h | 29 + src/main.cpp | 310 ++++++ src/main_correlation.cpp | 18 + src/parameter.h | 20 + src/replicas.cpp | 5 + src/replicas.h | 25 + src/stats.py | 83 ++ src/toolbox.h | 166 ++++ src/twitter_trace.h | 270 ++++++ 81 files changed, 7867 insertions(+) create mode 100644 GroupDivision/agg.py create mode 100644 GroupDivision/agg_groups.py create mode 100644 GroupDivision/clean.py create mode 100644 GroupDivision/convert.py create mode 100644 GroupDivision/divided_graph.py create mode 100644 GroupDivision/initial.sh create mode 100644 GroupDivision/merge_discrete_file.py create mode 100644 Preproccess/split_in_days.py create mode 100644 Preproccess/stats.py create mode 100644 Preproccess/thread_split.py create mode 100644 README.md create mode 100644 plot/Exp01-SelectedClusters/data_iops.csv create mode 100644 plot/Exp01-SelectedClusters/data_l95.csv create mode 100644 plot/Exp01-SelectedClusters/iops.py create mode 100644 plot/Exp01-SelectedClusters/meta.csv create mode 100644 plot/Exp01-SelectedClusters/tail.py create mode 100644 plot/Exp02-LoadBalance/data.csv create mode 100644 plot/Exp02-LoadBalance/load-balance.py create mode 100644 plot/Exp02-LoadBalance/storage-balance.py create mode 100644 plot/Exp02-LoadBalance/storagebalance.csv create mode 100644 plot/Exp02-LoadBalance/store-balance-sub.py create mode 100644 plot/Exp03-MemCostTimeConsuming/MarkerDefine.py create mode 100644 plot/Exp03-MemCostTimeConsuming/data_mem.csv create mode 100644 plot/Exp03-MemCostTimeConsuming/data_time.csv create mode 100644 plot/Exp03-MemCostTimeConsuming/mem-sub-2.py create mode 100644 plot/Exp03-MemCostTimeConsuming/mem-sub.py create mode 100644 plot/Exp03-MemCostTimeConsuming/mem.py create mode 100644 plot/Exp03-MemCostTimeConsuming/time.py create mode 100644 plot/Exp04-NodeNum/data_n15.csv create mode 100644 plot/Exp04-NodeNum/iops.py create mode 100644 plot/Exp04-NodeNum/tail.py create mode 100644 plot/Exp05-NetworkSpeed/data_new.csv create mode 100644 plot/Exp05-NetworkSpeed/iops - 1G.py create mode 100644 plot/Exp05-NetworkSpeed/iops - 5G.py create mode 100644 plot/Exp05-NetworkSpeed/iops.py create mode 100644 plot/Exp05-NetworkSpeed/tail.py create mode 100644 plot/Exp06-WindowSize/MarkerDefine.py create mode 100644 plot/Exp06-WindowSize/data.csv create mode 100644 plot/Exp06-WindowSize/data_iops.csv create mode 100644 plot/Exp06-WindowSize/data_l95.csv create mode 100644 plot/Exp06-WindowSize/data_l99.csv create mode 100644 plot/Exp06-WindowSize/iops.py create mode 100644 plot/Exp06-WindowSize/tail.py create mode 100644 plot/Exp07-CorrelationThreshold/MarkerDefine.py create mode 100644 plot/Exp07-CorrelationThreshold/data.csv create mode 100644 plot/Exp07-CorrelationThreshold/data_iops.csv create mode 100644 plot/Exp07-CorrelationThreshold/data_l95.csv create mode 100644 plot/Exp07-CorrelationThreshold/iops.py create mode 100644 plot/Exp07-CorrelationThreshold/tail.py create mode 100644 src/.gitignore create mode 100644 src/CMSketch.cpp create mode 100644 src/CMSketch.h create mode 100644 src/CMakeLists.txt create mode 100644 src/ErasureCode/ErasureCode.cpp create mode 100644 src/ErasureCode/ErasureCode.h create mode 100644 src/FreqList.cpp create mode 100644 src/FreqList.h create mode 100644 src/FreqTable.cpp create mode 100644 src/FreqTable.h create mode 100644 src/ListNode.cpp create mode 100644 src/ListNode.h create mode 100644 src/MemcachedClient.cpp create mode 100644 src/MemcachedClient.h create mode 100644 src/OurScheme.cpp create mode 100644 src/OurScheme.h create mode 100644 src/Random.cpp create mode 100644 src/Random.h create mode 100644 src/SPCache.cpp create mode 100644 src/SPCache.h create mode 100644 src/config.h create mode 100644 src/config.json create mode 100644 src/eccache.cpp create mode 100644 src/eccache.h create mode 100644 src/main.cpp create mode 100644 src/main_correlation.cpp create mode 100644 src/parameter.h create mode 100644 src/replicas.cpp create mode 100644 src/replicas.h create mode 100644 src/stats.py create mode 100644 src/toolbox.h create mode 100644 src/twitter_trace.h diff --git a/GroupDivision/agg.py b/GroupDivision/agg.py new file mode 100644 index 0000000..2894033 --- /dev/null +++ b/GroupDivision/agg.py @@ -0,0 +1,41 @@ +""" +把分group合并为一个 +""" + +import os +#os.chdir(r"E:\graph") + +def agg_file(group: list, source = 'graph02_10k_agg', dest="graph02_10k_agg"): + tgroupf = source + + with open(tgroupf, 'r') as fin: + cnts = fin.readlines() + + groups = list() + for i in range(0, len(cnts), 2): + n, s, f = cnts[i].strip().split('\t') + no = int(i/2) + if no in group: + #del cnts[i] + #del cnts[i+1] + with open('graph' + str(int(no)) + '_agg') as fin: + #cntst = fin.readline() + cntss = fin.readlines() + #print(cntss) + groups += cntss + else: + stri = "%s\t%s\t%s\n" % (n, s, f) + #print(stri) + groups.append(stri) + i = i + 1 + groups.append(cnts[i]) + + with open(dest, 'w') as fout: + for line in groups: + #print(line) + fout.write(line) + +if __name__ == "__main__": + traceno = 23 + fnum = "10k" + agg_file([0,], 'graph%02d_%s_agg' % (traceno, fnum), 'graph%02d_%s_agg' % (traceno, fnum)) \ No newline at end of file diff --git a/GroupDivision/agg_groups.py b/GroupDivision/agg_groups.py new file mode 100644 index 0000000..7a890f6 --- /dev/null +++ b/GroupDivision/agg_groups.py @@ -0,0 +1,92 @@ + +""" +生成合并的graph file +""" +import os +os.chdir(r"/home/flnan/group_divided") +level = 1 +nnode = 19 +group_no = 16 + +group_file = r"graph34_"+str(group_no)+"_l" + str(level) + + +class cgroup: + def __init__(self, num, size, freq, nodes): + self.num = num + self.size = size + self.freq = freq + self.nodes = nodes + + def __add__(self, __o): + num = self.num + __o.num + size = self.size + __o.size + freq = self.freq + __o.freq + nodes = self.nodes + __o.nodes + return cgroup(num, size, freq, nodes) + + +with open(group_file, 'r') as fin: + cnts = fin.readlines() + +groups = list() +tfreq = 0 +for i in range(0, len(cnts), 2): + #print(i,cnts[i]) + no, n, s, f = cnts[i].strip().split('\t') + tfreq += int(f) + i = i + 1 + #print(i) + nodes = cnts[i].split('\t') + nodesn = list() + for i in range(int(n)): + nodesn.append(int(nodes[i])) + tmpg = cgroup(int(n), int(s), int(f), nodesn) + groups.append(tmpg) + +cnts = list() +avgfreq = tfreq / 20 +print(tfreq, avgfreq) + +ngroups = list() +ngnum = list() +nn = list() +tmpg = cgroup(0, 0, 0, list()) +for i in range(len(groups)): + #print(type(tmpg)) + if tmpg.freq < avgfreq and groups[i].freq < avgfreq and groups[i].num < 1000: + tmpg = tmpg + groups[i] + nn.append(i) + if tmpg.freq > avgfreq: + ngroups.append(tmpg) + tmpg = cgroup(0, 0, 0, list()) + ngnum.append(nn) + nn = list() + if groups[i].freq > avgfreq: + if (tmpg.nodes != None): + ngroups.append(tmpg) + ngroups.append(groups[i]) + tmpg = cgroup(0, 0, 0, list()) + ngnum.append(nn) + nn = list() +ngroups.append(tmpg) +ngnum.append(nn) + +with open("graph"+str(group_no)+"l" + str(level) + "_agg", 'w') as fout: + fout.write(str(len(ngroups))) + fout.write('\n') + for g in ngroups: + pass + print(g.num, g.freq) + print(g.nodes) + fout.write(str(g.num) + '\t' + str(g.size) + '\t' + str(g.freq) + '\n') + for i in g.nodes: + fout.write(str(i) + '\t') + fout.write('\n') +# with open("graph"+str(group_no)+"l" + str(level) + "_agg", 'w') as fout: +# for n in ngnum: +# for i in n: +# fout.write(str(i) + '\t') +# fout.write('\n') + + diff --git a/GroupDivision/clean.py b/GroupDivision/clean.py new file mode 100644 index 0000000..ab0bae4 --- /dev/null +++ b/GroupDivision/clean.py @@ -0,0 +1,7 @@ +with open("louvain_node_34", 'r') as fin, open('louvain_data_34', 'w') as fout: + line = fin.readline() + while(line != ""): + cnts = int(line.strip().split('\t')[2]) + if(cnts != 0): + fout.write(line) + line = fin.readline() diff --git a/GroupDivision/convert.py b/GroupDivision/convert.py new file mode 100644 index 0000000..531d5cf --- /dev/null +++ b/GroupDivision/convert.py @@ -0,0 +1,10 @@ +import os + +os.chdir(r"E:\graph") + +with open("graph_16.data", 'r') as fin, open("graph16.d", 'w') as fout: + line = fin.readline() + while (line != ""): + cnts = line.strip().split('\t') + fout.write('"%s"\t"%s"\t%s\n' % (cnts[0], cnts[1], cnts[2])) + line = fin.readline() \ No newline at end of file diff --git a/GroupDivision/divided_graph.py b/GroupDivision/divided_graph.py new file mode 100644 index 0000000..de77523 --- /dev/null +++ b/GroupDivision/divided_graph.py @@ -0,0 +1,171 @@ +""" +找出超过平均值的group,生成单独的文件 +""" + +import os +import merge_discrete_file +import agg +import time + +os.chdir(r"/home/flnan/group_divided") +node_num = 6 + +traceno = 23 +fnum = "10k" + +graph_file = "graph%02d_%s_agg" % (traceno, fnum) #"graph_n19l" + str(level) + +freqkeys_file = "freqkeys%02d_%s" % (traceno, fnum) +stat_file = "../twitter/stat%02d" % traceno + +with open(freqkeys_file, 'r') as fin: + freqkeys = fin.readlines() + +with open(stat_file, 'r') as fin: + keystat = fin.readlines() + +key_info = dict() +tsize = 0 +tfreq = 0 +for info in keystat: + key, size, freq = info.strip().split("\t") + key_info[key] = (int(size), int(freq)) + tsize += int(size) + tfreq += int(freq) + +for i in range(len(freqkeys)): + freqkeys[i] = freqkeys[i].strip() + + +def split(source, g, favg): + with open(source, 'r') as fin: + cnts = fin.readlines() + + sgroups = list() + sgsize = list() + sgfreq = list() + + dgroups = list() + for i in range(0, len(cnts), 2): + if g == i/2: + n, s, f = cnts[i].strip().split('\t') + dgroups = cnts[i + 1].strip().split('\t') + + tmp = list() + stsize = 0 + stfreq = 0 + for item in dgroups: + key = freqkeys[int(item)] + isize = key_info[key][0] + ifreq = key_info[key][1] + if(stfreq + ifreq < favg): + stfreq += ifreq + stsize += isize + tmp.append(item) + else: + sgroups.append(tmp) + sgsize.append(stsize) + sgfreq.append(stfreq) + tmp = list() + stsize = 0 + stfreq = 0 + stfreq += ifreq + stsize += isize + tmp.append(item) + + sgroups.append(tmp) + sgsize.append(stsize) + sgfreq.append(stfreq) + print(sgsize) + print(sgfreq) + print(sgroups) + + with open(source, 'w') as fout: + for i in range(len(sgroups)): + fout.write("%d\t%d\t%d\n" % (len(sgroups[i]), sgsize[i], sgfreq[i])) + for item in sgroups[i]: + fout.write(str(item)+'\t') + fout.write('\n') + + + +#while(True): +with open(graph_file, 'r') as fin: + cnts = fin.readlines() + +group_num = int(len(cnts)/2) +#print(group_num) + +groups = list() #[list() for i in range(group_num)] + +for i in range(0, len(cnts), 2): + n, s, f = cnts[i].strip().split('\t') + tmp = cnts[i+1].strip().split('\t') + groups.append([int(i) for i in tmp]) + +for i in range(group_num): + groups[i].sort() + #print(i, groups[i]) + +total_size = 0 +total_freq = 0 +size_stat = list() +freq_stat = list() +group_len = list() +#with open(group_file, 'w') as fout: +for i in range(group_num): + size = 0 + freq = 0 + for node in groups[i]: + key = freqkeys[node] + size += key_info[key][0] + freq += key_info[key][1] + total_size += size + total_freq += freq + size_stat.append(size) + freq_stat.append(freq) + group_len.append(len(groups[i])) + +Favg = total_freq/node_num +print("Favg =", Favg) +print("Total items =", sum(group_len)) + +for j in range(5): + divided_group = list() + for i in range(group_num): + if freq_stat[i] > Favg: + print(i, freq_stat[i]) + divided_group.append(i) + g = set(groups[i]) + with open("graph_"+str(i), 'w') as fout, open("louvain%02d_%s" % (traceno, fnum), 'r') as fin: + line = fin.readline() + while line != "": + first, second, tmp = line.strip().split('\t') + if int(first) in g and int(second) in g: + fout.write(line) + line = fin.readline() + else: + continue + + for g in divided_group: + status = os.system("louvain-convert -i graph_"+ str(g) +" -o graph"+ str(g) +".bin -w graph"+ str(g) +".w") + time.sleep(1) + status = os.system("louvain-louvain graph"+ str(g) +".bin -l -1 -q id_qual -w graph"+ str(g) +".w > graph"+ str(g) +".tree") + time.sleep(1) + status = os.system("louvain-hierarchy graph"+ str(g) +".tree -m > graph"+ str(g)) + time.sleep(1) + + #status = os.popen("rm *.bin *.w *.tree graph_*") + + for g in divided_group: + merge_discrete_file.merge_group(traceno, fnum, g) + + agg.agg_file(divided_group, 'graph%02d_%s_agg' % (traceno, fnum), 'graph%02d_%s_agg' % (traceno, fnum)) + + +# 顽固部分 +for g in divided_group: + split('graph%02d_%s_agg' % (traceno, fnum), g, Favg) + +# agg.agg_file(divided_group) +#print(divided_group) diff --git a/GroupDivision/initial.sh b/GroupDivision/initial.sh new file mode 100644 index 0000000..64c20fc --- /dev/null +++ b/GroupDivision/initial.sh @@ -0,0 +1,8 @@ +traceno=25 +for i in 10 50 100 300 500 +do + echo "louvain"$traceno"_"$i"k" + /usr/bin/louvain-convert -i ../"louvain"$traceno"_"$i't' -o "graph"$traceno"_"$i.b -w "graph"$traceno"_"$i.w + /usr/bin/louvain-louvain "graph"$traceno"_"$i.b -l -1 -q id_qual -w "graph"$traceno"_"$i.w > "graph"$traceno"_"$i.t + /usr/bin/louvain-hierarchy "graph"$traceno"_"$i.t -m > "graph"$traceno"_"$i +done diff --git a/GroupDivision/merge_discrete_file.py b/GroupDivision/merge_discrete_file.py new file mode 100644 index 0000000..9442aa0 --- /dev/null +++ b/GroupDivision/merge_discrete_file.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 + +import os +os.chdir(r"/home/flnan/group_divided") + +#group_num = 49177 +def merge_group(traceno, fnum, subno: int, is_sub = True): + is_subgraph = is_sub + subno = subno + parent_graph = "graph%02d_%s_agg" % (traceno, fnum) + + if is_subgraph: + gprefix = "graph" + str(subno) + else: + gprefix = "graph%02d_%s" % (traceno, fnum) + group_file = gprefix #+ r"_"+str(group_num) + + freqkeys_file = "freqkeys%02d_%s" % (traceno, 10) + stat_file = "../twitter/stat%02d" % traceno + + if is_subgraph: + with open(parent_graph, 'r') as fin: + pcnts = fin.readlines() + + pgroups = list() + for i in range(0, len(pcnts), 2): + l, s, f = pcnts[i].strip().split('\t') + tmp = pcnts[i + 1].strip().split('\t') + for j in range(len(tmp)): + tmp[j] = int(tmp[j]) + pgroups.append(tmp) + + with open(group_file, 'r') as fin: + cnts = fin.readlines() + + with open(freqkeys_file, 'r') as fin: + freqkeys = fin.readlines() + + with open(stat_file, 'r') as fin: + keystat = fin.readlines() + + key_info = dict() + tsize = 0 + tfreq = 0 + for info in keystat: + key, size, freq = info.strip().split("\t") + key_info[key] = (int(size), int(freq)) + tsize += int(size) + tfreq += int(freq) + + for i in range(len(freqkeys)): + freqkeys[i] = freqkeys[i].strip() + + dgroups = dict() #[list() for i in range(group_num)] + gsize = list() + gfreq = list() + glen = list() + + for line in cnts: + kn, n = line.strip().split() + if is_subgraph: + if int(kn) not in pgroups[subno]: + continue + if n not in dgroups: + dgroups[n] = list() + dgroups[n].append(int(kn)) + + groups = list() + for d in dgroups: + groups.append(dgroups[d]) + + total_size = 0 + total_freq = 0 + + for i in range(len(groups)): + groups[i].sort() + size = 0 + freq = 0 + for node in groups[i]: + key = freqkeys[node] + size += key_info[key][0] + freq += key_info[key][1] + total_size += size + total_freq += freq + gsize.append(size) + gfreq.append(freq) + glen.append(len(groups[i])) + + print(sum(glen)) + #print(len(pgroups[subno])) + + with open(gprefix + '_agg', 'w') as fout: + for i in range(len(groups)): + if(glen[i] == 0): continue + fout.write("%d\t%d\t%d\n" % (glen[i], gsize[i], gfreq[i])) + for node in groups[i]: + fout.write(str(node)+'\t') + fout.write('\n') + +if __name__ == "__main__": + #for i in [10, 80, 126, 170]: + # merge_group(i, True) + # traceno = 23 + # # fnum = "50" + # fnum = [10, 50, 100, 300, 500] + # for i in fnum: + # merge_group(traceno, str(i), 1, False) + merge_group(23, 10, 1, False) \ No newline at end of file diff --git a/Preproccess/split_in_days.py b/Preproccess/split_in_days.py new file mode 100644 index 0000000..145c01b --- /dev/null +++ b/Preproccess/split_in_days.py @@ -0,0 +1,32 @@ +import os + +fname = "cluster25." +fprefix = "workload25" +tstart = 0 +dlong = 60 * 60 * 24 +day = 0 + +for f in range(0, 5): + with open(fname+("%03d" % f), 'r') as fin: + print("day =",day) + fout = open(fprefix+str(day),'a') + line = fin.readline() + while(line != ''): + cnts = line.strip().split(',') + ts = int(cnts[0]) + #if ts % 3600 == 0 and ts >= 3600: + # print(ts) + if ts < (day+1) * dlong: + fout.write(line) + else: + fout.close() + day += 1 + fout = open(fprefix+str(day),'a') + print("day =",day) + continue + if day > 10: + break + line = fin.readline() + + fout.close() + os.remove(fname+("%03d" %f)) diff --git a/Preproccess/stats.py b/Preproccess/stats.py new file mode 100644 index 0000000..39fae0e --- /dev/null +++ b/Preproccess/stats.py @@ -0,0 +1,23 @@ + +day = 0 +stat = dict() +th = 0 +lnum = 0 + +with open("workload23_" + str(day), "r") as f: + line = f.readline() + while(line != ""): + cnts = line.split(",") + if(cnts[1] in stat): + stat[cnts[1]][0] +=1 + else: + attr = list() + attr.append(1) + attr.append(cnts[3]) + stat[cnts[1]] = attr + line = f.readline() +print("read complete\n") + +with open("stat23", "w") as f: + for item in stat.items(): + f.write("%s\t%d\t%d\n" % (item[0], int(item[1][1]), int(item[1][0]))) diff --git a/Preproccess/thread_split.py b/Preproccess/thread_split.py new file mode 100644 index 0000000..c42a496 --- /dev/null +++ b/Preproccess/thread_split.py @@ -0,0 +1,19 @@ +traceno = 25 +day = 0 +nthreads = 128 +inprefix = "workload" + +count = -1 +for count,line in enumerate(open(inprefix+("%02d_%d" % (traceno, day)),'r')): + count += 1 + +nlines = int(count / nthreads) + 1 + +with open(inprefix+("%02d_%d" % (traceno, day)),'r') as fin: + for t in range(nthreads): + outfile = "t%dd%dt%dp%04d" % (traceno, day, nthreads, t) + fout = open(outfile, 'w'); + for lnum in range(nlines): + cnt = fin.readline() + fout.write(cnt) + fout.close() diff --git a/README.md b/README.md new file mode 100644 index 0000000..6afb627 --- /dev/null +++ b/README.md @@ -0,0 +1,142 @@ +# AC-Cache: A Memory-Efficient Caching System for Small Objects via Exploiting Access Correlations +AC-Cache consists of two main processes: correlation analysis and KV object distribution. +Accordingly, the source code is structured into two execution phases. +Data synchronization between these phases must be manually managed by the tester. +Due to the extended duration of the test process and the inability to automate it via shell scripts, we provide a detailed description of how to test AC-Cache. +The tester is required to manually modify and update certain scripts and source files during the process. + +## Requirement +* platform: Linux +* build tools: `cmake (>=3.20)` +* compiler: `gcc (>=4.8)` +* python: `python3 (==3.10)` +* library: `jsoncpp`, `libisal`, `libmemcached`, `libfmt`, `python-prtpy` + +## Build +```shell +git clone https://github.com/nankeys/AC-Cache.git +cd CorrelationAnalysis && mkdir _build && cd _build +cmake .. +make -j +``` + +## Trace process +### Download +* Twitter: Refer to [Twitter cache trace](https://github.com/twitter/cache-trace) + + `https://ftp.pdl.cmu.edu/pub/datasets/twemcacheWorkload/open_source/` +* Meta: Refer to [Running cachebench with the trace workload](https://cachelib.org/docs/Cache_Library_User_Guides/Cachebench_FB_HW_eval) + + `kvcache/202206`: `aws s3 cp --no-sign-request --recursive s3://cachelib-workload-sharing/pub/kvcache/202206/ ./` + + `kvcache/202401`: `aws s3 cp --no-sign-request --recursive s3://cachelib-workload-sharing/pub/kvcache/202401/ ./` + +### Preprocess +1. Uncompress the trace, for example +```shell +zstd -d cluster2.sort.zst +``` +2. Split the trace in days +```shell +# change the fname in split_in_days.py +python3 split_in_days.py +``` +3. Generate the stat file +```shell +# change the workload name and stat name in 02.stats.py +python3 02.stats.py +``` +4. Split the traces in threads +```shell +# change the traceno in 03.thread_split.py +python3 03.thread_split.py +``` +5. For each trace, extract the position of hot objects +```shell +# change the information in advance +python3 FreqExtraction.py +``` +6. Put the informaion into `parameter.h`. Put the variations into `flimit`. +7. Change the information in `config.json`. + +## Correalation Analysis +1. Change the information in main_correlation.cpp +2. Change the dir and rebuild +```shell +cd _build +make -j +``` +3. Run `correlation` to generate the correlation graph. +**Note**: the generation of the correlation graph could take a long time. It will generate a file whose name is `louvaion_node_{trace_no}_{flimit}` +```shell +./correlation +``` + +## Graph partition +1. Download and compile the `louvain`. + +```shell +wget https://master.dl.sourceforge.net/project/louvain/louvain-generic.tar.gz?viasf=1 +tar -zvxf louvain-generic.tar.gz +make +``` + +2. Generate initial groups +```shell +bash initial.sh +``` + +3. merge the group +```shell +# change the infromation of trace +python3 merge.py +``` + +4. Execute Algorithm 1: Partition correlation graph +```shell +python3 divided_graph.py +``` + +5. Then we get the graph file of the CGroups. + +## Objects Distribution +1. Put the generated information of CGroups to `parameter.h`. +2. Set up the experiments you want to test and changes the variation in `cache.h`. +3. Setup the `Memcached` Nodes. +4. Record information of `Memcached` nodes to `config.json`. +```json +"server_info": [ + { + "ip": "172.18.96.10", + "port": 11211 + },{ + "ip": "172.18.96.11", + "port": 11211 + } +] +``` +5. Change the dir and rebuild +```shell +cd _build +make -j +``` +6. Run the executable file +``` shell +./CorAna +``` +7. The result will be write to `result.txt`. + +## Various evaluations +To adopt to a new evaluation, one should change the file `config.h` to get the parameter from the self-defined source file but not the `config.json`. + +## Plot +All the scripts for ploting the graph is under directory `plot`. +The testers need to record the result as the format depicted in `*.csv`. +Then the testers can run the python scripts. + +## Notes +1. All the paths in the scripts and source code should be carefully checked. +2. Preprocesses is important and would take a long time. +3. The process of correlation analysis takes a long time. \ No newline at end of file diff --git a/plot/Exp01-SelectedClusters/data_iops.csv b/plot/Exp01-SelectedClusters/data_iops.csv new file mode 100644 index 0000000..6be4443 --- /dev/null +++ b/plot/Exp01-SelectedClusters/data_iops.csv @@ -0,0 +1,6 @@ +scheme,1,2,23,25,202206,202401 +AC-Cache,3415960,2818050,1700000,4150000,3738996.667,4029440 +EC-Cache,957000,919000,389000,653744,1153318,1165332 +SP-Cache,783000,933000,401000,895206,1164113.333,1177710 +Baseline,982000,956000,407000,859485,1206200,1.21E+06 +Replication,1.26E+06,1.25E+06,1.19E+06,1.13E+06,1207300,1212943.333 diff --git a/plot/Exp01-SelectedClusters/data_l95.csv b/plot/Exp01-SelectedClusters/data_l95.csv new file mode 100644 index 0000000..7cae0bb --- /dev/null +++ b/plot/Exp01-SelectedClusters/data_l95.csv @@ -0,0 +1,6 @@ +scheme,1,2,23,25,202206,202401 +AC-Cache,0.000131,0.000131,0.000138,0.000133,0.000129333,0.000127667 +EC-Cache,0.000200333,0.000199667,0.000698,0.000556,0.000141233,0.000141 +SP-Cache,0.000237,0.000196,0.0005985,0.000326,1.41E-04,0.000141333 +Baseline,0.000192,0.000189,0.000593,0.000386,0.000141333,0.000141333 +Replication,0.000138,0.000138,0.000153,0.000182,0.000141,0.000141667 diff --git a/plot/Exp01-SelectedClusters/iops.py b/plot/Exp01-SelectedClusters/iops.py new file mode 100644 index 0000000..abd60da --- /dev/null +++ b/plot/Exp01-SelectedClusters/iops.py @@ -0,0 +1,67 @@ +import matplotlib.pyplot as plt +import matplotlib as mpl +from matplotlib.colors import ListedColormap +import seaborn as sns +import numpy as np +import pandas as pd +import os + +#sns.set() +#sns.axes_style('white') +colors =["#ee4000", "#5f9ea0", "#9acd32", "#ffa54f", "#a56cc1"] +mycmap = ListedColormap(sns.color_palette(colors).as_hex()) + +plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica'] +mpl.rcParams['hatch.linewidth'] = 2 +mpl.rcParams["legend.markerscale"] = 5 +mpl.rcParams['pdf.fonttype'] = 42 + +# Convert the data to a DataFrame +df = pd.read_csv("data.csv", header=0) +df['ops'] /= 1000000 + +scheme_order = ['AC-Cache','EC-Cache', 'SP-Cache', 'Baseline', 'Replication'] + +# Reorder the 'Scheme' column based on the defined order +df['Scheme'] = pd.Categorical(df['Scheme'], categories=scheme_order, ordered=True) + +# Group by 'Scheme' and 'NodeNum' and calculate mean and std thruput +grouped = df.groupby(['Scheme', 'ClusterNum'])['ops'].agg(['mean', 'std']) + +# Reshape the DataFrame for plotting +grouped = grouped.unstack(level='Scheme') + +# Plotting with error bars for each scheme +ax = grouped['mean'].plot(kind='bar', yerr=grouped['std'], capsize=5, rot=0, legend=False, edgecolor='black', lw=2, colormap=mycmap, width = 0.8, zorder = 100) + + +ax.set_ylim(0, 5.5) +ax.set_yticks(np.arange(0, 5.1, 1)) + +minor_yticks = np.arange(0.5, 5, 1) +ax.set_yticks(minor_yticks, minor=True) +plt.grid(True, zorder=0) +plt.grid(color="b", linestyle="-", linewidth=0.1, alpha=0.1) +plt.grid(which = 'minor',color="b", linestyle="-", linewidth=0.1, alpha=0.1) + +ax.set_xticks([0, 1, 2, 3, 4, 5], labels=["cluster01", "cluster02", "cluster23", "cluster25", '202206', '202401'], rotation=30) + +#ax.set_xticks(["""1""","""2""","""23""","""34"""],labels=[1, 2, 23, 34]) + +# Hide the top and right axis +# for spine in ['top', 'right']: +# ax.spines[spine].set_visible(False) + +# linewidth of axises, and the fontsize of ticks +plt.setp(ax.spines.values(), linewidth=2) +ax.tick_params(width=2, labelsize = 34) + +ax.set_xlabel(r"Selected clusters", fontsize=42) +ax.set_ylabel("Throughput (Mops)", fontsize=42) + +# Legend style +ax.legend(loc='center left', ncols= 2, bbox_to_anchor=(0.05, 0.83), fontsize=36, reverse=False, labelspacing=0.3, columnspacing=0.5, frameon=False, handlelength=0.8, handleheight=1, handletextpad=0.3) + +#plt.show() +plt.gcf().set_size_inches(10, 8) +plt.savefig("iops.pdf", bbox_inches = 'tight', pad_inches = 0) \ No newline at end of file diff --git a/plot/Exp01-SelectedClusters/meta.csv b/plot/Exp01-SelectedClusters/meta.csv new file mode 100644 index 0000000..c2eb489 --- /dev/null +++ b/plot/Exp01-SelectedClusters/meta.csv @@ -0,0 +1,40 @@ +scheme,tnum,ttime,tops,ops,tsize,size,p95,p99,p9999 +Random,202206,174.429,207409712,1.21E+06,1.30E+11,738615,0.000141,0.000163,0.000647 +Random,202206,174.49,207409712,1.21E+06,1.30E+11,740268,0.000141,0.000162,0.000546 +Random,202206,175,207409712,1.20E+06,1.30E+11,737792,0.000142,0.000164,0.000582 +Random,202401,129.407,154604153,1.22E+06,1.20E+11,921139,0.000141,0.000163,0.000617 +Random,202401,129.686,154604153,1.21E+06,1.20E+11,919465,0.000141,0.000164,0.000583 +Random,202401,130.103,154604153,1.21E+06,1.20E+11,918105,0.000142,0.000164,0.000533 +Replica,202206,173.818,207409712,1.21E+06,1.30E+11,740233,0.000141,0.000162,0.000605 +Replica,202206,174.258,207409712,1.21E+06,1.30E+11,738876,0.000141,0.000162,0.000557 +Replica,202206,174.89,207409712,1.21E+06,1.30E+11,739576,0.000141,0.000162,0.000547 +Replica,202401,129.531,154604153,1.21E+06,1.20E+11,919191,0.000142,0.000164,0.000549 +Replica,202401,129.831,154604153,1.21E+06,1.20E+11,919514,0.000141,0.000163,0.000606 +Replica,202401,130.272,154604153,1.21E+06,1.20E+11,918155,0.000142,0.000164,0.000591 +SPCache,202206,179.722,207409712,1.17E+06,1.30E+11,718123,0.000141,0.000157,0.000543 +SPCache,202206,179.809,207409712,1.17E+06,1.31E+11,718712,0.000141,0.000158,0.00051 +SPCache,202206,182.555,207409712,1.15E+06,1.30E+11,707312,0.000142,0.000162,0.002443 +SPCache,202401,132,154604153,1.19E+06,1.20E+11,901869,0.00014,0.000157,0.000573 +SPCache,202401,133.983,154604153,1.17E+06,1.20E+11,885625,0.000142,0.000159,0.000521 +SPCache,202401,133.403,154604153,1.17E+06,1.20E+11,887068,0.000142,0.000159,0.000532 +Clime,202206,63.478,207409712,3.49E+06,1.30E+11,2.13E+06,0.00013,0.000151,0.000427 +Clime,202206,57.5585,207409712,3.87E+06,1.20E+11,2.18E+06,0.000129,0.000151,0.000413 +Clime,202206,57.5023,207409712,3.86E+06,1.20E+11,2.17E+06,0.000129,0.000151,0.000435 +Clime,202401,40.5298,154604153,4.03E+06,1.20E+11,3.05E+06,0.000128,0.000149,0.000418 +Clime,202401,40.439,154604153,4.03E+06,1.20E+11,3.05E+06,0.000128,0.000149,0.000427 +Clime,202401,40.7253,154604153,4.04E+06,1.20E+11,3.06E+06,0.000127,0.000149,0.000413 +Replica,1,768.383,946782610,1.26E+06,3.12123E+11,406323,0.000136,0.00015,0.00046 +Replica,1,766.171,946782610,1.26E+06,3.12123E+11,406702,0.000136,0.00015,0.000457 +Replica,1,765.805,946782610,1.26E+06,3.12123E+11,406490,0.000136,0.00015,0.000444 +Replica,2,756.224,925150952,1.25E+06,1.00382E+11,132154,0.000138,0.000154,0.000477 +Replica,2,751.773,925150952,1.26E+06,1.00382E+11,133157,0.000137,0.000151,0.000452 +Replica,2,750.566,925150952,1.26E+06,1.00382E+11,133503,0.000136,0.000151,0.00044 +Replica,23,618.958,717324320,1.19E+06,43298200234,69824.1,0.000153,0.000189,0.00049 +Replica,23,618.955,717324320,1.19E+06,43336866602,69936.7,0.000153,0.00019,0.000486 +Replica,23,620.947,717324320,1.18E+06,43759908053,70128.9,0.000155,0.000207,0.000543 +Replica,25,1414.55,1525189585,1.13E+06,65696140905,47390.7,0.000182,0.000245,0.00057 +Replica,25,1412.37,1525189585,1.13E+06,65696140905,47434.4,0.000182,0.000238,0.000557 +Replica,25,1425.05,1525189585,1.11E+06,65696140905,46816.6,0.000188,0.00029,0.000579 +Replica,99,24.768,30000000,1.24E+06,30000000,1207.11,0.000139,0.000154,0.000444 +Replica,99,24.5938,30000000,1.24E+06,30000000,1213.02,0.000138,0.000154,0.000452 +Replica,99,24.4954,30000000,1.24E+06,30000000,1214.55,0.000138,0.000153,0.000442 diff --git a/plot/Exp01-SelectedClusters/tail.py b/plot/Exp01-SelectedClusters/tail.py new file mode 100644 index 0000000..5f7b415 --- /dev/null +++ b/plot/Exp01-SelectedClusters/tail.py @@ -0,0 +1,64 @@ +import matplotlib.pyplot as plt +import matplotlib as mpl +from matplotlib.colors import ListedColormap +import seaborn as sns +import numpy as np +import pandas as pd +import os + +#sns.set() +#sns.axes_style('white') +colors =["#ee4000", "#5f9ea0", "#9acd32", "#ffa54f", "#a56cc1"] +mycmap = ListedColormap(sns.color_palette(colors).as_hex()) + +plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica'] +mpl.rcParams['hatch.linewidth'] = 2 +mpl.rcParams["legend.markerscale"] = 5 +mpl.rcParams['pdf.fonttype'] = 42 + +# Convert the data to a DataFrame +df = pd.read_csv("data.csv", header=0) +df['ops'] /= 1000000 + +scheme_order = ['AC-Cache','EC-Cache', 'SP-Cache', 'Baseline', 'Replication'] + +# Reorder the 'Scheme' column based on the defined order +df['Scheme'] = pd.Categorical(df['Scheme'], categories=scheme_order, ordered=True) + +# Group by 'Scheme' and 'NodeNum' and calculate mean and std thruput +grouped = df.groupby(['Scheme', 'tNum'])['p95'].agg(['mean', 'std']) + +# Reshape the DataFrame for plotting +grouped = grouped.unstack(level='Scheme') + +# Plotting with error bars for each scheme +ax = grouped['mean'].plot(kind='bar', yerr=grouped['std'], capsize=5, rot=0, legend=False, edgecolor='black', lw=2, colormap=mycmap, width = 0.8, zorder = 100) + +ax.set_ylim(0, 1.09) +ax.set_yticks(np.arange(0,1.1,0.2),labels = ['0', '0.2', '0.4', '0.6', '0.8', '1']) + +minor_yticks = np.arange(0.1, 1, 0.2) +ax.set_yticks(minor_yticks, minor=True) +plt.grid(True, zorder=0) +plt.grid(color="b", linestyle="-", linewidth=0.1, alpha=0.1) +plt.grid(which = 'minor',color="b", linestyle="-", linewidth=0.1, alpha=0.1) + +ax.set_xticks([0, 1, 2, 3, 4, 5], labels=["cluster01", "cluster02", "cluster23", "cluster25", '202206', '202401'], rotation=30) + +# Hide the top and right axis +# for spine in ['top', 'right']: +# ax.spines[spine].set_visible(False) + +# linewidth of axises, and the fontsize of ticks +plt.setp(ax.spines.values(), linewidth=2) +ax.tick_params(width=2, labelsize = 34) + +ax.set_xlabel(r"Selected clusters", fontsize=42) +ax.set_ylabel("95th latency (ms)", fontsize=42) + +# Legend style +ax.legend(loc='center left', ncols= 2, bbox_to_anchor=(0.05, 0.83), fontsize=36, reverse=False, labelspacing=0.3, columnspacing=0.5, frameon=False, handlelength=0.8, handleheight=1, handletextpad=0.3) + +#plt.show() +plt.gcf().set_size_inches(10, 8.5) +plt.savefig("tail-latency.pdf", bbox_inches = 'tight', pad_inches = 0) \ No newline at end of file diff --git a/plot/Exp02-LoadBalance/data.csv b/plot/Exp02-LoadBalance/data.csv new file mode 100644 index 0000000..ba3e5d5 --- /dev/null +++ b/plot/Exp02-LoadBalance/data.csv @@ -0,0 +1,6 @@ +scheme,1,2,3,4,5,6,7 +AC-Cache,0.080122934,0.070832816,0.080562279,0.084640642,0.087430529,0.085797109,0.084084838 +EC-Cache,0.101134106,0.10790791,0.109183882,0.113076435,0.113076435,0.113076435,0.118163709 +SP-Cache,0.128841918,0.126057195,0.125218337,0.125058883,0.125021744,0.125232202,0.124850309 +Baseline,0.125213203,0.126448887,0.125809988,0.126105502,0.126888145,0.127178064,0.126828341 +Replication,0.112403875,0.113443118,0.11280601,0.112090737,0.113267404,0.114044222,0.114580028 diff --git a/plot/Exp02-LoadBalance/load-balance.py b/plot/Exp02-LoadBalance/load-balance.py new file mode 100644 index 0000000..15f4a67 --- /dev/null +++ b/plot/Exp02-LoadBalance/load-balance.py @@ -0,0 +1,68 @@ +import matplotlib.pyplot as plt +import matplotlib as mpl +from matplotlib.colors import ListedColormap +import seaborn as sns +import numpy as np +import pandas as pd +import os + +#sns.set() +#sns.axes_style('white') +colors =["#ee4000", "#5f9ea0", "#9acd32", "#ffa54f", "#a56cc1"] +mycmap = ListedColormap(sns.color_palette(colors).as_hex()) + +plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica'] +mpl.rcParams['hatch.linewidth'] = 2 +mpl.rcParams["legend.markerscale"] = 1 +mpl.rcParams['pdf.fonttype'] = 42 + +df = pd.read_csv("data.csv",header=0) +print(df) +df = df.T +df[1:] = df[1:] * 100 +df.columns = df.iloc[0] +df = df.drop(index="scheme") +#df2 = df2.sort_values(by="scheme") + +error_params=dict(elinewidth=2,ecolor='black',capsize=3) + +fig = plt.figure() +plt.grid(True, zorder=0) +ax = df.plot(kind='bar', rot=0, legend=False, edgecolor='black', lw=2, colormap=mycmap, width = 0.8, zorder=100) #, yerr=0.1, error_kw=error_params) +plt.gcf().set_size_inches(10, 8) + +# Hatches in bar plot +patterns =('-', '+', 'x','/','//','O','o','\\','\\\\') +patterns = ['//', '\\\\', "x"] +hatches = [p for p in patterns for i in range(len(df))] +#bars = fig.patches + +#for bar, hatch in zip(bars, hatches): +# bar.set_hatch(hatch) + +ax.set_ylim(0, 18) +ax.set_yticks(np.arange(0,18,5)) + +minor_yticks = np.arange(2.5, 18, 5) +ax.set_yticks(minor_yticks, minor=True) +plt.grid(color="b", linestyle="-", linewidth=0.1, alpha=0.1) +plt.grid(which = 'minor',color="b", linestyle="-", linewidth=0.1, alpha=0.1) + +# Hide the top and right axis +# for spine in ['top', 'right']: +# ax.spines[spine].set_visible(False) + +# linewidth of axises, and the fontsize of ticks +plt.setp(ax.spines.values(), linewidth=2) +ax.tick_params(width=2, labelsize = 34) + +ax.set_xlabel("#-th day", fontsize=42) +ax.set_ylabel("Percent imbalance of \n the access load (%)", fontsize=38, y=0.45) + +# Legend style +l = ax.legend(loc='center left', ncols= 5, bbox_to_anchor=(0.05, 0.93), fontsize=36, reverse=False, labelspacing=0, columnspacing=0.5, frameon=False, handlelength=0.8, handleheight=1, handletextpad=0.3) +for t in l.get_texts(): t.set_position((0, 0)) + +#plt.show() +plt.gcf().set_size_inches(22, 7) +plt.savefig("load-balance.pdf", bbox_inches = 'tight', pad_inches = 0) \ No newline at end of file diff --git a/plot/Exp02-LoadBalance/storage-balance.py b/plot/Exp02-LoadBalance/storage-balance.py new file mode 100644 index 0000000..4f9f81a --- /dev/null +++ b/plot/Exp02-LoadBalance/storage-balance.py @@ -0,0 +1,80 @@ +import matplotlib.pyplot as plt +import matplotlib as mpl +from matplotlib.colors import ListedColormap +import seaborn as sns +import numpy as np +import pandas as pd +import os + +#sns.set() +#sns.axes_style('white') +colors =["#ee4000", "#5f9ea0", "#9acd32", "#ffa54f", "#a56cc1"] +mycmap = ListedColormap(sns.color_palette(colors).as_hex()) + +plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica'] +mpl.rcParams['hatch.linewidth'] = 2 +mpl.rcParams["legend.markerscale"] = 1 +mpl.rcParams['pdf.fonttype'] = 42 + +df = pd.read_csv("storagebalance2.csv",header=0) +print(df) +df = df.T +df[1:] = df[1:] * 100 + +df.columns = df.iloc[0] +df = df.drop(index="scheme") +# df = df[df['scheme' != 'YCSB']] +print(df) +#df2 = df2.sort_values(by="scheme") + +error_params=dict(elinewidth=2,ecolor='black',capsize=3) + +fig = plt.figure() +plt.grid(True, zorder=0) +ax = df.plot(kind='bar', rot=0, legend=False, edgecolor='black', lw=2, colormap=mycmap, width = 0.8, zorder=100) #, yerr=0.1, error_kw=error_params) +plt.gcf().set_size_inches(10, 8) + + +# y_axis=df +# for ya in y_axis: +# for x,y in zip(range(len(df)), df[ya]): +# if(x != 2): continue +# print(x,y) +# if(ya == "FastCache"): +# x = x - 0.43 +# elif(ya == "EC-Cache"): +# x = x - 0.15 +# elif(ya == "SP-Cache"): +# x = x + 0.05 +# elif(ya == "Random"): +# x = x + 0.25 +# plt.text(x, y+0.5, round(y,2), fontsize=25, rotation=45) + + +# Hatches in bar plot +patterns =('-', '+', 'x','/','//','O','o','\\','\\\\') +patterns = ['//', '\\\\', "x"] +hatches = [p for p in patterns for i in range(len(df))] + +ax.set_ylim(0, 28) +ax.set_yticks(np.arange(0, 28, 5)) +minor_yticks = np.arange(5, 28, 5) +ax.set_yticks(minor_yticks, minor=True) + +plt.grid(color="b", linestyle="-", linewidth=0.1, alpha=0.1) +plt.grid(which = 'minor',color="b", linestyle="-", linewidth=0.1, alpha=0.1) + +# linewidth of axises, and the fontsize of ticks +plt.setp(ax.spines.values(), linewidth=2) +ax.tick_params(width=2, labelsize = 34) + +ax.set_xlabel("Selected clusters", fontsize=42) +ax.set_ylabel("Percent imbalance of \n memory overhead (%)", fontsize=38) + +# Legend style +l = ax.legend(loc='center left', ncols= 5, bbox_to_anchor=(0.05, 0.93), fontsize=36, reverse=False, labelspacing=0, columnspacing=0.5, frameon=False, handlelength=0.8, handleheight=1, handletextpad=0.3) +for t in l.get_texts(): t.set_position((0, 0)) + +#plt.show() +plt.gcf().set_size_inches(22, 7) +plt.savefig("storage-balance.pdf", bbox_inches = 'tight', pad_inches = 0) \ No newline at end of file diff --git a/plot/Exp02-LoadBalance/storagebalance.csv b/plot/Exp02-LoadBalance/storagebalance.csv new file mode 100644 index 0000000..d216698 --- /dev/null +++ b/plot/Exp02-LoadBalance/storagebalance.csv @@ -0,0 +1,6 @@ +scheme,Cluster01,Cluster02,Cluster23,Cluster25,YCSB,kv202206,kv202401 +AC-Cache,0.024385,0.012978866,0.000504775,0.043884437,0.056431738,0.01240455,0.06356075 +EC-Cache,0.226435865,0.138054467,0.000335671,0.135682498,0.138027869,0.062605,0.106011 +SP-Cache,0.146110874,0.154736986,0.000396703,0.140308984,0.143744386,0.097036184,0.100522618 +Baseline,0.118480226,0.142211736,0.000250235,0.143687564,0.136907164,0.062749261,0.10603206 +Replication,0.040695877,0.193109772,0.062782595,0.135595446,0.091074224,0.062749257,0.106032054 diff --git a/plot/Exp02-LoadBalance/store-balance-sub.py b/plot/Exp02-LoadBalance/store-balance-sub.py new file mode 100644 index 0000000..ed4e83d --- /dev/null +++ b/plot/Exp02-LoadBalance/store-balance-sub.py @@ -0,0 +1,71 @@ +import matplotlib.pyplot as plt +import matplotlib as mpl +from matplotlib.colors import ListedColormap +import seaborn as sns +import numpy as np +import pandas as pd +import os + +#sns.set() +#sns.axes_style('white') +colors =["#ee4000", "#5f9ea0", "#9acd32", "#ffa54f"] +mycmap = ListedColormap(sns.color_palette(colors).as_hex()) + +plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica'] +mpl.rcParams['hatch.linewidth'] = 2 +mpl.rcParams["legend.markerscale"] = 1 +mpl.rcParams['pdf.fonttype'] = 42 + +df = pd.read_csv("storagebalance.csv",header=0) +df = df[:4] +print(df) +df = df[['scheme','Cluster23']] +df = df.T +df[1:] = df[1:] * 100 +df.columns = df.iloc[0] +df = df.drop(index="scheme") +#df2 = df2.sort_values(by="scheme") + +error_params=dict(elinewidth=2,ecolor='black',capsize=3) + +fig = plt.figure() +plt.grid(True, zorder=0) +ax = df.plot(kind='bar', rot=0, legend=False, edgecolor='black', lw=2, colormap=mycmap, width = 0.8, zorder=100) #, yerr=0.1, error_kw=error_params) +plt.gcf().set_size_inches(10, 8) +minor_yticks = np.arange(0.015, 0.06, 0.03) +ax.set_yticks(minor_yticks, minor=True) + +plt.grid(color="b", linestyle="-", linewidth=0.1, alpha=0.1) +plt.grid(which = 'minor',color="b", linestyle="-", linewidth=0.1, alpha=0.1) + + +# Hatches in bar plot +patterns =('-', '+', 'x','/','//','O','o','\\','\\\\') +patterns = ['//', '\\\\', "x"] +hatches = [p for p in patterns for i in range(len(df))] +#bars = fig.patches + +#for bar, hatch in zip(bars, hatches): +# bar.set_hatch(hatch) + +ax.set_ylim(0, 0.062) +ax.set_yticks(np.arange(0, 0.061, 0.03)) + +# Hide the top and right axis +# for spine in ['top', 'right']: +# ax.spines[spine].set_visible(False) + +# linewidth of axises, and the fontsize of ticks +plt.setp(ax.spines.values(), linewidth=2) +ax.tick_params(width=2, labelsize = 34) + +# ax.set_xlabel("#-th day", fontsize=42) +# ax.set_ylabel("Percent imbalance ($\lambda$)", fontsize=42) + +# Legend style +# l = ax.legend(loc='center left', ncols= 4, bbox_to_anchor=(0.1, 0.9), fontsize=36, reverse=False, labelspacing=0, columnspacing=0.5, frameon=False, handlelength=0.8, handleheight=1, handletextpad=0.3) +# for t in l.get_texts(): t.set_position((0, 0)) + +#plt.show() +plt.gcf().set_size_inches(5, 4) +plt.savefig("storage-balance-sub.pdf", bbox_inches = 'tight', pad_inches = 0) \ No newline at end of file diff --git a/plot/Exp03-MemCostTimeConsuming/MarkerDefine.py b/plot/Exp03-MemCostTimeConsuming/MarkerDefine.py new file mode 100644 index 0000000..c7dd4cf --- /dev/null +++ b/plot/Exp03-MemCostTimeConsuming/MarkerDefine.py @@ -0,0 +1,101 @@ +import matplotlib.path as mpath +import matplotlib.pyplot as plt +import numpy as np + +text_style = dict(horizontalalignment='right', verticalalignment='center', + fontsize=12, fontfamily='monospace') +marker_style = dict(linestyle=':', color='0.8', markersize=10, + markerfacecolor="none", markeredgecolor="tab:red") + + +def format_axes(ax): + ax.margins(0.2) + ax.set_axis_off() + ax.invert_yaxis() + + +def split_list(a_list): + i_half = len(a_list) // 2 + return a_list[:i_half], a_list[i_half:] + +sverts = [ + (-1., 1.), + (1.,1.), + (1.,-1.), + (-1.,-1.), + (-1., 1.), +] +rverts = [ + (0,1), + (1,0), + (0,-1), + (-1,0), + (0,1), +] +qcodes = [ + mpath.Path.MOVETO, + mpath.Path.LINETO, + mpath.Path.LINETO, + mpath.Path.LINETO, + mpath.Path.CLOSEPOLY, +] + +cross = mpath.Path.unit_regular_asterisk(4) +circle = mpath.Path.unit_circle() + +square = mpath.Path(sverts, qcodes) +rhombus = mpath.Path(rverts, qcodes) + +ocross = mpath.Path( + vertices=np.concatenate([circle.vertices, cross.vertices[::-1, ...]]), + codes=np.concatenate([circle.codes, cross.codes])) + +csquare = mpath.Path( + vertices=np.concatenate([square.vertices, cross.vertices[::-1, ...]]), + codes=np.concatenate([square.codes, cross.codes])) + +crhombus = mpath.Path( + vertices=np.concatenate([rhombus.vertices, cross.vertices[::-1, ...]]), + codes=np.concatenate([rhombus.codes, cross.codes])) + +ploygon = mpath.Path.unit_regular_polygon(6) +#hexagram = mpath.Path.unit_regular_star(6) + +#print(ploygon.vertices, hexagram.vertices) + +tverts = [ + [-8.66025404e-01,5.00000000e-01], + [ 8.66025404e-01,5.00000000e-01], + [-1.83697020e-16,-1.00000000e+00], + [-8.66025404e-01,5.00000000e-01], + [-8.66025404e-01,-5.00000000e-01], + [ 8.66025404e-01, -5.00000000e-01], + [ 6.12323400e-17, 1.00000000e+00], + [-8.66025404e-01, -5.00000000e-01], +] + +tcodes = [ + mpath.Path.MOVETO, + mpath.Path.LINETO, + mpath.Path.LINETO, + mpath.Path.CLOSEPOLY, + mpath.Path.MOVETO, + mpath.Path.LINETO, + mpath.Path.LINETO, + mpath.Path.CLOSEPOLY, +] + +hexagram = mpath.Path(tverts, tcodes) + +markers = {'square':square, "rhombus": rhombus, 'csquare':csquare, 'crhombus':crhombus, 'ploygon': ploygon,'hexagram':hexagram} + +if __name__ == "__main__": + fig, ax = plt.subplots() + fig.suptitle('Path markers', fontsize=14) + fig.subplots_adjust(left=0.4) + for y, (name, marker) in enumerate(markers.items()): + ax.text(-0.5, y, name, **text_style) + ax.plot([y] * 3, marker=marker, **marker_style) + format_axes(ax) + + plt.show() \ No newline at end of file diff --git a/plot/Exp03-MemCostTimeConsuming/data_mem.csv b/plot/Exp03-MemCostTimeConsuming/data_mem.csv new file mode 100644 index 0000000..aee71f4 --- /dev/null +++ b/plot/Exp03-MemCostTimeConsuming/data_mem.csv @@ -0,0 +1,3 @@ +scheme,Cluster01,Cluster02,Cluster23,Cluster25 +with CM-Sketch,556.9696777,13376.51024,23888.00008,1204.888894 +w/o CM-Sketch,582.2483234,94878.15668,172218.177,5325.171582 diff --git a/plot/Exp03-MemCostTimeConsuming/data_time.csv b/plot/Exp03-MemCostTimeConsuming/data_time.csv new file mode 100644 index 0000000..a3a5036 --- /dev/null +++ b/plot/Exp03-MemCostTimeConsuming/data_time.csv @@ -0,0 +1,3 @@ +scheme,1,3,5,7,9 +with CM-Sketch,108.979,341.661,580.369,819.633,1048.07 +w/o CM-Sketch,101.004,315.677,535.044,754.833,965.633 diff --git a/plot/Exp03-MemCostTimeConsuming/mem-sub-2.py b/plot/Exp03-MemCostTimeConsuming/mem-sub-2.py new file mode 100644 index 0000000..12dc0b7 --- /dev/null +++ b/plot/Exp03-MemCostTimeConsuming/mem-sub-2.py @@ -0,0 +1,90 @@ +import matplotlib.pyplot as plt +import matplotlib as mpl +from matplotlib.colors import ListedColormap +import seaborn as sns +import numpy as np +import pandas as pd +import os + +import MarkerDefine + +#sns.set() +#sns.axes_style('white') +colors =["#DC143C", "#9acd32", "#ffa54f", "#87CEFA"] +mycmap = ListedColormap(sns.color_palette(colors).as_hex()) + +plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica'] +mpl.rcParams['hatch.linewidth'] = 2 +mpl.rcParams["legend.markerscale"] = 1 +mpl.rcParams['pdf.fonttype'] = 42 + +df = pd.read_csv("data_mem.csv",header=0) +print(df) +df = df.T +#df = df.dropna() +df[1:] = df[1:] / 1024 +df.columns = df.iloc[0] +df = df.drop(index="scheme") +df = df[3:4] +#df2 = df2.sort_values(by="scheme") + +markers = ["h", "s", MarkerDefine.hexagram, MarkerDefine.rhombus] +dashes = [(1, 0, 1, 0), (2, 2, 2, 2), (5, 2, 5, 2), (5, 1, 2, 1)] + +# df_error=np.array([[[0,0,3333.33333333326,3333.33333333326,0], [0,0,6666.66666666674,6666.66666666674,0]], +# [[430.666666666628,430.666666666628,430.666666666628,430.666666666628,430.666666666628], [845.333333333372,845.333333333372,845.333333333372,845.333333333372,845.333333333372]], +# [[639.666666666628,639.666666666628,639.666666666628,639.666666666628,639.666666666628], [735.333333333372,735.333333333372,735.333333333372,735.333333333372,735.333333333372]], +# [[971.333333333372,971.333333333372,971.333333333372,971.333333333372,971.333333333372], [740.666666666628,740.666666666628,740.666666666628,740.666666666628,740.666666666628]]]) + + +fig = plt.figure(figsize=(10, 8.5)) +# ax = df.plot(yerr = df_error/1000000, fmt='.', color="black", elinewidth=2, ecolor='black', capsize=5, legend = False, zorder=150) +#reset color cycle so that the marker colors match +#ax.set_prop_cycle(None) +#plot the markers +ax = df.plot(kind='bar', rot=0, lw=6, colormap=mycmap, legend=False) #, markerfacecolor='none', markersize = 30, mew = 5) +plt.gcf().set_size_inches(10, 6) + + +ax.set_ylim(0, 5.5) +# ax.set_yticks(np.arange(0, 1)) + +# Hide the top and right axis +# for spine in ['top', 'right']: +# ax.spines[spine].set_visible(False) + +# minor_yticks = np.arange(25, 201, 50) +# ax.set_yticks(minor_yticks, minor=True) +# ax.set_xticks(np.arange(0, 5, 1), labels=['10','30', '50','70', '100']) +# ax.set_xticks([0], labels=["Cluster01"], rotation=20) + +# y_axis=df +# for ya in y_axis: +# for x,y in zip(range(len(df)), df[ya]): +# # if(x != 2): continue +# print(x, y) +# if(ya == "CMSketch"): +# x = x - 0.5 +# elif(ya == "w/o CMSketch"): +# x = x - 0.03 +# plt.text(x, y+0.05, round(y,2), fontsize=20, rotation=0) + +plt.grid(True) +plt.grid(color="b", linestyle="-", linewidth=0.1, alpha=0.1) +plt.grid(which = 'minor',color="b", linestyle="-", linewidth=0.1, alpha=0.1) + +# linewidth of axises, and the fontsize of ticks +plt.setp(ax.spines.values(), linewidth=2.5) +ax.tick_params(width=2, labelsize = 34) + +# ax.set_xlabel("Selected clusters", fontsize=42) +# ax.set_ylabel("Memory overhead (GB)", fontsize=38) + +# Legend style +# a = ax.legend().get_texts() +# label = [x.get_text() for x in a] +# l = ax.legend(labels= label[:4], loc='center left', ncols= 1, bbox_to_anchor=(0.00, 0.85), fontsize=36, reverse=False, labelspacing=0.3, columnspacing=0.5, frameon=False, handlelength=0.8, handleheight=1, markerscale=0.8, handletextpad=0.3) +# for t in l.get_texts(): t.set_position((0, 2)) + +plt.gcf().set_size_inches(8, 5) +plt.savefig("memory-cost-sub.pdf", bbox_inches = 'tight', pad_inches = 0) \ No newline at end of file diff --git a/plot/Exp03-MemCostTimeConsuming/mem-sub.py b/plot/Exp03-MemCostTimeConsuming/mem-sub.py new file mode 100644 index 0000000..51522a2 --- /dev/null +++ b/plot/Exp03-MemCostTimeConsuming/mem-sub.py @@ -0,0 +1,90 @@ +import matplotlib.pyplot as plt +import matplotlib as mpl +from matplotlib.colors import ListedColormap +import seaborn as sns +import numpy as np +import pandas as pd +import os + +import MarkerDefine + +#sns.set() +#sns.axes_style('white') +colors =["#DC143C", "#9acd32", "#ffa54f", "#87CEFA"] +mycmap = ListedColormap(sns.color_palette(colors).as_hex()) + +plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica'] +mpl.rcParams['hatch.linewidth'] = 2 +mpl.rcParams["legend.markerscale"] = 1 +mpl.rcParams['pdf.fonttype'] = 42 + +df = pd.read_csv("data_mem.csv",header=0) +print(df) +df = df.T +#df = df.dropna() +df[1:] = df[1:] / 1024 +df.columns = df.iloc[0] +df = df.drop(index="scheme") +df = df[:1] +#df2 = df2.sort_values(by="scheme") + +markers = ["h", "s", MarkerDefine.hexagram, MarkerDefine.rhombus] +dashes = [(1, 0, 1, 0), (2, 2, 2, 2), (5, 2, 5, 2), (5, 1, 2, 1)] + +# df_error=np.array([[[0,0,3333.33333333326,3333.33333333326,0], [0,0,6666.66666666674,6666.66666666674,0]], +# [[430.666666666628,430.666666666628,430.666666666628,430.666666666628,430.666666666628], [845.333333333372,845.333333333372,845.333333333372,845.333333333372,845.333333333372]], +# [[639.666666666628,639.666666666628,639.666666666628,639.666666666628,639.666666666628], [735.333333333372,735.333333333372,735.333333333372,735.333333333372,735.333333333372]], +# [[971.333333333372,971.333333333372,971.333333333372,971.333333333372,971.333333333372], [740.666666666628,740.666666666628,740.666666666628,740.666666666628,740.666666666628]]]) + + +fig = plt.figure(figsize=(10, 8.5)) +# ax = df.plot(yerr = df_error/1000000, fmt='.', color="black", elinewidth=2, ecolor='black', capsize=5, legend = False, zorder=150) +#reset color cycle so that the marker colors match +#ax.set_prop_cycle(None) +#plot the markers +ax = df.plot(kind='bar', rot=0, lw=6, colormap=mycmap, legend=False) #, markerfacecolor='none', markersize = 30, mew = 5) +plt.gcf().set_size_inches(10, 6) + + +ax.set_ylim(0, 0.6) +# ax.set_yticks(np.arange(0, 1)) + +# Hide the top and right axis +# for spine in ['top', 'right']: +# ax.spines[spine].set_visible(False) + +# minor_yticks = np.arange(25, 201, 50) +# ax.set_yticks(minor_yticks, minor=True) +# ax.set_xticks(np.arange(0, 5, 1), labels=['10','30', '50','70', '100']) +# ax.set_xticks([0], labels=["Cluster01"], rotation=20) + +# y_axis=df +# for ya in y_axis: +# for x,y in zip(range(len(df)), df[ya]): +# # if(x != 2): continue +# print(x, y) +# if(ya == "CMSketch"): +# x = x - 0.5 +# elif(ya == "w/o CMSketch"): +# x = x - 0.03 +# plt.text(x, y+0.05, round(y,2), fontsize=20, rotation=0) + +plt.grid(True) +plt.grid(color="b", linestyle="-", linewidth=0.1, alpha=0.1) +plt.grid(which = 'minor',color="b", linestyle="-", linewidth=0.1, alpha=0.1) + +# linewidth of axises, and the fontsize of ticks +plt.setp(ax.spines.values(), linewidth=2.5) +ax.tick_params(width=2, labelsize = 34) + +# ax.set_xlabel("Selected clusters", fontsize=42) +# ax.set_ylabel("Memory overhead (GB)", fontsize=38) + +# Legend style +# a = ax.legend().get_texts() +# label = [x.get_text() for x in a] +# l = ax.legend(labels= label[:4], loc='center left', ncols= 1, bbox_to_anchor=(0.00, 0.85), fontsize=36, reverse=False, labelspacing=0.3, columnspacing=0.5, frameon=False, handlelength=0.8, handleheight=1, markerscale=0.8, handletextpad=0.3) +# for t in l.get_texts(): t.set_position((0, 2)) + +plt.gcf().set_size_inches(8, 5) +plt.savefig("memory-cost-sub.pdf", bbox_inches = 'tight', pad_inches = 0) \ No newline at end of file diff --git a/plot/Exp03-MemCostTimeConsuming/mem.py b/plot/Exp03-MemCostTimeConsuming/mem.py new file mode 100644 index 0000000..f395473 --- /dev/null +++ b/plot/Exp03-MemCostTimeConsuming/mem.py @@ -0,0 +1,102 @@ +import matplotlib.pyplot as plt +import matplotlib as mpl +from matplotlib.colors import ListedColormap +import seaborn as sns +import numpy as np +import pandas as pd +import os + +import MarkerDefine + +#sns.set() +#sns.axes_style('white') +colors =["#DC143C", "#9acd32", "#ffa54f", "#87CEFA"] +mycmap = ListedColormap(sns.color_palette(colors).as_hex()) + +plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica'] +mpl.rcParams['hatch.linewidth'] = 2 +mpl.rcParams["legend.markerscale"] = 1 +mpl.rcParams['pdf.fonttype'] = 42 + +df = pd.read_csv("data_mem.csv",header=0) +print(df) +df = df.T +#df = df.dropna() +df[1:] = df[1:] / 1024 +df.columns = df.iloc[0] +df = df.drop(index="scheme") +#df2 = df2.sort_values(by="scheme") + +markers = ["h", "s", MarkerDefine.hexagram, MarkerDefine.rhombus] +dashes = [(1, 0, 1, 0), (2, 2, 2, 2), (5, 2, 5, 2), (5, 1, 2, 1)] + +# df_error=np.array([[[0,0,3333.33333333326,3333.33333333326,0], [0,0,6666.66666666674,6666.66666666674,0]], +# [[430.666666666628,430.666666666628,430.666666666628,430.666666666628,430.666666666628], [845.333333333372,845.333333333372,845.333333333372,845.333333333372,845.333333333372]], +# [[639.666666666628,639.666666666628,639.666666666628,639.666666666628,639.666666666628], [735.333333333372,735.333333333372,735.333333333372,735.333333333372,735.333333333372]], +# [[971.333333333372,971.333333333372,971.333333333372,971.333333333372,971.333333333372], [740.666666666628,740.666666666628,740.666666666628,740.666666666628,740.666666666628]]]) + + +fig = plt.figure(figsize=(10, 8.5)) +# ax = df.plot(yerr = df_error/1000000, fmt='.', color="black", elinewidth=2, ecolor='black', capsize=5, legend = False, zorder=150) +#reset color cycle so that the marker colors match +#ax.set_prop_cycle(None) +#plot the markers +ax = df.plot(kind='bar', rot=0, lw=6, colormap=mycmap) #, markerfacecolor='none', markersize = 30, mew = 5) +plt.gcf().set_size_inches(10, 8) + +# for i, line in enumerate(ax.get_lines()): +# line.set_marker(markers[i%4]) + #line.set_dashes(dashes[i%4]) + + +# Hatches in bar plot +patterns =('-', '+', 'x','/','//','O','o','\\','\\\\') +patterns = ['//', '\\\\', "x"] +hatches = [p for p in patterns for i in range(len(df))] +bars = ax.patches + +#for bar, hatch in zip(bars, markers): +# bar.set_hatch(hatch) + +ax.set_ylim(0, 200) +ax.set_yticks(np.arange(0, 201, 50)) + +# Hide the top and right axis +# for spine in ['top', 'right']: +# ax.spines[spine].set_visible(False) + +minor_yticks = np.arange(25, 201, 50) +ax.set_yticks(minor_yticks, minor=True) +# ax.set_xticks(np.arange(0, 5, 1), labels=['10','30', '50','70', '100']) +ax.set_xticks([0, 1, 2, 3], labels=["cluster01", "cluster02", "cluster23", "cluster25"], rotation=20) + +# y_axis=df +# for ya in y_axis: +# for x,y in zip(range(len(df)), df[ya]): +# # if(x != 2): continue +# print(x,y) +# if(ya == "CMSketch"): +# x = x - 0.32 +# elif(ya == "w/o CMSketch"): +# x = x - 0.03 +# plt.text(x, y+3, round(y,2), fontsize=20, rotation=0) + +plt.grid(True) +plt.grid(color="b", linestyle="-", linewidth=0.1, alpha=0.1) +plt.grid(which = 'minor',color="b", linestyle="-", linewidth=0.1, alpha=0.1) + +# linewidth of axises, and the fontsize of ticks +plt.setp(ax.spines.values(), linewidth=2.5) +ax.tick_params(width=2, labelsize = 34) + +ax.set_xlabel("Selected clusters", fontsize=42) +ax.set_ylabel("Memory overhead (GB)", fontsize=38) + +# Legend style +a = ax.legend().get_texts() +label = [x.get_text() for x in a] +l = ax.legend(labels= label[:4], loc='center left', ncols= 1, bbox_to_anchor=(0.00, 0.85), fontsize=36, reverse=False, labelspacing=0.3, columnspacing=0.5, frameon=False, handlelength=0.8, handleheight=1, markerscale=0.8, handletextpad=0.3) +for t in l.get_texts(): t.set_position((0, 2)) + +plt.gcf().set_size_inches(10, 8.4) +plt.savefig("memory-cost.pdf", bbox_inches = 'tight', pad_inches = 0) \ No newline at end of file diff --git a/plot/Exp03-MemCostTimeConsuming/time.py b/plot/Exp03-MemCostTimeConsuming/time.py new file mode 100644 index 0000000..8d12600 --- /dev/null +++ b/plot/Exp03-MemCostTimeConsuming/time.py @@ -0,0 +1,90 @@ +import matplotlib.pyplot as plt +import matplotlib as mpl +from matplotlib.colors import ListedColormap +import seaborn as sns +import numpy as np +import pandas as pd +import os + +import MarkerDefine + +#sns.set() +#sns.axes_style('white') +colors =["#DC143C", "#9acd32", "#ffa54f", "#87CEFA"] +mycmap = ListedColormap(sns.color_palette(colors).as_hex()) + +plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica'] +mpl.rcParams['hatch.linewidth'] = 2 +mpl.rcParams["legend.markerscale"] = 1 +mpl.rcParams['pdf.fonttype'] = 42 + +df = pd.read_csv("data_time.csv",header=0) +print(df) +df = df.T +#df = df.dropna() +# df[1:] = df[1:] / 1024 +df.columns = df.iloc[0] +df = df.drop(index="scheme") +#df2 = df2.sort_values(by="scheme") + +markers = ["h", "s", MarkerDefine.hexagram, MarkerDefine.rhombus] +dashes = [(1, 0, 1, 0), (2, 2, 2, 2), (5, 2, 5, 2), (5, 1, 2, 1)] + +# df_error=np.array([[[0,0,3333.33333333326,3333.33333333326,0], [0,0,6666.66666666674,6666.66666666674,0]], +# [[430.666666666628,430.666666666628,430.666666666628,430.666666666628,430.666666666628], [845.333333333372,845.333333333372,845.333333333372,845.333333333372,845.333333333372]], +# [[639.666666666628,639.666666666628,639.666666666628,639.666666666628,639.666666666628], [735.333333333372,735.333333333372,735.333333333372,735.333333333372,735.333333333372]], +# [[971.333333333372,971.333333333372,971.333333333372,971.333333333372,971.333333333372], [740.666666666628,740.666666666628,740.666666666628,740.666666666628,740.666666666628]]]) + + +fig = plt.figure(figsize=(10, 8.5)) +# ax = df.plot(yerr = df_error/1000000, fmt='.', color="black", elinewidth=2, ecolor='black', capsize=5, legend = False, zorder=150) +#reset color cycle so that the marker colors match +#ax.set_prop_cycle(None) +#plot the markers +ax = df.plot(kind='line', rot=0, lw=6, colormap=mycmap, markerfacecolor='none', markersize = 30, mew = 5) +plt.gcf().set_size_inches(10, 8) + +for i, line in enumerate(ax.get_lines()): + line.set_marker(markers[i%4]) + #line.set_dashes(dashes[i%4]) + + +# Hatches in bar plot +patterns =('-', '+', 'x','/','//','O','o','\\','\\\\') +patterns = ['//', '\\\\', "x"] +hatches = [p for p in patterns for i in range(len(df))] +bars = ax.patches + +#for bar, hatch in zip(bars, markers): +# bar.set_hatch(hatch) + +ax.set_ylim(0, 1200) +ax.set_yticks(np.arange(0, 1201, 300)) + +# Hide the top and right axis +# for spine in ['top', 'right']: +# ax.spines[spine].set_visible(False) + +minor_yticks = np.arange(150, 1200, 300) +ax.set_yticks(minor_yticks, minor=True) +ax.set_xticks(np.arange(0, 5, 1), labels=['1','3', '5', '7', '9']) + +plt.grid(True) +plt.grid(color="b", linestyle="-", linewidth=0.1, alpha=0.1) +plt.grid(which = 'minor',color="b", linestyle="-", linewidth=0.1, alpha=0.1) + +# linewidth of axises, and the fontsize of ticks +plt.setp(ax.spines.values(), linewidth=2.5) +ax.tick_params(width=2, labelsize = 34) + +ax.set_xlabel(r"# of records ($10^8$)", fontsize=42) +ax.set_ylabel("Time consuming (s)", fontsize=38) + +# Legend style +a = ax.legend().get_texts() +label = [x.get_text() for x in a] +l = ax.legend(labels= label[:4], loc='center left', ncols= 1, bbox_to_anchor=(0.0, 0.86), fontsize=36, reverse=False, labelspacing=0.3, columnspacing=0.5, frameon=False, handlelength=0.8, handleheight=1, markerscale=0.8, handletextpad=0.3) +for t in l.get_texts(): t.set_position((0, 2)) + +plt.gcf().set_size_inches(10, 9.3) +plt.savefig("time-cost.pdf", bbox_inches = 'tight', pad_inches = 0) \ No newline at end of file diff --git a/plot/Exp04-NodeNum/data_n15.csv b/plot/Exp04-NodeNum/data_n15.csv new file mode 100644 index 0000000..611fcab --- /dev/null +++ b/plot/Exp04-NodeNum/data_n15.csv @@ -0,0 +1,76 @@ +Scheme,NodeNum,ttime,tops,ops,tsize,thruput,p95,p99,p9999 +AC-Cache,3,292.856,925150951,3359480,1.00382E+11,358674,0.00021,0.00036,0.000744 +AC-Cache,3,167.339,925150951,6000760,91816865720,580799,0.000141,0.000317,0.00075 +AC-Cache,3,167.539,925150951,6100350,91816865720,590601,0.00014,0.000326,0.000865 +AC-Cache,6,335.341,925150951,2917240,79224412041,245264,0.000131,0.000151,0.000214 +AC-Cache,6,274.96,925150951,3626580,64705096119,248521,0.000126,0.000146,0.000217 +AC-Cache,6,278.407,925150951,3591500,64705096119,246133,0.000127,0.000148,0.000206 +AC-Cache,9,342.536,925150951,2840680,71464555321,215221,0.000128,0.000147,0.000201 +AC-Cache,9,322.064,925150951,3045060,66249754562,213864,0.000127,0.000147,0.000207 +AC-Cache,9,319.324,925150951,3086780,66249754562,216633,0.000125,0.000144,0.000197 +AC-Cache,12,353.755,925150951,2777870,67662626697,199076,0.000127,0.000147,0.000204 +AC-Cache,12,328.871,925150951,2990180,62742171813,198594,0.000124,0.000143,0.000197 +AC-Cache,12,331.944,925150951,2970180,62742171813,197282,0.000125,0.000144,0.000199 +AC-Cache,15,364.018,925150951,2647350,66736976920,187276,0.000127,0.000148,0.000211 +AC-Cache,15,361.296,925150951,2673740,66736976920,189276,0.000125,0.000145,0.000203 +AC-Cache,15,362.865,925150951,2659840,66736976920,188190,0.000125,0.000146,0.000205 +EC-Cache,3,1158.13,925150951,812320,1.00382E+11,86107,0.000322,0.000429,0.001739 +EC-Cache,3,1222.55,925150951,771477,1.00382E+11,81779.9,0.000405,0.000638,0.00128 +EC-Cache,3,1134.16,925150951,829637,1.00382E+11,87915.3,0.000313,0.000468,0.001108 +EC-Cache,6,808.465,925150951,1152270,1.00382E+11,122099,0.000145,0.000165,0.000262 +EC-Cache,6,814.211,925150951,1144830,1.00382E+11,121314,0.000147,0.000169,0.000316 +EC-Cache,6,813.972,925150951,1145790,1.00382E+11,121424,0.000147,0.000169,0.000292 +EC-Cache,9,774.828,925150951,1201530,1.00382E+11,127329,0.00014,0.000159,0.000221 +EC-Cache,9,778.733,925150951,1198620,1.00382E+11,127012,0.00014,0.000159,0.000225 +EC-Cache,9,779.905,925150951,1195180,1.00382E+11,126657,0.000141,0.000161,0.000228 +EC-Cache,12,764.854,925150951,1215650,1.00382E+11,128809,0.000139,0.000158,0.000229 +EC-Cache,12,767.609,925150951,1214110,1.00382E+11,128652,0.000139,0.000158,0.000225 +EC-Cache,12,765.426,925150951,1216090,1.00382E+11,128871,0.000138,0.000158,0.000221 +EC-Cache,15,752.866,925150951,1235770,1.00382E+11,130955,0.000137,0.000157,0.000225 +EC-Cache,15,752.158,925150951,1236920,1.00382E+11,131070,0.000137,0.000157,0.000239 +EC-Cache,15,749.161,925150951,1240780,1.00382E+11,131481,0.000137,0.000157,0.000223 +Baseline,3,1157.03,925150951,808738,1.00382E+11,85710.5,0.000332,0.000445,0.001841 +Baseline,3,1229.77,925150951,769301,1.00382E+11,81535.4,0.000419,0.000585,0.001194 +Baseline,3,1204.61,925150951,790894,1.00382E+11,83823.7,0.000385,0.000589,0.001127 +Baseline,6,784.797,925150951,1186910,1.00382E+11,125759,0.00014,0.000158,0.000261 +Baseline,6,787.907,925150951,1181750,1.00382E+11,125231,0.000142,0.000161,0.000306 +Baseline,6,791.141,925150951,1178090,1.00382E+11,124829,0.000143,0.000165,0.000288 +Baseline,9,755.103,925150951,1236740,1.00382E+11,131042,0.000135,0.000152,0.00021 +Baseline,9,754.052,925150951,1236340,1.00382E+11,131001,0.000135,0.000152,0.000233 +Baseline,9,756.163,925150951,1235740,1.00382E+11,130950,0.000136,0.000153,0.000211 +Baseline,12,745.001,925150951,1250350,1.00382E+11,132485,0.000134,0.000151,0.00021 +Baseline,12,741.553,925150951,1255900,1.00382E+11,133078,0.000133,0.00015,0.000209 +Baseline,12,740.762,925150951,1258780,1.00382E+11,133381,0.000133,0.00015,0.000205 +Baseline,15,726.494,925150951,1280480,1.00382E+11,135678,0.000132,0.000149,0.000227 +Baseline,15,724.363,925150951,1284300,1.00382E+11,136089,0.000131,0.000148,0.00021 +Baseline,15,722.149,925150951,1288740,1.00382E+11,136546,0.000131,0.000148,0.000207 +SP-Cache,3,1144.7,925150951,818010,1.00375E+11,86696.9,0.000323,0.00048,0.001616 +SP-Cache,3,1193.31,925150951,793185,1.00376E+11,84055.2,0.000377,0.000539,0.000967 +SP-Cache,3,1093.09,925150951,858947,1.00374E+11,91036.9,0.000285,0.000367,0.001034 +SP-Cache,6,800.039,925150951,1165370,1.00352E+11,123486,0.000143,0.000161,0.000249 +SP-Cache,6,801.671,925150951,1161720,1.00137E+11,122831,0.000144,0.000163,0.000274 +SP-Cache,6,805.431,925150951,1157690,1.00364E+11,122678,0.000146,0.000167,0.000295 +SP-Cache,9,770.094,925150951,1211360,1.00338E+11,128366,0.000139,0.000157,0.000221 +SP-Cache,9,769.029,925150951,1212610,1.00248E+11,128378,0.000139,0.000157,0.000221 +SP-Cache,9,771.399,925150951,1209720,1.00345E+11,128214,0.000139,0.000158,0.000223 +SP-Cache,12,759.892,925150951,1225930,1.00349E+11,129944,0.000137,0.000156,0.000216 +SP-Cache,12,758.187,925150951,1228870,1.00354E+11,130268,0.000137,0.000156,0.000217 +SP-Cache,12,756.236,925150951,1231940,1.0035E+11,130589,0.000137,0.000155,0.000215 +SP-Cache,15,745.558,925150951,1251400,1.00342E+11,132650,0.000135,0.000155,0.000217 +SP-Cache,15,743.483,925150951,1254080,1.00341E+11,132948,0.000135,0.000154,0.000215 +SP-Cache,15,739.719,925150951,1259180,1.00344E+11,133483,0.000135,0.000154,0.000213 +Replication,3,895.352,925150952,1.05E+06,1.00382E+11,111318,0.000187,0.000279,0.000794 +Replication,3,899.652,925150952,1.05E+06,1.00382E+11,111205,0.000187,0.000287,0.000755 +Replication,3,902.835,925150952,1.04E+06,1.00382E+11,110617,0.000189,0.000284,0.000744 +Replication,6,730.116,925150952,1.29E+06,1.00382E+11,136575,0.000146,0.000173,0.001007 +Replication,6,729.576,925150952,1.29E+06,1.00382E+11,136975,0.000145,0.000171,0.000433 +Replication,6,726.644,925150952,1.30E+06,1.00382E+11,137333,0.000145,0.000169,0.000434 +Replication,9,695.555,925150952,1.36E+06,1.00382E+11,143622,0.000137,0.000156,0.000372 +Replication,9,691.979,925150952,1.36E+06,1.00382E+11,144276,0.000137,0.000157,0.000385 +Replication,9,694.502,925150952,1.36E+06,1.00382E+11,144102,0.000137,0.000157,0.000386 +Replication,12,686.439,925150952,1.37E+06,1.00382E+11,145528,0.000135,0.000153,0.000372 +Replication,12,682.159,925150952,1.38E+06,1.00382E+11,145901,0.000135,0.000153,0.000368 +Replication,12,685.249,925150952,1.38E+06,1.00382E+11,145896,0.000135,0.000153,0.000354 +Replication,15,755.368,925150952,1.26E+06,98402183010,130484,0.000134,0.000151,0.000373 +Replication,15,688.781,925150952,1.36E+06,1.00382E+11,144623,0.000134,0.000152,0.000341 +Replication,15,689.885,925150952,1.36E+06,1.00382E+11,144535,0.000134,0.000152,0.000359 \ No newline at end of file diff --git a/plot/Exp04-NodeNum/iops.py b/plot/Exp04-NodeNum/iops.py new file mode 100644 index 0000000..ec2dd9e --- /dev/null +++ b/plot/Exp04-NodeNum/iops.py @@ -0,0 +1,70 @@ +import pandas as pd +import seaborn as sns +import matplotlib as mpl +import matplotlib.pyplot as plt +from matplotlib.colors import ListedColormap +import numpy as np +import os + +plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica'] +mpl.rcParams['hatch.linewidth'] = 2 +mpl.rcParams["legend.markerscale"] = 1 +mpl.rcParams['pdf.fonttype'] = 42 + +colors =["#ee4000", "#5f9ea0", "#9acd32", "#ffa54f", "#a56cc1"] +mycmap = ListedColormap(sns.color_palette(colors).as_hex()) + +# Convert the data to a DataFrame +df = pd.read_csv("data_n15.csv", header=0) +df['ops'] /= 1000000 + +# Plotting +# plt.figure(figsize=(12, 8)) + +# Define the order of the Scheme categories +scheme_order = ['AC-Cache','EC-Cache', 'SP-Cache', 'Baseline', 'Replication'] + +# Reorder the 'Scheme' column based on the defined order +df['Scheme'] = pd.Categorical(df['Scheme'], categories=scheme_order, ordered=True) + +# Group by 'Scheme' and 'NodeNum' and calculate mean and std thruput +grouped = df.groupby(['Scheme', 'NodeNum'])['ops'].agg(['mean', 'std']) + +# Reshape the DataFrame for plotting +grouped = grouped.unstack(level='Scheme') + +# Plotting with error bars for each scheme +ax = grouped['mean'].plot(kind='bar', yerr=grouped['std'], capsize=5, rot=0, legend=False, edgecolor='black', lw=2, colormap=mycmap, width = 0.8, zorder = 100) + +minor_yticks = np.arange(0.5, 7, 0.5) + +ax.set_ylim(0, 7) +ax.set_yticks(np.arange(0, 7.1, 1)) +ax.set_yticks(minor_yticks, minor=True) + +# Hide the top and right axis +# for spine in ['top', 'right']: +# ax.spines[spine].set_visible(False) +plt.grid(True) + +#axes.grid(which = 'minor', alp) + +plt.grid(color="b", linestyle="-", linewidth=0.1, alpha=0.1) +plt.grid(which = 'minor',color="b", linestyle="-", linewidth=0.1, alpha=0.1) + + +# linewidth of axises, and the fontsize of ticks +plt.setp(ax.spines.values(), linewidth=2.5) +ax.tick_params(width=2, labelsize = 34) + +ax.set_xlabel("# of caching nodes", fontsize=42) +ax.set_ylabel("Throughput (Mops)", fontsize=42) + +# Legend style +a = ax.legend().get_texts() +label = [x.get_text() for x in a] +l = ax.legend(labels= label[:5], loc='center left', ncols= 2, bbox_to_anchor=(0.1, 0.82), fontsize=36, reverse=False, labelspacing=0.3, columnspacing=0.5, frameon=False, handlelength=1, handleheight=1.3, markerscale=0.8, handletextpad=0.3) +for t in l.get_texts(): t.set_position((0, 6)) + +plt.gcf().set_size_inches(10, 8) +plt.savefig("iops.pdf", bbox_inches = 'tight', pad_inches = 0) \ No newline at end of file diff --git a/plot/Exp04-NodeNum/tail.py b/plot/Exp04-NodeNum/tail.py new file mode 100644 index 0000000..490193d --- /dev/null +++ b/plot/Exp04-NodeNum/tail.py @@ -0,0 +1,70 @@ +import pandas as pd +import seaborn as sns +import matplotlib as mpl +import matplotlib.pyplot as plt +from matplotlib.colors import ListedColormap +import numpy as np +import os + +plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica'] +mpl.rcParams['hatch.linewidth'] = 2 +mpl.rcParams["legend.markerscale"] = 1 +mpl.rcParams['pdf.fonttype'] = 42 + +colors =["#ee4000", "#5f9ea0", "#9acd32", "#ffa54f", "#a56cc1"] +mycmap = ListedColormap(sns.color_palette(colors).as_hex()) + +# Convert the data to a DataFrame +df = pd.read_csv("data_n15.csv", header=0) +df['p95'] *= 1000 + +# Plotting +# plt.figure(figsize=(12, 8)) + +# Define the order of the Scheme categories +scheme_order = ['AC-Cache','EC-Cache', 'SP-Cache', 'Baseline', 'Replication'] + +# Reorder the 'Scheme' column based on the defined order +df['Scheme'] = pd.Categorical(df['Scheme'], categories=scheme_order, ordered=True) + +# Group by 'Scheme' and 'NodeNum' and calculate mean and std thruput +grouped = df.groupby(['Scheme', 'NodeNum'])['p95'].agg(['mean', 'std']) + +# Reshape the DataFrame for plotting +grouped = grouped.unstack(level='Scheme') + +# Plotting with error bars for each scheme +ax = grouped['mean'].plot(kind='bar', yerr=grouped['std'], capsize=5, rot=0, legend=False, edgecolor='black', lw=2, colormap=mycmap, width = 0.8, zorder = 100) + +minor_yticks = np.arange(0.05, 0.4, 0.1) + +ax.set_ylim(0, 0.43) +ax.set_yticks(np.arange(0, 0.41, 0.1)) +ax.set_yticks(minor_yticks, minor=True) + +# Hide the top and right axis +# for spine in ['top', 'right']: +# ax.spines[spine].set_visible(False) +plt.grid(True) + +#axes.grid(which = 'minor', alp) + +plt.grid(color="b", linestyle="-", linewidth=0.1, alpha=0.1) +plt.grid(which = 'minor',color="b", linestyle="-", linewidth=0.1, alpha=0.1) + + +# linewidth of axises, and the fontsize of ticks +plt.setp(ax.spines.values(), linewidth=2.5) +ax.tick_params(width=2, labelsize = 34) + +ax.set_xlabel("# of caching nodes", fontsize=42) +ax.set_ylabel("Tail latency (ms)", fontsize=42) + +# Legend style +a = ax.legend().get_texts() +label = [x.get_text() for x in a] +l = ax.legend(labels= label[:5],loc='center left', ncols= 2, bbox_to_anchor=(0.12, 0.83), fontsize=36, reverse=False, labelspacing=0.3, columnspacing=0.5, frameon=False, handlelength=1, handleheight=1.3, markerscale=0.8, handletextpad=0.3) +for t in l.get_texts(): t.set_position((0, 6)) + +plt.gcf().set_size_inches(10, 8.5) +plt.savefig("tail-latency.pdf", bbox_inches = 'tight', pad_inches = 0) \ No newline at end of file diff --git a/plot/Exp05-NetworkSpeed/data_new.csv b/plot/Exp05-NetworkSpeed/data_new.csv new file mode 100644 index 0000000..2f144b5 --- /dev/null +++ b/plot/Exp05-NetworkSpeed/data_new.csv @@ -0,0 +1,16 @@ +scheme,netband,ttime,tops,ops,tsize,size,p95,p99,p9999 +AC-Cache,5,458.473,30000000,67289.8,1.23E+11,269159,0.000142,0.018304,0.236189 +AC-Cache,10,25.6739,30000000,1.27E+06,1.23E+11,5.06E+06,0.000124,0.00016,0.037192 +AC-Cache,1,1084.1,30000000,28334.7,1.23E+11,113339,0.00019,0.207247,0.681209 +EC-Cache,5,580.216,30000000,52455.9,1.23E+11,209824,0.000157,0.204546,0.212853 +EC-Cache,10,31.5582,30000000,1.00E+06,1.23E+11,4.00E+06,0.000149,0.000179,0.013461 +EC-Cache,1,1326.41,30000000,22797.9,1.23E+11,91191.6,0.000206,0.207777,0.638175 +Baseline,5,583.341,30000000,52329.3,1.23E+11,209317,0.000156,0.204565,0.21463 +Baseline,10,30.7288,30000000,1.01E+06,1.23E+11,4.05E+06,0.000149,0.000185,0.012562 +Baseline,1,1342.92,30000000,22579.1,1.23E+11,90316.6,0.000206,0.207791,0.650296 +Replication,5,584.164,30000000,52282.6,1.23E+11,209130,0.000155,0.204578,0.214897 +Replication,10,31.4261,30000000,1.02E+06,1.23E+11,4.09E+06,0.000148,0.000177,0.013139 +Replication,1,1337.06,30000000,22698.8,1.23E+11,90795.3,0.000206,0.207728,0.642557 +SP-Cache,5,463.936,30000000,66118.7,1.23E+11,264416,0.000157,0.017311,0.215331 +SP-Cache,10,29.3515,30000000,1.03E+06,1.23E+11,4.12E+06,0.000132,0.000177,0.000873 +SP-Cache,1,1447.67,30000000,20899.2,1.23E+11,83596.2,0.000495,0.208294,0.427894 diff --git a/plot/Exp05-NetworkSpeed/iops - 1G.py b/plot/Exp05-NetworkSpeed/iops - 1G.py new file mode 100644 index 0000000..b153d65 --- /dev/null +++ b/plot/Exp05-NetworkSpeed/iops - 1G.py @@ -0,0 +1,78 @@ +import pandas as pd +import seaborn as sns +import matplotlib as mpl +import matplotlib.pyplot as plt +from matplotlib.colors import ListedColormap +import numpy as np +import os + +os.chdir("evaluation/Exp04-NetworkSpeed/") + +plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica'] +mpl.rcParams['hatch.linewidth'] = 2 +mpl.rcParams["legend.markerscale"] = 1 +mpl.rcParams['pdf.fonttype'] = 42 + +colors =["#ee4000", "#5f9ea0", "#9acd32", "#ffa54f", "#a56cc1"] +mycmap = ListedColormap(sns.color_palette(colors).as_hex()) + +# Convert the data to a DataFrame +df = pd.read_csv("data_new.csv", header=0) +df = df[df['netband']==1] +df['ops'] /= 1000000 +print(df) + +df2 = pd.read_csv("data.csv", header=0) +df2['ops'] /= 1000000 +grouped2 = df2.groupby(['scheme', 'netband'])['ops'].agg(['mean', 'std']) +grouped2 = grouped2.unstack(level='scheme') + +# Plotting +# plt.figure(figsize=(12, 8)) + +# Define the order of the Scheme categories +scheme_order = ['AC-Cache','EC-Cache', 'SP-Cache', 'Baseline', "Replication"] + +# Reorder the 'Scheme' column based on the defined order +df['scheme'] = pd.Categorical(df['scheme'], categories=scheme_order, ordered=True) + +# Group by 'Scheme' and 'NodeNum' and calculate mean and std thruput +grouped = df.groupby(['scheme', 'netband'])['ops'].agg(['mean', 'std']) + +# Reshape the DataFrame for plotting +grouped = grouped.unstack(level='scheme') + +print(grouped) + +# Plotting with error bars for each scheme +ax = grouped['mean'].plot(kind='bar', yerr=grouped2['std'], capsize=5, rot=0, legend=False, edgecolor='black', lw=2, colormap=mycmap, width = 0.8, zorder = 100) + +minor_yticks = np.arange(0.005, 0.03, 0.01) + +ax.set_ylim(0, 0.03) +ax.set_yticks(np.arange(0, 0.031, 0.01)) +ax.set_yticks(minor_yticks, minor=True) + +# Hide the top and right axis +# for spine in ['top', 'right']: +# ax.spines[spine].set_visible(False) +plt.grid(True) +plt.grid(color="b", linestyle="-", linewidth=0.1, alpha=0.1) +plt.grid(which = 'minor',color="b", linestyle="-", linewidth=0.1, alpha=0.1) + + +# linewidth of axises, and the fontsize of ticks +plt.setp(ax.spines.values(), linewidth=2.5) +ax.tick_params(width=2, labelsize = 34) + +# ax.set_xlabel("Network bandwidth (Gbps)", fontsize=42) +# ax.set_ylabel("Throughput (Mops)", fontsize=42) + +# Legend style +# a = ax.legend().get_texts() +# label = [x.get_text() for x in a] +# l = ax.legend(labels= label[:5], loc='center left', ncols= 2, bbox_to_anchor=(0.05, 0.83), fontsize=36, reverse=False, labelspacing=0.3, columnspacing=0.5, frameon=False, handlelength=1, handleheight=1.3, markerscale=0.8, handletextpad=0.3) +# for t in l.get_texts(): t.set_position((0, 6)) + +plt.gcf().set_size_inches(10, 8.5) +plt.savefig("iops-1G.pdf", bbox_inches = 'tight', pad_inches = 0) \ No newline at end of file diff --git a/plot/Exp05-NetworkSpeed/iops - 5G.py b/plot/Exp05-NetworkSpeed/iops - 5G.py new file mode 100644 index 0000000..d3190ed --- /dev/null +++ b/plot/Exp05-NetworkSpeed/iops - 5G.py @@ -0,0 +1,78 @@ +import pandas as pd +import seaborn as sns +import matplotlib as mpl +import matplotlib.pyplot as plt +from matplotlib.colors import ListedColormap +import numpy as np +import os + +os.chdir("evaluation/Exp04-NetworkSpeed/") + +plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica'] +mpl.rcParams['hatch.linewidth'] = 2 +mpl.rcParams["legend.markerscale"] = 1 +mpl.rcParams['pdf.fonttype'] = 42 + +colors =["#ee4000", "#5f9ea0", "#9acd32", "#ffa54f", "#a56cc1"] +mycmap = ListedColormap(sns.color_palette(colors).as_hex()) + +# Convert the data to a DataFrame +df = pd.read_csv("data_new.csv", header=0) +df = df[df['netband']==5] +df['ops'] /= 1000000 +print(df) + +df2 = pd.read_csv("data.csv", header=0) +df2['ops'] /= 1000000 +grouped2 = df2.groupby(['scheme', 'netband'])['ops'].agg(['mean', 'std']) +grouped2 = grouped2.unstack(level='scheme') + +# Plotting +# plt.figure(figsize=(12, 8)) + +# Define the order of the Scheme categories +scheme_order = ['AC-Cache','EC-Cache', 'SP-Cache', 'Baseline', "Replication"] + +# Reorder the 'Scheme' column based on the defined order +df['scheme'] = pd.Categorical(df['scheme'], categories=scheme_order, ordered=True) + +# Group by 'Scheme' and 'NodeNum' and calculate mean and std thruput +grouped = df.groupby(['scheme', 'netband'])['ops'].agg(['mean', 'std']) + +# Reshape the DataFrame for plotting +grouped = grouped.unstack(level='scheme') + +print(grouped) + +# Plotting with error bars for each scheme +ax = grouped['mean'].plot(kind='bar', yerr=grouped2['std'], capsize=5, rot=0, legend=False, edgecolor='black', lw=2, colormap=mycmap, width = 0.8, zorder = 100) + +minor_yticks = np.arange(0.01, 0.08, 0.02) + +ax.set_ylim(0, 0.08) +ax.set_yticks(np.arange(0, 0.081, 0.02)) +ax.set_yticks(minor_yticks, minor=True) + +# Hide the top and right axis +# for spine in ['top', 'right']: +# ax.spines[spine].set_visible(False) +plt.grid(True) +plt.grid(color="b", linestyle="-", linewidth=0.1, alpha=0.1) +plt.grid(which = 'minor',color="b", linestyle="-", linewidth=0.1, alpha=0.1) + + +# linewidth of axises, and the fontsize of ticks +plt.setp(ax.spines.values(), linewidth=2.5) +ax.tick_params(width=2, labelsize = 34) + +# ax.set_xlabel("Network bandwidth (Gbps)", fontsize=42) +# ax.set_ylabel("Throughput (Mops)", fontsize=42) + +# Legend style +# a = ax.legend().get_texts() +# label = [x.get_text() for x in a] +# l = ax.legend(labels= label[:5], loc='center left', ncols= 2, bbox_to_anchor=(0.05, 0.83), fontsize=36, reverse=False, labelspacing=0.3, columnspacing=0.5, frameon=False, handlelength=1, handleheight=1.3, markerscale=0.8, handletextpad=0.3) +# for t in l.get_texts(): t.set_position((0, 6)) + +plt.gcf().set_size_inches(10, 8.5) +plt.savefig("iops-5G.pdf", bbox_inches = 'tight', pad_inches = 0) \ No newline at end of file diff --git a/plot/Exp05-NetworkSpeed/iops.py b/plot/Exp05-NetworkSpeed/iops.py new file mode 100644 index 0000000..aa2abd4 --- /dev/null +++ b/plot/Exp05-NetworkSpeed/iops.py @@ -0,0 +1,77 @@ +import pandas as pd +import seaborn as sns +import matplotlib as mpl +import matplotlib.pyplot as plt +from matplotlib.colors import ListedColormap +import numpy as np +import os + +os.chdir("evaluation/Exp04-NetworkSpeed/") + +plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica'] +mpl.rcParams['hatch.linewidth'] = 2 +mpl.rcParams["legend.markerscale"] = 1 +mpl.rcParams['pdf.fonttype'] = 42 + +colors =["#ee4000", "#5f9ea0", "#9acd32", "#ffa54f", "#a56cc1"] +mycmap = ListedColormap(sns.color_palette(colors).as_hex()) + +# Convert the data to a DataFrame +df = pd.read_csv("data_new.csv", header=0) +df['ops'] /= 1000000 +print(df) + +df2 = pd.read_csv("data.csv", header=0) +df2['ops'] /= 1000000 +grouped2 = df2.groupby(['scheme', 'netband'])['ops'].agg(['mean', 'std']) +grouped2 = grouped2.unstack(level='scheme') + +# Plotting +# plt.figure(figsize=(12, 8)) + +# Define the order of the Scheme categories +scheme_order = ['AC-Cache','EC-Cache', 'SP-Cache', 'Baseline', "Replication"] + +# Reorder the 'Scheme' column based on the defined order +df['scheme'] = pd.Categorical(df['scheme'], categories=scheme_order, ordered=True) + +# Group by 'Scheme' and 'NodeNum' and calculate mean and std thruput +grouped = df.groupby(['scheme', 'netband'])['ops'].agg(['mean', 'std']) + +# Reshape the DataFrame for plotting +grouped = grouped.unstack(level='scheme') + +print(grouped) + +# Plotting with error bars for each scheme +ax = grouped['mean'].plot(kind='bar', yerr=grouped2['std'], capsize=5, rot=0, legend=False, edgecolor='black', lw=2, colormap=mycmap, width = 0.8, zorder = 100) + +minor_yticks = np.arange(0.15, 1.5, 0.3) + +ax.set_ylim(0, 1.8) +ax.set_yticks(np.arange(0, 1.8, 0.3)) +ax.set_yticks(minor_yticks, minor=True) + +# Hide the top and right axis +# for spine in ['top', 'right']: +# ax.spines[spine].set_visible(False) +plt.grid(True) +plt.grid(color="b", linestyle="-", linewidth=0.1, alpha=0.1) +plt.grid(which = 'minor',color="b", linestyle="-", linewidth=0.1, alpha=0.1) + + +# linewidth of axises, and the fontsize of ticks +plt.setp(ax.spines.values(), linewidth=2.5) +ax.tick_params(width=2, labelsize = 34) + +ax.set_xlabel("Network bandwidth (Gbps)", fontsize=42) +ax.set_ylabel("Throughput (Mops)", fontsize=42) + +# Legend style +a = ax.legend().get_texts() +label = [x.get_text() for x in a] +l = ax.legend(labels= label[:5], loc='center left', ncols= 2, bbox_to_anchor=(0.05, 0.83), fontsize=36, reverse=False, labelspacing=0.3, columnspacing=0.5, frameon=False, handlelength=1, handleheight=1.3, markerscale=0.8, handletextpad=0.3) +for t in l.get_texts(): t.set_position((0, 6)) + +plt.gcf().set_size_inches(10, 8.5) +plt.savefig("iops.pdf", bbox_inches = 'tight', pad_inches = 0) \ No newline at end of file diff --git a/plot/Exp05-NetworkSpeed/tail.py b/plot/Exp05-NetworkSpeed/tail.py new file mode 100644 index 0000000..70f9262 --- /dev/null +++ b/plot/Exp05-NetworkSpeed/tail.py @@ -0,0 +1,79 @@ +import pandas as pd +import seaborn as sns +import matplotlib as mpl +import matplotlib.pyplot as plt +from matplotlib.colors import ListedColormap +import numpy as np +import os + +os.chdir("evaluation/Exp04-NetworkSpeed/") + + +plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica'] +mpl.rcParams['hatch.linewidth'] = 2 +mpl.rcParams["legend.markerscale"] = 1 +mpl.rcParams['pdf.fonttype'] = 42 + +colors =["#ee4000", "#5f9ea0", "#9acd32", "#ffa54f", "#a56cc1"] +mycmap = ListedColormap(sns.color_palette(colors).as_hex()) + +# Convert the data to a DataFrame +df = pd.read_csv("data_new.csv", header=0) +df['p95'] *= 1000 + +df2 = pd.read_csv("data.csv", header=0) +df2['p95'] *= 1000 +grouped2 = df2.groupby(['scheme', 'netband'])['p95'].agg(['mean', 'std']) +grouped2 = grouped2.unstack(level='scheme') + +# Plotting +# plt.figure(figsize=(12, 8)) + +# Define the order of the Scheme categories +scheme_order = ['AC-Cache','EC-Cache', 'SP-Cache', 'Baseline', "Replication"] + +# Reorder the 'Scheme' column based on the defined order +df['scheme'] = pd.Categorical(df['scheme'], categories=scheme_order, ordered=True) + +# Group by 'Scheme' and 'NodeNum' and calculate mean and std thruput +grouped = df.groupby(['scheme', 'netband'])['p95'].agg(['mean', 'std']) + +# Reshape the DataFrame for plotting +grouped = grouped.unstack(level='scheme') + +# Plotting with error bars for each scheme +ax = grouped['mean'].plot(kind='bar', yerr=grouped2['std'], capsize=5, rot=0, legend=False, edgecolor='black', lw=2, colormap=mycmap, width = 0.8, zorder = 100) + + + +ax.set_ylim(0, 0.75) +ax.set_yticks(np.arange(0, 0.75, 0.1)) +minor_yticks = np.arange(0.05, 0.75, 0.1) +ax.set_yticks(minor_yticks, minor=True) + +# Hide the top and right axis +# for spine in ['top', 'right']: +# ax.spines[spine].set_visible(False) +plt.grid(True) + +#axes.grid(which = 'minor', alp) + +plt.grid(color="b", linestyle="-", linewidth=0.1, alpha=0.1) +plt.grid(which = 'minor',color="b", linestyle="-", linewidth=0.1, alpha=0.1) + + +# linewidth of axises, and the fontsize of ticks +plt.setp(ax.spines.values(), linewidth=2.5) +ax.tick_params(width=2, labelsize = 34) + +ax.set_xlabel("Network bandwidth (Gbps)", fontsize=42) +ax.set_ylabel("Tail latency (ms)", fontsize=42) + +# Legend style +a = ax.legend().get_texts() +label = [x.get_text() for x in a] +l = ax.legend(labels= label[:5],loc='center left', ncols= 2, bbox_to_anchor=(0.05, 0.83), fontsize=36, reverse=False, labelspacing=0.3, columnspacing=0.5, frameon=False, handlelength=1, handleheight=1.3, markerscale=0.8, handletextpad=0.3) +for t in l.get_texts(): t.set_position((0, 6)) + +plt.gcf().set_size_inches(10, 8.5) +plt.savefig("tail-latency.pdf", bbox_inches = 'tight', pad_inches = 0) \ No newline at end of file diff --git a/plot/Exp06-WindowSize/MarkerDefine.py b/plot/Exp06-WindowSize/MarkerDefine.py new file mode 100644 index 0000000..c7dd4cf --- /dev/null +++ b/plot/Exp06-WindowSize/MarkerDefine.py @@ -0,0 +1,101 @@ +import matplotlib.path as mpath +import matplotlib.pyplot as plt +import numpy as np + +text_style = dict(horizontalalignment='right', verticalalignment='center', + fontsize=12, fontfamily='monospace') +marker_style = dict(linestyle=':', color='0.8', markersize=10, + markerfacecolor="none", markeredgecolor="tab:red") + + +def format_axes(ax): + ax.margins(0.2) + ax.set_axis_off() + ax.invert_yaxis() + + +def split_list(a_list): + i_half = len(a_list) // 2 + return a_list[:i_half], a_list[i_half:] + +sverts = [ + (-1., 1.), + (1.,1.), + (1.,-1.), + (-1.,-1.), + (-1., 1.), +] +rverts = [ + (0,1), + (1,0), + (0,-1), + (-1,0), + (0,1), +] +qcodes = [ + mpath.Path.MOVETO, + mpath.Path.LINETO, + mpath.Path.LINETO, + mpath.Path.LINETO, + mpath.Path.CLOSEPOLY, +] + +cross = mpath.Path.unit_regular_asterisk(4) +circle = mpath.Path.unit_circle() + +square = mpath.Path(sverts, qcodes) +rhombus = mpath.Path(rverts, qcodes) + +ocross = mpath.Path( + vertices=np.concatenate([circle.vertices, cross.vertices[::-1, ...]]), + codes=np.concatenate([circle.codes, cross.codes])) + +csquare = mpath.Path( + vertices=np.concatenate([square.vertices, cross.vertices[::-1, ...]]), + codes=np.concatenate([square.codes, cross.codes])) + +crhombus = mpath.Path( + vertices=np.concatenate([rhombus.vertices, cross.vertices[::-1, ...]]), + codes=np.concatenate([rhombus.codes, cross.codes])) + +ploygon = mpath.Path.unit_regular_polygon(6) +#hexagram = mpath.Path.unit_regular_star(6) + +#print(ploygon.vertices, hexagram.vertices) + +tverts = [ + [-8.66025404e-01,5.00000000e-01], + [ 8.66025404e-01,5.00000000e-01], + [-1.83697020e-16,-1.00000000e+00], + [-8.66025404e-01,5.00000000e-01], + [-8.66025404e-01,-5.00000000e-01], + [ 8.66025404e-01, -5.00000000e-01], + [ 6.12323400e-17, 1.00000000e+00], + [-8.66025404e-01, -5.00000000e-01], +] + +tcodes = [ + mpath.Path.MOVETO, + mpath.Path.LINETO, + mpath.Path.LINETO, + mpath.Path.CLOSEPOLY, + mpath.Path.MOVETO, + mpath.Path.LINETO, + mpath.Path.LINETO, + mpath.Path.CLOSEPOLY, +] + +hexagram = mpath.Path(tverts, tcodes) + +markers = {'square':square, "rhombus": rhombus, 'csquare':csquare, 'crhombus':crhombus, 'ploygon': ploygon,'hexagram':hexagram} + +if __name__ == "__main__": + fig, ax = plt.subplots() + fig.suptitle('Path markers', fontsize=14) + fig.subplots_adjust(left=0.4) + for y, (name, marker) in enumerate(markers.items()): + ax.text(-0.5, y, name, **text_style) + ax.plot([y] * 3, marker=marker, **marker_style) + format_axes(ax) + + plt.show() \ No newline at end of file diff --git a/plot/Exp06-WindowSize/data.csv b/plot/Exp06-WindowSize/data.csv new file mode 100644 index 0000000..da7ff8b --- /dev/null +++ b/plot/Exp06-WindowSize/data.csv @@ -0,0 +1,21 @@ +Scheme,traceno,windowsize,time,ops,iops,size,thrupt,l95,l99,l9999 +FastCache,1,10,304.239,946782609,3415960,121635401376,428537,0.000143,205,0.000431 +FastCache,1,50,303.122,946782609,3411180,156076524132,549592,0.000144,185,0.000382 +FastCache,1,100,304.698,946782609,3454210,121465284699,430582,0.000142,189,0.000403 +FastCache,1,300,298.275,946782609,3411050,102652882047,362771,0.000143,199,0.000388 +FastCache,1,500,296.28,946782609,3481460,124618375550,448886,0.00014,177,0.00034 +FastCache,2,10,348.999,925150951,2818050,54076534361,161655,0.000131,152,0.000226 +FastCache,2,50,348.556,925150951,2807750,61806746550,183944,0.000131,155,0.000261 +FastCache,2,100,347.687,925150951,2819140,59326642682,177538,0.000131,152,0.000252 +FastCache,2,300,348.618,925150951,2818130,59092910978,176779,0.000131,152,0.000215 +FastCache,2,500,348.555,925150951,2811570,58633868092,174967,0.000131,153,0.000304 +FastCache,23,10,425.284,717324319,1700000,45666370684,105863,0.000138,159,0.000258 +FastCache,23,50,418.996,717324319,1730000,45400000000,107004,0.000137,157,0.000214 +FastCache,23,100,419.999,717324319,1720000,45235017568,106217,0.000137,156,0.000246 +FastCache,23,300,418.37,717324319,1730500,45144449488,106349,0.000135,154,0.00025 +FastCache,23,500,419.93,717324319,1723630,44939157827,105444,0.000136,155,0.000214 +FastCache,25,10,435.436,1525189585,4150000,40105791489,106532,0.000133,179,0.00096 +FastCache,25,50,440.714,1525189585,4100000,11836224268,30985.4,0.000133,177,0.00124 +FastCache,25,100,420.273,1525189585,4250000,52953491389,143983,0.000132,170,0.000335 +FastCache,25,300,434.774,1525189585,4210000,41212908300,111159,0.000133,178,0.000334 +FastCache,25,500,475.752,1525189585,4040000,21102138566,54556.6,0.000136,235,0.000448 diff --git a/plot/Exp06-WindowSize/data_iops.csv b/plot/Exp06-WindowSize/data_iops.csv new file mode 100644 index 0000000..69f1e4d --- /dev/null +++ b/plot/Exp06-WindowSize/data_iops.csv @@ -0,0 +1,5 @@ +scheme,10,50,100,300,500 +cluster01,3415960,3411180,3454210,3411050,3481460 +cluster02,2818050,2807750,2819140,2818130,2811570 +cluster23,1700000,1730000,1720000,1730500,1723630 +cluster25,4151240,4096090,4247870,4213790,4037730 diff --git a/plot/Exp06-WindowSize/data_l95.csv b/plot/Exp06-WindowSize/data_l95.csv new file mode 100644 index 0000000..3964fc1 --- /dev/null +++ b/plot/Exp06-WindowSize/data_l95.csv @@ -0,0 +1,5 @@ +scheme,10,50,100,300,500 +cluster01,143,144,142,143,140 +cluster02,131,131,131,131,131 +cluster23,138,137,137,135,136 +cluster25,133,133,132,133,136 diff --git a/plot/Exp06-WindowSize/data_l99.csv b/plot/Exp06-WindowSize/data_l99.csv new file mode 100644 index 0000000..7b08d46 --- /dev/null +++ b/plot/Exp06-WindowSize/data_l99.csv @@ -0,0 +1,5 @@ +scheme,10,50,100,300,500 +cluster01,205,185,189,199,177 +cluster02,152,155,152,152,153 +cluster23,159,157,156,154,155 +cluster25,179,177,170,178,235 diff --git a/plot/Exp06-WindowSize/iops.py b/plot/Exp06-WindowSize/iops.py new file mode 100644 index 0000000..8b42bf5 --- /dev/null +++ b/plot/Exp06-WindowSize/iops.py @@ -0,0 +1,89 @@ +import matplotlib.pyplot as plt +import matplotlib as mpl +from matplotlib.colors import ListedColormap +import seaborn as sns +import numpy as np +import pandas as pd +import os + +import MarkerDefine + +#sns.set() +#sns.axes_style('white') +colors =["#EB455F", "#F2921D", "#068FFF", "#609966"] +mycmap = ListedColormap(sns.color_palette(colors).as_hex()) + +plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica'] +mpl.rcParams['hatch.linewidth'] = 2 +mpl.rcParams["legend.markerscale"] = 1 +mpl.rcParams['pdf.fonttype'] = 42 + +df = pd.read_csv("data_iops.csv",header=0) +print(df) +df = df.T +#df = df.dropna() +df[1:] = df[1:] / 1000000 +df.columns = df.iloc[0] +df = df.drop(index="scheme") +#df2 = df2.sort_values(by="scheme") + +markers = ["h", "s", MarkerDefine.hexagram, MarkerDefine.rhombus] +dashes = [(1, 0, 1, 0), (2, 2, 2, 2), (5, 2, 5, 2), (5, 1, 2, 1)] + +# df_error=np.array([[[0,0,3333.33333333326,3333.33333333326,0], [0,0,6666.66666666674,6666.66666666674,0]], +# [[430.666666666628,430.666666666628,430.666666666628,430.666666666628,430.666666666628], [845.333333333372,845.333333333372,845.333333333372,845.333333333372,845.333333333372]], +# [[639.666666666628,639.666666666628,639.666666666628,639.666666666628,639.666666666628], [735.333333333372,735.333333333372,735.333333333372,735.333333333372,735.333333333372]], +# [[971.333333333372,971.333333333372,971.333333333372,971.333333333372,971.333333333372], [740.666666666628,740.666666666628,740.666666666628,740.666666666628,740.666666666628]]]) + + +fig = plt.figure() +# ax = df.plot(yerr = df_error/1000000, fmt='.', color="black", elinewidth=2, ecolor='black', capsize=5, legend = False, zorder=150) +#reset color cycle so that the marker colors match +#ax.set_prop_cycle(None) +#plot the markers +ax = df.plot(kind='line', rot=0, lw=6, colormap=mycmap, markerfacecolor='none', markersize = 30, mew = 5) +plt.gcf().set_size_inches(10, 8) + +for i, line in enumerate(ax.get_lines()): + line.set_marker(markers[i%4]) + #line.set_dashes(dashes[i%4]) + + +# Hatches in bar plot +patterns =('-', '+', 'x','/','//','O','o','\\','\\\\') +patterns = ['//', '\\\\', "x"] +hatches = [p for p in patterns for i in range(len(df))] +bars = ax.patches + +#for bar, hatch in zip(bars, markers): +# bar.set_hatch(hatch) + +ax.set_ylim(0, 6) +ax.set_yticks(np.arange(0, 5.1, 1)) + +# Hide the top and right axis +# for spine in ['top', 'right']: +# ax.spines[spine].set_visible(False) + +minor_yticks = np.arange(0.5, 5, 1) +ax.set_yticks(minor_yticks, minor=True) + +plt.grid(True) +plt.grid(color="b", linestyle="-", linewidth=0.1, alpha=0.1) +plt.grid(which = 'minor',color="b", linestyle="-", linewidth=0.1, alpha=0.1) + +# linewidth of axises, and the fontsize of ticks +plt.setp(ax.spines.values(), linewidth=2.5) +ax.tick_params(width=2, labelsize = 34) + +ax.set_xlabel("Window size", fontsize=42) +ax.set_ylabel("Throughput (Mops)", fontsize=38) + +# Legend style +a = ax.legend().get_texts() +label = [x.get_text() for x in a] +l = ax.legend(labels= label[:4], loc='center left', ncols= 2, bbox_to_anchor=(0.09, 0.90), fontsize=36, reverse=False, labelspacing=0.3, columnspacing=0.5, frameon=False, handlelength=0.8, handleheight=1, markerscale=0.8, handletextpad=0.3) +for t in l.get_texts(): t.set_position((0, 2)) + +plt.gcf().set_size_inches(10, 8) +plt.savefig("iops.pdf", bbox_inches = 'tight', pad_inches = 0) \ No newline at end of file diff --git a/plot/Exp06-WindowSize/tail.py b/plot/Exp06-WindowSize/tail.py new file mode 100644 index 0000000..9d08985 --- /dev/null +++ b/plot/Exp06-WindowSize/tail.py @@ -0,0 +1,93 @@ +import matplotlib.pyplot as plt +import matplotlib as mpl +from matplotlib.colors import ListedColormap +import seaborn as sns +import numpy as np +import pandas as pd +import os + +import MarkerDefine + +os.chdir("evaluation/Exp08") + +#sns.set() +#sns.axes_style('white') +# colors =["#ee4000", "#5f9ea0", "#9acd32", "#ffa54f"] +colors =["#EB455F", "#F2921D", "#068FFF", "#609966"] + +mycmap = ListedColormap(sns.color_palette(colors).as_hex()) + +plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica'] +mpl.rcParams['hatch.linewidth'] = 2 +mpl.rcParams["legend.markerscale"] = 1 +mpl.rcParams['pdf.fonttype'] = 42 + +df = pd.read_csv("data_l95.csv",header=0) +print(df) +df = df.T +# df[1:] = df[1:] * 1000 +df.columns = df.iloc[0] +df = df.drop(index="scheme") +#df2 = df2.sort_values(by="scheme") + +markers = ["h", "s", MarkerDefine.hexagram, MarkerDefine.rhombus] +dashes = [(1, 0, 1, 0), (2, 2, 2, 2), (5, 2, 5, 2), (5, 1, 2, 1)] + +# df_error=np.array([[[1.33E-06,6.67E-07,6.67E-07,6.67E-07,3.33E-07], [6.67E-07,1.33E-06,3.33E-07,3.33E-07,6.67E-07]], +# [[6.67E-07,6.67E-07,6.67E-07,6.67E-07,6.67E-07], [3.33E-07,3.33E-07,3.33E-07,3.33E-07,3.33E-07]], +# [[3.33E-07,3.33E-07,3.33E-07,3.33E-07,3.33E-07], [6.67E-07,6.67E-07,6.67E-07,6.67E-07,6.67E-07]], +# [[1.67E-06,1.67E-06,1.67E-06,1.67E-06,1.67E-06], [1.33E-06,1.33E-06,1.33E-06,1.33E-06,1.33E-06]]]) + +fig = plt.figure() +# ax = df.plot(yerr = df_error/1000000, fmt='.', color="black", elinewidth=2, ecolor='black', capsize=5, legend = False, zorder=150) +#reset color cycle so that the marker colors match +# ax.set_prop_cycle(None) +#plot the markers +ax = df.plot(kind='line', rot=0, lw=6, colormap=mycmap, markerfacecolor='none', markersize = 30, mew = 5) +plt.gcf().set_size_inches(10, 8) + +for i, line in enumerate(ax.get_lines()): + #print(i) + #if(11< i < 16): + line.set_marker(markers[i%4]) + # line.set_dashes(dashes[i%4]) + + +# Hatches in bar plot +patterns =('-', '+', 'x','/','//','O','o','\\','\\\\') +patterns = ['//', '\\\\', "x"] +hatches = [p for p in patterns for i in range(len(df))] +bars = ax.patches + +#for bar, hatch in zip(bars, markers): +# bar.set_hatch(hatch) + +ax.set_ylim(125, 150) +ax.set_yticks(np.arange(125, 151, 5)) + +minor_yticks = np.arange(130, 150, 10) +ax.set_yticks(minor_yticks, minor=True) + +plt.grid(True) +plt.grid(color="b", linestyle="-", linewidth=0.1, alpha=0.1) +plt.grid(which = 'minor',color="b", linestyle="-", linewidth=0.1, alpha=0.1) + +# Hide the top and right axis +# for spine in ['top', 'right']: +# ax.spines[spine].set_visible(False) + +# linewidth of axises, and the fontsize of ticks +plt.setp(ax.spines.values(), linewidth=2.5) +ax.tick_params(width=2, labelsize = 34) + +ax.set_xlabel("Window size", fontsize=42) +ax.set_ylabel(r"Tail latency ($\mu$s)", fontsize=42) + +# Legend style +a = ax.legend().get_texts() +label = [x.get_text() for x in a] +l = ax.legend(labels= label[:4], loc='center left', ncols= 2, bbox_to_anchor=(0.1, 0.90), fontsize=36, reverse=False, labelspacing=0.3, columnspacing=0.5, frameon=False, handlelength=0.8, handleheight=1, markerscale=0.8, handletextpad=0.3) +for t in l.get_texts(): t.set_position((0, 2)) + +plt.gcf().set_size_inches(10, 8) +plt.savefig("tail-latency.pdf", bbox_inches = 'tight', pad_inches = 0) \ No newline at end of file diff --git a/plot/Exp07-CorrelationThreshold/MarkerDefine.py b/plot/Exp07-CorrelationThreshold/MarkerDefine.py new file mode 100644 index 0000000..c7dd4cf --- /dev/null +++ b/plot/Exp07-CorrelationThreshold/MarkerDefine.py @@ -0,0 +1,101 @@ +import matplotlib.path as mpath +import matplotlib.pyplot as plt +import numpy as np + +text_style = dict(horizontalalignment='right', verticalalignment='center', + fontsize=12, fontfamily='monospace') +marker_style = dict(linestyle=':', color='0.8', markersize=10, + markerfacecolor="none", markeredgecolor="tab:red") + + +def format_axes(ax): + ax.margins(0.2) + ax.set_axis_off() + ax.invert_yaxis() + + +def split_list(a_list): + i_half = len(a_list) // 2 + return a_list[:i_half], a_list[i_half:] + +sverts = [ + (-1., 1.), + (1.,1.), + (1.,-1.), + (-1.,-1.), + (-1., 1.), +] +rverts = [ + (0,1), + (1,0), + (0,-1), + (-1,0), + (0,1), +] +qcodes = [ + mpath.Path.MOVETO, + mpath.Path.LINETO, + mpath.Path.LINETO, + mpath.Path.LINETO, + mpath.Path.CLOSEPOLY, +] + +cross = mpath.Path.unit_regular_asterisk(4) +circle = mpath.Path.unit_circle() + +square = mpath.Path(sverts, qcodes) +rhombus = mpath.Path(rverts, qcodes) + +ocross = mpath.Path( + vertices=np.concatenate([circle.vertices, cross.vertices[::-1, ...]]), + codes=np.concatenate([circle.codes, cross.codes])) + +csquare = mpath.Path( + vertices=np.concatenate([square.vertices, cross.vertices[::-1, ...]]), + codes=np.concatenate([square.codes, cross.codes])) + +crhombus = mpath.Path( + vertices=np.concatenate([rhombus.vertices, cross.vertices[::-1, ...]]), + codes=np.concatenate([rhombus.codes, cross.codes])) + +ploygon = mpath.Path.unit_regular_polygon(6) +#hexagram = mpath.Path.unit_regular_star(6) + +#print(ploygon.vertices, hexagram.vertices) + +tverts = [ + [-8.66025404e-01,5.00000000e-01], + [ 8.66025404e-01,5.00000000e-01], + [-1.83697020e-16,-1.00000000e+00], + [-8.66025404e-01,5.00000000e-01], + [-8.66025404e-01,-5.00000000e-01], + [ 8.66025404e-01, -5.00000000e-01], + [ 6.12323400e-17, 1.00000000e+00], + [-8.66025404e-01, -5.00000000e-01], +] + +tcodes = [ + mpath.Path.MOVETO, + mpath.Path.LINETO, + mpath.Path.LINETO, + mpath.Path.CLOSEPOLY, + mpath.Path.MOVETO, + mpath.Path.LINETO, + mpath.Path.LINETO, + mpath.Path.CLOSEPOLY, +] + +hexagram = mpath.Path(tverts, tcodes) + +markers = {'square':square, "rhombus": rhombus, 'csquare':csquare, 'crhombus':crhombus, 'ploygon': ploygon,'hexagram':hexagram} + +if __name__ == "__main__": + fig, ax = plt.subplots() + fig.suptitle('Path markers', fontsize=14) + fig.subplots_adjust(left=0.4) + for y, (name, marker) in enumerate(markers.items()): + ax.text(-0.5, y, name, **text_style) + ax.plot([y] * 3, marker=marker, **marker_style) + format_axes(ax) + + plt.show() \ No newline at end of file diff --git a/plot/Exp07-CorrelationThreshold/data.csv b/plot/Exp07-CorrelationThreshold/data.csv new file mode 100644 index 0000000..f667479 --- /dev/null +++ b/plot/Exp07-CorrelationThreshold/data.csv @@ -0,0 +1,16 @@ +traceno,thresohold,time,ops,iops,size,thruput,l95,l99,l9999 +1,10,148.7975,946782609,7.36E+06,118499022129,890962.5,118.5,0.000171,0.0002995 +1,50,143.0125,946782609,7.59E+06,76647467474,602062,119,0.0001505,0.0002575 +1,100,142.4025,946782609,7.59E+06,78310195000,615867,118.5,0.0001555,0.0002935 +1,300,130.0335,946782609,8.51E+06,92461303112.5,857966.5,113,0.000158,0.000337 +1,500,124.9915,946782609,8.99E+06,64896323569,604125,111,0.0001525,0.000284 +2,10,261.587,925150951,3.70E+06,65952081712.5,256975.5,127.5,0.0001575,0.0002945 +2,50,263.019,925150951,3.69E+06,41869089058.5,162749,128,0.0001605,0.00029 +2,100,262.7185,925150951,3.70E+06,66118203651,257590.5,127.5,0.0001555,0.0002785 +2,300,243.1935,925150951,3.98E+06,41419238069,173864.5,126.5,0.0001595,0.000282 +2,500,242.7265,925150951,4.00E+06,40145065982,169351,125,0.000156,0.0002865 +25,10,772.5975,1525189585,2.65E+06,65508504990,111362.5,132.5,0.000159,0.0002945 +25,50,772.839333333333,1525189585,2.65E+06,60403178453.6667,102966.666666667,132.333333333333,0.000157,0.000286666666666667 +25,100,769.648,1525189585,2.66E+06,55283260956,94466.2666666667,132.333333333333,0.000156333333333333,0.000279666666666667 +25,300,763.585666666667,1525189585,2.66E+06,65484220977.6667,111868.333333333,132,0.000156666666666667,0.000277333333333333 +25,500,773.126666666667,1525189585,2.63E+06,60241202502,101853.666666667,133.666666666667,0.000167333333333333,0.000360666666666667 diff --git a/plot/Exp07-CorrelationThreshold/data_iops.csv b/plot/Exp07-CorrelationThreshold/data_iops.csv new file mode 100644 index 0000000..a260c4e --- /dev/null +++ b/plot/Exp07-CorrelationThreshold/data_iops.csv @@ -0,0 +1,5 @@ +traceno,10,50,100,300,500 +cluster01,7360000,7590000,7590000,8510000,8990000 +cluster02,3700000,3690000,3700000,3980000,4000000 +cluster23,1765120,1770630,1772580,1774690,1777580 +cluster25,2650000,2650000,2660000,2660000,2630000 diff --git a/plot/Exp07-CorrelationThreshold/data_l95.csv b/plot/Exp07-CorrelationThreshold/data_l95.csv new file mode 100644 index 0000000..4f31ec8 --- /dev/null +++ b/plot/Exp07-CorrelationThreshold/data_l95.csv @@ -0,0 +1,5 @@ +traceno,10,50,100,300,500 +cluster01,118.5,119,118.5,113,111 +cluster02,127.5,128,127.5,126.5,125 +cluster23,134,133,134,135,134 +cluster25,132.5,132.333333333333,132.333333333333,132,133.666666666667 diff --git a/plot/Exp07-CorrelationThreshold/iops.py b/plot/Exp07-CorrelationThreshold/iops.py new file mode 100644 index 0000000..3aa2af3 --- /dev/null +++ b/plot/Exp07-CorrelationThreshold/iops.py @@ -0,0 +1,91 @@ +import matplotlib.pyplot as plt +import matplotlib as mpl +from matplotlib.colors import ListedColormap +import seaborn as sns +import numpy as np +import pandas as pd +import os + +import MarkerDefine + +#sns.set() +#sns.axes_style('white') +colors =["#EB455F", "#F2921D", "#068FFF", "#609966"] +mycmap = ListedColormap(sns.color_palette(colors).as_hex()) + +plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica'] +mpl.rcParams['hatch.linewidth'] = 2 +mpl.rcParams["legend.markerscale"] = 1 +mpl.rcParams['pdf.fonttype'] = 42 + +df = pd.read_csv("data_iops.csv",header=0) +print(df) +df = df.T +df[1:] = df[1:] / 1000000 +df.columns = df.iloc[0] +df = df.drop(index="traceno") +#df2 = df2.sort_values(by="scheme") + +markers = ["h", "s", MarkerDefine.hexagram, MarkerDefine.rhombus] +dashes = [(1, 0, 1, 0), (2, 2, 2, 2), (5, 2, 5, 2), (5, 1, 2, 1)] + +# df_error=np.array([[[0,0,3333.33333333326,3333.33333333326,0], [0,0,6666.66666666674,6666.66666666674,0]], +# [[430.666666666628,430.666666666628,430.666666666628,430.666666666628,430.666666666628], [845.333333333372,845.333333333372,845.333333333372,845.333333333372,845.333333333372]], +# [[639.666666666628,639.666666666628,639.666666666628,639.666666666628,639.666666666628], [735.333333333372,735.333333333372,735.333333333372,735.333333333372,735.333333333372]], +# [[971.333333333372,971.333333333372,971.333333333372,971.333333333372,971.333333333372], [740.666666666628,740.666666666628,740.666666666628,740.666666666628,740.666666666628]]]) + + +fig = plt.figure() +# ax = df.plot(yerr = df_error/1000000, fmt='.', color="black", elinewidth=2, ecolor='black', capsize=5, legend = False, zorder=150) +#reset color cycle so that the marker colors match +# ax.set_prop_cycle(None) +#plot the markers +ax = df.plot(kind='line', rot=0, lw=6, colormap=mycmap, markerfacecolor='none', markersize = 30, mew = 5) +plt.gcf().set_size_inches(10, 8) + +for i, line in enumerate(ax.get_lines()): + #print(i) + # if(11< i < 16): + line.set_marker(markers[i%4]) + # line.set_dashes(dashes[i%4]) + + + +# Hatches in bar plot +patterns =('-', '+', 'x','/','//','O','o','\\','\\\\') +patterns = ['//', '\\\\', "x"] +hatches = [p for p in patterns for i in range(len(df))] +bars = ax.patches + +#for bar, hatch in zip(bars, markers): +# bar.set_hatch(hatch) + +ax.set_ylim(0, 11) +ax.set_yticks(np.arange(0, 11.1, 2)) + +# Hide the top and right axis +# for spine in ['top', 'right']: +# ax.spines[spine].set_visible(False) + +minor_yticks = np.arange(1, 11, 2) +ax.set_yticks(minor_yticks, minor=True) + +plt.grid(True) +plt.grid(color="b", linestyle="-", linewidth=0.1, alpha=0.1) +plt.grid(which = 'minor',color="b", linestyle="-", linewidth=0.1, alpha=0.1) + +# linewidth of axises, and the fontsize of ticks +plt.setp(ax.spines.values(), linewidth=2.5) +ax.tick_params(width=2, labelsize = 34) + +ax.set_xlabel(r"Correlation threshold", fontsize=42) +ax.set_ylabel("Throughput (Mops)", fontsize=42) + +# Legend style +a = ax.legend().get_texts() +label = [x.get_text() for x in a] +l = ax.legend(labels= label[:4], loc='center left', ncols= 2, bbox_to_anchor=(0.05, 0.9), fontsize=36, reverse=False, labelspacing=0.3, columnspacing=0.5, frameon=False, handlelength=0.8, handleheight=1, markerscale=0.8, handletextpad=0.3) +for t in l.get_texts(): t.set_position((0, 2)) + +#plt.show() +plt.savefig("iops.pdf", bbox_inches = 'tight', pad_inches = 0) \ No newline at end of file diff --git a/plot/Exp07-CorrelationThreshold/tail.py b/plot/Exp07-CorrelationThreshold/tail.py new file mode 100644 index 0000000..50dbbd7 --- /dev/null +++ b/plot/Exp07-CorrelationThreshold/tail.py @@ -0,0 +1,88 @@ +import matplotlib.pyplot as plt +import matplotlib as mpl +from matplotlib.colors import ListedColormap +import seaborn as sns +import numpy as np +import pandas as pd +import os + +import MarkerDefine + +#sns.set() +#sns.axes_style('white') +colors =["#EB455F", "#F2921D", "#068FFF", "#609966"] +mycmap = ListedColormap(sns.color_palette(colors).as_hex()) + +plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica'] +mpl.rcParams['hatch.linewidth'] = 2 +mpl.rcParams["legend.markerscale"] = 1 +mpl.rcParams['pdf.fonttype'] = 42 + +df = pd.read_csv("data_l95.csv",header=0) +print(df) +df = df.T +# df[1:] = df[1:] * 1000 +df.columns = df.iloc[0] +df = df.drop(index="traceno") +#df2 = df2.sort_values(by="scheme") + +markers = ["h", "s", MarkerDefine.hexagram, MarkerDefine.rhombus] +dashes = [(1, 0, 1, 0), (2, 2, 2, 2), (5, 2, 5, 2), (5, 1, 2, 1)] + +# df_error=np.array([[[1.33E-06,6.67E-07,6.67E-07,6.67E-07,3.33E-07], [6.67E-07,1.33E-06,3.33E-07,3.33E-07,6.67E-07]], +# [[6.67E-07,6.67E-07,6.67E-07,6.67E-07,6.67E-07], [3.33E-07,3.33E-07,3.33E-07,3.33E-07,3.33E-07]], +# [[3.33E-07,3.33E-07,3.33E-07,3.33E-07,3.33E-07], [6.67E-07,6.67E-07,6.67E-07,6.67E-07,6.67E-07]], +# [[1.67E-06,1.67E-06,1.67E-06,1.67E-06,1.67E-06], [1.33E-06,1.33E-06,1.33E-06,1.33E-06,1.33E-06]]]) + +fig = plt.figure() +# ax = df.plot(yerr = df_error/1000000, fmt='.', color="black", elinewidth=2, ecolor='black', capsize=5, legend = False, zorder=150) +#reset color cycle so that the marker colors match +# ax.set_prop_cycle(None) +#plot the markers +ax = df.plot(kind='line', rot=0, lw=6, colormap=mycmap, markerfacecolor='none', markersize = 30, mew = 5) +plt.gcf().set_size_inches(10, 8) + +for i, line in enumerate(ax.get_lines()): + #print(i) + # if(11< i < 16): + line.set_marker(markers[i%4]) + # line.set_dashes(dashes[i%4]) + +# Hatches in bar plot +patterns =('-', '+', 'x','/','//','O','o','\\','\\\\') +patterns = ['//', '\\\\', "x"] +hatches = [p for p in patterns for i in range(len(df))] +bars = ax.patches + +#for bar, hatch in zip(bars, markers): +# bar.set_hatch(hatch) + +ax.set_ylim(100, 150) +# ax.set_yticks([0, 0.05, 0.1, 0.15, 0.2]) + +minor_yticks = np.arange(100, 151, 10) +ax.set_yticks(minor_yticks, minor=True) + +plt.grid(True) +plt.grid(color="b", linestyle="-", linewidth=0.1, alpha=0.1) +plt.grid(which = 'minor',color="b", linestyle="-", linewidth=0.1, alpha=0.1) + +# Hide the top and right axis +# for spine in ['top', 'right']: +# ax.spines[spine].set_visible(False) + +# linewidth of axises, and the fontsize of ticks +plt.setp(ax.spines.values(), linewidth=2.5) +ax.tick_params(width=2, labelsize = 34) + +ax.set_xlabel(r"Correlation threshold", fontsize=42) +ax.set_ylabel(r"Tail latency ($\mu$s)", fontsize=42) + +# Legend style +a = ax.legend().get_texts() +label = [x.get_text() for x in a] +l = ax.legend(labels= label[:4], loc='center left', ncols= 2, bbox_to_anchor=(0.05, 0.9), fontsize=36, reverse=False, labelspacing=0.3, columnspacing=0.5, frameon=False, handlelength=0.8, handleheight=1, markerscale=0.8, handletextpad=0.3) +for t in l.get_texts(): t.set_position((0, 2)) + +plt.gcf().set_size_inches(10, 8.3) +plt.savefig("tail-latency.pdf", bbox_inches = 'tight', pad_inches = 0) \ No newline at end of file diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..94950c3 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,7 @@ +*.log +*.cmake +*.d +*.o +*.make +*.xml +*.cbp diff --git a/src/CMSketch.cpp b/src/CMSketch.cpp new file mode 100644 index 0000000..30c4a11 --- /dev/null +++ b/src/CMSketch.cpp @@ -0,0 +1,395 @@ +// +// Created by Alfred on 2022/7/26. +// + +#include "CMSketch.h" +#include "toolbox.h" +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +CMSketch::CMSketch(const long long& n, const int& deviation, const double& confidence) { + this->n = n; + long long number = n * n; + cout << "deviation = " << deviation << endl; + this->epsilon = (long double) deviation / number; + cout << "epsilon = " << epsilon << endl; + this->m = size_t(ceil(M_E / this->epsilon)); + this->delta = confidence; + this->k = int(ceil(log(1.0/confidence))); + + cout << "CMSketch k =" << this->k << endl; + cout << "CMSketch m =" << this->m << endl; + + this->a = new int[this->k]; + this->b = new int[this->k]; + this->c = new int[this->k]; + + this->bucket = vector >( this->k); + for(int i = 0; i < this->k; i ++) { + this->bucket[i] = vector(this->m); + this->bucket[i].resize(this->m); + } + + static default_random_engine e{random_device{}()}; + static uniform_int_distribution u; + + for(int i = 0; i < this->k; i ++) { + this->a[i] = u(e, decltype(u)::param_type(0, INT32_MAX)); + this->b[i] = u(e, decltype(u)::param_type(0, INT32_MAX)); + this->c[i] = u(e, decltype(u)::param_type(0, INT32_MAX)); + } +} + +CMSketch::CMSketch(CMSketch &cms) { + this->epsilon = cms.epsilon; + this->m = cms.m; + this->delta = cms.delta; + this->k = cms.k; + + this->a = new int[this->k]; + this->b = new int[this->k]; + this->c = new int[this->k]; + + this->bucket = vector >( this->k); + for(int i = 0; i < this->k; i ++) { + this->bucket[i] = vector(this->m); + this->bucket[i].resize(this->m); + } + + for(int i = 0; i < this->k; i ++) { + this->a[i] = cms.a[i]; + this->b[i] = cms.b[i]; + this->c[i] = cms.c[i]; + } +} + +int CMSketch::hash(const int& a, const int& b, int& i) { + unsigned int first, second; + if(a < b) { + first = b; + second = a; + } else { + first = a; + second = b; + } + int value = (this->a[i] * first + this->b[i] * second) % INT32_MAX + this->c[i]; + return value % this->m; +} + +void CMSketch::add(const int& a, const int& b) { + int loc; + for(int i = 0; i < this->k; i ++) { + loc = this->hash(a, b, i); + this->bucket[i][loc] ++; + } +} + +int CMSketch::estimateFrequency(const int& a, const int& b) { + int minimum = INT32_MAX; + int loc; + for(int i = 0; i < this->k; i ++) { + loc = this->hash(a, b, i); + minimum = min(minimum, this->bucket[i][loc]); + } + return minimum; +} + +int CMSketch::find(const int& a, const int& b) { + return estimateFrequency(a, b); +} + +CMSketch::~CMSketch() { + for(int i = 0; i < this->k; i ++) { + this->bucket[i].clear(); + } + //delete [] this->a; + //delete [] this->b; + //delete [] this->c; +} + +void CMSketch::formalized() { + for(int i = 0; i < this->k; i ++) { + int tmp = *min_element(bucket[i].begin(), bucket[i].end()); + for(int j = 0; j < this->m; j ++) { + bucket[i][j] = (bucket[i][j] - tmp) * (bucket[i][j] - tmp); + } + } +} + +bool CMSketch::cutGraph() { + vector> correlation_group; + bool visited[n]; + bool flag = false; + timeit tm; + + + int max_connect = 0; + int max_item = INT32_MAX; + //vector cgroup; + + //BFS + int head; + int connect_count; + for(int i = 0; i < n; i ++) { + visited[i] = false; + } + //sq.push(max_item); + //visited[max_item] = true; + + size_t all_connection = 0; + /*tm.start(); + + for(int i = 0; i < n; i ++) { + for(int j = i + 1; j < n; j ++) { + all_connection += this->find(i, j); + } + } + tm.end(); + cout << "Sum using " << tm.passedtime() << endl;*/ + all_connection = 62332412114572; + cout << "Sum connection = " << all_connection << endl; + + cout << "Graph cutting initialized." << endl; + + while(true) { + int count = 0; + + for(int i = 0; i < n; i ++) { + //flag = flag && visited[i]; + if(!visited[i]) flag = true; + } + + if(!flag) break; + + vector cgroup; + queue sq; + max_item = INT32_MAX; + max_connect = 0; + // get the maximum frequent initial node + tm.start(); + for(int i = 0; i < n; i ++) { + if(visited[i]) continue; + int num = 0; + for(int j = 0; j < n; j ++) { + if(i == j || visited[j]) continue; + //if(i > j) { + num += this->find(i, j); + //} else { + // num += this->find(j, i); + //} + } + if (num > max_connect) { + tm.end(); + max_connect = num; + max_item = i; + cout << "Now max_item = " << i << endl; + cout << "Now max_connection = " << max_connect << endl; + cout << "Now time elapse = " << tm.passedtime() << endl; + count ++; + //if(count == 10) break; + } + } + cgroup.push_back(max_item); + + sq.push(max_item); + visited[max_item] = true; + + cout << "initial node is " << max_item << ", frequency = " << max_connect << endl; + head = sq.front(); + sq.pop(); + count = 0; + for(int loci = 0; loci < n; loci ++) { + if(count == 0) tm.start(); + if(!visited[loci] && this->find(head, loci) != 0 ) { + vector tmp = cgroup; + //tmp.push_back(loci); + CMSketch::group_insert(&tmp, loci); + connect_count = this->getConnection(tmp, n, visited); + if(connect_count <= max_connect) { + cout << "loci = " << loci << endl; + max_connect = connect_count; + //int cinc = this->getInnerConnection(tmp); + vector comp = *CMSketch::complement(tmp, n); + //int cing = this->getInnerConnection(comp); + size_t cing = all_connection - connect_count - this->getInnerConnection(tmp); + //if(cinc < max_connect || cing < max_connect || comp.size() <= 2) { + if( cing < max_connect || comp.size() <= 2) { + //continue; + for(auto &pr : comp){ + CMSketch::group_insert(&cgroup, pr); + visited[pr] = true; + } + } + CMSketch::group_insert(&cgroup, loci); + + visited[loci] = true; + sq.push(loci); + auto *single_node = this->get_single_node(comp, visited); + if(!single_node->empty()) { + for(int & value : *single_node) { + CMSketch::group_insert(&cgroup, value); + visited[value] = true; + } + all_connection = all_connection - this->getInnerConnection(cgroup) - connect_count; + } else { + all_connection = cing; + } + } + } + count ++; + if(count == 0 || count % 1000 == 0) { + tm.end(); + cout << "Time elapse : " << tm.passedtime() << endl; + } + } + cout << "Group is " << endl; + for(auto &pr : cgroup) { + cout << pr << "\t"; + } + cout << endl; + correlation_group.push_back(cgroup); + flag = false; + } + cout << "Sum groups = " << correlation_group.size() << endl; + + return false; +} + +/* + * Private + */ + +int CMSketch::getConnection(const vector &group, const int& n, bool *visited) { + int num = 0; + for(int value: group) { + for(int i = 0; i < n; i ++) { + if(visited[i]) continue; + bool it = ::binary_search(group.begin(), group.end(), i); + if(it) continue; + num += this->find(value, i); + } + } + return num; +} + +int CMSketch::getInnerConnection(const vector &group) { + int num = 0; + for(int pri : group) { + for(int prj : group) { + if(prj == pri) continue; + num += this->estimateFrequency(pri, prj); + } + } + return num; +} + +// sorted: selective insertion +void CMSketch::group_insert(std::vector *group, const int& value) { + for(auto pr = group->begin(); pr != group->end(); pr ++) { + if(*pr >= value) { + group->emplace(pr, value); + return; + } + } + group->emplace(group->end(), value); +} + +std::vector* CMSketch::complement(std::vector group, const int& n) { + auto* rst = new vector(); + auto pr = group.begin(); + for(int i = 0; i < n; i ++) { + if(pr != group.end() && i != *pr) { + rst->push_back(i); + }else if(pr != group.end()) { + pr ++; + } else { + rst->push_back(i); + } + } + return rst; +} + +vector* CMSketch::get_single_node(std::vector group, const bool *visited) { + auto* rst = new vector(); + for(auto pr = group.begin(); pr != group.end(); pr ++) { + int num = 0; + if(visited[*pr]) continue; + for(auto prj = group.begin(); prj != group.end(); prj ++) { + if(pr == prj) continue; + if(visited[*prj]) continue; + num += this->estimateFrequency(*pr, *prj); + } + if(num == 0) rst->push_back(*pr); + } + return rst; +} + +void CMSketch::write2File(const std::string& filename) { + + ofstream fout; + fout.open(filename,std::ofstream::out); + + if(!fout.is_open()) { + cout << "Error opening files!" << endl; + exit(-1); + } + + fout << this->k << "\t" << this->m << endl; + + for(int i = 0; i < this->k; i ++) { + fout << a[i] << "\t" + << b[i] << "\t" + << c[i] << endl; + } + + for(auto &pri : bucket) { + for(int & prj : pri) { + fout << prj << " "; + } + fout << endl; + } + fout.close(); +} + +void CMSketch::load(const string &filename) { + ifstream fin(filename); + if(!fin.is_open()) { + cout << "Error opening files!" << endl; + exit(-1); + } + + int tmp_k; + size_t tmp_m; + int tmp_a, tmp_b, tmp_c; + fin >> tmp_k >> tmp_m; + + if(tmp_k != this->k || tmp_m != this->m) { + cout << "Parameter Error" << endl; + exit(-1); + } + + for(int i = 0; i < this->k; i ++) { + fin >> tmp_a >> tmp_b >> tmp_c; + a[i] = tmp_a; + b[i] = tmp_b; + c[i] = tmp_c; + } + + int tmp; + for(auto & pri : bucket) { + for(int & prj : pri) { + fin >> tmp; + prj = tmp; + } + } + fin.close(); + + cout << "CMSketch load complete." << endl; +} \ No newline at end of file diff --git a/src/CMSketch.h b/src/CMSketch.h new file mode 100644 index 0000000..d3d577e --- /dev/null +++ b/src/CMSketch.h @@ -0,0 +1,49 @@ +// +// Created by Alfred on 2022/7/26. +// + +#ifndef CORANA_CMSKETCH_H +#define CORANA_CMSKETCH_H + + +#include +#include + +class CMSketch { +public: + size_t m; + int k; + long double epsilon; + double delta; + std::vector > bucket; + long long n; + + CMSketch(const long long& n, const int& deviation, const double& confidence = 0.01); + CMSketch(CMSketch& cms); + CMSketch(){}; + void add(const int& a, const int& b); + int estimateFrequency(const int& a, const int& b); + void formalized(); + int find(const int& a, const int& b); + bool cutGraph(); + void write2File(const std::string& filename); + void load(const std::string& filename); + ~CMSketch(); + + + +private: + int *a; + int *b; + int *c; + int hash(const int& a, const int& b, int& i); + + static void group_insert(std::vector* group, const int& value); + int getConnection(const std::vector &group, const int& n, bool *visited); + int getInnerConnection(const std::vector &group); + static std::vector* complement(std::vector group, const int& n); + std::vector* get_single_node(std::vector group, const bool visited[]); +}; + + +#endif //CORANA_CMSKETCH_H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..a729fb7 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,61 @@ +cmake_minimum_required(VERSION 3.22) +project(CorAna) + +# option(LOCAL "Local test" ON) + +set(CMAKE_CXX_STANDARD 17) + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif() + +set(CORRELATION + ./ListNode.cpp ./ListNode.h + ./FreqList.cpp ./FreqList.h + ./FreqTable.h ./FreqTable.cpp + #./CMSketch.cpp + #./CMSketch.h + ) +set(OTHERS + ./config.h + ./toolbox.h + ./ErasureCode/ErasureCode.cpp + ./ErasureCode/ErasureCode.h + ./MemcachedClient.cpp + ./MemcachedClient.h + ./parameter.h + ) +set(OUR_SCHEME + OurScheme.cpp + OurScheme.h + ) +set(SCHEMES + ./eccache.h ./eccache.cpp + Random.h Random.cpp + SPCache.cpp SPCache.h) + +#add_executable(CorAna main.cpp ListNode.cpp ListNode.h FreqList.cpp FreqList.h FreqTable.cpp FreqTable.h toolbox.h twitter_trace.h config.h eccache.cpp eccache.h ErasureCode/ErasureCode.cpp ErasureCode/ErasureCode.h) +#add_executable(CorAna main.cpp ErasureCode/ErasureCode.cpp ErasureCode/ErasureCode.h toolbox.h eccache.h eccache.cpp replicas.cpp replicas.h MemcachedClient.cpp MemcachedClient.h CMSketch.cpp CMSketch.h OurScheme.cpp OurScheme.h) +add_executable(CorAna main.cpp + ${CORRELATION} + ${OUR_SCHEME} + ${SCHEMES} + ${OTHERS} + ) +add_executable(correlation main_correlation.cpp + ${CORRELATION} + ${OUR_SCHEME} + ${OTHERS} +) + +target_link_libraries(CorAna isal memcached jsoncpp pthread python3.10 fmt) +target_include_directories(CorAna + PRIVATE + /usr/include/python3.10 +) + +target_link_libraries(correlation isal memcached jsoncpp pthread python3.10 fmt) +target_include_directories(correlation + PRIVATE + /usr/include/python3.10 +) diff --git a/src/ErasureCode/ErasureCode.cpp b/src/ErasureCode/ErasureCode.cpp new file mode 100644 index 0000000..6df008c --- /dev/null +++ b/src/ErasureCode/ErasureCode.cpp @@ -0,0 +1,233 @@ +// +// Created by Alfred on 2022/9/12. +// + +#include "ErasureCode.h" +#include +#include +#include + +prealloc_encode::prealloc_encode(int n, int k) : encode_matrix(n * k), table(32 * k * (n - k)) +{ +} + +prealloc_recover::prealloc_recover(int n, int k, size_t errors_count, size_t len) + : errors_matrix(n * k), invert_matrix(n * k), decode_matrix(n * k), table(32 * k * (n - k)) +{ + decoding = (uint8_t**)malloc(errors_count * sizeof(uint8_t*)); + for (int i = 0; i < errors_count; ++i) + { + decoding[i] = (uint8_t*)malloc(len * sizeof(uint8_t)); + } + //printf("decoding len = %d\n", len); +} + +ErasureCode::ErasureCode(const int& n, const int& k) + :k(k),n(n) +{ + /*//this->preallocEncode = new prealloc_encode(this->n, this->k); + size_t line_size = source.length() % this->k == 0 ? source.length() / this->k : size_t(source.length() / this->k)+1; + //printf("linesize = %d\n", line_size); + this->len = line_size * k; + //printf("len = %d\n", this->len); + this->filling_len = this->len - source.length(); + + this->source = ErasureCode::string2array(source, k, n);*/ + //this->set_source(source); +} + + +uint8_t** ErasureCode::string2array(const string& source, int* filling_len) { + size_t line_size; + string s; + uint8_t** data; + if(source.length() % k == 0) { + line_size = source.length() / k; + s = source; + *filling_len = 0; + //cout << "source len = " << source.length() <source[i][j]); + } + cout << endl; + }*/ +} + +void ErasureCode::recover_data(const vector &errors, const int & len, uint8_t **dest, uint8_t **erroneous_data, prealloc_encode& preallocEncode, prealloc_recover &preallocRecover) { + //cout << "recover:1111111111111111111111111111111111" << endl; + //this->preallocRecover = new prealloc_recover(this->n, this->k, errors.size(), this->len/this->k); + //cout << "recover:2222222222222222222222222222222222" << endl; + for (int i = 0, r = 0; i < this->k; ++i, ++r) { + while (find(errors.cbegin(), errors.cend(), r) != errors.cend()) + ++r; + for (int j = 0; j < this->k; j++) { + preallocRecover.errors_matrix[k * i + j] = preallocEncode.encode_matrix[this->k * r + j]; + } + } + + gf_invert_matrix(preallocRecover.errors_matrix.data(), preallocRecover.invert_matrix.data(), this->k); + + for (int e = 0; e < errors.size(); ++e) { + int idx = errors[e]; + + // We lost one of the buffers containing the data + if (idx < this->k) { + for (int j = 0; j < this->k; j++) { + preallocRecover.decode_matrix[this->k * e + j] = preallocRecover.invert_matrix[this->k * idx + j]; + } + } else { + // We lost one of the buffer containing the error correction codes + for (int i = 0; i < this->k; i++) { + uint8_t s = 0; + for (int j = 0; j < this->k; j++) + s ^= gf_mul(preallocRecover.invert_matrix[j * this->k + i], preallocEncode.encode_matrix[this->k * idx + j]); + preallocRecover.decode_matrix[this->k * e + i] = s; + } + } + } + + ec_init_tables(this->k, this->n - this->k, preallocRecover.decode_matrix.data(), preallocRecover.table.data()); + ec_encode_data(len, this->k, (this->n - this->k), preallocRecover.table.data(), erroneous_data, preallocRecover.decoding); + + + bool success = false; + + for (int i = 0; i < errors.size(); ++i) { + int ret = memcmp(dest[errors[i]], preallocRecover.decoding[i], len); + success = (ret == 0); + assert((success == true)); + //cout << i << " " <<"3:555555555555555555555555555555555555" << endl; + } +} + + +string ErasureCode::get_source(uint8_t **source, const int& len) { + //int len; + //GET_ARRAY_LEN(source[0], len); + //cout << "len = " << len << endl; + char tmp[len + 1]; + string rst; + for(int i = 0; i < k; i ++) { + for(size_t j = 0; j < len; j ++) { + tmp[j] = source[i][j]; + } + tmp[len] = '\0'; + rst += tmp; + //cout << strlen(tmp) << endl; + } + //rst = rst.substr(0, this->len - this->filling_len); + return rst; +} + +string ErasureCode::get_line(uint8_t **source, const int& len, const int& line_num) +{ + //assert(line_num >= n); + char tmp[len + 1]; + string rst; + + for(int i = 0; i < len; i ++) { + tmp[i] = source[line_num][i]; + } + tmp[len] = '\0'; + rst = tmp; + + return rst; +} + +/*string* ErasureCode::get_parity() { + char tmp[this->len / k + 1]; + auto *rst = new string[this->n - this->k]; + + cout << "444444444444444444444" << endl; + for(int i = this->k; i < this->n; i ++) { + for(size_t j = 0; j < this->len / this->k; j ++) { + tmp[i] = this->source[i][j]; + cout << "test tmp[i] = " << tmp[i] << endl; + } + tmp[this->len / k] = '\0'; + cout << "55555555555555555" << endl; + cout << "tmp = " << tmp << endl; + cout << "77777777777777777" << endl; + rst[i] = tmp; + cout << "666666666666666666" << endl; + } + + return rst; +}*/ + +ErasureCode::~ErasureCode() { + //delete this->preallocEncode; + //delete this->preallocRecover; + /*for(int i = 0 ; i < this->k; i ++) { + delete [] this->source[i]; + } + delete [] this->source;*/ +} + +uint8_t **ErasureCode::create_erroneous_data(uint8_t **source_data, std::vector errors) { + uint8_t** erroneous_data; + erroneous_data = (uint8_t**)malloc(this->k * sizeof(uint8_t*)); + + for (int i = 0, r = 0; i < this->k; ++i, ++r) + { + while (std::find(errors.cbegin(), errors.cend(), r) != errors.cend()) + ++r; + for (int j = 0; j < this->k; j++) + { + erroneous_data[i] = source_data[r]; + } + } + return erroneous_data; +} + +int ErasureCode::getK() const { + return k; +} + +int ErasureCode::getN() const { + return n; +} \ No newline at end of file diff --git a/src/ErasureCode/ErasureCode.h b/src/ErasureCode/ErasureCode.h new file mode 100644 index 0000000..5f2f9e1 --- /dev/null +++ b/src/ErasureCode/ErasureCode.h @@ -0,0 +1,61 @@ +// +// Created by Alfred on 2022/9/12. +// + +#ifndef CORANA_ERASURECODE_H +#define CORANA_ERASURECODE_H + +#include +#include +#include +using namespace std; + +#define GET_ARRAY_LEN(arr,len) {len = (sizeof(arr) / sizeof(arr[0]));} + +struct prealloc_encode +{ + prealloc_encode(int n, int k); + + prealloc_encode(const prealloc_encode&) = delete; + prealloc_encode(prealloc_encode&&) = default; + + vector encode_matrix; + vector table; +}; + +struct prealloc_recover +{ + prealloc_recover(int n, int k, size_t errors_count, size_t len); + + prealloc_recover(const prealloc_recover&) = delete; + prealloc_recover(prealloc_recover&&) = default; + + vector errors_matrix; + vector invert_matrix; + vector decode_matrix; + vector table; + uint8_t** decoding; +}; + +class ErasureCode { +private: + int k; + int n; + +public: + ErasureCode(const int& n, const int& k); + void encode_data(prealloc_encode& preallocEncode, uint8_t **source, const int & len); + void recover_data(const vector &errors, const int & len, uint8_t **dest, uint8_t **erroneous_data, prealloc_encode& preallocEncode, prealloc_recover &preallocRecover); + uint8_t** create_erroneous_data(uint8_t** source_data, std::vector errors); + string get_source(uint8_t **source, const int& len); + string get_line(uint8_t **source, const int& len, const int& line_num); + //string* get_parity(); + int getK() const; + int getN() const; + uint8_t** string2array(const string& source, int* filling_len); + ~ErasureCode(); + +}; + + +#endif //CORANA_ERASURECODE_H diff --git a/src/FreqList.cpp b/src/FreqList.cpp new file mode 100644 index 0000000..914bfc4 --- /dev/null +++ b/src/FreqList.cpp @@ -0,0 +1,74 @@ +// +// Created by Alfred on 2022/7/22. +// + +#include +#include +#include "FreqList.h" +//#include "toolbox.h" +using namespace chrono; + +FreqList::FreqList(const int& lenlim, const int& freqlim) { + this->lengthLimit = lenlim; + this->freqLimit = freqlim; + this->list_len = 0; + this->distList= list(); +} + +int FreqList::len() const { + return list_len; +} + +list::iterator FreqList::index(const ListNode &ln) { + //cout << "List size = " << this->distList.size() << endl; + for(auto it = this->distList.begin(); it != this->distList.end(); it ++) { + if(it->samepair(ln)) { + return it; + } + } + return this->distList.end(); +} + +bool FreqList::isHot(list::iterator pr) const { + return pr->freq >= this->freqLimit; +} + +list::iterator FreqList::insert(const ListNode &ln) { + time_point s, e; + auto it = this->index(ln); + if(it != this->distList.end()) { + //*it = *it + ln; + it->freq ++; + return it; + } else { + s = system_clock::now(); + this->distList.emplace_front(ln); + this->list_len ++; + if (this->list_len > this->lengthLimit) { + this->distList.erase(--it); + this->list_len --; + } + e = system_clock::now(); + auto duration = duration_cast(e - s); + double elapsed = double(duration.count()) * microseconds::period::num / microseconds::period::den; + //cout << "Insert time = " << elapsed << endl; + } + return this->distList.begin(); +} + +void FreqList::del(list::iterator pr) { + this->distList.erase(pr); + this->list_len --; +} + +void FreqList::clear() { + this->distList.clear(); +} + +FreqList::~FreqList() { + this->clear(); +} + + + + diff --git a/src/FreqList.h b/src/FreqList.h new file mode 100644 index 0000000..77eff8b --- /dev/null +++ b/src/FreqList.h @@ -0,0 +1,30 @@ +// +// Created by Alfred on 2022/7/22. +// + +#ifndef CORANA_FREQLIST_H +#define CORANA_FREQLIST_H + + +#include +#include "ListNode.h" + +class FreqList { +public: + int lengthLimit; + int freqLimit; + int list_len; + list distList; + + FreqList(const int& lenlim, const int& freqlim); + list::iterator insert(const ListNode& ln); + int len() const; + list::iterator index(const ListNode& ln); + bool isHot(list::iterator pr) const; + void del(list::iterator pr); + void clear(); + ~FreqList(); +}; + + +#endif //CORANA_FREQLIST_H diff --git a/src/FreqTable.cpp b/src/FreqTable.cpp new file mode 100644 index 0000000..ce8ba64 --- /dev/null +++ b/src/FreqTable.cpp @@ -0,0 +1,345 @@ +// +// Created by Alfred on 2022/7/22. +// + +#include +#include +#include +#include +#include "FreqTable.h" +#include "toolbox.h" + +FreqTable::FreqTable():n(0) { +} + +FreqTable::FreqTable(const size_t& length):n(length) { + size_t len = n * (n - 1) / 2; + cout << "len = " << len << endl; + this->ftable = vector(len); + this->ftable.resize(len); + this->ftable.shrink_to_fit(); +} + +FreqTable::FreqTable(const FreqTable &_a):n(_a.n) { + size_t len = _a.n * (_a.n - 1) / 2; + this->ftable = vector(len); + this->ftable.resize(len); + this->ftable.shrink_to_fit(); +} + +int FreqTable::estimateFrequency(const int& a, const int& b) { + size_t index = loc(a, b); + return this->ftable[index]; +} + +int FreqTable::find(const int& a, const int& b) { + return estimateFrequency(a, b); +} + +size_t FreqTable::loc(const int& a, const int& b) const { + int first, second; + size_t index; + + if(a == b) return -1; + else if(a > b) { + first = b; + second = a; + } else { + first = a; + second = b; + } + index = (2 * n - first - 1) * first / 2 + (second - first - 1); + return index; +} + +int FreqTable::at(const int& index) { + return this->ftable[index]; +} + +void FreqTable::add(const int &a, const int &b, const int &num) { + size_t index = loc(a, b); + + for(int i = 0; i < num; i ++) { + this->ftable[index] ++; + } +} + +void FreqTable::write2File(const string &filename) { + ofstream fout(filename); + + if(!fout.is_open()) { + cout << "Error opening files!" << endl; + exit(-1); + } + + fout << this->n << endl; + + for(auto &pr: ftable) { + fout << pr << '\t'; + } + fout << endl; + + fout.close(); +} + +void FreqTable::write4louvain(const std::string& filename) { + ofstream fout(filename); + + if(!fout.is_open()) { + cout << "Error opening files!" << endl; + exit(-1); + } + + for(int i = 0; i != n; i ++) { + for(int j = i + 1; j != n; j ++) { + fout << i << "\t" << j << "\t" << find(i, j) << endl; + } + } + + fout.close(); +} + +void FreqTable::load(const string &filename) { + ifstream fin(filename); + if(!fin.is_open()) { + cout << "Error opening files!" << endl; + exit(-1); + } + + int tmp_n; + fin >> tmp_n; + + if(tmp_n != n) { + cout << "Parameter Error" << endl; + exit(-1); + } + + size_t length = ftable.size(); + int tmp_value; + for(size_t i = 0; i < length; i ++) { + fin >> tmp_value; + ftable[i] = tmp_value; + } +} + +bool FreqTable::cutGraph() { + vector> correlation_group; + bool visited[n]; + bool flag = false; + timeit tm; + + + int max_connect = 0; + int max_item = INT32_MAX; + //vector cgroup; + + //BFS + int head; + int connect_count; + for(int i = 0; i < n; i ++) { + visited[i] = false; + } + //sq.push(max_item); + //visited[max_item] = true; + + size_t all_connection = 0; + tm.start(); + + for(int i = 0; i < n; i ++) { + for(int j = i + 1; j < n; j ++) { + all_connection += this->find(i, j); + } + } + tm.end(); + cout << "Sum using " << tm.passedtime() << endl; + //all_connection = 62332412114572; + cout << "Sum connection = " << all_connection << endl; + + cout << "Graph cutting initialized." << endl; + + while(true) { + int count = 0; + + for(int i = 0; i < n; i ++) { + //flag = flag && visited[i]; + if(!visited[i]) flag = true; + } + + if(!flag) break; + + vector cgroup; + queue sq; + max_item = INT32_MAX; + max_connect = 0; + // get the maximum frequent initial node + tm.start(); + for(int i = 0; i < n; i ++) { + if(visited[i]) continue; + int num = 0; + for(int j = 0; j < n; j ++) { + if(i == j || visited[j]) continue; + //if(i > j) { + num += this->find(i, j); + //} else { + // num += this->find(j, i); + //} + } + if (num > max_connect) { + tm.end(); + max_connect = num; + max_item = i; + cout << "Now max_item = " << i << endl; + cout << "Now max_connection = " << max_connect << endl; + cout << "Now time elapse = " << tm.passedtime() << endl; + count ++; + //if(count == 10) break; + } + } + cgroup.push_back(max_item); + + sq.push(max_item); + visited[max_item] = true; + + cout << "initial node is " << max_item << ", frequency = " << max_connect << endl; + head = sq.front(); + sq.pop(); + count = 0; + for(int loci = 0; loci < n; loci ++) { + if(count == 0) tm.start(); + if(!visited[loci] && this->find(head, loci) != 0 ) { + vector tmp = cgroup; + //tmp.push_back(loci); + FreqTable::group_insert(&tmp, loci); + connect_count = this->getConnection(tmp, n, visited); + if(connect_count <= max_connect) { + cout << "loci = " << loci << endl; + max_connect = connect_count; + //int cinc = this->getInnerConnection(tmp); + vector comp = *FreqTable::complement(tmp, n); + int cing = this->getInnerConnection(comp); + //size_t cing = all_connection - connect_count - this->getInnerConnection(tmp); + //if(cinc < max_connect || cing < max_connect || comp.size() <= 2) { + if( cing < max_connect || comp.size() <= 2) { + //continue; + for(auto &pr : comp){ + FreqTable::group_insert(&cgroup, pr); + visited[pr] = true; + } + } + FreqTable::group_insert(&cgroup, loci); + + visited[loci] = true; + sq.push(loci); + auto *single_node = this->get_single_node(comp, visited); + if(!single_node->empty()) { + for(int & value : *single_node) { + FreqTable::group_insert(&cgroup, value); + visited[value] = true; + } + //all_connection = all_connection - this->getInnerConnection(cgroup) - connect_count; + } //else { + //all_connection = cing; + //} + } + } + count ++; + if(count == 0 || count % 10000 == 0) { + tm.end(); + cout << "Time elapse : " << tm.passedtime() << endl; + } + } + cout << "Group is " << endl; + for(auto &pr : cgroup) { + cout << pr << "\t"; + } + cout << endl; + correlation_group.push_back(cgroup); + flag = false; + } + cout << "Sum groups = " << correlation_group.size() << endl; + + ofstream fout("/home/flnan/groups"); + + if(!fout.is_open()) { + cout << "Error opening files!" << endl; + exit(-1); + } + + fout << correlation_group.size() << endl; + + for(auto &pri : correlation_group) { + for(auto &prj: pri) { + fout << prj << "\t"; + } + fout << endl; + } + fout.close(); + + return false; +} + +int FreqTable::getConnection(const vector &group, const size_t& n, bool *visited) { + int num = 0; + for(int value: group) { + for(int i = 0; i < n; i ++) { + if(visited[i]) continue; + bool it = ::binary_search(group.begin(), group.end(), i); + if(it) continue; + num += this->find(value, i); + } + } + return num; +} + +int FreqTable::getInnerConnection(const vector &group) { + int num = 0; + for(int pri : group) { + for(int prj : group) { + if(prj == pri) continue; + num += this->estimateFrequency(pri, prj); + } + } + return num; +} + +// sorted: selective insertion +void FreqTable::group_insert(std::vector *group, const size_t& value) { + for(auto pr = group->begin(); pr != group->end(); pr ++) { + if(*pr >= value) { + group->emplace(pr, value); + return; + } + } + group->emplace(group->end(), value); +} + +std::vector* FreqTable::complement(std::vector group, const size_t& n) { + auto* rst = new vector(); + auto pr = group.begin(); + for(int i = 0; i < n; i ++) { + if(pr != group.end() && i != *pr) { + rst->push_back(i); + }else if(pr != group.end()) { + pr ++; + } else { + rst->push_back(i); + } + } + return rst; +} + +vector* FreqTable::get_single_node(std::vector group, const bool *visited) { + auto* rst = new vector(); + for(auto pr = group.begin(); pr != group.end(); pr ++) { + int num = 0; + if(visited[*pr]) continue; + for(auto prj = group.begin(); prj != group.end(); prj ++) { + if(pr == prj) continue; + if(visited[*prj]) continue; + num += this->estimateFrequency(*pr, *prj); + } + if(num == 0) rst->push_back(*pr); + } + return rst; +} + diff --git a/src/FreqTable.h b/src/FreqTable.h new file mode 100644 index 0000000..a65f1f3 --- /dev/null +++ b/src/FreqTable.h @@ -0,0 +1,41 @@ +// +// Created by Alfred on 2022/7/22. +// + +#ifndef CORANA_FREQTABLE_H +#define CORANA_FREQTABLE_H + +#include +#include +using namespace std; + +class FreqTable { +public: + vector ftable; + size_t n; + + explicit FreqTable(const size_t& length); + FreqTable(const FreqTable& _a); + + FreqTable(); + + size_t loc(const int& a, const int &b) const; + void add(const int& a, const int& b, const int& num = 1); + int estimateFrequency(const int& a, const int& b); + int at(const int& index); + int find(const int& a, const int& b); + bool cutGraph(); + void write2File(const std::string& filename); + void write4louvain(const std::string& filename); + void load(const std::string& filename); + +private: + static void group_insert(std::vector* group, const size_t& value); + int getConnection(const std::vector &group, const size_t& n, bool *visited); + int getInnerConnection(const std::vector &group); + static std::vector* complement(std::vector group, const size_t& n); + std::vector* get_single_node(std::vector group, const bool visited[]); +}; + + +#endif //CORANA_FREQTABLE_H diff --git a/src/ListNode.cpp b/src/ListNode.cpp new file mode 100644 index 0000000..a3e3584 --- /dev/null +++ b/src/ListNode.cpp @@ -0,0 +1,60 @@ +// +// Created by Alfred on 2022/7/22. +// + +#include "ListNode.h" +#include + +struct WRONG_VALUE_ERR : public exception +{ + const char * what () const throw () + { + return "not the same value"; + } +}; + +ListNode::ListNode(const ListNode &ln) { + this->first = ln.first; + this->second = ln.second; + this->freq = ln.freq; +} + +ListNode::ListNode(const int& a, const int& b) { + if (a < b) { + this->first = a; + this->second = b; + } else { + this->first = b; + this->second = a; + } + this->freq = 1; +} + +bool ListNode::samepair(const ListNode &ln) const { + return (this->first == ln.first) && (this->second == ln.second); +} + +ListNode ListNode::operator+(const ListNode &ln) const { + if(!this->samepair(ln)) { + //return null; + throw WRONG_VALUE_ERR(); + } + + ListNode l(this->first, this->second); + + l.freq ++; + + return l; +} + +bool ListNode::operator==(const ListNode &ln) const { + return this->samepair(ln); +} + +string ListNode::toString() const { + return to_string(this->first) + "," + ::to_string(this->second); +} + +ListNode::~ListNode() = default; + + diff --git a/src/ListNode.h b/src/ListNode.h new file mode 100644 index 0000000..548f7f6 --- /dev/null +++ b/src/ListNode.h @@ -0,0 +1,27 @@ +// +// Created by Alfred on 2022/7/22. +// + +#ifndef UNTITLED_LISTNODE_H +#define UNTITLED_LISTNODE_H + +#include +using namespace std; + +const int MAX_INT = 999999; + +class ListNode { +public: + int first, second; + int freq; + ListNode(const ListNode& ln); + ListNode(const int& a, const int& b); + bool samepair(const ListNode& ln) const; + ListNode operator+(const ListNode& ln) const; + bool operator==(const ListNode& ln) const; + string toString() const; + ~ListNode(); +}; + + +#endif //UNTITLED_LISTNODE_H diff --git a/src/MemcachedClient.cpp b/src/MemcachedClient.cpp new file mode 100644 index 0000000..20c3346 --- /dev/null +++ b/src/MemcachedClient.cpp @@ -0,0 +1,323 @@ +// +// Created by Alfred on 2022/9/13. +// + +#include "MemcachedClient.h" +#include +#include +#include +#include "toolbox.h" +#include "config.h" + +using namespace std; + +MemcachedClient::MemcachedClient(vector>& server_info, bool replica) { + + memcached_return rc; + memcached_server_st *server = nullptr; + + this->memc = memcached_create(nullptr); + if(memc == nullptr) { + printf("Error create memcached link.\n"); + exit(-1); + } + + for(auto & pr : server_info) { + //printf("Server = %s:%d\n", pr.first.c_str(), pr.second); + server = memcached_server_list_append(server, pr.first.c_str(), pr.second, &rc); + this->server_key.push_back(pr.first + to_string(pr.second)); + } + + rc = memcached_server_push(memc, server); + if (MEMCACHED_SUCCESS != rc) + cout <<"memcached_server_push failed! rc: " << rc << endl; + + int server_count = memcached_server_count(memc); + + for(int sn = 0; sn < server_count; sn ++) { + } + + memcached_server_list_free(server); + + rc = memcached_behavior_set(memc, MEMCACHED_BEHAVIOR_DISTRIBUTION, MEMCACHED_DISTRIBUTION_CONSISTENT); + if (MEMCACHED_SUCCESS != rc) { + printf("Failing to set!\n"); + exit(-1); + } + + if(replica) { + rc = memcached_behavior_set(memc, MEMCACHED_BEHAVIOR_NUMBER_OF_REPLICAS, 3); + if (MEMCACHED_SUCCESS != rc) { + printf("Setting replications failed!\n"); + exit(-1); + } + + rc = memcached_behavior_set(memc, MEMCACHED_BEHAVIOR_RANDOMIZE_REPLICA_READ, 1); + if (MEMCACHED_SUCCESS != rc) { + printf("Setting replications failed!\n"); + exit(-1); + } + } +} + +bool MemcachedClient::insert(const char *key, const char *value, time_t expiration) { + if (nullptr == key || nullptr == value) + exit(1); + //cout << strlen(key) << " " << strlen(value) << endl; + if(strlen(key) == 0 || strlen(value) == 0) + exit(2); + + uint32_t flags = 0; + memcached_return rc; + + rc = memcached_set(this->memc, key, strlen(key), value, strlen(value), expiration, flags); + // insert ok + if (MEMCACHED_SUCCESS == rc) + return true; + else + return false; +} + +bool MemcachedClient::gset(const char *gkey, const char *key, const char *value, time_t expiration) { + if (nullptr == key || nullptr == value || nullptr == gkey) + exit(1); + //cout << strlen(key) << " " << strlen(value) << endl; + if(strlen(key) == 0 || strlen(value) == 0 || strlen(gkey) == 0) + exit(1); + + uint32_t flags = 0; + memcached_return rc; + + rc = memcached_set_by_key(this->memc, gkey, strlen(gkey), key, strlen(key), value, strlen(value), expiration, flags); + + // insert ok + if (MEMCACHED_SUCCESS == rc) + return true; + else + return false; +} + + +bool MemcachedClient::get(const char *key, std::string& value) { + if (nullptr == key) + exit(1); + if(strlen(key) == 0) + exit(1); + + + uint32_t flags = 0; + memcached_return rc, rc2; + size_t value_length; + + char* v = memcached_get(memc, key, strlen(key), &value_length, &flags, &rc); + + // get ok + if(rc == MEMCACHED_SUCCESS) { + value = v; + free(v); + if(value.length() == 0) { + insert(key, "1111"); + v = memcached_get(memc, key, strlen(key), &value_length, &flags, &rc2); + if(rc2 == MEMCACHED_SUCCESS) { + value = v; + free(v); + } + } + return true; + } + return false; +} + +bool MemcachedClient::gget(const char *gkey, const char *key, std::string& value) { + if (nullptr == key || nullptr == gkey) + exit(1); + if(strlen(key) == 0 || strlen(gkey) == 0) + exit(1); + + + uint32_t flags = 0; + memcached_return rc; + size_t value_length; + + char* v = memcached_get_by_key(memc, gkey, strlen(gkey), key, strlen(key), &value_length, &flags, &rc); + + // get ok + if(rc == MEMCACHED_SUCCESS) { + value = v; + free(v); + return true; + } + value = ""; + return false; +} + +std::vector> MemcachedClient::get_stats() { + memcached_stat_st *stats = nullptr; + memcached_return_t rc; + char *args = nullptr; + + stats = memcached_stat(memc, args, &rc); + + int server_count = memcached_server_count(memc); + + for(int sn = 0; sn < server_count; sn ++) { + + const char *ip = memcached_server_name(memcached_server_instance_by_position(memc, sn)); + int port = memcached_server_port(memcached_server_instance_by_position(memc, sn)); + rc = memcached_stat_servername(stats, args, ip, port); + if (rc != MEMCACHED_SUCCESS) { + printf("Error\n"); + exit(-1); + } + + char **stats_key; + stats_key = memcached_stat_get_keys(memc, stats, &rc); + if (rc != MEMCACHED_SUCCESS) { + printf("Error\n"); + exit(-1); + } + + //cout << "stats = " << (stats == nullptr) << endl; + + map tmp; + char *ckey = nullptr; + ckey = stats_key[0]; + for (int i = 1; ckey != nullptr; i++) { + //printf("key = %s\n", ckey); + const char *s = ckey; + memcached_return mr; + char *cvalue = memcached_stat_get_value(memc, stats, s, &mr); + if (mr == MEMCACHED_SUCCESS) { + //cout << ckey << " = " << cvalue << endl; + //cout << string(ckey) << " = " << string(cvalue) << endl; + tmp[ckey] = cvalue; + } + ckey = stats_key[i]; + } + + this->server_status.push_back(tmp); + + //free(stats_key); + } + + /*for(int i = 0; i < server_status.size(); i ++) { + cout << "Server Node " << i << " : " << endl; + for(auto &pr: server_status[i]) { + cout << "\t" << pr.first << " : " << pr.second << endl; + } + }*/ + + memcached_stat_free(memc, stats); + //return false; + + return server_status; +} + +static memcached_return_t stat_printer(const memcached_instance_st *server, + const char *key, size_t key_length, + const char *value, size_t value_length, + void *context) +{ + (void)server; + (void)context; + (void)key; + (void)key_length; + (void)value; + (void)value_length; + + return MEMCACHED_SUCCESS; +} + +bool MemcachedClient::flush() { + memcached_return rc; + + rc = memcached_flush(memc, 0); + + if(rc != MEMCACHED_SUCCESS) + return false; + + rc = memcached_stat_execute(memc, "reset", stat_printer, NULL); + if(rc != MEMCACHED_SUCCESS) + return false; + + return true; +} + +MemcachedClient::~MemcachedClient() { + memcached_free(memc); +} + +size_t MemcachedClient::mgget(const char *gkey, char **key, size_t *key_len, const size_t& key_num) { + memcached_return rc; + memcached_result_st results_obj; + memcached_result_st *results; + + results= memcached_result_create(memc, &results_obj); + + //size_t key_len[key_num]; + size_t gkey_len = strlen(gkey); + size_t value_len = 0; + + //for(int i = 0 ; i < key_num; i ++) { + // key_len[i] = strlen(key[i]); + //} + + rc = memcached_mget_by_key(memc, gkey, gkey_len, key, key_len, key_num); + if(rc != MEMCACHED_SUCCESS) + return 0; + + while ((results= memcached_fetch_result(memc, &results_obj, &rc))) + { + //if(rc != MEMCACHED_SUCCESS) { + //value_len = 0; + // break; + //} + + value_len += memcached_result_length(results); + } + + //cout << "value len = " << value_len << endl; + + memcached_result_free(&results_obj); + return value_len; +} + +vector MemcachedClient::get_server_key(const int& key_len) { + const memcached_instance_st *mc; + memcached_return_t rc; + + size_t count = memcached_server_count(memc); + cout << "# of servers = " << count << endl; + + vector keys; + set names; + + while(true) { + string key = makeRandStr(key_len, true); + mc = memcached_server_by_key(memc, key.c_str(), key.size(), &rc); + string name = memcached_server_name(mc); + in_port_t port = memcached_server_port(mc); + // cout << name << ":" << port << endl; + string server = name + ":" + to_string(port); + if(names.count(server) == 0) { + keys.push_back(key); + names.insert(server); + } + // cout << "key set size = " << keys.size() << endl; + if(keys.size() == count) break; + } + + cout << keys.size() << " keys in total" << endl; + for(auto &pr: names) { + cout << pr << endl; + } + + /*for(auto & key : keys) { + cout << key << endl; + mc = memcached_server_by_key(memc, key.c_str(), key.size(), &rc); + cout << memcached_server_name(mc) << endl; + }*/ + + return keys; +} + diff --git a/src/MemcachedClient.h b/src/MemcachedClient.h new file mode 100644 index 0000000..b868d8c --- /dev/null +++ b/src/MemcachedClient.h @@ -0,0 +1,35 @@ +// +// Created by Alfred on 2022/9/13. +// + +#ifndef CORANA_MEMCACHEDCLIENT_H +#define CORANA_MEMCACHEDCLIENT_H + +#include +#include +#include +#include +#include + +class MemcachedClient { +private: + std::vector server_key; + std::vector> server_status; + +public: + memcached_st *memc; + MemcachedClient(std::vector>& server_info, bool replica = false); + bool insert(const char* key, const char* value, time_t expiration = 0); + bool gset(const char * gkey, const char* key, const char* value, time_t expiration = 0); + bool get(const char* key, std::string& value); + bool gget(const char * gkey, const char* key, std::string& value); + size_t mgget(const char * gkey, char* key[], size_t *key_len, const size_t& key_num); + std::vector get_server_key(const int& key_len); + std::vector> get_stats(); + bool flush(); + ~MemcachedClient(); + +}; + + +#endif //CORANA_MEMCACHEDCLIENT_H diff --git a/src/OurScheme.cpp b/src/OurScheme.cpp new file mode 100644 index 0000000..37c7128 --- /dev/null +++ b/src/OurScheme.cpp @@ -0,0 +1,882 @@ +// +// Created by Alfred on 2022/9/14. +// + +#include "OurScheme.h" +#include "FreqList.h" +#include "CMSketch.h" +#include +#include +#include +#include +#include +#include +#include "fmt/core.h" +#include +#include "MemcachedClient.h" + +#include "toolbox.h" + +using namespace std; + +vector gkeys; +pthread_mutex_t oprintmutex; +ConfigParameter cpOur; +vector> fgroup; +vector okeys; +vector::iterator freq_limit_pos; + +vector> distribute_group(int bin_num) { + vector> group_distribution; + Py_Initialize(); + if (!Py_IsInitialized()) + { + printf("Initialization Failed"); + exit(-1); + } + + PyRun_SimpleString("import sys"); + string path = "sys.path.append('"+ cpOur.PATH_PREFIX + "/')"; + string path = "sys.path.append('../')"; + + cout << "Python file path = " << path << endl; + PyRun_SimpleString(path.c_str()); + + + PyObject * pModule = NULL; + PyObject * pFunc = NULL; + pModule = PyImport_ImportModule("stats"); + if (pModule==NULL) + { + cout << "Not found" << endl; + } + pFunc = PyObject_GetAttrString(pModule, "group_distribution2"); + PyObject* pDict = PyDict_New(); + for(int i = freq_limit_pos - okeys.begin(); i < okeys.size(); i ++) { + PyDict_SetItemString(pDict, to_string(i).c_str(), Py_BuildValue("i", okeys[i].freq)); + } + PyObject* pArgs = PyTuple_New(2); + PyTuple_SetItem(pArgs, 0, pDict); + PyTuple_SetItem(pArgs, 1, Py_BuildValue("i", bin_num)); + + PyObject* pRet = PyObject_CallObject(pFunc, pArgs); + + if (PyList_Check(pRet)) { + // okay, it's a list + for (Py_ssize_t i = 0; i < PyList_Size(pRet); ++i) { + vector tmp; + PyObject* next = PyList_GetItem(pRet, i); + if(PyList_Check(next)) { + for (Py_ssize_t j = 0; j < PyList_Size(next); ++j) { + PyObject* snext = PyList_GetItem(next, j); + if(!PyUnicode_Check(snext)) { + cout << "Wrong return" << endl; + exit(-1); + } + + tmp.push_back(atoi((char*)PyUnicode_DATA(snext))); + } + group_distribution.push_back(tmp); + } + + } + } + + //cout << "Finish grouping" << endl; + return group_distribution; +} + +void OurScheme::getFreqKeys(const string &stats_file) { + ifstream fin(stats_file); + + if(!fin.is_open()) { + cout << "Error opening stats file!" << endl; + exit(-1); + } + + while(fin.peek() != EOF) { + key_param tmp; + fin >> tmp.key >> tmp.size >> tmp.freq; + okeys.push_back(tmp); + } + + okeys.pop_back(); + + sort(okeys.begin(), okeys.end(), key_freq_comp); + + freq_limit_pos = okeys.end(); + for(auto pr = okeys.begin(); pr != okeys.end(); pr ++) { + if (pr->freq == cpOur.HOTEST_FREQ_LIMIT || ((pr + 1) != okeys.end() && pr->freq > cpOur.HOTEST_FREQ_LIMIT && (pr + 1)->freq < cpOur.HOTEST_FREQ_LIMIT)) + freq_limit_pos = pr; + total_freq += pr->freq; + } + + + sort(okeys.begin(), freq_limit_pos, key_string_comp); + sort(freq_limit_pos, okeys.end(), key_string_comp); + + cout << "NUmber of hot keys = " << freq_limit_pos - okeys.begin() << endl; + + ofstream fout(fmt::format("freqkeys{:02d}_{}", cpOur.TRACE_NO, cpOur.WINDOW_SIZE), ios::out); + if(!fout.is_open()) { + cout << "Error opening file to write freqkeys." << endl; + exit(-1); + } + + for(auto pr = okeys.begin(); pr != freq_limit_pos + 1; pr++) { + fout << pr->key << endl; + } + fout.close(); + + cout << "finished reading hotkyes" << endl; +} + +vector *OurScheme::readStream(const string& fname, int lnum, long long int &cpos) const { + ifstream in(fname); + in.seekg(cpos, ios::beg); + auto* vec = new vector(); + string lcnt, rst; + vector tmp; + int i; + bool flag; + + if (!in.is_open()) { + cout << "Error opening file: " << fname << endl; + exit(0); + } + + i = 0; + while(getline(in, lcnt) && i < lnum) { + switch (wtype) { + case meta: + tmp = split(lcnt, ','); + if(tmp[0] == "key") { + flag = true; //202206 + continue; + } else if (tmp[0] == "op_time") { + flag = false; //202401 + continue; + } + + if(flag) { + rst = tmp[0]; + } else { + rst = tmp[1]; + } + break; + + case twitter: + tmp = split(lcnt, ','); + rst = tmp[1]; + break; + case ibm: + tmp = split(lcnt, ' '); + rst = tmp[2]; + break; + } + vec->push_back(rst); + i ++; + } + + cpos = in.tellg(); + in.close(); + return vec; +} + +int OurScheme::isFreqKeys(const string &key) { + /*key_param tmp; + tmp.key = key; + + auto pr = find(keys.begin(), freq_limit_pos + 1, tmp); + if(pr != freq_limit_pos + 1) { + return pr - keys.begin(); + } + return -1;*/ + return FreqSearch(okeys, 0, freq_limit_pos - okeys.begin() + 1, key); +} + +vector* OurScheme::group_stat(const string& filename, const int& group_num) { + ifstream fin(filename); + if(!fin.is_open()) { + cout << "Error opening files!" << endl; + exit(-1); + } + + auto groups = new vector [group_num+100]; + + cout << "group read: group num = " << group_num << endl; + + + size_t num, size, freq; + int node; + int it = 0; + while(fin.peek() != EOF) { + + fin >> num >> size >> freq; + if(fin.eof() or fin.fail() or fin.bad()) break; + //cout << num << ", " << size << ", " << freq << endl; + for(int i = 0; i < num; i ++) { + fin >> node; + groups[it].push_back(node); + } + it ++; + } + + gstat = vector(group_num); + gsize = vector(group_num); + + for(int i = 0; i < group_num; i ++) { + int sum_freq = 0; + int sum_size = 0; + for(int & pr : groups[i]) { + sum_freq += okeys[pr].freq; + sum_size += okeys[pr].size; + } + //cout << sum_freq << endl; + //cout << "origin Group " << i << " frequent = " << sum_freq << endl; + gstat[i] = sum_freq; + gsize[i] = sum_size; + //cout << "origin Group " << i << " frequent = " << gstat[i] << endl; + } + + gstat.resize(group_num); + gsize.resize(group_num); + + return groups; +} + +OurScheme::OurScheme(enum workload_type wt, const int& trace_no, const int& hotlim, const int& snum) +{ + string stat_file; + + this->wtype = wt; + + cpOur = ConfigParameter(wt, trace_no, hotlim, snum); + total_freq = 0; + cout << "Hot objects frequent limit = " << cpOur.HOTEST_FREQ_LIMIT << endl; + + stat_file = cpOur.PATH_PREFIX + "/" + cpOur.STAT_FILE; + + cout << "stat file = " << stat_file << endl; + + getFreqKeys(stat_file); + + cout << "CMSketch = " << freq_limit_pos - okeys.begin() << endl; + cout << "FreqSearch = " << FreqSearch(okeys, 0, freq_limit_pos-okeys.begin()+1, (--freq_limit_pos)->key) << endl; + //this->ftable = CMSketch(freq_limit_pos - keys.begin(), cpOur.CMSKETCH_DEVIATION); + this->ftable = FreqTable(freq_limit_pos - okeys.begin()); + +} + +void OurScheme::CorrelationAnalysis(const int& day) { + FreqList flist(cpOur.FREQ_LIST_SIZE, cpOur.FREQ_LIMIT); + //CMSketch ftable(); + + long long current_pos = 0; + long long tail_pos; + int flag = 0; + timeit t; + + string workload_file = cpOur.PATH_PREFIX+ "/" + cpOur.STREAM_FILE_PREFIX + to_string(day + 1); + + cout << "workload_file = " << workload_file << endl; + // get the tail position of the file + ifstream fin(workload_file); + if (!fin.is_open()) { + cout << "Error opening file: " << workload_file << endl; + exit(0); + } + fin.seekg(0, ios::end); + tail_pos = fin.tellg(); + cout << "Tail position: " << tail_pos << endl; + fin.close(); + + t.start(); + vector *rstream = readStream(workload_file, cpOur.ONCE_READ_LIMIT, current_pos); + t.end(); + cout << "Read the stream pos Time used: "<< t.passedtime() << endl; + + /* + * Begin the Correlation Analysis + */ + cout << "Correlation Analysis start" << endl; + auto start_pos = rstream->begin(); + cout << *start_pos << endl; + int start_index = 0; + + while(!rstream->empty()) { + if((start_pos + 1) != rstream->end() && (rstream->size() - (start_pos-rstream->begin())) < cpOur.WINDOW_SIZE + 100 && current_pos != -1) { + if(start_index == rstream->size()) { + rstream->erase(rstream->begin(), rstream->end()); + } else { + rstream->erase(rstream->begin(), start_pos + 1); + } + vector *stmp = readStream(workload_file, cpOur.ONCE_READ_LIMIT, current_pos); + rstream->insert(rstream->end(), stmp->begin(), stmp->end()); + stmp->clear(); + delete stmp; + stmp = nullptr; + start_pos = rstream->begin(); + start_index = 0; + } + + int loc1 = isFreqKeys(*start_pos); + + //if() break; + + while(loc1 == -1 && (start_pos + 1) <= rstream->end()) { + start_pos ++; + start_index ++; + loc1 = isFreqKeys(*start_pos); + } + + if(loc1 == -1 && current_pos == -1){ + break; + } else if(loc1 == -1) { + continue; + } + + if(start_pos >= rstream->end()) + break; + + //cout << rstream->size() << endl; + //cout << start_index << endl; + + int size = rstream->end() - start_pos; + int size1 = rstream->size() - start_index; + + int width = size > cpOur.WINDOW_SIZE? cpOur.WINDOW_SIZE: size; + + for(auto pr = start_pos + 1; pr != start_pos + width; pr ++) { + int loc2 = isFreqKeys(*pr); + if(loc2 == -1 || loc2 == loc1) continue; + + int rst = ftable.find(loc1, loc2); + + if(rst != 0) { + ftable.add(loc1, loc2); + ftable.add(loc1, loc2); + } else { + auto index = flist.insert(ListNode(loc1, loc2)); + if(flist.isHot(index)) { + ftable.add(index->first, index->second); + ftable.add(index->first, index->second); + flist.del(index); + } + } + + } + + start_pos++; + start_index++; + flag ++; + if(flag % 100000000 == 0) { + t.end(); + cout << "Time used: "<< t.passedtime() << endl; + // if(flag / 1000000 > 66) +// break; + } + } + //delete rstream; + cout << "Current point position = " << current_pos << endl; + flist.clear(); + ftable.write4louvain(fmt::format("{}/louvain_node_{}_{}", cpOur.PATH_PREFIX, cpOur.TRACE_NO, cpOur.variation)); +} + +void OurScheme::test() { + size_t num_of_keys = okeys.size(); + std::default_random_engine e; + std::uniform_int_distribution u(0, num_of_keys); + + //for (int i = 0; i < 100; ++i) { + int loc = freq_limit_pos - okeys.begin(); //u(e); + cout << "the origin " << loc << " th key = " << okeys[loc].key << endl; + int nloc = FreqSearch(okeys, 0, freq_limit_pos - okeys.begin() + 1, okeys[loc].key); + //if(nloc == -1) { + // nloc = FreqSearch(okeys, freq_limit_pos - okeys.begin(), okeys.size(), okeys[loc].key); + //} + if(nloc != -1) { + cout << "the found " << nloc << " th key = " << okeys[nloc].key << endl; + } else { + cout << "Not found" << endl; + } + //} + +} + +void OurScheme::distribute(const string &group_file, const int& group_num, const int& node_num) { + vector > distributed_group; + vector distributed_freq; + vector distributed_size; + + + auto group_divided = group_stat(group_file, group_num); + cout << "group num = " << group_num << endl; + cout << "group size = " << group_divided->size() << endl; + + int dinic_length = 2 + gstat.size() + node_num + 1; + int common_items = gstat.size(); + + vector>> graph; + + graph.resize(dinic_length); + for(int i = 0; i < dinic_length; i ++) { + //graph[i].resize(dinic_length); + for(int j = 0; j < dinic_length; j ++) { + graph[i].push_back(pair(0,0)); + } + } + for(int i = 0; i < common_items + 2; i ++) { + graph[1][i + 2].first = gstat[i]; + graph[i + 2][1].first = -gstat[i]; + } + size_t gfreq = 0; + for(int i = 0; i < common_items; i ++) { + gfreq += gstat[i]; + } + + for(int i = 0; i < node_num; i ++) { + for(int j = 0; j < gstat.size(); j ++) { + graph[2 + gstat.size() + i][2 + j].first = 0 - int(gfreq * 1.0 / node_num); + graph[2 + j][2 + gstat.size() + i].first = int(gfreq * 1.0 / node_num); + } + graph[2 + gstat.size() + i][dinic_length - 1].first = int(gfreq * 1.0 / node_num); + graph[dinic_length - 1][2 + gstat.size() + i].first = 0 - int(gfreq * 1.0 / node_num); + } + + /*for(int i = 1; i < dinic_length; i ++) { + //graph[i].resize(dinic_length); + for(int j = 1; j < dinic_length; j ++) { + cout << graph[i][j].first << "," << graph[i][j].second << " "; + } + cout << endl; + }*/ + + int re = dinic(graph, dinic_length - 1); + + /*cout << "re = " << re << endl; + for(int i = 1; i < dinic_length; i ++) { + //graph[i].resize(dinic_length); + for(int j = 1; j < dinic_length; j ++) { + cout << graph[i][j].first << "," << graph[i][j].second << " "; + } + cout << endl; + }*/ + + vector loc; + loc.resize(gstat.size()); + for(int i = 2; i < 2 + gstat.size(); i ++) { + int max = 0, max_index = 0; + for(int j = 0; j < node_num; j ++) { + if(graph[i][2 + gstat.size() + j].second > max) { + max = graph[i][2 + gstat.size() + j].second; + max_index = j; + } + } + loc[i - 2] = max_index; + } + + cout << endl; + for(int i = 0; i < gstat.size(); i ++) + cout << gstat[i] << "," << loc[i] << " "; + cout << endl; + + distributed_group.resize(node_num); + distributed_freq.resize(node_num); + distributed_size.resize(node_num); + + for(int i = 0; i < node_num; i ++) + distributed_freq[i] = 0; + + for(int i = 0; i < gstat.size(); i ++) { + distributed_group[loc[i]].insert(group_divided[i].begin(), group_divided[i].end()); + distributed_freq[loc[i]] += gstat[i]; + distributed_freq[loc[i]] += gsize[i]; + } + + vector> cold_items = distribute_group(node_num); + + for(int i = 0; i < node_num; i ++) { + distributed_group[i].insert(cold_items[i].begin(), cold_items[i].end()); + } + + MemcachedClient mc(cpOur.SERVER_INFO); + + gkeys = mc.get_server_key(100); + cout << "Got MemcachedClient keys" << endl; + + for(int i = 0; i < node_num; i ++) { + for(auto &pr: distributed_group[i]) { + if(okeys[pr].size == 0) okeys[pr].size = 1; + string value(okeys[pr].size, '1'); + mc.gset(gkeys[i].c_str(), okeys[pr].key.c_str(), value.c_str()); + } + } + + //cout << "1111111111111111111111111111111" << endl; + + fgroup.resize(node_num); + for(int i = 0; i < node_num; i ++) { + for(auto &pr: distributed_group[i]) { + fgroup[i].emplace(okeys[pr].key); + } + } + + /*for(auto pr = freq_limit_pos; pr != okeys.end(); pr ++) { + size_t size = 0; + if(pr->size == 0) size = 1; + else size = pr->size; + string value(size, '1'); + mc.insert(pr->key.c_str(), value.c_str()); + }*/ + + + + cout << "Distributed Finished" << endl; +} + +void *twitter_query_exec(void *param) { + timeit tt; + MemcachedClient mc(cpOur.SERVER_INFO); + + string prefix = cpOur.PATH_PREFIX; + + pthread_mutex_lock(&oprintmutex); + //cout << ((thread_param *)param)->tid <<": twitter_query_exec" << endl; + pthread_mutex_unlock(&oprintmutex); + + //pthread_mutex_lock(&oprintmutex); + char filename[255]; + //sprintf(filename, "d0t%dp%04d", cpOur.THREAD_NUM, ((thread_param *)param)->tid); + snprintf(filename, sizeof(filename), "w%02dd%dt%dp%04d", cpOur.TRACE_NO, cpOur.DAY, cpOur.THREAD_NUM, ((thread_param *)param)->tid); + //sprintf(filename, "d0t128p%04d", ((thread_param *)param)->tid); + string fname = prefix + "/" + filename; + //pthread_mutex_unlock(&oprintmutex); + + //pthread_mutex_lock (&oprintmutex); + // cout << ((thread_param *)param)->tid <<",filename = " << fname << endl; + //pthread_mutex_unlock (&oprintmutex); + + //pthread_mutex_lock (&oprintmutex); + ifstream fin(fname); + + + if(!fin) { + cout << ((thread_param *)param)->tid <<": Error open trace file" << endl; + exit(-1); + } + //pthread_mutex_unlock (&oprintmutex); + + pthread_mutex_lock (&oprintmutex); + //fprintf(stderr, "start benching using thread%u\n", ((thread_param *)param)->tid); + pthread_mutex_unlock (&oprintmutex); + + //cout << "Location = " << FreqSearch(okeys, 0, freq_limit_pos - okeys.begin() + 1, "vi3.j3_S1b.Iz.9S.sC"); + //cout << "keys size = " << okeys.size() << endl; + + + vector qkeys; + while(fin.peek() != EOF) { + + if(fin.eof() or fin.fail() or fin.bad()) break; + + char line[1000]; + long time_val; + char query_key[200]; + int linenum; + + pthread_mutex_lock (&oprintmutex); + linenum = 0; + while(fin.peek() != EOF and linenum != cpOur.ONCE_READ_LIMIT) { + fin.getline(line, 1000); + time_val = strtol(strtok(line, ","), NULL, 10); // time + qkeys.emplace_back(string(strtok(NULL, ","))); //key + linenum ++; + } + pthread_mutex_unlock (&oprintmutex); + + + + for(int it = 0; it < linenum; it ++) { + //vector skeys; + size_t key_num = 0; + char **skey; + bool flag; + size_t rsize; + int lg = -1; + int sops = 1; + size_t sbegin = -1; + char sgkey[255]; + + if (qkeys[it][qkeys[it].size() - 1] == '\n') qkeys[it] = qkeys[it].substr(0, qkeys[it].size() - 1); + if (qkeys[it][qkeys[it].size() - 1] == '\r') qkeys[it] = qkeys[it].substr(0, qkeys[it].size() - 1); + + //tt.start(); + /*int loc = FreqSearch(okeys, 0, freq_limit_pos - okeys.begin() + 1, qkeys[it]); + if(loc == -1) { + loc = FreqSearch(okeys, freq_limit_pos - okeys.begin(), okeys.size(), qkeys[it]); + }*/ + for (int i = 0; i < fgroup.size(); i++) { + if(fgroup[i].count(qkeys[it]) == 1) { + lg = i; + } + } + if(lg != -1) { + strcpy(sgkey, gkeys[lg].c_str()); + //skeys.emplace_back(qkeys[it]); + sbegin = it; + key_num ++; + for (++it;it < linenum; it++) { + /*int loc2 = FreqSearch(okeys, 0, freq_limit_pos - okeys.begin() + 1, qkeys[it]); + if(loc2 == -1) { + loc2 = FreqSearch(okeys, freq_limit_pos - okeys.begin(), okeys.size(), qkeys[it]); + }*/ + if(fgroup[lg].count(qkeys[it]) == 1) { + // skeys.emplace_back(qkeys[it]); + key_num ++; + //if(key_num >= 10) break; + } else { + it --; + break; + } + } + if(key_num > 1) { + size_t key_len[key_num]; + skey = new char* [key_num]; + //key_len = new size_t [key_num]; + for(int i = 0; i < key_num; i ++) { + skey[i] = new char[255]; + strcpy(skey[i], qkeys[sbegin + i].c_str()); + key_len[i] = strlen(skey[i]); //skeys[i].size(); + } + rsize = 0; + //while(rsize == 0) { + //tt.start(); + //for(int ii = 0; ii < 3; ii ++) { + //rsize = mc.mgget(sgkey, skey, key_len, key_num); + memcached_return rc; + memcached_result_st results_obj; + memcached_result_st *results; + + results= memcached_result_create(mc.memc, &results_obj); + + //size_t key_len[key_num]; + size_t gkey_len = strlen(sgkey); + size_t value_len = 0; + + //for(int i = 0 ; i < key_num; i ++) { + // key_len[i] = strlen(key[i]); + //} + tt.start(); + rc = memcached_mget_by_key(mc.memc, sgkey, gkey_len, skey, key_len, key_num); + tt.end(); + //if(rc != MEMCACHED_SUCCESS) + // return 0; + + while ((results= memcached_fetch_result(mc.memc, &results_obj, &rc))) + { + //if(rc != MEMCACHED_SUCCESS) { + //value_len = 0; + // break; + //} + + value_len += memcached_result_length(results); + } + + //cout << "value len = " << value_len << endl; + + memcached_result_free(&results_obj); + // if (value_len != 0) { + rsize = value_len; + // break; + // } + //} + + sops = key_num; + + for(int i = 0; i < key_num; i ++) { + delete [] skey[i]; + } + delete [] skey; + //delete [] key_len; + } else { + string rst; + char thekey[500]; + //strcpy(sgkey, gkeys[lg].c_str()); + strcpy(thekey, qkeys[sbegin].c_str()); + rsize = 0; + + char * v; + + for(int ii = 0; ii < 3; ii ++) + { + //flag = mc.gget(sgkey, thekey, rst); + uint32_t flags = 0; + memcached_return rc; + size_t value_length; + + tt.start(); + v = memcached_get_by_key(mc.memc, sgkey, strlen(sgkey), thekey, strlen(thekey), &value_length, &flags, &rc); + tt.end(); + + if (v != NULL) { + break; + } + } + + if(v != NULL) { + rst = v; + } else { + rst = ""; + } + + free(v); + + rsize = rst.size(); + sops = 1; + } + + } else { + string rst; + char thekey[500]; + strcpy(thekey, qkeys[it].c_str()); + rsize = 0; + tt.start(); + for(int ii = 0; ii < 1; ii ++) + { + flag = mc.get(thekey, rst); + if (!rst.empty()) break; + } + tt.end(); + + rsize = rst.size(); + sops = 1; + } + //tt.end(); + + for(int i = 0; i < sops; i ++) { + ((thread_param *) param)->latency.push(tt.passedtime() / sops); + //((thread_param *) param)->latency.push(tt.passedtime()); + if (((thread_param *) param)->latency.size() >= cpOur.LATENCY_NUM) { + ((thread_param *) param)->latency.pop(); + } + } + //total running time + ((thread_param *) param)->runtime += tt.passedtime(); + //sum ops + ((thread_param *) param)->ops += sops; + //sum size + ((thread_param *) param)->size += rsize; + } + qkeys.clear(); + vector().swap(qkeys); + } + fin.close(); + + ((thread_param *)param)->thput_of_ops = ((thread_param *)param)->ops / ((thread_param *)param)->runtime; + ((thread_param *)param)->thput_of_size = 1.0 * ((thread_param *)param)->size / ((thread_param *)param)->runtime / 1024; + + // cout << "Total time: " << ((thread_param *)param)->runtime << endl + // << "Total ops: " << ((thread_param *)param)->ops << endl + // << "Total ops throughput: " << ((thread_param *)param)->thput_of_ops << endl + // << "Total sizes: " << ((thread_param *)param)->size << endl + // << "Total size throughput: " << ((thread_param *)param)->thput_of_size << " KB" << endl; + + + //free(line); + //memcached_server_list_free(server); + pthread_exit(NULL); +} + +void OurScheme::query() { + //cpOur = ConfigParameter(twitter, snum); + pthread_t threads[cpOur.THREAD_NUM]; + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);; + + pthread_mutex_init(&oprintmutex, NULL); + + thread_param tp[cpOur.THREAD_NUM]; + for (uint32_t t = 0; t < cpOur.THREAD_NUM; t++) + //for (uint32_t t = 0; t < 1; t++) + { + // cout << "Threads = " << t << endl; + //tp[t].queries = queries; + tp[t].tid = t; + // tp[t].sop = sop_tmp; + tp[t].ops = tp[t].size = 0; + tp[t].runtime = tp[t].thput_of_ops = tp[t].thput_of_size = 0.0; + int rci; + if(wtype == twitter) { + rci = pthread_create(&threads[t], &attr, twitter_query_exec, (void *) &tp[t]); + } //else { + //rci = pthread_create(&threads[t], &attr, ibm_query_exec, (void *) &tp[t]); + //} + if (rci) { + perror("failed: pthread_create\n"); + exit(-1); + } + + } + + double total_ops_thputs = 0.0; + long double total_size_thputs = 0.0; + int total_ops = 0; + double total_time = 0.0; + unsigned long long total_size = 0; + vector latency; + + int nthreads = cpOur.THREAD_NUM; + // cout << "end:333333333333333333333" << endl; + + for (uint32_t t = 0; t < cpOur.THREAD_NUM; t++) { + //for (uint32_t t = 0; t < 1; t++) { + void *status; + int rci = pthread_join(threads[t], &status); + if (rci) { + perror("error, pthread_join\n"); + exit(-1); + } + + + total_time = total_time > tp[t].runtime? total_time: tp[t].runtime; + total_ops += tp[t].ops; + total_ops_thputs += tp[t].thput_of_ops; + total_size += tp[t].size; + total_size_thputs += tp[t].thput_of_size; + while(!tp[t].latency.empty()) { + latency.push_back(tp[t].latency.top()); + tp[t].latency.pop(); + } + //latency.insert(latency.end(), tp[t].latency.begin(),tp[t].latency.end()); + } + sort(latency.rbegin(),latency.rend()); + double latency95 = latency[total_ops - int(total_ops * 0.95)]; + double latency99 = latency[total_ops - int(total_ops * 0.99)]; + double latency9999 = latency[total_ops - int(total_ops * 0.9999)]; + + cout << "Total time: " << total_time << endl + << "Total ops: " << total_ops << endl + << "Total op throughput: " << total_ops_thputs << endl + << "Total sizes: " << total_size << endl + << "Total size throughput: " << total_size_thputs << endl + << "95\% latency: " << latency95 *1000 << endl + << "99\% latency: " << latency99 *1000 << endl + << "99.99\% latency: " << latency9999 *1000 << endl; + + ofstream fout(cpOur.PATH_PREFIX + "/result", ios::out|ios::app); + //fout << snum << endl; + //fout << "Our Scheme" << endl; + fout << "AC-Cache" << "\t" << nthreads << "\t" << total_time << "\t" << total_ops << "\t" << total_ops_thputs << "\t" + << total_size << "\t" << total_size_thputs << "\t" + << latency95 << "\t" << latency99 << "\t" << latency9999 << endl; + fout.close(); + + pthread_attr_destroy(&attr); +} + +OurScheme::~OurScheme() { + gkeys.clear(); + okeys.clear(); +} diff --git a/src/OurScheme.h b/src/OurScheme.h new file mode 100644 index 0000000..a49f671 --- /dev/null +++ b/src/OurScheme.h @@ -0,0 +1,41 @@ +// +// Created by Alfred on 2022/9/14. +// + +#ifndef CORANA_OURSCHEME_H +#define CORANA_OURSCHEME_H + +#include "config.h" +//#include "CMSketch.h" +#include "FreqTable.h" +#include + +class OurScheme { +private: + void getFreqKeys(const string& stats_file); + vector* readStream(const string& fname, int lnum, long long& cpos) const; + int isFreqKeys(const string& key); + vector* group_stat(const string& filename, const int& group_num); +private: + enum workload_type wtype; + vector gstat; + vector gsize; + size_t total_freq; + +public: + //CMSketch ftable; + FreqTable ftable; + +public: + explicit OurScheme(enum workload_type wt = twitter, const int& trace_no = 2, const int& hotlim = 3444, const int& snum = 9); + void CorrelationAnalysis(const int& day = 0); + void distribute(const string& group_file, const int& group_num, const int& node_num); + //void MemcachedInit(); + void query(); + //void clean(); + ~OurScheme(); + void test(); +}; + + +#endif //CORANA_OURSCHEME_H diff --git a/src/Random.cpp b/src/Random.cpp new file mode 100644 index 0000000..d532841 --- /dev/null +++ b/src/Random.cpp @@ -0,0 +1,274 @@ +// +// Created by Alfred on 2022/10/19. +// + +#include +#include "Random.h" +#include "MemcachedClient.h" +#include "toolbox.h" +#include "pthread.h" + +ConfigParameter cp; +vector keys; +workload_type wt; +pthread_mutex_t printmutex; +int server_num = 0; + +void random_read_file(const workload_type& type, const int& snum) { + wt =type; + cp = ConfigParameter(type, snum); + keys = readStat(cp.PATH_PREFIX + "/" + cp.STAT_FILE); +} + +void twitter_init(){ + char single = '1'; + MemcachedClient mc(cp.SERVER_INFO); + cout << "init: 1111111111111111" << endl; + for(auto & key : keys) { + int tmp_size = key.size == 0? 1:key.size; + string value = string(tmp_size, single); + mc.insert(key.key.c_str(), value.c_str()); + } + cout << "init: 22222222222222222" << endl; + + + cout << "Twitter init finished." << endl; +} + +void random_init() { + if(wt == twitter) { + twitter_init(); + } + + cout << "Workload init finished." << endl; +} + +void *twitter_query_exec(void *param) { + timeit tt; + MemcachedClient mc(cp.SERVER_INFO); + + string prefix = cp.PATH_PREFIX; + + pthread_mutex_lock(&printmutex); + cout << ((thread_param *)param)->tid <<": twitter_query_exec" << endl; + pthread_mutex_unlock(&printmutex); + + //pthread_mutex_lock(&printmutex); + char filename[255]; + //sprintf(filename, "d0t%dp%04d", cp.THREAD_NUM, ((thread_param *)param)->tid); + snprintf(filename, sizeof(filename), "d%dt%dp%04d", cp.DAY, cp.THREAD_NUM, ((thread_param *)param)->tid); + //sprintf(filename, "d0t128p%04d", ((thread_param *)param)->tid); + string fname = prefix + "/" + filename; + //pthread_mutex_unlock(&printmutex); + + //pthread_mutex_lock (&printmutex); + cout << ((thread_param *)param)->tid <<",filename = " << fname << endl; + //pthread_mutex_unlock (&printmutex); + + //pthread_mutex_lock (&printmutex); + ifstream fin(fname); + + + if(!fin) { + cout << ((thread_param *)param)->tid <<": Error open trace file" << endl; + exit(-1); + } + //pthread_mutex_unlock (&printmutex); + + pthread_mutex_lock (&printmutex); + fprintf(stderr, "start benching using thread%u\n", ((thread_param *)param)->tid); + pthread_mutex_unlock (&printmutex); + + + vector qkeys; + vector ops; + while(fin.peek() != EOF) { + + char line[1000]; + long time_val; + char query_key[200]; + int linenum; + + pthread_mutex_lock (&printmutex); + linenum = 0; + while(fin.peek() != EOF and linenum != cp.ONCE_READ_LIMIT) { + fin.getline(line, 1000); + time_val = strtol(strtok(line, ","), NULL, 10); // time + qkeys.emplace_back(string(strtok(NULL, ","))); //key + time_val = strtol(strtok(NULL, ","), NULL, 10); //key len + time_val = strtol(strtok(NULL, ","), NULL, 10); //value len + time_val = strtol(strtok(NULL, ","), NULL, 10); //client id + ops.emplace_back(string(strtok(NULL, ","))); //ops + linenum ++; + } + pthread_mutex_unlock (&printmutex); + + + for(int it = 0; it != linenum; it ++) { + string rst; + bool flag; + + tt.start(); +// if (ops[it] == "set") { +// flag = mc.insert(qkeys[it].c_str(), "1111"); +// } else { + for(int ii = 0; ii < 3; ii ++) {//while (true) { + flag = mc.get(qkeys[it].c_str(), rst); + if (!rst.empty()) break; + } + //} + tt.end(); + + //tail latency + /*int left = 0; + int right = ((thread_param *) param)->latency.size() - 1; + int mid = 0; + //找a[i]应该插入的位置 + while (left <= right) { + mid = (left + right) / 2; + if (tt.passedtime() < ((thread_param *) param)->latency[mid]) { + left = mid + 1; + } else { + right = mid + -1; + } + } + ((thread_param *) param)->latency.emplace(((thread_param *) param)->latency.begin()+left, tt.passedtime());*/ + ((thread_param *) param)->latency.push(tt.passedtime()); + /*auto pr = ((thread_param *) param)->latency.begin(); + for (; pr != ((thread_param *) param)->latency.end(); pr++) { + if (tt.passedtime() >= *pr) { + break; + } + } + ((thread_param *) param)->latency.emplace(pr, tt.passedtime());*/ + if (((thread_param *) param)->latency.size() >= cp.LATENCY_NUM) { + // ((thread_param *) param)->latency.pop_back(); + // ((thread_param *) param)->latency.shrink_to_fit(); + ((thread_param *) param)->latency.pop(); + } + //total running time + ((thread_param *) param)->runtime += tt.passedtime(); + //sum ops + ((thread_param *) param)->ops++; + //sum size + ((thread_param *) param)->size += rst.size(); + } + qkeys.clear(); + vector().swap(qkeys); + } + fin.close(); + + ((thread_param *)param)->thput_of_ops = ((thread_param *)param)->ops / ((thread_param *)param)->runtime; + ((thread_param *)param)->thput_of_size = 1.0 * ((thread_param *)param)->size / ((thread_param *)param)->runtime / 1024; + + cout << "Total time: " << ((thread_param *)param)->runtime << endl + << "Total ops: " << ((thread_param *)param)->ops << endl + << "Total ops throughput: " << ((thread_param *)param)->thput_of_ops << endl + << "Total sizes: " << ((thread_param *)param)->size << endl + << "Total size throughput: " << ((thread_param *)param)->thput_of_size << " KB" << endl; + + + //free(line); + //memcached_server_list_free(server); + pthread_exit(NULL); +} + +void *ibm_query_exec(void *param) { + return nullptr; +} + +void random_test(const workload_type& type, const int& snum) { + wt = type; + cp = ConfigParameter(type, snum); + + //cout << "11111111111111111" << endl; + //random_read_file(); + //cout << "22222222222222222" << endl; + //random_init(); + //cout << "33333333333333333" << endl; + + pthread_t threads[cp.THREAD_NUM]; + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);; + + pthread_mutex_init(&printmutex, NULL); + + thread_param tp[cp.THREAD_NUM]; + for (uint32_t t = 0; t < cp.THREAD_NUM; t++) + { + cout << "Threads = " << t << endl; + //tp[t].queries = queries; + tp[t].tid = t; + // tp[t].sop = sop_tmp; + tp[t].ops = tp[t].size = 0; + tp[t].runtime = tp[t].thput_of_ops = tp[t].thput_of_size = 0.0; + int rci; + if(wt == twitter) { + rci = pthread_create(&threads[t], &attr, twitter_query_exec, (void *) &tp[t]); + } else { + rci = pthread_create(&threads[t], &attr, ibm_query_exec, (void *) &tp[t]); + } + if (rci) { + perror("failed: pthread_create\n"); + exit(-1); + } + + } + + double total_ops_thputs = 0.0; + long double total_size_thputs = 0.0; + int total_ops = 0; + double total_time = 0.0; + unsigned long long total_size = 0; + vector latency; + + int nthreads = cp.THREAD_NUM; + cout << "end:333333333333333333333" << endl; + + for (uint32_t t = 0; t < cp.THREAD_NUM; t++) { + void *status; + int rci = pthread_join(threads[t], &status); + if (rci) { + perror("error, pthread_join\n"); + exit(-1); + } + + + total_time = total_time > tp[t].runtime? total_time: tp[t].runtime; + total_ops += tp[t].ops; + total_ops_thputs += tp[t].thput_of_ops; + total_size += tp[t].size; + total_size_thputs += tp[t].thput_of_size; + while(!tp[t].latency.empty()) { + latency.push_back(tp[t].latency.top()); + tp[t].latency.pop(); + } + //latency.insert(latency.end(), tp[t].latency.begin(),tp[t].latency.end()); + } + sort(latency.rbegin(),latency.rend()); + + double latency95 = latency[total_ops - int(total_ops * 0.95)]; + double latency99 = latency[total_ops - int(total_ops * 0.99)]; + double latency9999 = latency[total_ops - int(total_ops * 0.9999)]; + + cout << "Total time: " << total_time << endl + << "Total ops: " << total_ops << endl + << "Total op throughput: " << total_ops_thputs << endl + << "Total sizes: " << total_size << endl + << "Total size throughput: " << total_size_thputs << endl + << "95\% latency: " << latency95 *1000 << endl + << "99\% latency: " << latency99 *1000 << endl + << "99.99\% latency: " << latency9999 *1000 << endl; + + ofstream fout("/data/result", ios::out|ios::app); + //fout << snum << endl; + fout << "Random" << "\t" << nthreads << "\t" << total_time << "\t" << total_ops << "\t" << total_ops_thputs << "\t" + << total_size << "\t" << total_size_thputs << "\t" + << latency95 << "\t" << latency99 << "\t" << latency9999 << endl; + fout.close(); + + pthread_attr_destroy(&attr); +} + + diff --git a/src/Random.h b/src/Random.h new file mode 100644 index 0000000..4b5fed9 --- /dev/null +++ b/src/Random.h @@ -0,0 +1,12 @@ +#ifndef __RANDOM_H_ +#define __RANDOM_H_ + +#include "config.h" + +void random_read_file(const workload_type& type, const int& snum); +void random_init(); +static void *twitter_query_exec(void* param); +static void *ibm_query_exec(void* param); +void random_test(const workload_type& type, const int& snum); // Main Test + +#endif //__RANDOM_H_ \ No newline at end of file diff --git a/src/SPCache.cpp b/src/SPCache.cpp new file mode 100644 index 0000000..e33545c --- /dev/null +++ b/src/SPCache.cpp @@ -0,0 +1,273 @@ +// +// Created by Alfred on 2022/11/9. +// + +#include +#include "SPCache.h" +#include "toolbox.h" +#include "MemcachedClient.h" + +namespace SPCache { + static pthread_mutex_t printmutex; + workload_type wtype; + double SPFactor = 0.000006; + ConfigParameter cp; + vector ukeys; + map> kmeta; + + void initial(workload_type wt, const int& snum) { + wtype = wt; + if(wtype == twitter) { + cp = ConfigParameter(twitter, snum); + } else { + cp = ConfigParameter(ibm, snum); + } + + ukeys = readStat(cp.PATH_PREFIX + "/" + cp.STAT_FILE); + } + + void distribution() { + vector kvector; + MemcachedClient mc(cp.SERVER_INFO); + //size_t maxk = 0; + + for(auto &k: ukeys) { + //cout << "key = " << k.key << ", size = " << k.size << ", k = " << k.freq * k.size * SPFactor << endl; + //maxk = maxk > k.freq * k.size * SPFactor ? maxk: k.freq * k.size * SPFactor; + int knum = max(int(min(size_t(k.freq * k.size * SPFactor), cp.SERVER_INFO.size())), 1); + if(k.size / knum < 64) knum = k.size / 64; + knum = max(knum, 1); + //cout << "knum = " << knum << endl; + kvector.push_back(knum); + } + + //cout << "max k = " << maxk << endl; + + for(int i = 0; i < ukeys.size(); i ++) { + for(int j = 0; j < kvector[i]; j++) { + string ktmp = makeRandStr(100, true); + kmeta[ukeys[i].key].push_back(ktmp); + if(ukeys[i].size == 0) ukeys[i].size = 1; + string value = string(ukeys[i].size / kvector[i], '1'); + mc.insert(ktmp.c_str(), value.c_str()); + } + } + + cout << "Distribution finished!" << endl; + } + + static void *twitter_query_exec(void *param) { + timeit tt; + MemcachedClient mc(cp.SERVER_INFO); + + string prefix = cp.PATH_PREFIX; + + pthread_mutex_lock(&printmutex); + cout << ((thread_param *)param)->tid <<": twitter_query_exec" << endl; + pthread_mutex_unlock(&printmutex); + + //pthread_mutex_lock(&printmutex); + char filename[255]; + pthread_mutex_lock(&printmutex); + //sprintf(filename, "d0t%dp%04d", cp.THREAD_NUM, ((thread_param *)param)->tid); + snprintf(filename, sizeof(filename), "d%dt%dp%04d", cp.DAY, cp.THREAD_NUM, ((thread_param *)param)->tid); + pthread_mutex_unlock(&printmutex); + //sprintf(filename, "d0t128p%04d", ((thread_param *)param)->tid); + string fname = prefix + "/" + filename; + //pthread_mutex_unlock(&printmutex); + + //pthread_mutex_lock (&printmutex); + cout << ((thread_param *)param)->tid <<",filename = " << fname << endl; + //pthread_mutex_unlock (&printmutex); + + //pthread_mutex_lock (&printmutex); + ifstream fin(fname); + + + if(!fin) { + cout << ((thread_param *)param)->tid <<": Error open trace file" << endl; + exit(-1); + } + //pthread_mutex_unlock (&printmutex); + + pthread_mutex_lock (&printmutex); + fprintf(stderr, "start benching using thread%u\n", ((thread_param *)param)->tid); + pthread_mutex_unlock (&printmutex); + + + vector qkeys; + while(fin.peek() != EOF) { + + char line[1000]; + long time_val; + char query_key[200]; + int linenum; + + pthread_mutex_lock (&printmutex); + linenum = 0; + while(fin.peek() != EOF and linenum != cp.ONCE_READ_LIMIT) { + fin.getline(line, 1000); + time_val = strtol(strtok(line, ","), NULL, 10); // time + qkeys.emplace_back(string(strtok(NULL, ","))); //key + linenum ++; + } + pthread_mutex_unlock (&printmutex); + + + for(int it = 0; it != linenum; it ++) { + string rst; + bool flag; + double max_time = 0; + size_t tsize = 0; + + //int tloc = FreqSearch(ukeys, 0, ukeys.size(), qkeys[it]); + if (kmeta.count(qkeys[it]) == 1) { + for(auto &pr: kmeta[qkeys[it]]) { + tt.start(); + while (true) { + flag = mc.get(pr.c_str(), rst); + if (!rst.empty() || flag) break; + } + tsize += rst.size(); + tt.end(); + max_time = max_time > tt.passedtime()? max_time: tt.passedtime(); + } + } else { + tt.start(); + for(int ii = 0; ii < 3; ii ++) { + flag = mc.get(qkeys[it].c_str(), rst); + if (!rst.empty() || flag) break; + } + tsize = rst.size(); + tt.end(); + max_time = tt.passedtime(); + } + + + //tail latency + ((thread_param *) param)->latency.push(max_time); + + if (((thread_param *) param)->latency.size() >= cp.LATENCY_NUM) { + ((thread_param *) param)->latency.pop(); + } + //total running time + ((thread_param *) param)->runtime += max_time; //tt.passedtime(); + //sum ops + ((thread_param *) param)->ops++; + //sum size + ((thread_param *) param)->size += tsize; + } + qkeys.clear(); + vector().swap(qkeys); + } + fin.close(); + + ((thread_param *)param)->thput_of_ops = ((thread_param *)param)->ops / ((thread_param *)param)->runtime; + ((thread_param *)param)->thput_of_size = 1.0 * ((thread_param *)param)->size / ((thread_param *)param)->runtime / 1024; + + cout << "Total time: " << ((thread_param *)param)->runtime << endl + << "Total ops: " << ((thread_param *)param)->ops << endl + << "Total ops throughput: " << ((thread_param *)param)->thput_of_ops << endl + << "Total sizes: " << ((thread_param *)param)->size << endl + << "Total size throughput: " << ((thread_param *)param)->thput_of_size << " KB" << endl; + + + //free(line); + //memcached_server_list_free(server); + pthread_exit(NULL); + } + + static void *ibm_query_exec(void *param) {} + + void test(const int& snum) { + cp = ConfigParameter(twitter, snum); + pthread_t threads[cp.THREAD_NUM]; + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);; + + pthread_mutex_init(&printmutex, NULL); + + thread_param tp[cp.THREAD_NUM]; + for (uint32_t t = 0; t < cp.THREAD_NUM; t++) + { + cout << "Threads = " << t << endl; + //tp[t].queries = queries; + tp[t].tid = t; + // tp[t].sop = sop_tmp; + tp[t].ops = tp[t].size = 0; + tp[t].runtime = tp[t].thput_of_ops = tp[t].thput_of_size = 0.0; + int rci; + if(wtype == ibm) { + rci = pthread_create(&threads[t], &attr, ibm_query_exec, (void *) &tp[t]); + } else { + rci = pthread_create(&threads[t], &attr, twitter_query_exec, (void *) &tp[t]); + } + if (rci) + { + perror("failed: pthread_create\n"); + exit(-1); + } + } + + double total_ops_thputs = 0.0; + long double total_size_thputs = 0.0; + int total_ops = 0; + double total_time = 0.0; + unsigned long long total_size = 0; + vector latency; + + int nthreads = cp.THREAD_NUM; + cout << "333333333333333333333" << endl; + + for (uint32_t t = 0; t < cp.THREAD_NUM; t++) { + void *status; + int rci = pthread_join(threads[t], &status); + if (rci) { + perror("error, pthread_join\n"); + exit(-1); + } + + + total_time = total_time > tp[t].runtime ? total_time: tp[t].runtime; + total_ops += tp[t].ops; + total_ops_thputs += tp[t].thput_of_ops; + total_size += tp[t].size; + total_size_thputs += tp[t].thput_of_size; + while(!tp[t].latency.empty()) { + latency.push_back(tp[t].latency.top()); + tp[t].latency.pop(); + } + } + cout << "4444444444444444444444444" << endl; + sort(latency.rbegin(),latency.rend()); + + double latency95 = latency[total_ops - int(total_ops * 0.95)]; + double latency99 = latency[total_ops - int(total_ops * 0.99)]; + double latency9999 = latency[total_ops - int(total_ops * 0.9999)]; + + cout << "Total time: " << total_time << endl + << "Total ops: " << total_ops << endl + << "Total op throughput: " << total_ops_thputs << endl + << "Total sizes: " << total_size << endl + << "Total size throughput: " << total_size_thputs << endl + << "95\% latency: " << latency95 << endl + << "99\% latency: " << latency99 << endl + << "99.99\% latency: " << latency9999 << endl; + + ofstream fout("/data/result", ios::out|ios::app); + //fout << "SP-Cache" << endl; + //fout << snum << endl; + fout << "SPCache" << "\t" << nthreads << "\t" << total_time << "\t" << total_ops << "\t" << total_ops_thputs << "\t" + << total_size << "\t" << total_size_thputs << "\t" + << latency95 << "\t" << latency99 << "\t" << latency9999 << endl; + fout.close(); + + pthread_attr_destroy(&attr); + //return 0; + ukeys.clear(); + vector().swap(ukeys); + kmeta.clear(); + } + +} \ No newline at end of file diff --git a/src/SPCache.h b/src/SPCache.h new file mode 100644 index 0000000..199fa4a --- /dev/null +++ b/src/SPCache.h @@ -0,0 +1,17 @@ +// +// Created by Alfred on 2022/11/9. +// + +#ifndef CORANA_SPCACHE_H +#define CORANA_SPCACHE_H + +#include "config.h" + +namespace SPCache { + void initial(workload_type wt, const int& snum); + void distribution(); + void test(const int& snum); +}; + + +#endif //CORANA_SPCACHE_H diff --git a/src/config.h b/src/config.h new file mode 100644 index 0000000..74d3863 --- /dev/null +++ b/src/config.h @@ -0,0 +1,310 @@ +// +// Created by Alfred on 2022/7/22. +// + +#ifndef CORANA_CONFIG_H +#define CORANA_CONFIG_H + +#include +#include +#include +#include +#include "fmt/core.h" + +using namespace std; + +enum workload_type{ + twitter = 0, + meta, + ibm +}; + +typedef struct { + uint32_t tid; + double runtime; + //vector latency; + priority_queue, greater<> > latency; + int ops; + double thput_of_ops; + unsigned long long size; + long double thput_of_size; +} thread_param; + +typedef struct k{ + string key; + size_t size; + int freq; + bool operator==(const struct k& ln) const { + return this->key == ln.key; + } +}key_param; + +typedef struct { + string ckey; // chunk key + size_t offset; + size_t length; // value length + size_t stripe_id; + short chunk_id; +}agg_key; + +static bool key_freq_comp(const key_param &a, const key_param &b) { + if (a.freq > b.freq) { + return true; + } else { + return false; + } +} + +static bool key_string_comp(const key_param &a, const key_param &b) { + if (a.key > b.key) { + return true; + } else { + return false; + } +} + +//static void readStat(const string& stats_file, vector& keys) { +static vector readStat(const string& stats_file) { + vector keys; + ifstream fin(stats_file); + + if(!fin.is_open()) { + cout << "Error opening stats file!" << endl; + exit(-1); + } + + int i = 0; + while(fin.peek() != EOF) { + key_param tmp; + fin >> tmp.key >> tmp.size >> tmp.freq; + if(tmp.key.empty()) break; + keys.push_back(tmp); + i++; + } + + cout << "stat line = " << keys.size() << endl; + cout << "stat line = " << i << endl; + + return keys; +} + +static int FreqSearch(vector& keys, long start, long end, const string& ss){ + long left = start; + long right = end - 1; + + int mid = 0; + //定义域为[left,right] + while(left <= right) { + //此时标记left位置,防止下标越界 + mid = left + (right - left) / 2; + if(keys[mid].key == ss) { + return mid; + } + if(keys[mid].key > ss) { + left = mid + 1; + } + if(keys[mid].key < ss) { + right = mid - 1; + } + } + return -1; +} + +static int FreqSearch(vector& keys, long start, long end, const string& ss){ + long left = start; + long right = end - 1; + + int mid = 0; + //定义域为[left,right] + while(left <= right) { + //此时标记left位置,防止下标越界 + mid = left + (right - left) / 2; + if(keys[mid] == ss) { + return mid; + } + if(keys[mid] > ss) { + left = mid + 1; + } + if(keys[mid] < ss) { + right = mid - 1; + } + } + return -1; +} + +class ConfigParameter { +public: + int TRACE_NO; + string STAT_FILE; + string STREAM_FILE_PREFIX; + string PATH_PREFIX; + int HOTEST_FREQ_LIMIT; + int FREQ_LIST_SIZE; + int CMSKETCH_DEVIATION; + int DAY; + int DAY_NUM; + int WINDOW_SIZE; + int FREQ_LIMIT; + int ONCE_READ_LIMIT; + int EC_K; + int EC_N; + int THREAD_NUM; + int LATENCY_NUM; + int LOW_LIMIT; + int CHUNK_SIZE; + int SERVER_NUM; + int variation; + vector> SERVER_INFO; + + /*ConfigParameter() { + this->STAT_FILE = ""; + this->STREAM_FILE_PREFIX= ""; + this->PATH_PREFIX = ""; + this->FREQ_LIST_SIZE = 0; + this->DAY_NUM = 0; + this->WINDOW_SIZE = 0; + this->HOTEST_FREQ_LIMIT = 0; + this->FREQ_LIMIT = 0; + this->ONCE_READ_LIMIT = 0; + this->CMSKETCH_DEVIATION = 0; + }*/ + + explicit ConfigParameter(enum workload_type wt = twitter, int trace_no = 2, int hotlim = 3444, int snum = 9) { + Json::Reader reader; + Json::Value root; + +// #ifdef LOCAL_TEST +// ifstream in("../config_loc.json", ios::binary); +// cout << "Local configure" << endl; +// #else + ifstream in("../config.json", ios::binary); + cout << "Standard configure" << endl; +// #endif + + if (!in.is_open()) { + cout << "Error opening config file" << endl; + return; + } + + cout << "workloadtype = " << wt << endl; + this->variation = snum; + if(reader.parse(in, root)) { + switch (wt) { + case twitter: + // this->TRACE_NO = root["twitter"]["trace_no"].asInt(); + this->TRACE_NO = trace_no; + this->STAT_FILE = "stat" + fmt::format("{:02d}", this->TRACE_NO); + this->STREAM_FILE_PREFIX= "workload"+fmt::format("{:02d}_", this->TRACE_NO); + this->PATH_PREFIX = root["twitter"]["path_prefix"].asString(); + this->FREQ_LIST_SIZE = root["twitter"]["freq_list_size"].asInt(); + this->DAY = root["twitter"]["day"].asInt(); + //this->DAY = snum; + this->DAY_NUM = root["twitter"]["day_num"].asInt(); + this->WINDOW_SIZE = root["twitter"]["window_size"].asInt(); + // this->WINDOW_SIZE = snum; + //this->HOTEST_FREQ_LIMIT = root["twitter"]["hotest_freq_limit"].asInt(); + this->HOTEST_FREQ_LIMIT = hotlim; + this->FREQ_LIMIT = root["twitter"]["freq_limit"].asInt(); + //this->FREQ_LIMIT = snum; + this->ONCE_READ_LIMIT = root["twitter"]["once_read_num"].asInt(); + this->CMSKETCH_DEVIATION = root["twitter"]["cmsketch_deviation"].asInt(); + this->LATENCY_NUM = root["twitter"]["latency_num"].asInt(); + this->LOW_LIMIT = root["twitter"]["low_limit"].asInt(); + // this->SERVER_NUM = root["twitter"]["server_num"].asInt(); + this->SERVER_NUM = this->variation; + break; + case meta: + // this->TRACE_NO = root["meta"]["trace_no"].asInt(); + this->TRACE_NO = trace_no; + this->STAT_FILE = "stat" + fmt::format("{:02d}", this->TRACE_NO); + this->STREAM_FILE_PREFIX= "kvcache_traces_"; + this->PATH_PREFIX = root["meta"]["path_prefix"].asString() + to_string(this->TRACE_NO); + this->FREQ_LIST_SIZE = root["meta"]["freq_list_size"].asInt(); + this->DAY = root["meta"]["day"].asInt(); + //this->DAY = snum; + this->DAY_NUM = root["meta"]["day_num"].asInt(); + // this->WINDOW_SIZE = root["meta"]["window_size"].asInt(); + this->WINDOW_SIZE = snum; + //this->HOTEST_FREQ_LIMIT = root["meta"]["hotest_freq_limit"].asInt(); + this->HOTEST_FREQ_LIMIT = hotlim; + this->FREQ_LIMIT = root["meta"]["freq_limit"].asInt(); + //this->FREQ_LIMIT = snum; + this->ONCE_READ_LIMIT = root["meta"]["once_read_num"].asInt(); + this->CMSKETCH_DEVIATION = root["meta"]["cmsketch_deviation"].asInt(); + this->LATENCY_NUM = root["meta"]["latency_num"].asInt(); + this->LOW_LIMIT = root["meta"]["low_limit"].asInt(); + this->SERVER_NUM = root["meta"]["server_num"].asInt(); + //this->SERVER_NUM = snum; + break; + case ibm: + this->STAT_FILE = root["ibm"]["stat_file"].asString(); + this->STREAM_FILE_PREFIX= root["ibm"]["stream_file_prefix"].asString(); + this->PATH_PREFIX = root["ibm"]["path_prefix"].asString(); + this->FREQ_LIST_SIZE = root["ibm"]["freq_list_size"].asInt(); + //this->DAY = root["ibm"]["day"].asInt(); + this->DAY = snum; + this->DAY_NUM = root["ibm"]["day_num"].asInt(); + this->WINDOW_SIZE = root["ibm"]["window_size"].asInt(); + this->HOTEST_FREQ_LIMIT = root["ibm"]["hotest_freq_limit"].asInt(); + this->FREQ_LIMIT = root["ibm"]["freq_limit"].asInt(); + this->ONCE_READ_LIMIT = root["ibm"]["once_read_num"].asInt(); + this->CMSKETCH_DEVIATION = root["ibm"]["cmsketch_deviation"].asInt(); + this->LATENCY_NUM = root["ibm"]["latency_num"].asInt(); + this->LOW_LIMIT = root["ibm"]["low_limit"].asInt(); + this->SERVER_NUM = root["ibm"]["server_num"].asInt(); + //this->SERVER_NUM = snum; + break; + default: + cout << "workloadtype2 = " << wt << endl; + } + this->EC_N = root["ec_n"].asInt(); + this->EC_K = root["ec_k"].asInt(); + this->THREAD_NUM = root["thread_num"].asInt(); + this->CHUNK_SIZE = root["chunk_size"].asInt(); + + int count = 0; + const Json::Value arrayObj = root["server_info"]; + for (const auto & i : arrayObj) + { + pair tmp; + tmp.first = i["ip"].asString(); + tmp.second = i["port"].asInt(); + SERVER_INFO.push_back(tmp); + count ++; + if(count == this->SERVER_NUM) break; + } + + cout << "Configuration Parameters :" << this->PATH_PREFIX << endl; + } else { + cout << "parse error" << endl; + } + + in.close(); + } + + ConfigParameter(const ConfigParameter& cp) { + this->TRACE_NO = cp.TRACE_NO; + this->STAT_FILE = cp.STAT_FILE; + this->STREAM_FILE_PREFIX= cp.STREAM_FILE_PREFIX; + this->PATH_PREFIX = cp.PATH_PREFIX; + this->FREQ_LIST_SIZE = cp.FREQ_LIST_SIZE; + this->DAY = cp.DAY; + this->DAY_NUM = cp.DAY_NUM; + this->WINDOW_SIZE = cp.WINDOW_SIZE; + this->HOTEST_FREQ_LIMIT = cp.HOTEST_FREQ_LIMIT; + this->FREQ_LIMIT = cp.FREQ_LIMIT; + this->ONCE_READ_LIMIT = cp.ONCE_READ_LIMIT; + this->CMSKETCH_DEVIATION = cp.CMSKETCH_DEVIATION; + this->EC_K = cp.EC_K; + this->EC_N = cp.EC_N; + this->THREAD_NUM = cp.THREAD_NUM; + this->SERVER_INFO = cp.SERVER_INFO; + this->LATENCY_NUM = cp.LATENCY_NUM; + this->LOW_LIMIT = cp.LOW_LIMIT; + this->CHUNK_SIZE = cp.CHUNK_SIZE; + this->SERVER_NUM = cp.SERVER_NUM; + } + +}; + +#endif //CORANA_CONFIG_H diff --git a/src/config.json b/src/config.json new file mode 100644 index 0000000..3cc7d1c --- /dev/null +++ b/src/config.json @@ -0,0 +1,115 @@ +{ + "twitter": { + "trace_no": 99, + "stat_file" : "stat99", + "stream_file_prefix": "workload", + "path_prefix": "/home/flnan/twitter", + "freq_list_size": 12000, + "day": 0, + "day_num": 7, + "window_size": 100, + "hotest_freq_limit": 42, + "cmsketch_deviation": 5000, + "freq_limit": 812, + "once_read_num": 1000000, + "latency_num": 800000, + "low_limit": 1048576, + "server_num": 6 + }, + "meta": { + "trace_no": 202206, + "stat_file" : "stat", + "stream_file_prefix": "kvcache_traces_", + "path_prefix": "/data/kvcache", + "freq_list_size": 12000, + "day": 0, + "day_num": 5, + "window_size": 100, + "hotest_freq_limit": 403, + "cmsketch_deviation": 5000, + "freq_limit": 812, + "once_read_num": 1000000, + "latency_num": 800000, + "low_limit": 1048576, + "server_num": 6 + }, + "ibm": { + "stat_file": "stat01", + "stream_file_prefix": "work_cluster", + "path_prefix": "/home/flnan/ibm", + "freq_list_size": 12000, + "day_num": 7, + "window_size": 100, + "hotest_freq_limit": 10, + "cmsketch_deviation": 900, + "freq_limit": 2, + "once_read_num": 10000, + "latency_num": 10000, + "low_limit": 1048576, + "server_num": 13 + }, + "ec_n" : 5, + "ec_k" : 3, + "thread_num" : 128, + "chunk_size" : 4096, + "server_info": [ + { + "ip": "10.26.43.54", + "port": 11211 + },{ + "ip": "10.26.43.54", + "port": 11212 + },{ + "ip": "10.26.43.54", + "port": 11213 + },{ + "ip": "10.26.43.54", + "port": 11214 + },{ + "ip": "10.26.43.54", + "port": 11215 + },{ + "ip": "10.26.43.54", + "port": 11216 + },{ + "ip": "172.18.96.16", + "port": 11211 + },{ + "ip": "172.18.96.17", + "port": 11211 + },{ + "ip": "172.18.96.18", + "port": 11211 + },{ + "ip": "172.18.96.19", + "port": 11211 + },{ + "ip": "172.18.96.20", + "port": 11211 + },{ + "ip": "172.18.96.21", + "port": 11211 + },{ + "ip": "172.18.96.22", + "port": 11211 + },{ + "ip": "172.18.96.23", + "port": 11211 + },{ + "ip": "172.18.96.24", + "port": 11211 + },{ + "ip": "172.18.96.25", + "port": 11211 + },{ + "ip": "172.18.96.26", + "port": 11211 + },{ + "ip": "172.18.96.27", + "port": 11211 + },{ + "ip": "172.18.96.28", + "port": 11211 + } + ] +} diff --git a/src/eccache.cpp b/src/eccache.cpp new file mode 100644 index 0000000..8455904 --- /dev/null +++ b/src/eccache.cpp @@ -0,0 +1,561 @@ +// +// Created by Alfred on 2022/9/11. +// + +#include "eccache.h" +#include "MemcachedClient.h" +#include +#include "toolbox.h" +#include +#include + +namespace eccache { + typedef struct { + string pkey; + string value; + } pinfo; + + + int k; + int m; + //const int LOW_LIMIT = 1024 * 1024; // 1mb, lower than 1mb using replicas + map> key_record; // + vector keys; + workload_type wtype; + ConfigParameter cpl; + //static std::vector>& server_info; + vector stripe_key; + vector > chunk_keys; + pthread_mutex_t printmutex; + vector parity; + + + map> keyRecord(vector &keys) { + map> key_record; + for (auto &key: keys) { + if (key.size > eccache::cpl.LOW_LIMIT) { + int nstrip; + for (nstrip = 1;; nstrip++) { + if (key.size / eccache::cpl.EC_K / nstrip <= eccache::cpl.LOW_LIMIT) break; + } + for (int strip = 0; strip < nstrip; strip++) { + for (int i = 0; i < eccache::cpl.EC_K; i++) { + char buffer[250]; + snprintf(buffer, 250, "%s%04d%d", key.key.c_str(), strip, i); + key_record[key.key].push_back(buffer); + } + for (int i = 0; i < eccache::cpl.EC_N - eccache::cpl.EC_K; i++) { + char buffer[250]; + snprintf(buffer, 250, "%s%04dp%d", key.key.c_str(), strip, i); + key_record[key.key].push_back(buffer); + } + } + } else { + key_record[key.key].push_back(key.key); + // cout << key.key << endl; + } + } + return key_record; + } + + void init(const ConfigParameter &cp, const workload_type &wt) + { + k = cp.EC_K; + m = cp.EC_N - cp.EC_K; + wtype = wt; + cpl = cp; + keys = readStat(cp.PATH_PREFIX + "/" + cp.STAT_FILE); + key_record = keyRecord(eccache::keys); + //readStat(cp.PATH_PREFIX + "/" + cp.STAT_FILE, skeys); + sort(keys.begin(), keys.end(), key_string_comp); + + cout << "initialization finished" << endl; + } + + void ibm_distribution() { + MemcachedClient mc(cpl.SERVER_INFO); + prealloc_encode pEncode(cpl.EC_N, cpl.EC_K); + int n = k + m; + ErasureCode ec(n, k); + + int tloc = FreqSearch(keys, 0, keys.size(), "00000971a34ea8a0"); + cout << "loc = " << tloc << endl; + //key_param key = keys[tloc]; + cout << "last one = " << (keys.end() - 2)->key << endl; + cout << "keys size = " << keys.size() << endl; + for (auto &key: keys) { + + if (key.size > cpl.LOW_LIMIT) { + int fill_len; + //string object = string(key.size, '1'); + int nstrip = key_record[key.key].size() / n; + auto strip_len = size_t(key.size / k / nstrip); + //cout << "stripe len = " << strip_len << endl; + if (size_t(strip_len * k * nstrip) < key.size) { + strip_len += 1; + } + cout << "key = " << key.key << endl; + //cout << strip_len * k * nstrip - key.size << endl; + //cout << "stripe len = " << strip_len << endl; + + //cout << "size = " << key.size << endl; + //cout << "nstripe = " << nstrip << endl; + fill_len = strip_len * k * nstrip - key.size; + //cout << "filling len = " << fill_len << endl; + //object += string(fill_len, '0'); + for (int i = 0; i < nstrip; i++) { + int flen; + string stripe; + if (i < nstrip - 1) { + stripe = string(k * strip_len, '1'); + } else { + stripe = string(k * strip_len - fill_len, '1') + string(fill_len, '0'); + } + uint8_t **source = ec.string2array(stripe, &flen); + if (flen != 0) { + cout << "error long" << endl; + } + ec.encode_data(pEncode, source, strip_len); + for (int j = 0; j < n; j++) { + cout << "i = " << i << ", j = " << j << endl; + cout << "stripe len = " << strip_len << endl; + string chunk = ec.get_line(source, strip_len, j); + mc.insert(key_record[key.key][i * k + j].c_str(), chunk.c_str()); + } + for (int k = 0; k < cpl.EC_N; k++) { + delete source[k]; + } + delete source; + } + } else { + string object = string(key.size, '1'); + cout << key_record[key.key][0] << endl; + //cout << key.size << endl; + mc.insert(key_record[key.key][0].c_str(), object.c_str()); + + } + } + } + + void twitter_distribution() { + MemcachedClient mc(cpl.SERVER_INFO); + int n = k + m; + ErasureCode ec(n, k); + + prealloc_encode pEncode(cpl.EC_N, cpl.EC_K); + + + size_t pr = 0; + vector large_object; + int stripe_id = 0; + while (true) { + + string stripe; + string ckey[n]; + + for (short i = 0; i < n; i++) { + ckey[i] = makeRandStr(100, true); + } + + size_t clength = cpl.CHUNK_SIZE * k; + + for (; pr != keys.size(); pr++) { + if (keys[pr].size < clength) { + agg_key tmp; + short kno = stripe.size() / cpl.CHUNK_SIZE; + tmp.ckey = ckey[kno]; + tmp.offset = stripe.size() % cpl.CHUNK_SIZE; + if (keys[pr].size == 0) keys[pr].size = 1; + tmp.length = keys[pr].size; + tmp.chunk_id = kno; + tmp.stripe_id = stripe_id; + clength -= keys[pr].size; + stripe_key.push_back(tmp); + stripe += string(keys[pr].size, '1'); + } else if (keys[pr].size > k * cpl.CHUNK_SIZE) { + //cout << "key = " << keys[pr].key << ", size = " << keys[pr].size << endl; + agg_key tmp; + tmp.ckey = makeRandStr(100, true); + stripe_key.push_back(tmp); + continue; + //exit(-1); + } else { + stripe += string(clength, '#'); + clength = 0; + break; + } + } + + if (clength != 0) stripe += string(clength, '#'); + + int fill = 0; + uint8_t **source = ec.string2array(stripe, &fill); + ec.encode_data(pEncode, source, cpl.CHUNK_SIZE); + + /*cout << "stripe len = " << stripe.length() << endl; + for(int i = 0; i < n; i ++) { + cout << source[i] << endl; + }*/ + + for (int i = 0; i < m; i++) { + pinfo tmp; + tmp.value = ec.get_line(source, cpl.CHUNK_SIZE, k + i); + //tmp.value = source[k+i]; + tmp.pkey = ckey[k + i]; + //cout << "parity " << i << " value is " << tmp.value << endl; + parity.push_back(tmp); + } + + if (pr == keys.size()) + break; + stripe_id++; + + for(int i = 0; i < n; i ++) { + delete [] source[i]; + } + delete [] source; + } + + cout << "22222222222222222222222222" << endl; + + for (int i = 0; i < keys.size(); i++) { + int size = keys[i].size; + if (size == 0) { + size = 1; + } + //cout << "key = " << keys[i].key << ", size = " << size << endl; + mc.gset(stripe_key[i].ckey.c_str(), keys[i].key.c_str(), string(size, '1').c_str()); + } + + for (auto &p: parity) { + mc.insert(p.pkey.c_str(), p.value.c_str()); + } + } + + void distribution() { + if (wtype == ibm) { + ibm_distribution(); + } else { + twitter_distribution(); + } + + cout << "Objects distribution finished." << endl; + } + + static void *twitter_query_exec(void *param) { + timeit tt; + MemcachedClient mc(cpl.SERVER_INFO); + + string prefix = cpl.PATH_PREFIX; + + pthread_mutex_lock(&printmutex); + cout << ((thread_param *) param)->tid << ": twitter_query_exec" << endl; + pthread_mutex_unlock(&printmutex); + + //pthread_mutex_lock(&printmutex); + char filename[255]; + snprintf(filename, sizeof(filename), "d%dt%dp%04d", cpl.DAY, cpl.THREAD_NUM, ((thread_param *)param)->tid); + //sprintf(filename, "d0t128p%04d", ((thread_param *)param)->tid); + string fname = prefix + "/" + filename; + //pthread_mutex_unlock(&printmutex); + + //pthread_mutex_lock (&printmutex); + cout << ((thread_param *) param)->tid << ",filename = " << fname << endl; + //pthread_mutex_unlock (&printmutex); + + //pthread_mutex_lock (&printmutex); + ifstream fin(fname); + + + if (!fin) { + cout << ((thread_param *) param)->tid << ": Error open trace file" << endl; + exit(-1); + } + //pthread_mutex_unlock (&printmutex); + + pthread_mutex_lock(&printmutex); + fprintf(stderr, "start benching using thread%u\n", ((thread_param *) param)->tid); + pthread_mutex_unlock(&printmutex); + + + vector qkeys; + while (fin.peek() != EOF) { + + char line[1000]; + long time_val; + char query_key[200]; + int linenum; + + pthread_mutex_lock(&printmutex); + linenum = 0; + while (fin.peek() != EOF and linenum != cpl.ONCE_READ_LIMIT) { + fin.getline(line, 1000); + time_val = strtol(strtok(line, ","), NULL, 10); // time + qkeys.emplace_back(string(strtok(NULL, ","))); //key + linenum++; + } + pthread_mutex_unlock(&printmutex); + + + for (int it = 0; it != linenum; it++) { + string rst; + bool flag; + + tt.start(); + int tloc = FreqSearch(keys, 0, keys.size(), qkeys[it]); + if(tloc != -1) { + string ckeyt = stripe_key[tloc].ckey; + for(int ii = 0; ii < 3; ii ++) { + flag = mc.gget(ckeyt.c_str(), qkeys[it].c_str(), rst); + if (!rst.empty() || flag) break; + } + } else { + for(int ii = 0; ii < 3; ii ++) { + flag = mc.get(qkeys[it].c_str(), rst); + if (!rst.empty() || flag) break; + } + } + tt.end(); + + //tail latency + ((thread_param *) param)->latency.push(tt.passedtime()); + + if (((thread_param *) param)->latency.size() >= cpl.LATENCY_NUM) { + ((thread_param *) param)->latency.pop(); + } + //total running time + ((thread_param *) param)->runtime += tt.passedtime(); + //sum ops + ((thread_param *) param)->ops++; + //sum size + ((thread_param *) param)->size += rst.size(); + } + qkeys.clear(); + vector().swap(qkeys); + } + fin.close(); + + ((thread_param *) param)->thput_of_ops = ((thread_param *) param)->ops / ((thread_param *) param)->runtime; + ((thread_param *) param)->thput_of_size = + 1.0 * ((thread_param *) param)->size / ((thread_param *) param)->runtime / 1024; + + cout << "Total time: " << ((thread_param *) param)->runtime << endl + << "Total ops: " << ((thread_param *) param)->ops << endl + << "Total ops throughput: " << ((thread_param *) param)->thput_of_ops << endl + << "Total sizes: " << ((thread_param *) param)->size << endl + << "Total size throughput: " << ((thread_param *) param)->thput_of_size << " KB" << endl; + + + //free(line); + //memcached_server_list_free(server); + pthread_exit(NULL); + } + + static void *ibm_query_exec(void *param) { + cout << ((thread_param *) param)->tid << " random_query_exec: 11111111111111111" << endl; + MemcachedClient mc(cpl.SERVER_INFO); + timeit tt; + + + char filename[255]; + snprintf(filename, sizeof(filename), "d%dt%dp%04d", cpl.DAY, cpl.THREAD_NUM, ((thread_param *)param)->tid); + string fname = filename; + //string fname = "/home/flnan/ibm/t16_ibm00d01p" + to_string(((thread_param *)param)->tid); + pthread_mutex_lock(&printmutex); + cout << ((thread_param *) param)->tid << ",filename = " << fname << endl; + pthread_mutex_unlock(&printmutex); + ifstream fin(fname); + + + if (!fin) { + cout << "Error open trace file" << endl; + exit(-1); + } + + pthread_mutex_lock(&printmutex); + cout << ((thread_param *) param)->tid << " random_query_exec:2222222222222222222222222" << endl; + pthread_mutex_unlock(&printmutex); + + pthread_mutex_lock(&printmutex); + fprintf(stderr, "start benching using thread%u\n", ((thread_param *) param)->tid); + pthread_mutex_unlock(&printmutex); + + + while (fin.peek() != EOF) { + char line[1000]; + long time_val; + char query_key[100]; + size_t value_len; + int group; + bool flag; + + string qkeys[cpl.ONCE_READ_LIMIT]; + int linenum; + + pthread_mutex_lock(&printmutex); + linenum = 0; + while (fin.peek() != EOF and linenum != cpl.ONCE_READ_LIMIT) { + fin.getline(line, 1000); + time_val = strtol(strtok(line, " "), NULL, 10); // time + strcpy(query_key, strtok(NULL, " ")); + qkeys[linenum] = string(strtok(NULL, " ")); //key + linenum++; + } + pthread_mutex_unlock(&printmutex); + + //cout << "Search = " << FreqSearch(keys, 0 , keys.size(), "56f24fa744aa5b54"); + + for (int it = 0; it != linenum; it++) { + string rst; + tt.start(); + if (qkeys[it][qkeys[it].size() - 1] == '\n') qkeys[it] = qkeys[it].substr(0, qkeys[it].size() - 1); + if (qkeys[it][qkeys[it].size() - 1] == '\r') qkeys[it] = qkeys[it].substr(0, qkeys[it].size() - 1); + //strcpy(query_key, qkeys[it].c_str()); + int loc = FreqSearch(keys, 0, keys.size(), qkeys[it]); + if (loc == -1) { + //cout << (qkeys[it][qkeys[it].size() - 1] == '\n') << endl; + //cout << "Error keys = " << qkeys[it] << endl; + //exit(-1); + for(int ii = 0; ii < 3; ii ++) {//while (true) { + flag = mc.get(qkeys[it].c_str(), rst); + if (!rst.empty()) break; + } + } else { + if (keys[loc].size < cpl.LOW_LIMIT) { + flag = mc.get(key_record[qkeys[it]][0].c_str(), rst); + //cout << rst << endl; + //sleep(100); + } else { + int stripe_num = key_record[qkeys[it]].size() / cpl.EC_N; + for (int i = 0; i < stripe_num; i++) { + for (int j = 0; j < cpl.EC_K + 1; j++) { + flag = mc.get(key_record[qkeys[it]][i].c_str(), rst); + //sleep(100); + } + } + } + } + tt.end(); + double tmp_time = tt.passedtime(); + + //tail latency + /*auto pr = ((thread_param *) param)->latency.begin(); + for (; pr != ((thread_param *) param)->latency.end(); pr++) { + if (tmp_time >= *pr) { + break; + } + } + ((thread_param *) param)->latency.emplace(pr, tmp_time);*/ + ((thread_param *) param)->latency.push(tmp_time); + if (((thread_param *) param)->latency.size() >= cpl.LATENCY_NUM) { + ((thread_param *) param)->latency.pop(); + } + //total running time + ((thread_param *) param)->runtime += tmp_time; + //sum ops + ((thread_param *) param)->ops++; + //sum size + ((thread_param *) param)->size += value_len; + } + } + fin.close(); + + ((thread_param *) param)->thput_of_ops = ((thread_param *) param)->ops / ((thread_param *) param)->runtime; + ((thread_param *) param)->thput_of_size = + 1.0 * ((thread_param *) param)->size / ((thread_param *) param)->runtime / 1024; + + cout << "Total time: " << ((thread_param *) param)->runtime << endl + << "Total ops: " << ((thread_param *) param)->ops << endl + << "Total ops throughput: " << ((thread_param *) param)->thput_of_ops << endl + << "Total sizes: " << ((thread_param *) param)->size << endl + << "Total size throughput: " << ((thread_param *) param)->thput_of_size << " KB" << endl; + + pthread_exit(NULL); + } + + void test(const ConfigParameter& cp, const int& snum) { + cpl = cp; + pthread_t threads[cpl.THREAD_NUM]; + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);; + + pthread_mutex_init(&printmutex, NULL); + + thread_param tp[cpl.THREAD_NUM]; + for (uint32_t t = 0; t < cpl.THREAD_NUM; t++) { + cout << "Threads = " << t << endl; + //tp[t].queries = queries; + tp[t].tid = t; + // tp[t].sop = sop_tmp; + tp[t].ops = tp[t].size = 0; + tp[t].runtime = tp[t].thput_of_ops = tp[t].thput_of_size = 0.0; + int rci; + if (wtype == ibm) { + rci = pthread_create(&threads[t], &attr, ibm_query_exec, (void *) &tp[t]); + } else { + rci = pthread_create(&threads[t], &attr, twitter_query_exec, (void *) &tp[t]); + } + if (rci) { + perror("failed: pthread_create\n"); + exit(-1); + } + } + + double total_ops_thputs = 0.0; + long double total_size_thputs = 0.0; + int total_ops = 0; + double total_time = 0.0; + unsigned long long total_size = 0; + vector latency; + + int nthreads = cpl.THREAD_NUM; + cout << "333333333333333333333" << endl; + + for (uint32_t t = 0; t < cpl.THREAD_NUM; t++) { + void *status; + int rci = pthread_join(threads[t], &status); + if (rci) { + perror("error, pthread_join\n"); + exit(-1); + } + + + total_time = total_time > tp[t].runtime ? total_time : tp[t].runtime; + total_ops += tp[t].ops; + total_ops_thputs += tp[t].thput_of_ops; + total_size += tp[t].size; + total_size_thputs += tp[t].thput_of_size; + while (!tp[t].latency.empty()) { + latency.push_back(tp[t].latency.top()); + tp[t].latency.pop(); + } + } + cout << "4444444444444444444444444" << endl; + sort(latency.rbegin(), latency.rend()); + + double latency95 = latency[total_ops - int(total_ops * 0.95)]; + double latency99 = latency[total_ops - int(total_ops * 0.99)]; + double latency9999 = latency[total_ops - int(total_ops * 0.9999)]; + + cout << "Total time: " << total_time << endl + << "Total ops: " << total_ops << endl + << "Total op throughput: " << total_ops_thputs << endl + << "Total sizes: " << total_size << endl + << "Total size throughput: " << total_size_thputs << endl + << "95\% latency: " << latency95 << endl + << "99\% latency: " << latency99 << endl + << "99.99\% latency: " << latency9999 << endl; + + ofstream fout("/data/result", ios::out|ios::app); + //fout << snum << endl; + fout << "ECCache" << "\t" << nthreads << "\t" << total_time << "\t" << total_ops << "\t" << total_ops_thputs << "\t" + << total_size << "\t" << total_size_thputs << "\t" + << latency95 << "\t" << latency99 << "\t" << latency9999 << endl; + fout.close(); + + pthread_attr_destroy(&attr); + //return 0; + } + +} \ No newline at end of file diff --git a/src/eccache.h b/src/eccache.h new file mode 100644 index 0000000..3464822 --- /dev/null +++ b/src/eccache.h @@ -0,0 +1,29 @@ +// +// Created by Alfred on 2022/9/11. +// + +#ifndef CORANA_ECCACHE_H +#define CORANA_ECCACHE_H + +#include "config.h" +#include "ErasureCode/ErasureCode.h" +#include +#include +#include +#include + +using namespace std; + +namespace eccache { + void init(const ConfigParameter& cp, const workload_type& wt); + void distribution(); + void test(const ConfigParameter& cp, const int& snum); + + static void *twitter_query_exec(void* param); + static void *ibm_query_exec(void* param); + void ibm_distribution(); + void twitter_distribution(); +}; + + +#endif //CORANA_ECCACHE_H diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..9af7808 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,310 @@ +//#include "Random.h" +//#include "eccache.h" +#include +#include +#include "SPCache.h" +#include "MemcachedClient.h" +#include "OurScheme.h" +#include "Random.h" +#include "eccache.h" +#include "parameter.h" +#include "fmt/core.h" + +int main() { + ConfigParameter gcp1(twitter, 9); + + // Exp#1 (Tail latency and access throughput) & Exp#4 (Impact of number of caching nodes) + for(int k = 0; k < 3; k ++) { // number of runs + for(int i = 0; i < 10; i ++) { // traceno + for(int j = 0; j < 5; j ++) { // Server number + ofstream fout(gcp1.PATH_PREFIX + "/result", ios::out|ios::app); + fout << variation[j] << endl; + fout.close(); + + cout << "Server num = " << SERVER_NUM[j] << endl; + ConfigParameter gcp(meta, traceno[i], hotlim[i][2], SERVER_NUM[j]); + MemcachedClient mca(gcp.SERVER_INFO); + mca.flush(); + + //AC-Cache + OurScheme os(meta, traceno[i], hotlim[i][2], SERVER_NUM[j]); + os.distribute(gcp.PATH_PREFIX + "/" + fmt::format("graph{:02d}_{}_agg", gcp.TRACE_NO, groupnum[i][j]), groupnum[i][j], SERVER_NUM[j]); + os.query(); + mca.flush(); + + //EC-Cache + eccache::init(gcp, twitter); + eccache::distribution(); + eccache::test(gcp, SERVER_NUM[j]); + mca.flush(); + + //SP-Cache + SPCache::initial(twitter, SERVER_NUM[j]); + SPCache::distribution(); + SPCache::test(SERVER_NUM[j]); + mca.flush(); + + random_read_file(twitter, SERVER_NUM[j]); + random_init(); + random_test(twitter, SERVER_NUM[j]); + mca.flush(); + } + } + } + + // Exp#2 (Percent imbalance) + // You need to change the parameter in config.h + for(int k = 0; k < 3; k ++) { // number of runs + for(int i = 0; i < 10; i ++) { // traceno + for(int j = 0; j < 7; j ++) { // day number + ofstream fout(gcp1.PATH_PREFIX + "/result", ios::out|ios::app); + fout << variation[j] << endl; + fout.close(); + + cout << "Server num = " << day_num[j] << endl; + ConfigParameter gcp(meta, traceno[i], hotlim[i][2], day_num[j]); + MemcachedClient mca(gcp.SERVER_INFO); + mca.flush(); + + //AC-Cache + OurScheme os(meta, traceno[i], hotlim[i][2], day_num[j]); + os.distribute(gcp.PATH_PREFIX + "/" + fmt::format("graph{:02d}_{}_agg", gcp.TRACE_NO, groupnum[i][2]), groupnum[i][2], day_num[j]); + os.query(); + + auto stats = mca.get_stats(); + ofstream fout2(gcp.PATH_PREFIX + "/ACCache_" + to_string(day_num[j]) + "n" + to_string(i)); + for(int i = 0; i < stats.size(); i ++) { + fout2 << "Server No. " << i << " : " << endl; + for(auto &pr: stats[i]) { + fout2 << "\t" << pr.first << " : " << pr.second << endl; + } + } + fout2.close(); + mca.flush(); + + //EC-Cache + eccache::init(gcp, twitter); + eccache::distribution(); + eccache::test(gcp, day_num[j]); + stats = mca.get_stats(); + fout2 = ofstream(gcp.PATH_PREFIX + "/ECCache_" + to_string(day_num[j]) + "n" + to_string(i)); + for(int i = 0; i < stats.size(); i ++) { + fout2 << "Server No. " << i << " : " << endl; + for(auto &pr: stats[i]) { + fout2 << "\t" << pr.first << " : " << pr.second << endl; + } + } + fout2.close(); + mca.flush(); + + //SP-Cache + SPCache::initial(twitter, day_num[j]); + SPCache::distribution(); + SPCache::test(day_num[j]); + stats = mca.get_stats(); + fout2 = ofstream(gcp.PATH_PREFIX + "/SPCache_" + to_string(day_num[j]) + "n" + to_string(i)); + for(int i = 0; i < stats.size(); i ++) { + fout2 << "Server No. " << i << " : " << endl; + for(auto &pr: stats[i]) { + fout2 << "\t" << pr.first << " : " << pr.second << endl; + } + } + fout2.close(); + mca.flush(); + + random_read_file(twitter, day_num[j]); + random_init(); + random_test(twitter, day_num[j]); + stats = mca.get_stats(); + fout2 = ofstream(gcp.PATH_PREFIX + "/Random_" + to_string(day_num[j]) + "n" + to_string(i)); + for(int i = 0; i < stats.size(); i ++) { + fout2 << "Server No. " << i << " : " << endl; + for(auto &pr: stats[i]) { + fout2 << "\t" << pr.first << " : " << pr.second << endl; + } + } + fout2.close(); + mca.flush(); + } + } + } + + /*for(int i = 0; i < 3; i ++) { + for (int snum = 4; snum < 20; snum += 3) { + ofstream fout(gcp.PATH_PREFIX + "/result", ios::out|ios::app); + fout << snum << endl; + + cout << "Server num = " << snum << endl; + ConfigParameter cp(twitter, snum); + MemcachedClient mc(cp.SERVER_INFO); + + OurScheme os(twitter, snum); + cout << cp.PATH_PREFIX + "/newgroup34" << endl; + os.distribute(cp.PATH_PREFIX + "/newgroup34", 246, snum); + os.query(snum); + + //MemcachedClient mc(cp.SERVER_INFO); + auto stats = mc.get_stats(); + + ofstream fout2(cp.PATH_PREFIX + "/Clime_" + to_string(snum) + "n" + to_string(i)); + for(int i = 0; i < stats.size(); i ++) { + fout2 << "Server Num " << i << " : " << endl; + for(auto &pr: stats[i]) { + fout2 << "\t" << pr.first << " : " << pr.second << endl; + } + } + fout.close(); + fout2.close(); + mc.flush();*/ + + /*eccache::init(cp, twitter); + eccache::distribution(); + eccache::test(cp, snum); + MemcachedClient mc(cp.SERVER_INFO); + mc.flush();*/ + + /*SPCache::initial(twitter, snum); + SPCache::distribution(); + SPCache::test(snum); + mc.flush(); + + random_read_file(twitter, snum); + random_init(); + random_test(twitter, snum); + mc.flush();*/ + //} + //ConfigParameter cp(twitter, 6); + //} + + /*for(int i = 0; i < 3; i ++) { + eccache::init(gcp, twitter); + eccache::distribution(); + cout << "The " << i << "th time" << endl; + for (int snum = 0; snum < 7; snum++) { + cout << "Server num = " << snum << endl; + ConfigParameter cp(twitter, snum); + + eccache::test(cp, snum); + //SPCache::initial(twitter, snum); + //SPCache::distribution(); + //SPCache::test(snum); + //random_test(twitter, snum); + MemcachedClient mc(cp.SERVER_INFO); + auto stats = mc.get_stats(); + + ofstream fout(cp.PATH_PREFIX + "/ECCache_day" + to_string(snum) + "n" + to_string(i)); + for(int i = 0; i < stats.size(); i ++) { + fout << "Server Node " << i << " : " << endl; + for(auto &pr: stats[i]) { + fout << "\t" << pr.first << " : " << pr.second << endl; + } + } + fout.close(); + + //mc.get_stats(); + // + } + MemcachedClient mcc(gcp.SERVER_INFO); + mcc.flush(); + }*/ + + /*for(int i = 0; i < 3; i ++) { + random_read_file(twitter, 0); + random_init(); + cout << "The " << i << "th time" << endl; + for (int snum = 0; snum < 7; snum++) { + cout << "Server num = " << snum << endl; + ConfigParameter cp(twitter, snum); + + random_test(twitter, snum); + //SPCache::initial(twitter, snum); + //SPCache::distribution(); + //SPCache::test(snum); + //random_test(twitter, snum); + MemcachedClient mc(cp.SERVER_INFO); + auto stats = mc.get_stats(); + + ofstream fout(cp.PATH_PREFIX + "/Random_day" + to_string(snum) + "n" + to_string(i)); + for(int i = 0; i < stats.size(); i ++) { + fout << "Server Node " << i << " : " << endl; + for(auto &pr: stats[i]) { + fout << "\t" << pr.first << " : " << pr.second << endl; + } + } + fout.close(); + + //mc.get_stats(); + // + } + MemcachedClient mcc(gcp.SERVER_INFO); + mcc.flush(); + } + + for(int i = 0; i < 3; i ++) { + SPCache::initial(twitter, 0); + SPCache::distribution(); + cout << "The " << i << "th time" << endl; + for (int snum = 0; snum < 7; snum++) { + cout << "Server num = " << snum << endl; + ConfigParameter cp(twitter, snum); + + //random_test(twitter, snum); + //SPCache::initial(twitter, snum); + //SPCache::distribution(); + SPCache::test(snum); + //random_test(twitter, snum); + MemcachedClient mc(cp.SERVER_INFO); + auto stats = mc.get_stats(); + + ofstream fout(cp.PATH_PREFIX + "/SPCache_day" + to_string(snum) + "n" + to_string(i)); + for(int i = 0; i < stats.size(); i ++) { + fout << "Server Node " << i << " : " << endl; + for(auto &pr: stats[i]) { + fout << "\t" << pr.first << " : " << pr.second << endl; + } + } + fout.close(); + + //mc.get_stats(); + // + } + MemcachedClient mcc(gcp.SERVER_INFO); + mcc.flush(); + }*/ + +/* for(int i = 0; i < 2; i ++) { + OurScheme os(twitter, 0); + os.distribute(gcp.PATH_PREFIX + "/newgroup34", 246, gcp.SERVER_NUM); + cout << "The " << i << "th time" << endl; + for (int snum = 0; snum < 7; snum++) { + cout << "Server num = " << snum << endl; + ConfigParameter cp(twitter, snum); + + //random_test(twitter, snum); + os.query(snum); + //SPCache::initial(twitter, snum); + //SPCache::distribution(); + //SPCache::test(snum); + //random_test(twitter, snum); + MemcachedClient mc(cp.SERVER_INFO); + auto stats = mc.get_stats(); + + ofstream fout(cp.PATH_PREFIX + "/Clime_day" + to_string(snum) + "n" + to_string(i)); + for(int i = 0; i < stats.size(); i ++) { + fout << "Server Node " << i << " : " << endl; + for(auto &pr: stats[i]) { + fout << "\t" << pr.first << " : " << pr.second << endl; + } + } + fout.close(); + + //mc.get_stats(); + // + } + MemcachedClient mcc(gcp.SERVER_INFO); + mcc.flush(); + } +*/ + + return 0; +} diff --git a/src/main_correlation.cpp b/src/main_correlation.cpp new file mode 100644 index 0000000..2f40f8f --- /dev/null +++ b/src/main_correlation.cpp @@ -0,0 +1,18 @@ +#include +#include +#include "MemcachedClient.h" +#include "OurScheme.h" +#include "parameter.h" + +int main() { + for(int i = 8; i < 10; i ++) { + //if (i == 1) continue; // ignore trace 02 for now + for(int j = 0; j < 5; j ++) { + ConfigParameter gcp(meta, traceno[i], hotlim[i][2], 6); + OurScheme os(meta, traceno[i], hotlim[i][2], variation[j]); + os.CorrelationAnalysis(0); + } + } + + return 0; +} diff --git a/src/parameter.h b/src/parameter.h new file mode 100644 index 0000000..1ff2f7c --- /dev/null +++ b/src/parameter.h @@ -0,0 +1,20 @@ +#pragma once +int traceno[10] = {1, 2, 23, 25, 99, 50, 51, 52, 202206, 202401}; // trace no +int variation[5] = {10, 50, 100, 300, 500}; //freq limit for FQueue +int hotlim[10][6] = { {812, 35, 5, 0, 0, 265494}, + {10762, 5022, 3444, 2624, 1910, 804}, + {277, 182, 147, 134, 116, 2}, + {192, 183, 174, 168, 152, 15295354}, + {140, 59, 42, 35, 29, 0}, + {13987, 5628, 3769, 2834, 2044, 0}, + {14004, 5638, 3776, 2840, 2049, 0}, + {14148, 5671, 3792, 2851, 2058, 0}, + {1817, 655, 403, 294, 210, 8}, + {1116, 430, 274, 204, 148}}; //10k, 30k, 50k, 70k, 100k. 80% +int groupnum[4][5] = {{22828/2, 66662/2, 75164/2, 85326/2, 88720/2}, + {178/2, 182/2, 828/2, 23016/2, 61188/2}, + {34/2, 16/2, 12/2, 12/2, 19536/2}, + {112/2, 146/2, 170/2, 106/2, 46/2}}; + +int SERVER_NUM[5]={3, 6, 9, 12, 15}; +int day_num[7]={0, 1, 2, 3, 4, 5, 6}; \ No newline at end of file diff --git a/src/replicas.cpp b/src/replicas.cpp new file mode 100644 index 0000000..54f4e0d --- /dev/null +++ b/src/replicas.cpp @@ -0,0 +1,5 @@ +// +// Created by Alfred on 2022/9/13. +// + +#include "replicas.h" diff --git a/src/replicas.h b/src/replicas.h new file mode 100644 index 0000000..c7d2063 --- /dev/null +++ b/src/replicas.h @@ -0,0 +1,25 @@ +// +// Created by Alfred on 2022/9/13. +// + +#ifndef CORANA_REPLICAS_H +#define CORANA_REPLICAS_H + +#include +#include "config.h" +#include + +using namespace std; + +class replicas { +public: + vector keys; + map> key_record; + + replicas(ConfigParameter cp, workload_type wt); + void workload_init(); + +}; + + +#endif //CORANA_REPLICAS_H diff --git a/src/stats.py b/src/stats.py new file mode 100644 index 0000000..c3f8f59 --- /dev/null +++ b/src/stats.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 + +""" get the freqs of all groups and test multiway partition problem +""" + +import prtpy +from time import perf_counter + +#groupfile = "newgroup34" +#path_prefix = "/home/flnan/group_divided/" + +def group_distribution(groupfile: str, bin_num: int): + with open(groupfile,'r') as fin: + cnts = fin.readlines() + + group_freqs = list() + for i in range(0, len(cnts), 2): + num, size, freq = cnts[i].strip().split("\t") + group_freqs.append(int(freq)) + + + gf = dict() + for no, freq in enumerate(group_freqs): + gf[no] = freq + + + res = prtpy.partition(algorithm=prtpy.partitioning.greedy, numbins=bin_num, items=gf) + + for i in range(len(res)): + res[i].sort() + #start = perf_counter() + #print(f"\t {perf_counter()-start} seconds") + + return res + + +def group_distribution2(gf: dict, bin_num: int): + res = prtpy.partition(algorithm=prtpy.partitioning.greedy, numbins=bin_num, items=gf) + + for i in range(len(res)): + res[i].sort() + #start = perf_counter() + #print(f"\t {perf_counter()-start} seconds") + + return res + +def hello123(name) -> str: + print("pointed") + return "Hello %s" % name + +def test1(aa: dict): + print("pointed") + for it in aa: + print("dict[%s] = %d" % (it, aa[it])) + + +if __name__ == "__main__": + with open("/home/flnan/twitter/stat34", 'r') as fin: + cnts = fin.readlines() + + keys = dict() + i = 0 + for item in cnts: + k,s,f = item.strip().split() + keys[i] = int(f) + i = i + 1 + +# print(group_distribution2(keys, 19)) + + print([1,2,3]) + +def test2(): + with open("/home/flnan/twitter/stat34", 'r') as fin: + cnts = fin.readlines() + + keys = dict() + i = 0 + for item in cnts: + k,s,f = item.strip().split() + keys[i] = int(f) + i = i + 1 + + return group_distribution2(keys, 19) \ No newline at end of file diff --git a/src/toolbox.h b/src/toolbox.h new file mode 100644 index 0000000..b81b151 --- /dev/null +++ b/src/toolbox.h @@ -0,0 +1,166 @@ +// +// Created by Alfred on 2022/7/22. +// + +#ifndef CORANA_TOOLBOX_H +#define CORANA_TOOLBOX_H + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +using namespace std; +using namespace chrono; + +inline vector split(string& str, char symbol) +{ + stringstream ss(str); + vector result; + + while (ss.good()) { + string substr; + getline(ss, substr, symbol); + result.push_back(substr); + } + + return result; +} + +inline void arr_uint2char(char **dest, uint8_t **matrix, const int& row, const int& column) +{ + //dest = new char* [row]; + for(int i = 0; i < row; i ++) { + //dest[i] = new char [column + 1]; + for(int j = 0; j < column; j ++) { + dest[i][j] = matrix[i][j]; + } + //dest[i][column] = '\0'; + } +} + +inline bool dinicBFS(vector>>& graph, vector& level, int vertexNum) +{ + queue q; + for (int i = 0; i <= vertexNum; i++) + level[i] = 0; + + q.push(1); + level[1] = 1; + int u, v; + while (!q.empty()) { + u = q.front(); + q.pop(); + for (v = 1; v <= vertexNum; v++) { + if (!level[v] && graph[u][v].first > graph[u][v].second) { + level[v] = level[u] + 1; + q.push(v); + } + } + } + return level[vertexNum] != 0; +} + +inline int dinicDFS(vector>>& graph, vector& level, int vertexNum, int currentVertex, int cp, vector>& flag) +{ + int tmp = cp; + int v, t; + if (currentVertex == vertexNum) + return cp; + for (v = 1; v <= vertexNum && tmp; v++) { + if (level[currentVertex] + 1 == level[v]) { + if (graph[currentVertex][v].first > graph[currentVertex][v].second && flag[currentVertex][v]) { + t = dinicDFS(graph, level, vertexNum, v, + min(tmp, graph[currentVertex][v].first - graph[currentVertex][v].second), flag); + //printf("cp = %d, currentVertex = %d, first = %d, second = %d, v = %d, t = %d, tmp = %d\n", cp, currentVertex,graph[currentVertex][v].first, graph[currentVertex][v].second, v, t, tmp); + //if(graph[currentVertex][v].first - graph[currentVertex][v].second >= tmp || tmp == cp) { + graph[currentVertex][v].second += t; + graph[v][currentVertex].second -= t; + tmp -= t; + /*} else { + flag[currentVertex][v] = false; + flag[v][currentVertex] = false; + }*/ + } + } + } + return cp - tmp; +} + +/* + * pair + * first: capacity + * second: flow + */ +inline int dinic(vector>>& graph, int vertexNum) +{ + vector> flag; + flag.resize(vertexNum + 1); + for(int i = 0; i < vertexNum + 1; i ++) { + flag[i].resize(vertexNum + 1); + for(int j = 0; j < vertexNum + 1; j ++) { + flag[i][j] = true; + } + } + + vector level; + int sum = 0, tf = 0; + level.resize(vertexNum + 1); + while (dinicBFS(graph, level, vertexNum)) + { + while ((tf = dinicDFS(graph, level, vertexNum, 1, INT_MAX, flag))) + sum += tf; + } + return sum; +} + +// 采样字符集 +static constexpr char CCH[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + +// sz: 字符串的长度 +// printable:是否可打印。如果用作key,可以使用不可打印的字符哟 +inline string makeRandStr(int sz, bool printable) +{ + string ret; + ret.resize(sz); + std::mt19937 rng(std::random_device{}()); + for (int i = 0; i < sz; ++i) + { + if (printable) + { + uint32_t x = rng() % (sizeof(CCH) - 1); + ret[i] = CCH[x]; + } + else + { + ret[i] = rng() % 0xFF; + } + } + + return ret; +} + +class timeit { +public: + time_point s, e; + + void start() { + s = system_clock::now(); + } + + void end() { + e = system_clock::now(); + } + + double passedtime() { + auto duration = duration_cast(this->e - this->s); + double elapsed = double(duration.count()) * microseconds::period::num / microseconds::period::den; + return elapsed; + } +}; + +#endif //CORANA_TOOLBOX_H diff --git a/src/twitter_trace.h b/src/twitter_trace.h new file mode 100644 index 0000000..839e148 --- /dev/null +++ b/src/twitter_trace.h @@ -0,0 +1,270 @@ +// +// Created by Alfred on 2022/7/22. +// + +#ifndef CORANA_TWITTER_TRACE_H +#define CORANA_TWITTER_TRACE_H + +#include +#include +#include +#include +#include + +#include + +#include "config.h" +#include "FreqList.h" +#include "FreqTable.h" +#include "ListNode.h" +#include "toolbox.h" +#include "CMSketch.h" + +// global definitions +vector* freqkeys; + +class tkeys +{ +public: + tkeys(const string& a, const int b) + : key(a), freq(b) + {} + string key; + int freq; + bool operator<(const tkeys& m) const { + return freq > m.freq; + } +}; + +vector* getFeqKeys(string str, int size); +vector* readStream(string fname, int lnum, long long& cpos); +int isFreqkeys(string str); + +void twitter_trace() +{ + ConfigParameter cp; + FreqList flist(cp.FREQ_LIST_SIZE, cp.FREQ_LIMIT); + //FreqTable ftable(cp.HOTEST_NUM); + CMSketch ftable(9e10, 900); + vector* rstream; // the stream list + + // temp variables + string cnts; + int stream_size; + int flag, readnum; + int width; // the width of the window + long long cur_pos = 0; // the current position of the trace file + long long ftail; // the tail position of the trace file + string tname; // the name of the reading trace file + vector* tmp; + + //temp for timing + //auto start = system_clock::now(); + //auto end = system_clock::now(); + //auto duration = duration_cast(end - start); + timeit t = timeit(); + + for(int day = 0; day < cp.DAY_NUM; day ++) + { + cout << "The " << day + 1 << "th day." << endl; + + flag = 0; + readnum = 0; + int tmp_size = cp.ONCE_READ_LIMIT; + /** + * Read the trace and get the hotest keys + */ + ifstream fin(cp.STAT_FILE); + + if (!fin.is_open()) { + cout << "Error opening file: " << cp.STAT_FILE << endl; + exit(1); + } + for(int i = 0; i < day; i ++) { + getline(fin, cnts); + getline(fin, cnts); + getline(fin, cnts); + } + getline(fin, cnts); + cout << "day = " << cnts << endl; + getline(fin, cnts); + getline(fin, cnts); + fin.close(); + + // Get the top-k most frequent keys + freqkeys = getFeqKeys(cnts, cp.HOTEST_NUM); + ofstream oof("Freqkeys" + to_string(day)); + for(auto & freqkey : *freqkeys) { + oof << freqkey << endl; + } + oof.close(); + cout << "finished reading hotkyes" << endl; + + // first read the stream + + tname = cp.PATH_PREFIX + cp.STREAM_FILE_PREFIX + to_string(day); + // get the tail position of the file + fin.open(tname); + fin.seekg(0, ios::end); + ftail = fin.tellg(); + cout << "Tail position: " << ftail << endl; + fin.close(); + + cur_pos = 0; + rstream = readStream(tname, cp.ONCE_READ_LIMIT, cur_pos); + stream_size = cp.ONCE_READ_LIMIT; + + /* + * Begin the Correlation Analysis + */ + cout << "Correlation Analysis start" << endl; + while(stream_size != 0) { + //stream_size = rstream->size(); + if (flag == 0) { + //start = system_clock::now(); + t.start(); + } + if (stream_size < cp.WINDOW_SIZE + 100 && tmp_size >= cp.ONCE_READ_LIMIT) { + tmp = readStream(tname, cp.ONCE_READ_LIMIT, cur_pos); + rstream->insert(rstream->end(), tmp->begin(), tmp->end()); + tmp_size = tmp->size(); + stream_size += tmp_size; //tmp->size(); + delete tmp; + readnum++; + } + + int i = 0; + + auto stream_start = rstream->begin(); + + int loc1 = isFreqkeys(*stream_start); + while( loc1 == -1) { + stream_start ++; + loc1 = isFreqkeys(*stream_start); + } + + int size = rstream->end() - stream_start; + width = size > cp.WINDOW_SIZE? cp.WINDOW_SIZE: size; + for(auto iter = stream_start + 1; iter < stream_start + width; iter ++, i ++) { + int loc2 = isFreqkeys(*iter); + if( loc2 == -1 || loc1 == loc2) { + continue; + } + + ListNode ln = ListNode(loc1, loc2, 0); + int rst = ftable.find(ln.first, ln.second); + if(rst != 0) { + ftable.add(ln.first, ln.second); + } else { + auto index = flist.insert(ln); + //auto index = flist.index(ln); + if(flist.isHot(index)) { + ftable.add(index->first, index->second); + flist.del(index); + } + } + + } + rstream->erase(rstream->begin(), stream_start); + stream_size -= stream_start - rstream->begin(); + flag ++; + if(flag % 1000000 == 0) { + //end = system_clock::now(); + //duration = duration_cast(end - start); + t.end(); + + cout << "Time used: "<< t.passedtime() << endl; + // if(flag / 1000000 > 66) +// break; + } + } + + delete rstream; + delete freqkeys; + cout << "Current point position = " << cur_pos << endl; + ofstream fout("ftable_rst" + to_string(day)); + for(int i = 0; i < cp.HOTEST_NUM; i ++) { + for(int j = i + 1; j < cp.HOTEST_NUM; j ++) + fout << to_string(i) + "," + to_string(j) << "," << ftable.find(i, j) << endl; + } + fout.close(); + flist.clear(); + //ftable.clear(); + t.end(); + cout << "Day " << day <<" Total time used: "<< t.passedtime() << "s" << endl; + } + +} + +vector* getFeqKeys(const string& str, const int size) +{ + vector* dkeys = new vector(); + vector twkeys; + int i; + + Json::Reader reader; + Json::Value value; + Json::Value::Members mem; + + if (reader.parse(str, value)) { + if(value.size() < (unsigned)size) { + return nullptr; + } + mem = value.getMemberNames(); + for(auto iter = mem.begin(); iter != mem.end(); iter ++) { + tkeys tmp(*iter, value[*iter].asInt()); + twkeys.push_back(tmp); + } + } + partial_sort(twkeys.begin(), twkeys.begin()+size, twkeys.end()); + i = 0; + for(auto iter = twkeys.begin(); iter != twkeys.end() && i < size; i ++, iter ++) { + dkeys->push_back(iter->key); + } + sort(dkeys->begin(), dkeys->end()); + return dkeys; +} + +vector* readStream(string fname, int lnum, long long& cpos) +{ + ifstream in(fname); + in.seekg(cpos, ios::beg); + auto* vec = new vector(); + string lcnt, rst; + vector tmp; + int i; + + if (!in.is_open()) { + cout << "Error opening file: " << fname << endl; + exit(0); + //return nullptr; + } + + i = 0; + while(getline(in, lcnt) && i < lnum) { + tmp = split(lcnt, ','); + rst = tmp[1]; + /*auto pr = equal_range(freqkeys->begin(), freqkeys->end(), rst); + if((pr.first)->compare(rst) == 0) { + vec->push_back(to_string(pr.first - freqkeys->begin())); + i ++; + }*/ + vec->push_back(rst); + i ++; + } + + cpos = in.tellg(); + in.close(); + return vec; +} + +int isFreqkeys(string str) +{ + auto pr = equal_range(freqkeys->begin(), freqkeys->end(), str); + if((pr.first)->compare(str) == 0) { + return pr.first - freqkeys->begin(); + } + return -1; +} + +#endif //CORANA_TWITTER_TRACE_H