From 55758b3681a23a55af48677b24f1007fb9cdb4c7 Mon Sep 17 00:00:00 2001 From: JohnaLiu Date: Thu, 26 Mar 2015 09:13:53 -0500 Subject: [PATCH 1/6] Create Intro.txt --- Intro.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Intro.txt diff --git a/Intro.txt b/Intro.txt new file mode 100644 index 00000000..c0ad870a --- /dev/null +++ b/Intro.txt @@ -0,0 +1,3 @@ +This is our project for 613 Advanced Operating Systems. +The three group members are Feiyu Yu, Zhijiao Liu and Jiacheng Gu. +Gu will be responsible for the setup of test environment, Liu will collect test data and Yu will mainly focus on analyzing the performance of GraphChi From dbced52f5d76761bbdcbfe4f874052bea3d20bda Mon Sep 17 00:00:00 2001 From: feiyuyu Date: Mon, 4 May 2015 18:11:33 -0500 Subject: [PATCH 2/6] Create BP.cpp BP --- example_apps/BP.cpp | 296 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 296 insertions(+) create mode 100644 example_apps/BP.cpp diff --git a/example_apps/BP.cpp b/example_apps/BP.cpp new file mode 100644 index 00000000..50312297 --- /dev/null +++ b/example_apps/BP.cpp @@ -0,0 +1,296 @@ + +/** + * @file + * @author Aapo Kyrola + * @version 1.0 + * + * @section LICENSE + * + * Copyright [2012] [Aapo Kyrola, Guy Blelloch, Carlos Guestrin / Carnegie Mellon University] + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + + * + * @section DESCRIPTION + * + * Template for GraphChi applications. To create a new application, duplicate + * this template. + */ + +//Modified by Feiyu Yu + +#include +#include +#include +#include +#include + +#include "graphchi_basic_includes.hpp" +#include "util/toplistBP.hpp" +using namespace graphchi; + + +#define GRAPHCHI_DISABLE_COMPRESSION +#define THRESHOLD 1e-1 +#define RANDOMRESETPROB 0.15 +#define Init 1 + + + + + +/** + * Type definitions. Remember to create suitable graph shards using the + * Sharder-program. + */ +typedef VertextValue VertexDataType; +typedef EdgeValue EdgeDataType; + + +void parse(VertextValue &x, const char * s) { } // Do nothing +/** + * GraphChi programs need to subclass GraphChiProgram + * class. The main logic is usually in the update function. + */ +struct MyGraphChiProgram : public GraphChiProgram { + + float normP; + float normN; + int flag; + /** + * Vertex update function. + */ + void update(graphchi_vertex &vertex, graphchi_context &gcontext) { + + if (gcontext.iteration == 0) { + /* On first iteration, initialize vertex (and its edges). This is usually required, because + on each run, GraphChi will modify the data files. To start from scratch, it is easiest + do initialize the program in code. Alternatively, you can keep a copy of initial data files. */ + // vertex.set_data(init_value); + //(std::rand()%100+1)/100 + for(int i=0; i < vertex.num_inedges(); i++) { + VertextValue veValue; + + veValue.belifP =0; + veValue.belifN =0; + + + vertex.set_data(veValue); + + EdgeValue EdValue; + EdValue.PhiP = (float)(std::rand()%100)/100; + //if(EdValue.PhiP<0.1) + // EdValue.PhiP = EdValue.PhiP +0.1; + EdValue.PhiN = (float)(std::rand()%100)/100; + //if(EdValue.PhiN<0.1) + // EdValue.PhiN = EdValue.PhiN +0.1; + //std::cout< set_data(EdValue); + } + + } + + else{ + + if(flag ==1){ + + for(int i=0; i < vertex.num_inedges(); i++) { + EdgeValue value = vertex.inedge(i)->get_data(); + + value.mP = value.mP/normP; + value.mN = value.mN/normN; + + vertex.inedge(i)->set_data(value); + } + + + } + + else if(flag == 0) { + + float sumP=0; + float sumN=0; + + /* Do computation */ + + ///* Loop over in-edges (example) */ + for(int i=0; i < vertex.num_inedges(); i++) { + + EdgeValue EdValue; + EdValue = vertex.inedge(i) -> get_data(); + float mp1 = EdValue.PhiP * EdValue.PsiPP; + float mp2 = EdValue.PhiP * EdValue.PsiPN; + float mp3 = EdValue.PhiN * EdValue.PsiNP; + float mp4 = EdValue.PhiN * EdValue.PsiNN; + + float PiMP = 1; + float PiMN = 1; + for(int j=0; j < vertex.num_inedges(); j++){ + if(j != i) + { + EdgeValue EdValueS; + EdValueS = vertex.inedge(i) -> get_data(); + PiMP = PiMP * EdValueS.mP; + PiMN = PiMN * EdValueS.mN; + } + } + + + EdValue.mP = (mp1+mp2) * PiMP; + EdValue.mN = (mp3+mp4) * PiMN; + sumP = sumP + EdValue.mP; + sumN = sumN + EdValue.mN; + //vertex.inedge(i) -> set_data(EdValue); + + } + + normP = normP + sumP * sumP; + normN = normN + sumN * sumN; + + } + + else if(flag == 2) { + float PiMjiP = 1; + float PiMjiN = 1; + VertextValue VeValue; + VeValue = vertex.get_data(); + for(int i=0; i < vertex.num_inedges(); i++) { + EdgeValue EdValueS; + EdValueS = vertex.inedge(i) -> get_data(); + PiMjiP = PiMjiP * EdValueS.mP; + + PiMjiN = PiMjiN * EdValueS.mN; + //std::cout< get_data(); + VeValue.belifP = EdValueS.PhiP * PiMjiP; + VeValue.belifN = EdValueS.PhiN * PiMjiN; + vertex.set_data(VeValue); + } + } + } + } + + /** + * Called before an iteration starts. + */ + void before_iteration(int iteration, graphchi_context &gcontext) { + + if(iteration == 0) + { + flag = 1; + normP=0; + normN=0; + } + if(flag == 0) + { + normP=0; + normN=0; + } + else if (flag == 1) + { + //std::cout<<"iterBefore:"<(filename, + get_option_string("nshards", "auto")); + + /* Run */ + MyGraphChiProgram program; + graphchi_engine engine(filename, nshards, scheduler, m); + engine.run(program, niters); + + /* Output top 20 authorities*/ + std::vector< vertex_value > topA = get_top_vertices(filename, ntop,1); + std::cout << "Print top " << ntop << " Negative" << std::endl; + for(int i=0; i < (int)topA.size(); i++) { + std::cout << (i+1) << ". " << topA[i].vertex << "\t" << topA[i].value.belifN << std::endl; + } + + /* Output top 20 authorities*/ + std::vector< vertex_value > topH = get_top_vertices(filename, ntop,0); + std::cout << "Print top " << ntop << " Positive:" << std::endl; + for(int i=0; i < (int)topH.size(); i++) { + std::cout << (i+1) << ". " << topH[i].vertex << "\t" << topH[i].value.belifP << std::endl; + } + /* Report execution metrics */ + metrics_report(m); + return 0; +} From b68923c04a1a6fe32fc7d8b0b3fb0dc06d85a998 Mon Sep 17 00:00:00 2001 From: feiyuyu Date: Mon, 4 May 2015 18:12:10 -0500 Subject: [PATCH 3/6] Create HITS.cpp --- example_apps/HITS.cpp | 322 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 322 insertions(+) create mode 100644 example_apps/HITS.cpp diff --git a/example_apps/HITS.cpp b/example_apps/HITS.cpp new file mode 100644 index 00000000..f4aaa1c4 --- /dev/null +++ b/example_apps/HITS.cpp @@ -0,0 +1,322 @@ + +/** + * @file + * @author Aapo Kyrola + * @version 1.0 + * + * @section LICENSE + * + * Copyright [2012] [Aapo Kyrola, Guy Blelloch, Carlos Guestrin / Carnegie Mellon University] + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + + * + * @section DESCRIPTION + * + * Template for GraphChi applications. To create a new application, duplicate + * this template. + */ + +//Modified by Feiyu Yu +///#define DYNAMICEDATA 1 +//#define DYNAMICVERTEXDATA 1 + +#include +#include +#include +#include +#include + + + +#include "graphchi_basic_includes.hpp" +//#include "api/dynamicdata/chivector.hpp" + +//#include "util/toplist2.hpp" +#include "util/toplistHITS.hpp" +/* ALS-related classes are contained in als.hpp */// +//#include "als.hpp" + +using namespace graphchi; + + +#define GRAPHCHI_DISABLE_COMPRESSION +#define THRESHOLD 1e-1 +#define RANDOMRESETPROB 0.15 +#define Init 1 +/** + * Type definitions. Remember to create suitable graph shards using the + * Sharder-program. + */ +//typedef my_vertex_type VertexDataType; +//typedef my_edge_type EdgeDataType; + +//struct HA_label { +// float hub; +// float auth; +//}; + + + +typedef HA_label VertexDataType; +//typedef float VertexDataType; +typedef HA_label EdgeDataType; +//typedef chivector VertexDataType; +//typedef int VertexDataType; +/** + * GraphChi programs need to subclass GraphChiProgram + * class. The main logic is usually in the update function. + */ +void parse(HA_label &x, const char * s) { } // Do nothing + + + +struct HITSProgram : public GraphChiProgram { + + float normH; + float normA; + int flag; + + void update(graphchi_vertex &v, graphchi_context &ginfo) { + + + + + if (ginfo.iteration == 0) { + for(int i=0; i < v.num_edges(); i++) { + HA_label havalue; + havalue.hub = 1; + havalue.auth = 1; + v.edge(i)->set_data(havalue); + + HA_label havalueVex; + havalueVex.hub = 0; + havalueVex.auth = 0; + v.set_data(havalueVex); + //v.set_data(0); + } + + } + else{ + + if(flag == 1){ + + + for(int i=0; i < v.num_outedges(); i++) { + HA_label havalue = v.outedge(i)->get_data(); + //std::cout<<"normH: "< 1) + // {std::cout<<">1: "<set_data(havalue); + + // HA_label Vexhavalue = v.get_data(); + // Vexhavalue.hub = Vexhavalue.hub ; + // Vexhavalue.auth = havalue.auth; + //v.set_data(Vexhavalue); + //v.set_data(havalue.auth); + } + + /* for(int i=0; i < v.num_outedges(); i++) { + HA_label havalue = v.outedge(0)->get_data(); + HA_label Vexhavalue = v.get_data(); + Vexhavalue.hub = havalue.hub; + Vexhavalue.auth = Vexhavalue.auth; + v.set_data(Vexhavalue); + }*/ + } + else if(flag == 2){ + + for(int i=0; i < v.num_inedges(); i++) { + HA_label havalue = v.inedge(i)->get_data(); + + HA_label Vexhavalue = v.get_data(); + Vexhavalue.hub = Vexhavalue.hub ; + //std::cout<<"qurt: "<get_data(); + HA_label Vexhavalue = v.get_data(); + Vexhavalue.hub = havalue.hub; + + Vexhavalue.auth = Vexhavalue.auth; + v.set_data(Vexhavalue); + } + } + else if(flag == 0){ + float sumA=0; + float sumH=0; + //std::cout<<"pow: "< > * outedge = v.outedge(i); + HA_label havalue = v.outedge(i)->get_data(); + float val = havalue.auth; + //std::cout< > * outedge = v.outedge(i); + HA_label havalue = v.outedge(i)->get_data(); + havalue.hub = sumH; + havalue.auth = havalue.auth; + v.outedge(i)->set_data(havalue); + } + //for(int i=0; i < v.num_outedges(); i++) { + // //graphchi_edge > * outedge = v.outedge(i); + // HA_label havalue = v.outedge(i)->get_data(); + // + // } + + ///A(x) + for(int i=0; i < v.num_inedges(); i++) { + + //graphchi_edge > * inedge = v.inedge(i); + HA_label havalue = v.inedge(i)->get_data(); + float val = havalue.hub; + sumA = sumA + val; + + } + normA = normA + sumA * sumA; + + for(int i=0; i < v.num_inedges(); i++) { + HA_label havalue = v.inedge(i)->get_data(); + havalue.hub = havalue.hub; + havalue.auth = sumA; + v.inedge(i)->set_data(havalue); + } + + //v.set_data(sumA); + + + } + + + } + + } + + /** + * Called before an iteration starts. + */ + void before_iteration(int iteration, graphchi_context &gcontext) { + + if(iteration == 0) + { + flag = 1; + normH=0; + normA=0; + } + if(flag == 0) + { + normH=0; + normA=0; + } + else if (flag == 1) + { + //std::cout<<"iterBefore:"< AValue; + + /* Parameters */ + std::string filename = get_option_string("file"); // Base filename + int niters = get_option_int("niters", 10); + bool scheduler = false; // Non-dynamic version of pagerank. + int ntop = get_option_int("top", 20); + + //bool preexisting_shards; + //int nshards = convert_if_notexists(filename, get_option_string("nshards", "auto"), preexisting_shards); + /* Process input file - if not already preprocessed */ + int nshards = convert_if_notexists(filename, get_option_string("nshards", "auto")); + + + + /* Run */ + graphchi_engine< VertexDataType, EdgeDataType > engine(filename, nshards, scheduler, m); + + HITSProgram program; + engine.run(program, niters); + /*if (preexisting_shards) { + engine.reinitialize_edge_data(0); + }*/ + /* Output top 20 authorities*/ + std::vector< vertex_value > topA = get_top_vertices(filename, ntop,1); + std::cout << "Print top " << ntop << " Authorities:" << std::endl; + for(int i=0; i < (int)topA.size(); i++) { + std::cout << (i+1) << ". " << topA[i].vertex << "\t" << topA[i].value.auth << std::endl; + } + + /* Output top 20 authorities*/ + std::vector< vertex_value > topH = get_top_vertices(filename, ntop,0); + std::cout << "Print top " << ntop << " Hub:" << std::endl; + for(int i=0; i < (int)topH.size(); i++) { + std::cout << (i+1) << ". " << topH[i].vertex << "\t" << topH[i].value.hub << std::endl; + } + /* Report execution metrics */ + metrics_report(m); + return 0; +} From 72b496885aeea8004294d45cb70519857e9326f6 Mon Sep 17 00:00:00 2001 From: feiyuyu Date: Mon, 4 May 2015 18:12:42 -0500 Subject: [PATCH 4/6] Create ReadMeHITS&BP --- example_apps/ReadMeHITS&BP | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 example_apps/ReadMeHITS&BP diff --git a/example_apps/ReadMeHITS&BP b/example_apps/ReadMeHITS&BP new file mode 100644 index 00000000..7e721268 --- /dev/null +++ b/example_apps/ReadMeHITS&BP @@ -0,0 +1,11 @@ +1 Put BP.cpp and HITS.cpp under the folder ./graphchi-cpp-master/example_apps + +2 Put toplistHITS.hpp and toplistBP.hpp under the folder ./graphchi-cpp-master/src/util + +3 Compile .cpp file using + make example_apps/BP + make example_apps/HITS + +4 To run the program + bin/example_apps/HITS file ./folderName/fileName.txt (It's relative path version, you also can use absolute path) + bin/example_apps/BP file ./folderName/fileName.txt From df16cf6e34f654b80fecda9a6a1971369e81ed45 Mon Sep 17 00:00:00 2001 From: feiyuyu Date: Mon, 4 May 2015 18:15:03 -0500 Subject: [PATCH 5/6] Create toplistBP.hpp --- src/util/toplistBP.hpp | 206 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 src/util/toplistBP.hpp diff --git a/src/util/toplistBP.hpp b/src/util/toplistBP.hpp new file mode 100644 index 00000000..93f82a72 --- /dev/null +++ b/src/util/toplistBP.hpp @@ -0,0 +1,206 @@ +/** + * @file + * @author Aapo Kyrola + * @version 1.0 + * + * @section LICENSE + * + * Copyright [2012] [Aapo Kyrola, Guy Blelloch, Carlos Guestrin / Carnegie Mellon University] + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + + * + * @section DESCRIPTION + * + * Tools for listing the TOP K values from a verte data file. + */ + +//Modified by Feiyu Yu + +#ifndef DEF_GRAPHCHI_TOPLIST +#define DEF_GRAPHCHI_TOPLIST + +#include +#include +#include +#include + +#include "io/stripedio.hpp" +#include "logger/logger.hpp" +#include "util/merge.hpp" +#include "util/ioutil.hpp" +#include "util/qsort.hpp" +#include "api/chifilenames.hpp" +#include "engine/auxdata/vertex_data.hpp" + +namespace graphchi { + + template + struct vertex_value { + vid_t vertex; + VertexDataType value; + vertex_value() {} + vertex_value(vid_t v, VertexDataType x) : vertex(v), value(x) {} + }; + + struct VertextValue { + + float belifP; + float belifN; + }; + +struct EdgeValue { + float PhiP; + float PhiN; + float PsiPP; + float PsiPN; + float PsiNP; + float PsiNN; + float mP; + float mN; + }; + + + template + bool vertex_value_greaterP(const vertex_value &a, const vertex_value &b) { + return a.value.belifP > b.value.belifP; + } + template + bool vertex_value_greaterN(const vertex_value &a, const vertex_value &b) { + return a.value.belifN > b.value.belifN; + } + + /** + * Reads the vertex data file and returns top N values. + * Vertex value type must be given as a template parameter. + * This method has been implemented in a manner to consume very little + * memory, i.e the whole file is not loaded into memory (unless ntop = nvertices). + * @param basefilename name of the graph + * @param ntop number of top values to return (if ntop is smaller than the total number of vertices, returns all in sorted order) + * @param from first vertex to include (default, 0) + * @param to last vertex to include (default, all) + * @return a vector of top ntop values + */ + template + std::vector > get_top_vertices(std::string basefilename, int ntop,int para, vid_t from=0, vid_t to=0) { + typedef vertex_value vv_t; + + /* Initialize striped IO manager */ + metrics m("toplist"); + stripedio * iomgr = new stripedio(m); + + /* Initialize the vertex-data reader */ + vid_t readwindow = 1024 * 1024; + size_t numvertices = get_num_vertices(basefilename); + vertex_data_store * vertexdata = + new vertex_data_store(basefilename, numvertices, iomgr); + + if ((size_t)ntop > numvertices) { + ntop = (int)numvertices; + } + + /* Initialize buffer */ + vv_t * buffer_idxs = (vv_t*) calloc(readwindow, sizeof(vv_t)); + vv_t * topbuf = (vv_t*) calloc(ntop, sizeof(vv_t)); + vv_t * mergearr = (vv_t*) calloc(ntop * 2, sizeof(vv_t)); + + /* Iterate the vertex values and maintain the top-list */ + size_t idx = 0; + vid_t st = 0; + vid_t en = numvertices - 1; + + int count = 0; + while(st <= numvertices - 1) { + en = st + readwindow - 1; + if (en >= numvertices - 1) en = numvertices - 1; + + /* Load the vertex values */ + vertexdata->load(st, en); + + int nt = en - st + 1; + int k = 0; + VertexDataType minima = VertexDataType(); + if (count > 0) { + minima = topbuf[ntop - 1].value; // Minimum value that should be even considered + } + for(int j=0; j < nt; j++) { + VertexDataType& val = *vertexdata->vertex_data_ptr(j + st); + + float valresult; + float minimaresult; + if(para == 0){ + valresult = val.belifP; + minimaresult = minima.belifP; + } + else if(para == 1){ + valresult = val.belifN; + minimaresult = minima.belifN; + } + if (count == 0 || (valresult > minimaresult)) { + //if (count == 0 || (val.auth > minima.auth)) { + buffer_idxs[k] = vv_t((vid_t)idx + from, val); + k++; + } + idx++; + } + nt = k; /* How many were actually included */ + + /* Sort buffer-idxs */ + if(para == 0){ + quickSort(buffer_idxs, nt, vertex_value_greaterP); + } + else + { + quickSort(buffer_idxs, nt, vertex_value_greaterN); + } + + /* Merge the top with the current top */ + if (count == 0) { + /* Nothing to merge, just copy */ + memcpy(topbuf, buffer_idxs, ntop * sizeof(vv_t)); + } else { + // void merge(ET* S1, int l1, ET* S2, int l2, ET* R, F f) { + if(para == 0){ + merge(topbuf, ntop, buffer_idxs, std::min(ntop, nt), mergearr, vertex_value_greaterP); + } + else if(para == 1) + { + merge(topbuf, ntop, buffer_idxs, std::min(ntop, nt), mergearr, vertex_value_greaterN); + } + + memcpy(topbuf, mergearr, ntop * sizeof(vv_t)); + } + + count++; + st += readwindow; + } + + /* Return */ + std::vector< vv_t > ret; + for(int i=0; i < ntop; i++) { + ret.push_back(topbuf[i]); + } + free(buffer_idxs); + free(mergearr); + free(topbuf); + + delete vertexdata; + delete iomgr; + + return ret; + } + + +}; + +#endif From 8f4bb48a081586b4042246dfd2dd52a585885f43 Mon Sep 17 00:00:00 2001 From: feiyuyu Date: Mon, 4 May 2015 18:15:24 -0500 Subject: [PATCH 6/6] Create toplistHITS.hpp --- src/util/toplistHITS.hpp | 196 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 src/util/toplistHITS.hpp diff --git a/src/util/toplistHITS.hpp b/src/util/toplistHITS.hpp new file mode 100644 index 00000000..3f42eddd --- /dev/null +++ b/src/util/toplistHITS.hpp @@ -0,0 +1,196 @@ + +/** + * @file + * @author Aapo Kyrola + * @version 1.0 + * + * @section LICENSE + * + * Copyright [2012] [Aapo Kyrola, Guy Blelloch, Carlos Guestrin / Carnegie Mellon University] + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + + * + * @section DESCRIPTION + * + * Tools for listing the TOP K values from a verte data file. + */ + +//Modified by Feiyu Yu + +#ifndef DEF_GRAPHCHI_TOPLIST +#define DEF_GRAPHCHI_TOPLIST + +#include +#include +#include +#include + +#include "io/stripedio.hpp" +#include "logger/logger.hpp" +#include "util/merge.hpp" +#include "util/ioutil.hpp" +#include "util/qsort.hpp" +#include "api/chifilenames.hpp" +#include "engine/auxdata/vertex_data.hpp" + +namespace graphchi { + + + + + template + struct vertex_value { + vid_t vertex; + VertexDataType value; + vertex_value() {} + vertex_value(vid_t v, VertexDataType x) : vertex(v), value(x) {} + }; + struct HA_label { + float hub; + float auth; + }; + + + template + bool vertex_value_greaterA(const vertex_value &a, const vertex_value &b) { + return a.value.auth > b.value.auth; + } + template + bool vertex_value_greaterH(const vertex_value &a, const vertex_value &b) { + return a.value.hub > b.value.hub; + } + /** + * Reads the vertex data file and returns top N values. + * Vertex value type must be given as a template parameter. + * This method has been implemented in a manner to consume very little + * memory, i.e the whole file is not loaded into memory (unless ntop = nvertices). + * @param basefilename name of the graph + * @param ntop number of top values to return (if ntop is smaller than the total number of vertices, returns all in sorted order) + * @param from first vertex to include (default, 0) + * @param to last vertex to include (default, all) + * @return a vector of top ntop values + */ + template + std::vector > get_top_vertices(std::string basefilename, int ntop,int para, vid_t from=0, vid_t to=0) { + typedef vertex_value vv_t; + + /* Initialize striped IO manager */ + metrics m("toplist"); + stripedio * iomgr = new stripedio(m); + + /* Initialize the vertex-data reader */ + vid_t readwindow = 1024 * 1024; + size_t numvertices = get_num_vertices(basefilename); + vertex_data_store * vertexdata = + new vertex_data_store(basefilename, numvertices, iomgr); + + if ((size_t)ntop > numvertices) { + ntop = (int)numvertices; + } + + /* Initialize buffer */ + vv_t * buffer_idxs = (vv_t*) calloc(readwindow, sizeof(vv_t)); + vv_t * topbuf = (vv_t*) calloc(ntop, sizeof(vv_t)); + vv_t * mergearr = (vv_t*) calloc(ntop * 2, sizeof(vv_t)); + + /* Iterate the vertex values and maintain the top-list */ + size_t idx = 0; + vid_t st = 0; + vid_t en = numvertices - 1; + + int count = 0; + while(st <= numvertices - 1) { + en = st + readwindow - 1; + if (en >= numvertices - 1) en = numvertices - 1; + + /* Load the vertex values */ + vertexdata->load(st, en); + + int nt = en - st + 1; + int k = 0; + VertexDataType minima = VertexDataType(); + if (count > 0) { + minima = topbuf[ntop - 1].value; // Minimum value that should be even considered + } + for(int j=0; j < nt; j++) { + VertexDataType& val = *vertexdata->vertex_data_ptr(j + st); + //if (count == 0 || (val > minima)) { + float valresult; + float minimaresult; + if(para == 0){ + valresult = val.hub; + minimaresult = minima.hub; + } + else if(para == 1){ + valresult = val.auth; + minimaresult = minima.auth; + } + if (count == 0 || (valresult > minimaresult)) { + //if (count == 0 || (val.auth > minima.auth)) { + buffer_idxs[k] = vv_t((vid_t)idx + from, val); + k++; + } + idx++; + } + nt = k; /* How many were actually included */ + + /* Sort buffer-idxs */ + if(para == 0){ + quickSort(buffer_idxs, nt, vertex_value_greaterH); + } + else + { + quickSort(buffer_idxs, nt, vertex_value_greaterA); + } + /* Merge the top with the current top */ + if (count == 0) { + /* Nothing to merge, just copy */ + memcpy(topbuf, buffer_idxs, ntop * sizeof(vv_t)); + } else { + // void merge(ET* S1, int l1, ET* S2, int l2, ET* R, F f) { + if(para == 0){ + merge(topbuf, ntop, buffer_idxs, std::min(ntop, nt), mergearr, vertex_value_greaterH); + } + else if(para == 1) + { + merge(topbuf, ntop, buffer_idxs, std::min(ntop, nt), mergearr, vertex_value_greaterA); + } + + memcpy(topbuf, mergearr, ntop * sizeof(vv_t)); + } + + count++; + st += readwindow; + } + + /* Return */ + std::vector< vv_t > ret; + for(int i=0; i < ntop; i++) { + ret.push_back(topbuf[i]); + } + free(buffer_idxs); + free(mergearr); + free(topbuf); + + delete vertexdata; + delete iomgr; + + return ret; + } + + +}; + +#endif +