diff --git a/c_fast.c b/c_fast.c index 1b7e080..111240a 100644 --- a/c_fast.c +++ b/c_fast.c @@ -24,7 +24,7 @@ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************* - + See https://github.com/logicchains/LPATHBench The edges are stored as an array of pointers to arrays of edges, i.e: the edges @@ -34,13 +34,13 @@ maximize the density of useful data in caches * it achieves performance almost as good as a statically sized matrix, while allowing dynamic sizing - + This has only been optimised for Tegra K1, a Cortex-A15-based SoC when compiled with gcc 4.8.2. Performance seems highly dependent on code alignment. - + Peformance (with provided benchmark graph): around 10% speedup compared to cpp, and around 15% compared to C/HIGHBIT. - + Note that C/HIGHBIT has been observed to be faster on a second sparse graph with 35 nodes. */ @@ -82,23 +82,22 @@ void parse_graph(edge_t ***costs_p, int *no_of_nodes_p) { int target_node; int cost; int index = 0; - int wp; int no_of_nodes; edge_t **costs; - + f = fopen("agraph", "r"); assert(f != NULL); - + ret = fscanf(f, "%d", no_of_nodes_p); assert (ret == 1); no_of_nodes = *no_of_nodes_p; - + costs = malloc(sizeof(edge_t*) * no_of_nodes); assert(costs != NULL); *costs_p = costs; edge_t *buf = malloc(sizeof(edge_t) * no_of_nodes); assert(buf != NULL); - + while(fscanf(f, "%d %d %d\n", &c_node, &target_node, &cost) == 3) { assert((c_node == prev_node || c_node == (prev_node + 1)) && c_node < no_of_nodes && cost >= 0); if (c_node != prev_node) { @@ -111,17 +110,17 @@ void parse_graph(edge_t ***costs_p, int *no_of_nodes_p) { } buf[target_node].target = target_node; buf[target_node].cost = cost; - + index++; } - + insert_node_edges(costs, prev_node, buf, no_of_nodes, index); } int get_max_cost_small(edge_t **c, const int c_node, uint32_t visited) { int max = 0; int dist; - + visited |= 1 << c_node; for (int index = 0; c[c_node][index].cost >= 0; index++) { if (!(visited & (1 << c[c_node][index].target))) { @@ -130,7 +129,7 @@ int get_max_cost_small(edge_t **c, const int c_node, uint32_t visited) { } } visited &= ~(1 << c_node); - + return max; } @@ -138,7 +137,7 @@ int get_max_cost(edge_t **c, const int c_node, uint32_t *visited) { int max = 0; int dist; int target; - + visited[c_node >> 5] |= 1 << (c_node & 0x1f); for (int index = 0; c[c_node][index].cost >= 0; index++) { target = c[c_node][index].target; @@ -148,7 +147,7 @@ int get_max_cost(edge_t **c, const int c_node, uint32_t *visited) { } } visited[c_node >> 5] &= ~(1 << (c_node & 0x1f)); - + return max; } @@ -159,7 +158,7 @@ int main() { struct timeval start, end, duration; edge_t **costs; int no_of_nodes; - + parse_graph(&costs, &no_of_nodes); gettimeofday(&start, NULL); @@ -171,9 +170,8 @@ int main() { result = get_max_cost_small(costs, 0, 0); } gettimeofday(&end, NULL); - + timersub(&end, &start, &duration); ms = duration.tv_sec*1000 + duration.tv_usec/1000; - printf("%d LANGUAGE C-fast %llu\n", result, ms); + printf("%d LANGUAGE C-fast %lu\n", result, ms); } - diff --git a/makefile b/makefile index 918a222..0dda4cf 100644 --- a/makefile +++ b/makefile @@ -1,15 +1,26 @@ NUM_NODES = 10 WORLD_SIZE = 1000 +COMMON_CFLAGS = -std=gnu99 -O2 -march=native -fomit-frame-pointer -Wall -Wextra +COMMON_CXXFLAGS = -std=c++14 -Wall -O2 -march=native buildall: c_fast c_fast_arm f03 c fsharp cpp_gcc cpp_clang cpp_cached racket csharp java haskell ocaml lisp rust rust_unsafe go gccgo d nim oraclejava crystal +clean: + rm -f c_fast_arm c_fast f03 fs.exe cpp_gcc cpp_clang cpp_plain cpp_cached \ + cs.exe jv.class hs ml lisp rs rs_unsafe go gccgo d nim crystal d \ + c +# C targets +c: c.c + $(CC) $(COMMON_CFLAGS) c.c -o c -DUSE_HIGHBIT + c_fast_arm: c_fast.c - gcc -marm -falign-functions=32 -g -std=gnu99 -O2 -mcpu=native -fomit-frame-pointer c_fast.c -o ./c_fast_arm - + $(CC) -marm -falign-functions=32 $(COMMON_CFLAGS) c_fast.c -o ./c_fast_arm + c_fast: c_fast.c - gcc -falign-functions=32 -g -std=gnu99 -O2 -mcpu=native -fomit-frame-pointer c_fast.c -o ./c_fast + $(CC) -falign-functions=32 $(COMMON_CFLAGS) c_fast.c -o ./c_fast +# Other f03: f03.f03 gfortran -O2 -mcpu=native f03.f03 -o f03 @@ -17,19 +28,16 @@ fsharp: fs.fs fsharpc fs.fs cpp_gcc: cpp.cpp - g++ cpp.cpp -std=c++14 -Wall -O2 -mcpu=native -DCOMPILER='"gcc"' -o cpp_gcc + g++ cpp.cpp $(COMMON_CXXFLAGS) -DCOMPILER='"gcc"' -o cpp_gcc cpp_clang: cpp.cpp - clang++ cpp.cpp -std=c++14 -Wall -O2 -mcpu=native -DCOMPILER='"clang"' -o cpp_clang + clang++ cpp.cpp $(COMMON_CXXFLAGS) -DCOMPILER='"clang"' -o cpp_clang cpp_plain: cpp_plain.cpp - clang++ cpp_plain.cpp -std=c++14 -Wall -O2 -mcpu=native -DCOMPILER='"clang"' -o cpp_plain + clang++ cpp_plain.cpp $(COMMON_CXXFLAGS) -DCOMPILER='"clang"' -o cpp_plain cpp_cached: cpp_cached.cpp - clang++ cpp_cached.cpp -std=c++14 -Wall -O2 -mcpu=native -o cpp_cached - -c: c.c - gcc -g -std=gnu99 -Wall -Wextra c.c -O2 -mcpu=native -o c -DUSE_HIGHBIT + clang++ cpp_cached.cpp $(COMMON_CXXFLAGS) -o cpp_cached racket: rkt.rkt raco exe rkt.rkt @@ -59,7 +67,7 @@ rust: rs.rs rustc rs.rs --opt-level=3 -C no-stack-check rust_unsafe: rs_unsafe.rs - rustc rs_unsafe.rs --opt-level=3 + rustc rs_unsafe.rs --opt-level=3 go: go.go go build go.go @@ -80,7 +88,7 @@ nim: nim.nim nim c --cc:clang --passC:-mcpu=native -d:release nim.nim scala: scala.scala - scalac scala.scala + scalac scala.scala graphbuilder: mkgraph.go go build mkgraph.go