From 92ec1ab414e9785c609b6a242423cc5418370621 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Lindqvist?= Date: Sat, 11 Jun 2016 03:00:50 +0200 Subject: [PATCH 1/5] COMMON_CFLAGS variable --- makefile | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/makefile b/makefile index 918a222..ea0c28e 100644 --- a/makefile +++ b/makefile @@ -1,14 +1,15 @@ NUM_NODES = 10 WORLD_SIZE = 1000 +COMMON_CFLAGS = -g -std=gnu99 -O2 -mcpu=native -fomit-frame-pointer -Wall -Wextra buildall: c_fast c_fast_arm f03 c fsharp cpp_gcc cpp_clang cpp_cached racket csharp java haskell ocaml lisp rust rust_unsafe go gccgo d nim oraclejava crystal c_fast_arm: c_fast.c - gcc -marm -falign-functions=32 -g -std=gnu99 -O2 -mcpu=native -fomit-frame-pointer c_fast.c -o ./c_fast_arm - + gcc -marm -falign-functions=32 $(COMMON_CFLAGS) c_fast.c -o ./c_fast_arm + c_fast: c_fast.c - gcc -falign-functions=32 -g -std=gnu99 -O2 -mcpu=native -fomit-frame-pointer c_fast.c -o ./c_fast + gcc -falign-functions=32 $(COMMON_CFLAGS) c_fast.c -o ./c_fast f03: f03.f03 gfortran -O2 -mcpu=native f03.f03 -o f03 @@ -29,7 +30,7 @@ cpp_cached: cpp_cached.cpp clang++ cpp_cached.cpp -std=c++14 -Wall -O2 -mcpu=native -o cpp_cached c: c.c - gcc -g -std=gnu99 -Wall -Wextra c.c -O2 -mcpu=native -o c -DUSE_HIGHBIT + gcc $(COMMON_CFLAGS) c.c -o c -DUSE_HIGHBIT racket: rkt.rkt raco exe rkt.rkt @@ -59,7 +60,7 @@ rust: rs.rs rustc rs.rs --opt-level=3 -C no-stack-check rust_unsafe: rs_unsafe.rs - rustc rs_unsafe.rs --opt-level=3 + rustc rs_unsafe.rs --opt-level=3 go: go.go go build go.go @@ -80,7 +81,7 @@ nim: nim.nim nim c --cc:clang --passC:-mcpu=native -d:release nim.nim scala: scala.scala - scalac scala.scala + scalac scala.scala graphbuilder: mkgraph.go go build mkgraph.go From 42b9a991d7ac1d624b210d6160fc1a8b5f3e6eba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Lindqvist?= Date: Sat, 11 Jun 2016 03:05:17 +0200 Subject: [PATCH 2/5] A clean target --- makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/makefile b/makefile index ea0c28e..45b89bf 100644 --- a/makefile +++ b/makefile @@ -5,6 +5,11 @@ COMMON_CFLAGS = -g -std=gnu99 -O2 -mcpu=native -fomit-frame-pointer -Wall -Wextr buildall: c_fast c_fast_arm f03 c fsharp cpp_gcc cpp_clang cpp_cached racket csharp java haskell ocaml lisp rust rust_unsafe go gccgo d nim oraclejava crystal +clean: + rm -f c_fast_arm c_fast f03 fs.exe cpp_gcc cpp_clang cpp_plain cpp_cached \ + cs.exe jv.class hs ml lisp rs rs_unsafe go gccgo d nim crystal d \ + c + c_fast_arm: c_fast.c gcc -marm -falign-functions=32 $(COMMON_CFLAGS) c_fast.c -o ./c_fast_arm From 3341b090ac1e079ee27a882ba3c89dee642de97b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Lindqvist?= Date: Sat, 11 Jun 2016 03:13:24 +0200 Subject: [PATCH 3/5] Overridable CC (clang 3.6 way faster than 5.2) and -march replaces -mcpu --- makefile | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/makefile b/makefile index 45b89bf..9f0e03f 100644 --- a/makefile +++ b/makefile @@ -1,7 +1,7 @@ NUM_NODES = 10 WORLD_SIZE = 1000 -COMMON_CFLAGS = -g -std=gnu99 -O2 -mcpu=native -fomit-frame-pointer -Wall -Wextra +COMMON_CFLAGS = -std=gnu99 -O2 -march=native -fomit-frame-pointer -Wall -Wextra buildall: c_fast c_fast_arm f03 c fsharp cpp_gcc cpp_clang cpp_cached racket csharp java haskell ocaml lisp rust rust_unsafe go gccgo d nim oraclejava crystal @@ -9,13 +9,17 @@ clean: rm -f c_fast_arm c_fast f03 fs.exe cpp_gcc cpp_clang cpp_plain cpp_cached \ cs.exe jv.class hs ml lisp rs rs_unsafe go gccgo d nim crystal d \ c +# C targets +c: c.c + $(CC) $(COMMON_CFLAGS) c.c -o c -DUSE_HIGHBIT c_fast_arm: c_fast.c - gcc -marm -falign-functions=32 $(COMMON_CFLAGS) c_fast.c -o ./c_fast_arm + $(CC) -marm -falign-functions=32 $(COMMON_CFLAGS) c_fast.c -o ./c_fast_arm c_fast: c_fast.c - gcc -falign-functions=32 $(COMMON_CFLAGS) c_fast.c -o ./c_fast + $(CC) -falign-functions=32 $(COMMON_CFLAGS) c_fast.c -o ./c_fast +# Other f03: f03.f03 gfortran -O2 -mcpu=native f03.f03 -o f03 @@ -34,9 +38,6 @@ cpp_plain: cpp_plain.cpp cpp_cached: cpp_cached.cpp clang++ cpp_cached.cpp -std=c++14 -Wall -O2 -mcpu=native -o cpp_cached -c: c.c - gcc $(COMMON_CFLAGS) c.c -o c -DUSE_HIGHBIT - racket: rkt.rkt raco exe rkt.rkt From e6849802fb576f626908abe844dd853b2bd9c158 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Lindqvist?= Date: Sat, 11 Jun 2016 03:27:06 +0200 Subject: [PATCH 4/5] Fixing c++ flags too --- makefile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/makefile b/makefile index 9f0e03f..0dda4cf 100644 --- a/makefile +++ b/makefile @@ -2,6 +2,7 @@ NUM_NODES = 10 WORLD_SIZE = 1000 COMMON_CFLAGS = -std=gnu99 -O2 -march=native -fomit-frame-pointer -Wall -Wextra +COMMON_CXXFLAGS = -std=c++14 -Wall -O2 -march=native buildall: c_fast c_fast_arm f03 c fsharp cpp_gcc cpp_clang cpp_cached racket csharp java haskell ocaml lisp rust rust_unsafe go gccgo d nim oraclejava crystal @@ -27,16 +28,16 @@ fsharp: fs.fs fsharpc fs.fs cpp_gcc: cpp.cpp - g++ cpp.cpp -std=c++14 -Wall -O2 -mcpu=native -DCOMPILER='"gcc"' -o cpp_gcc + g++ cpp.cpp $(COMMON_CXXFLAGS) -DCOMPILER='"gcc"' -o cpp_gcc cpp_clang: cpp.cpp - clang++ cpp.cpp -std=c++14 -Wall -O2 -mcpu=native -DCOMPILER='"clang"' -o cpp_clang + clang++ cpp.cpp $(COMMON_CXXFLAGS) -DCOMPILER='"clang"' -o cpp_clang cpp_plain: cpp_plain.cpp - clang++ cpp_plain.cpp -std=c++14 -Wall -O2 -mcpu=native -DCOMPILER='"clang"' -o cpp_plain + clang++ cpp_plain.cpp $(COMMON_CXXFLAGS) -DCOMPILER='"clang"' -o cpp_plain cpp_cached: cpp_cached.cpp - clang++ cpp_cached.cpp -std=c++14 -Wall -O2 -mcpu=native -o cpp_cached + clang++ cpp_cached.cpp $(COMMON_CXXFLAGS) -o cpp_cached racket: rkt.rkt raco exe rkt.rkt From bc8cbacae7bc0a44bf67ffe5829e96193be77420 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Lindqvist?= Date: Sat, 11 Jun 2016 03:31:26 +0200 Subject: [PATCH 5/5] Fix compiler warnings. --- c_fast.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/c_fast.c b/c_fast.c index 1b7e080..111240a 100644 --- a/c_fast.c +++ b/c_fast.c @@ -24,7 +24,7 @@ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************* - + See https://github.com/logicchains/LPATHBench The edges are stored as an array of pointers to arrays of edges, i.e: the edges @@ -34,13 +34,13 @@ maximize the density of useful data in caches * it achieves performance almost as good as a statically sized matrix, while allowing dynamic sizing - + This has only been optimised for Tegra K1, a Cortex-A15-based SoC when compiled with gcc 4.8.2. Performance seems highly dependent on code alignment. - + Peformance (with provided benchmark graph): around 10% speedup compared to cpp, and around 15% compared to C/HIGHBIT. - + Note that C/HIGHBIT has been observed to be faster on a second sparse graph with 35 nodes. */ @@ -82,23 +82,22 @@ void parse_graph(edge_t ***costs_p, int *no_of_nodes_p) { int target_node; int cost; int index = 0; - int wp; int no_of_nodes; edge_t **costs; - + f = fopen("agraph", "r"); assert(f != NULL); - + ret = fscanf(f, "%d", no_of_nodes_p); assert (ret == 1); no_of_nodes = *no_of_nodes_p; - + costs = malloc(sizeof(edge_t*) * no_of_nodes); assert(costs != NULL); *costs_p = costs; edge_t *buf = malloc(sizeof(edge_t) * no_of_nodes); assert(buf != NULL); - + while(fscanf(f, "%d %d %d\n", &c_node, &target_node, &cost) == 3) { assert((c_node == prev_node || c_node == (prev_node + 1)) && c_node < no_of_nodes && cost >= 0); if (c_node != prev_node) { @@ -111,17 +110,17 @@ void parse_graph(edge_t ***costs_p, int *no_of_nodes_p) { } buf[target_node].target = target_node; buf[target_node].cost = cost; - + index++; } - + insert_node_edges(costs, prev_node, buf, no_of_nodes, index); } int get_max_cost_small(edge_t **c, const int c_node, uint32_t visited) { int max = 0; int dist; - + visited |= 1 << c_node; for (int index = 0; c[c_node][index].cost >= 0; index++) { if (!(visited & (1 << c[c_node][index].target))) { @@ -130,7 +129,7 @@ int get_max_cost_small(edge_t **c, const int c_node, uint32_t visited) { } } visited &= ~(1 << c_node); - + return max; } @@ -138,7 +137,7 @@ int get_max_cost(edge_t **c, const int c_node, uint32_t *visited) { int max = 0; int dist; int target; - + visited[c_node >> 5] |= 1 << (c_node & 0x1f); for (int index = 0; c[c_node][index].cost >= 0; index++) { target = c[c_node][index].target; @@ -148,7 +147,7 @@ int get_max_cost(edge_t **c, const int c_node, uint32_t *visited) { } } visited[c_node >> 5] &= ~(1 << (c_node & 0x1f)); - + return max; } @@ -159,7 +158,7 @@ int main() { struct timeval start, end, duration; edge_t **costs; int no_of_nodes; - + parse_graph(&costs, &no_of_nodes); gettimeofday(&start, NULL); @@ -171,9 +170,8 @@ int main() { result = get_max_cost_small(costs, 0, 0); } gettimeofday(&end, NULL); - + timersub(&end, &start, &duration); ms = duration.tv_sec*1000 + duration.tv_usec/1000; - printf("%d LANGUAGE C-fast %llu\n", result, ms); + printf("%d LANGUAGE C-fast %lu\n", result, ms); } -