diff --git a/src/common/keccak/keccak1600/amd64/mmx1/Ec.mk b/src/common/keccak/keccak1600/amd64/mmx1/Makefile similarity index 87% rename from src/common/keccak/keccak1600/amd64/mmx1/Ec.mk rename to src/common/keccak/keccak1600/amd64/mmx1/Makefile index fa930736..7e8e000b 100644 --- a/src/common/keccak/keccak1600/amd64/mmx1/Ec.mk +++ b/src/common/keccak/keccak1600/amd64/mmx1/Makefile @@ -1,4 +1,5 @@ ECFN := -ec _keccak1600_mmx1 JEXT := jinc SRCS := keccak1600.jinc +entry: extract include ../../../../../Makefile.common diff --git a/src/common/keccak/keccak1600/amd64/mmx1/keccak1600.jinc b/src/common/keccak/keccak1600/amd64/mmx1/keccak1600.jinc index 70cb6bfe..601e6586 100644 --- a/src/common/keccak/keccak1600/amd64/mmx1/keccak1600.jinc +++ b/src/common/keccak/keccak1600/amd64/mmx1/keccak1600.jinc @@ -2,28 +2,25 @@ param int KECCAK_ROUNDS=24; require "keccakf1600.jinc" -inline fn __keccak_init_mmx1() -> stack u64[25] + +inline fn __keccak_init_mmx1(reg ptr u64[25] state) -> reg ptr u64[25] { - stack u64[25] state; - reg u64 t; inline int i; + reg u64 t; - _,_,_,_,_, t = #set0(); - - for i = 0 to 25 - { state[i] = t; - i += 1; - } + ?{}, t = #set0(); + for i=0 to 25 + { state[i] = t; } return state; } inline fn __add_full_block_mmx1( - stack u64[25] state, + reg ptr u64[25] state, reg u64 in inlen, reg u64 rate -) -> stack u64[25], reg u64, reg u64 +) -> reg ptr u64[25], reg u64, reg u64 { reg u64 i t rate64; @@ -46,20 +43,19 @@ inline fn __add_full_block_mmx1( // obs: @pre: inlen < rate_in_bytes inline fn __add_final_block_mmx1( - stack u64[25] state, - reg u64 in inlen, - reg u8 trail_byte, - reg u64 rate -) -> stack u64[25] + reg ptr u64[25] state, + reg u64 in inlen, + reg u8 trail_byte, + reg u64 rate +) -> reg ptr u64[25] { reg u64 i t inlen8; reg u8 c; - reg bool b; inlen8 = inlen; inlen8 >>= 3; i = 0; - while { b = i < inlen8;} (b) + while ( i < inlen8 ) { t = [in + 8*i]; state[(int)i] ^= t; @@ -67,7 +63,7 @@ inline fn __add_final_block_mmx1( } i <<= 3; - while { b = i < inlen; } (b) + while ( i < inlen ) { c = (u8)[in + i]; state[u8 (int)i] ^= c; @@ -85,35 +81,35 @@ inline fn __add_final_block_mmx1( inline fn __absorb_mmx1( - stack u64[25] state, - reg u64 in inlen, - stack u8 s_trail_byte, - reg u64 rate // rate already in bytes -- it is returned bc of spills -) -> stack u64[25], reg u64 + reg ptr u64[25] state, + reg u64 in inlen, + #mmx reg u64 s_trail_byte, + reg u64 rate // rate already in bytes -- it is returned bc of spills +) -> reg ptr u64[25], reg u64 { - #mmx reg u64 x_in x_inlen x_rate; + #mmx reg u64 s_in s_inlen s_rate; reg u8 trail_byte; reg u64 t; - reg bool b; // intermediate blocks - while {b = inlen >= rate;} ( b ) + while ( inlen >= rate ) { state, in, inlen = __add_full_block_mmx1(state, in, inlen, rate); - x_in = in; - x_inlen = inlen; - x_rate = rate; + s_in = in; + s_inlen = inlen; + s_rate = rate; state = _keccakf1600_mmx1(state); - in = x_in; - inlen = x_inlen; - rate = x_rate; + in = s_in; + inlen = s_inlen; + rate = s_rate; } // final block - trail_byte = s_trail_byte; + t = s_trail_byte; + trail_byte = (8u) t; state = __add_final_block_mmx1(state, in, inlen, trail_byte, rate); @@ -122,18 +118,17 @@ inline fn __absorb_mmx1( inline fn __xtr_full_block_mmx1( - stack u64[25] state, + reg ptr u64[25] state, reg u64 out outlen, reg u64 rate ) -> reg u64, reg u64 { reg u64 i t rate64; - reg bool b; rate64 = rate; rate64 >>= 3; i = 0; - while { b = i < rate64; } (b) + while ( i < rate64 ) { t = state[(int)i]; [out + 8*i] = t; @@ -148,7 +143,7 @@ inline fn __xtr_full_block_mmx1( inline fn __xtr_bytes_mmx1( - stack u64[25] state, + reg ptr u64[25] state, reg u64 out outlen ) -> reg u64 { @@ -179,38 +174,37 @@ inline fn __xtr_bytes_mmx1( inline fn __squeeze_mmx1( - stack u64[25] state, - #mmx reg u64 x_out, - reg u64 outlen, - reg u64 rate + reg mut ptr u64[25] state, + #mmx reg u64 s_out, + reg u64 outlen, + reg u64 rate ) { reg u64 out; - #mmx reg u64 x_outlen x_rate; - reg bool b; + #mmx reg u64 s_outlen s_rate; // intermediate blocks - while {b = outlen > rate;} (b) + while ( outlen > rate ) { - x_outlen = outlen; - x_rate = rate; + s_outlen = outlen; + s_rate = rate; state = _keccakf1600_mmx1(state); - out = x_out; - outlen = x_outlen; - rate = x_rate; + out = s_out; + outlen = s_outlen; + rate = s_rate; out, outlen = __xtr_full_block_mmx1(state, out, outlen, rate); - x_out = out; + s_out = out; } - x_outlen = outlen; + s_outlen = outlen; state = _keccakf1600_mmx1(state); - out = x_out; - outlen = x_outlen; + out = s_out; + outlen = s_outlen; out = __xtr_bytes_mmx1(state, out, outlen); } @@ -218,22 +212,26 @@ inline fn __squeeze_mmx1( inline fn __keccak1600_mmx1(reg u64 out outlen in inlen, reg u8 trail_byte, reg u64 rate) { - stack u64[25] state; - #mmx reg u64 x_out x_outlen; - stack u8 s_trail_byte; + stack u64[25] _state; + reg ptr u64[25] state; + #mmx reg u64 s_out s_outlen; + #mmx reg u64 s_trail_byte_64; + reg u64 t; - x_out = out; - x_outlen = outlen; - s_trail_byte = trail_byte; + s_out = out; + s_outlen = outlen; + t = (64u) trail_byte; + s_trail_byte_64 = t; - state = __keccak_init_mmx1(); + state = _state; + state = __keccak_init_mmx1(state); // absorb - state, rate = __absorb_mmx1(state, in, inlen, s_trail_byte, rate); + state, rate = __absorb_mmx1(state, in, inlen, s_trail_byte_64, rate); // squeeze - outlen = x_outlen; - __squeeze_mmx1(state, x_out, outlen, rate); + outlen = s_outlen; + __squeeze_mmx1(state, s_out, outlen, rate); } @@ -242,3 +240,5 @@ fn _keccak1600_mmx1(reg u64 out outlen in inlen, reg u8 trail_byte, reg u64 rate { __keccak1600_mmx1(out, outlen, in, inlen, trail_byte, rate); } + + diff --git a/src/common/keccak/keccak1600/amd64/mmx1/keccakf1600.jinc b/src/common/keccak/keccak1600/amd64/mmx1/keccakf1600.jinc index 7fa42412..a0bface3 100644 --- a/src/common/keccak/keccak1600/amd64/mmx1/keccakf1600.jinc +++ b/src/common/keccak/keccak1600/amd64/mmx1/keccakf1600.jinc @@ -1,5 +1,5 @@ from Jade require "common/keccak/keccak1600/amd64/spec/keccakf1600_globals.jinc" // KECCAK1600_RC -from Jade require "common/keccak/keccak1600/amd64/spec/keccakf1600.jinc" // __rhotates +from Jade require "common/keccak/keccak1600/amd64/spec/keccakf1600.jinc" // __rhotates_spec // C[x] = A[x,0] ^ A[x,1] ^ A[x,2] ^ A[x,3] ^ A[x,4] inline fn __theta_sum_mmx1(reg ptr u64[25] a) -> reg u64[5] @@ -21,7 +21,7 @@ inline fn __theta_sum_mmx1(reg ptr u64[25] a) -> reg u64[5] } -// D[x] = C[x-1] ^ ROT(C[x+1], 1) +// D[x] = C[x-1] ^ ROT(C[x+1], 1) inline fn __theta_rol_mmx1(reg u64[5] c) -> reg u64[5] { inline int x; @@ -60,7 +60,7 @@ inline fn __rol_sum_mmx1( { x_ = (x + 3*y) % 5; y_ = x; - r = __rhotates_nomodmsf(x_, y_); + r = __rhotates_spec(x_, y_); // B[x] = A[x',y'] b[x] = a[x_ + y_*5]; @@ -69,7 +69,7 @@ inline fn __rol_sum_mmx1( b[x] ^= d[x_]; // B[x] = ROT( B[x], r[x',y'] ); - #inline if(r != 0) + if(r != 0) { _, _, b[x] = #ROL_64(b[x], r); } } @@ -84,19 +84,22 @@ inline fn __set_row_mmx1( reg ptr u64[25] e, reg u64[5] b, inline int y, - stack u64 s_rc + #mmx reg u64 s_rc ) -> reg ptr u64[25] { inline int x x1 x2; - reg u64 t; + reg u64 t rc; for x=0 to 5 - { + { x1 = (x + 1) % 5; x2 = (x + 2) % 5; - t = !b[x1] & b[x2]; + + //t = !b[x1] & b[x2]; + t = b[x1]; t = !t; t &= b[x2]; + t ^= b[x]; - #inline if( x==0 && y==0 ){ t ^= s_rc; } + if( x==0 && y==0 ){ rc = s_rc; t ^= rc; } e[x + y*5] = t; } @@ -104,57 +107,51 @@ inline fn __set_row_mmx1( } -inline fn __round_mmx1(reg ptr u64[25] e a, stack u64 s_rc) -> reg ptr u64[25], reg ptr u64[25] +inline fn __round_mmx1(reg ptr u64[25] e a, reg u64 rc) -> reg ptr u64[25] { inline int y; reg u64[5] b c d; + #mmx reg u64 s_rc; + + s_rc = rc; c = __theta_sum_mmx1(a); d = __theta_rol_mmx1(c); for y = 0 to 5 - { - b = __rol_sum_mmx1(a, d, y); + { b = __rol_sum_mmx1(a, d, y); e = __set_row_mmx1(e, b, y, s_rc); } - return e, a; + return e; } inline fn __keccakf1600_mmx1(reg ptr u64[25] a) -> reg ptr u64[25] { reg ptr u64[24] RC; - #mmx reg ptr u64[24] x_RC; + #mmx reg ptr u64[24] s_RC; stack u64[25] s_e; reg ptr u64[25] e; reg u64 c rc; - #mmx reg u64 x_c x_rc; - reg bool b; RC = KECCAK1600_RC; - x_RC = RC; + s_RC = RC; e = s_e; c = 0; - while + while (c < KECCAK_ROUNDS - 1) { - x_c = c; - - RC = x_RC; + RC = s_RC; rc = RC[(int) c]; - x_rc = rc; - e, a = __round_mmx1(e, a, x_rc); + e = __round_mmx1(e, a, rc); - RC = x_RC; + RC = s_RC; rc = RC[(int) c + 1]; - x_rc = rc; - a, e = __round_mmx1(a, e, x_rc); + a = __round_mmx1(a, e, rc); - c = x_c; c += 2; - b = c < KECCAK_ROUNDS; - } (b) { } + } return a; } @@ -166,3 +163,14 @@ fn _keccakf1600_mmx1(reg ptr u64[25] a) -> reg ptr u64[25] a = __keccakf1600_mmx1(a); return a; } + + +inline fn _keccakf1600_mmx1_(reg ptr u64[25] a) -> reg ptr u64[25] +{ + a = a; + a = _keccakf1600_mmx1(a); + a = a; + return a; +} + + diff --git a/src/crypto_xof/shake256/amd64/mmx1/Makefile b/src/crypto_xof/shake256/amd64/mmx1/Makefile new file mode 100644 index 00000000..ffd4f141 --- /dev/null +++ b/src/crypto_xof/shake256/amd64/mmx1/Makefile @@ -0,0 +1,3 @@ +SRCS := xof.jazz +SCT_FLAGS := -checkSCTafter unroll +include ../../../../Makefile.common diff --git a/src/crypto_xof/shake256/amd64/mmx1/include/api.h b/src/crypto_xof/shake256/amd64/mmx1/include/api.h new file mode 100644 index 00000000..f2e83c42 --- /dev/null +++ b/src/crypto_xof/shake256/amd64/mmx1/include/api.h @@ -0,0 +1,17 @@ +#ifndef JADE_XOF_shake256_amd64_mmx1_API_H +#define JADE_XOF_shake256_amd64_mmx1_API_H + +#define JADE_XOF_shake256_amd64_mmx1_ALGNAME "SHAKE256" +#define JADE_XOF_shake256_amd64_mmx1_ARCH "amd64" +#define JADE_XOF_shake256_amd64_mmx1_IMPL "mmx1" + +#include + +int jade_xof_shake256_amd64_mmx1( + uint8_t *output, + uint64_t output_length, + const uint8_t *input, + uint64_t input_length +); + +#endif diff --git a/src/crypto_xof/shake256/amd64/mmx1/shake256.jinc b/src/crypto_xof/shake256/amd64/mmx1/shake256.jinc new file mode 100644 index 00000000..fe03242b --- /dev/null +++ b/src/crypto_xof/shake256/amd64/mmx1/shake256.jinc @@ -0,0 +1,14 @@ +from Jade require "common/keccak/keccak1600/amd64/mmx1/keccak1600.jinc" + +inline fn __shake256_mmx1(reg u64 out outlen in inlen) +{ + reg u64 rate; + reg u8 trail_byte; + + trail_byte = 0x1F; + rate = (1088/8); + + _keccak1600_mmx1(out, outlen, in, inlen, trail_byte, rate); +} + + diff --git a/src/crypto_xof/shake256/amd64/mmx1/xof.jazz b/src/crypto_xof/shake256/amd64/mmx1/xof.jazz new file mode 100644 index 00000000..056cb48f --- /dev/null +++ b/src/crypto_xof/shake256/amd64/mmx1/xof.jazz @@ -0,0 +1,12 @@ +require "shake256.jinc" + +export fn jade_xof_shake256_amd64_mmx1(reg u64 output output_length input input_length) -> reg u64 +{ + reg u64 r; + _ = #init_msf(); + __shake256_mmx1(output, output_length, input, input_length); + ?{}, r = #set0(); + return r; +} + +