diff --git a/src/crypto_kem/frodo/common/amd64/ref/encode.jinc b/src/crypto_kem/frodo/common/amd64/ref/encode.jinc new file mode 100644 index 00000000..28975366 --- /dev/null +++ b/src/crypto_kem/frodo/common/amd64/ref/encode.jinc @@ -0,0 +1,76 @@ +fn __encode(reg ptr u16[NBAR * NBAR]out, reg ptr u8[EXTRACTED_BITS * NBAR * NBAR / 8] in) -> stack u16[NBAR * NBAR] { + inline int k; + reg u64 i j t tmp tmp2 mask offset; + + mask = (1 << EXTRACTED_BITS) - 1; + + i = 0; + while (i < NBAR) { + tmp = 0; + + offset = i * EXTRACTED_BITS; + for k = 0 to EXTRACTED_BITS { + tmp2 = (64u)in[offset + k]; + tmp2 <<= 8 * k; + tmp |= tmp2; + } + + j = 0; + while (j < 8) { + t = tmp; + t &= mask; + t <<= D - EXTRACTED_BITS; + offset = #LEA(i*NBAR+j); + out[offset] = (16u)t; + tmp >>= EXTRACTED_BITS; + + j += 1; + } + + i += 1; + } + + return out; +} + +fn __decode(reg ptr u8[EXTRACTED_BITS * NBAR] out, reg ptr u16[NBAR * NBAR] in) -> stack u8[EXTRACTED_BITS * NBAR] { + reg u32 tmplong tmp mask d; + reg u64 i j offset; + + d = 1 << (D - EXTRACTED_BITS - 1); + mask = (1 << EXTRACTED_BITS) - 1; + + out = out; + in = in; + + i = 0; + while (i < NBAR) { + tmplong = 0; + + j = 0; + while (j < 8) { + offset = #LEA(i * NBAR + 7); + offset -= j; + tmp = (32u)in[offset]; + tmp += d; + tmp >>= D - EXTRACTED_BITS; + tmp &= mask; + + tmplong <<= EXTRACTED_BITS; + tmplong |= tmp; + j += 1; + } + + j = 0; + offset = i*EXTRACTED_BITS; + while (j < EXTRACTED_BITS) { + out[offset] = (8u)tmplong; + tmplong >>= 8; + j += 1; + offset += 1; + } + i += 1; + } + + return out; +} diff --git a/src/crypto_kem/frodo/common/amd64/ref/matrix.jinc b/src/crypto_kem/frodo/common/amd64/ref/matrix.jinc new file mode 100644 index 00000000..535c338a --- /dev/null +++ b/src/crypto_kem/frodo/common/amd64/ref/matrix.jinc @@ -0,0 +1,108 @@ +fn __matrix_add(reg ptr u16[NBAR * NBAR] a b) -> stack u16[NBAR * NBAR] { + reg u64 i; + reg u16 tmp; + + i = 0; + while (i < NBAR * NBAR) { + tmp = a[i]; + tmp += b[i]; + tmp &= (1 << D) - 1; + a[i] = tmp; + i += 1; + } + + return a; +} + +#[returnaddress="stack"] +// a = b - a +fn __matrix_sub(reg ptr u16[NBAR * NBAR] a b) -> stack u16[NBAR * NBAR] { + reg u64 i; + reg u16 tmp; + + i = 0; + while (i < NBAR * NBAR) { + tmp = b[i]; + tmp -= a[i]; + tmp &= (1 << D) - 1; + a[i] = tmp; + i += 1; + } + + return a; +} + +#[returnaddress="stack"] +fn __ct_verify_NNBAR(reg ptr u16[NNBAR] a b) -> stack u8 { + reg u64 i; + reg u16 ac tmp; + reg u8 r; + + i = 0; + ac = 0; + while (i < NNBAR) { + tmp = a[(int) i]; + tmp ^= b[(int)i]; + ac |= tmp; + i += 1; + } + + tmp = ac * -1; + ac |= tmp; + ac >>= 15; + ac *= (-1); + + r = (8u)ac; + + return r; +} + +#[returnaddress="stack"] +fn __ct_verify_NBAR2(reg ptr u16[NBAR * NBAR] a b) -> stack u8 { + reg u64 i; + reg u16 ac tmp; + reg u8 r; + + i = 0; + ac = 0; + while (i < NBAR * NBAR) { + tmp = a[(int) i]; + tmp ^= b[(int)i]; + ac |= tmp; + i += 1; + } + + tmp = ac * -1; + ac |= tmp; + ac >>= 15; + ac *= (-1); + + r = (8u) ac; + + return r; +} + +#[returnaddress="stack"] +fn __ct_select(reg ptr u8[BYTES_SEC] out a b, reg u8 selector) -> stack u8[BYTES_SEC] { + reg u64 i; + + reg u8 n_selector tmp; + + n_selector = selector; + n_selector ^= 0xFF; + + i = 0; + while (i < BYTES_SEC) { + tmp = a[i]; + tmp &= n_selector; + out[i] = tmp; + + tmp = b[i]; + tmp &= selector; + out[i] |= tmp; + + i += 1; + } + + return out; +} diff --git a/src/crypto_kem/frodo/common/amd64/ref/matrix_mul.jinc b/src/crypto_kem/frodo/common/amd64/ref/matrix_mul.jinc new file mode 100644 index 00000000..0008277a --- /dev/null +++ b/src/crypto_kem/frodo/common/amd64/ref/matrix_mul.jinc @@ -0,0 +1,191 @@ +from Jade require "crypto_kem/frodo/common/amd64/ref/shake128.jinc" + +#[returnaddress="stack"] +fn __AS_plus_E(reg ptr u16[NNBAR] B, reg ptr u8[BYTES_SEED_A]seedA, reg ptr u16[NNBAR] S E) -> stack u16[NNBAR] { + stack ptr u16[NNBAR] s_B; + stack u16[N] A_row; + stack ptr u8[BYTES_SEED_A] s_seedA; + stack ptr u16[NNBAR] s_S s_E; + stack u8[2 + BYTES_SEED_A] b; + + reg u64 j k; stack u64 s_j s_k; + reg u16 tmp ac; + inline int i l; + + s_B = B; s_S = S; s_E = E; + + // copy seedA + for i = 0 to BYTES_SEED_A { + b[i + 2] = seedA[i]; + } + s_seedA = seedA; + + // first set B = E + B = s_B; E = s_E; + + j = 0; + while (j < NNBAR) { + for l = 0 to 4 { + B[(int)j + l] = E[(int)j + l]; + } + j += 4; + } + + s_B = B; s_E = E; + + // calculate A and B += A * S + b[u16 0] = 0; + k = 0; + + while (b[u16 0] < N) { + s_j = j; s_k = k; s_S = S; s_B = B; + A_row = __shake128_gen_A(A_row, b); + j = s_j; k = s_k; S = s_S; B = s_B; + + for i = 0 to NBAR { + ac = 0; + j = 0; + + // A_row * S_T_row + while (j < N) { + tmp = A_row[(int)j]; + tmp *= S[i * N + (int)j]; + ac += tmp; + j += 1; + } + + B[(int)k + i] += ac; + } + k += NBAR; + + b[u16 0] += 1; + } + + return B; +} + +#[returnaddress="stack"] +fn __SA_plus_E(reg ptr u16[NNBAR] B, reg ptr u8[BYTES_SEED_A]seedA, reg ptr u16[NNBAR] S E) -> stack u16[NNBAR] { + stack ptr u16[NNBAR] s_B; + stack u16[N] A_row; + stack ptr u8[BYTES_SEED_A] s_seedA; + stack ptr u16[NNBAR] s_S s_E; + stack u8[2 + BYTES_SEED_A] b; + + reg u64 j k; stack u64 s_j s_k; + reg u16 tmp s; + inline int l; + + // copy seedA + for l = 0 to BYTES_SEED_A { + b[l + 2] = seedA[l]; + } + s_seedA = seedA; + + j = 0; + while (j < NNBAR) { + for l = 0 to 4 { + B[(int)j + l] = E[(int)j + l]; + } + j += 4; + } + s_B = B; s_S = S; s_E = E; + + // calculate A and B += S * A + b[u16 0] = 0; + + while (b[u16 0] < N) { + A_row = __shake128_gen_A(A_row, b); + + for l = 0 to NBAR { + k = s_k; S = s_S; + + k = (64u)b[u16 0]; + s = S[l * N + (int)k]; + + s_k = k; s_S = S; + + j = s_j; B = s_B; + j = 0; + while (j < N) { + tmp = A_row[(int)j]; + tmp *= s; + B[l * N + (int)j] += tmp; + + j += 1; + } + s_j = j; s_B = B; + } + + b[u16 0] += 1; + } + + return B; +} + +#[returnaddress="stack"] +fn __SB_plus_E(reg ptr u16[NBAR * NBAR] V, reg ptr u16[NNBAR] S B, reg ptr u16[NBAR * NBAR] E) -> stack u16[NBAR * NBAR] { + reg u64 k tj; + reg u16 tmp ac; + inline int i j l; + + k = 0; + while (k < NBAR * NBAR) { + for l = 0 to 4 { + V[(int)k + l] = E[(int)k + l]; + } + k += 4; + } + + for i = 0 to NBAR { + for j = 0 to NBAR { + k = 0; + ac = 0; + while (k < N) { + tmp = S[i * N + (int)k]; + + // NOTE: why is this needed ? + tj = j + NBAR * k; + tmp *= B[(int)tj]; + + ac += tmp; + k += 1; + } + + V[i * NBAR + j] += ac; + V[i * NBAR + j] &= (1 << D) - 1; + } + } + + return V; +} + +#[returnaddress="stack"] +fn __mul_BS(reg ptr u16[NBAR * NBAR] M, reg ptr u16[NNBAR]B S) -> stack u16[NBAR * NBAR] { + reg u64 k tj; + reg u16 tmp; + inline int i j; + + for i = 0 to NBAR { + for j = 0 to NBAR { + M[i * NBAR + j] = 0; + + k = 0; + while (k < N) { + tmp = B[i * N + (int)k]; + + tj = j * N; + // NOTE: why is this needed ? register allocation, k and tj must be merged will be raised + tj += k; + tmp *= S[(int)tj]; + + M[i * NBAR + j] += tmp; + + k += 1; + } + M[i * NBAR + j] &= (1 << D) - 1; + } + } + + return M; +} diff --git a/src/crypto_kem/frodo/common/amd64/ref/matrix_mul_opt.jinc b/src/crypto_kem/frodo/common/amd64/ref/matrix_mul_opt.jinc new file mode 100644 index 00000000..deca2d84 --- /dev/null +++ b/src/crypto_kem/frodo/common/amd64/ref/matrix_mul_opt.jinc @@ -0,0 +1,299 @@ +// notes: "16" instead of BYTES_SEED_A on purpose +// compilation should fail if BYTES_SEED_A changes + +inline fn __pad_seedA(reg ptr u8[16] seedA) -> reg ptr u8[2 + 16 + 6] { + reg u64 i j; + stack u8[2+16+ 6] s_index_seed_padding; + reg u8 v; + + // setup "index || seed || padding" + s_index_seed_padding[u16 0] = 0; + s_index_seed_padding[u64 2] = 0; + + i = 0; j = 2; + while (i < 16) + { v = seedA[i]; + s_index_seed_padding[j] = v; + i += 1; + j += 1; + } + + s_index_seed_padding[18] = 0x1f; + + return s_index_seed_padding; +} + +fn __AS_plus_E_opt( + #spill_to_mmx reg ptr u16[NNBAR] B, + reg ptr u8[16] seedA, + #spill_to_mmx reg ptr u16[NNBAR] S, + #spill_to_mmx reg ptr u16[NNBAR] E) + -> + reg ptr u16[NNBAR] +{ + reg ptr u8[2+16+ 6] index_seed_padding; + + stack u16[N * 8] s_A; + reg ptr u16[N * 8] A; + reg u64 A_offset B_offset S_offset; + + inline int p; + #spill_to_mmx reg u64 i j ij jN q; + reg u32[8] sum; + reg u32 mul sp; + + index_seed_padding = __pad_seedA(seedA); + + i = 0; + while (i < NNBAR) { + B[i] = E[i]; + i += 1; + } + + () = #spill(E, index_seed_padding); + + A = s_A; + i = 0; + B_offset = 0; + while( i < N ) { + () = #spill(B, S); + () = #unspill(index_seed_padding); + + A_offset = 0; + j = 0; + while( j < 8 ) + { + ij = #LEA(i + j); + + () = #spill(i, j); + + index_seed_padding[u16 0] = (16u) ij; + A, A_offset = __shake128_gen_A_opt(A, A_offset, index_seed_padding); + + () = #unspill(i, j); + j += 1; + } + + () = #unspill(B, S); + () = #spill(index_seed_padding); + + j = 0; + jN = 0; + while ( j < NBAR ) + { + B_offset = #LEA(i * NBAR + j); + + () = #spill(i, j, jN); + for p = 0 to 8 { sum[p] = (32u)B[B_offset + p*NBAR]; } + + q = 0; + while (q < N) { + () = #unspill(jN); + + S_offset = #LEA(jN + q); + sp = (32u)S[S_offset]; + + () = #spill(jN); + + for p = 0 to 8 + { + mul = (32u)A[p*N + q]; + mul *= sp; + sum[p] += mul; + } + + q += 1; + } + + for p = 0 to 8 { + B[B_offset + p*NBAR] = (16u)sum[p]; + } + + () = #unspill(i, j, jN); + + j += 1; + jN += N; + } + + i += 8; + } + + return B; +} + +fn __SA_plus_E_opt( + #spill_to_mmx reg ptr u16[NNBAR] B, // initial value is set to E's to avoid copy + reg ptr u8[16] seedA, + #spill_to_mmx reg ptr u16[NNBAR] S) + -> + reg ptr u16[NNBAR] +{ + reg ptr u8[2+16+ 6] index_seed_padding; + + stack u16[N * 8] s_A; + reg ptr u16[N * 8] A; + reg u64 A_offset B_offset S_offset; + + inline int p; + #spill_to_mmx reg u64 i j ij jN q; + reg u32[8] sp; + reg u32 sum mul; + + index_seed_padding = __pad_seedA(seedA); + + () = #spill(index_seed_padding); + + A = s_A; + i = 0; + while( i < N ) + { + + () = #spill(B, S); + () = #unspill(index_seed_padding); + + // + A_offset = 0; + j = 0; + while( j < 8 ) + { + ij = #LEA(i + j); + + () = #spill(i, j); + + index_seed_padding[u16 0] = (16u) ij; + A, A_offset = __shake128_gen_A_opt(A, A_offset, index_seed_padding); + + () = #unspill(i, j); + j += 1; + } + + () = #unspill(B, S); + () = #spill(index_seed_padding); + + // + j = 0; + jN = 0; + while ( j < NBAR ) + { + () = #spill(j); + + // + S_offset = #LEA(jN + i); + for p=0 to 8 + { sp[p] = (32u) S[S_offset + p]; } + + () = #spill(S); + + // + q = 0; + B_offset = jN; + + () = #spill(jN); + + while( q < N ) + { + sum = (32u) B[B_offset]; + + for p=0 to 8 + { + mul = (32u) A[p*N + q]; + mul *= sp[p]; + sum += mul; + } + + B[B_offset] = (16u) sum; + + q += 1; + B_offset += 1; + } + + () = #unspill(j, S, jN); + + j += 1; + jN += N; + } + + i += 8; + } + + return B; +} + +fn __SB_plus_E_opt( + #spill_to_mmx reg ptr u16[NBAR * NBAR] V, // initial value is set to E's to avoid copy + #spill_to_mmx reg ptr u16[NNBAR] S B) +-> reg ptr u16[NBAR * NBAR] { + reg u64 i j k iN V_offset offset; + reg u32 sum mul t32; + + i = 0; iN = 0; + while (i < NBAR) { + j = 0; + while (j < NBAR) { + k = 0; + V_offset = #LEA(i*NBAR+j); + sum = (32u)V[V_offset]; + while (k < N) { + offset = #LEA(iN+k); + mul = (32u)S[offset]; + + offset = #LEA(j + NBAR*k); + t32 = (32u)B[offset]; + mul *= t32; + + sum += mul; + k += 1; + } + + sum &= (1 << D) - 1; + V[V_offset] = (16u)sum; + + j += 1; + } + i += 1; + iN += N; + } + + return V; +} + +fn __mul_BS_opt( + #spill_to_mmx reg ptr u16[NBAR * NBAR] M, + #spill_to_mmx reg ptr u16[NNBAR]B S) +-> reg ptr u16[NBAR * NBAR] { + reg u64 i j k iN jN offset; + reg u32 sum mul t32; + + i = 0; iN = 0; + while (i < NBAR) { + j = 0; jN = 0; + while (j < NBAR) { + sum = 0; + + k = 0; + while (k < N) { + offset = #LEA(iN+k); + mul = (32u)B[offset]; + + offset = #LEA(jN+k); + t32 = (32u)S[offset]; + mul *= t32; + + sum += mul; + + k += 1; + } + sum &= (1 << D) - 1; + offset = #LEA(i*NBAR+j); + M[offset] = (16u)sum; + + j += 1; + jN += N; + } + + i += 1; + iN += N; + } + + return M; +} diff --git a/src/crypto_kem/frodo/common/amd64/ref/noise.jinc b/src/crypto_kem/frodo/common/amd64/ref/noise.jinc new file mode 100644 index 00000000..313f28b9 --- /dev/null +++ b/src/crypto_kem/frodo/common/amd64/ref/noise.jinc @@ -0,0 +1,88 @@ +fn __sample_2NNBAR(reg ptr u16[2 * NNBAR] s) -> stack u16[2 * NNBAR] { + reg ptr u16[CDF_TABLE_LEN] cdftp; + cdftp = CDF_TABLE; + + reg u64 i j; + reg u16 sample prnd sign; + + i = 0; + while (i < 2 * NNBAR) { + sample = 0; + + // prnd = s[i] >> 1 + prnd = s[i]; + prnd >>= 1; + + // sign = s[i] & 0x1 + sign = s[i]; + sign &= 0x1; + + // no need to compare with the last value + j = 0; + while (j < CDF_TABLE_LEN - 1) { + // sample += (CDF_TABLE[j] - prnd) >> 15 + + reg u16 tmp_sample; + tmp_sample = cdftp[j]; + tmp_sample -= prnd; + tmp_sample >>= 15; + sample += tmp_sample; + + j += 1; + } + + // s[i] = ((-sign) ^ sample) + sign + s[i] = 0; + s[i] -= sign; + s[i] ^= sample; + s[i] += sign; + + i += 1; + } + + return s; +} + +fn __sample_NBAR2(reg ptr u16[NBAR * NBAR] s) -> stack u16[NBAR * NBAR] { + reg ptr u16[CDF_TABLE_LEN] cdftp; + cdftp = CDF_TABLE; + + reg u64 i j; + reg u16 sample prnd sign; + i = 0; + while (i < NBAR * NBAR) { + sample = 0; + + // prnd = s[i] >> 1 + prnd = s[i]; + prnd >>= 1; + + // sign = s[i] & 0x1 + sign = s[i]; + sign &= 0x1; + + // no need to compare with the last value + j = 0; + while (j < CDF_TABLE_LEN - 1) { + // sample += (CDF_TABLE[j] - prnd) >> 15 + + reg u16 tmp_sample; + tmp_sample = cdftp[j]; + tmp_sample -= prnd; + tmp_sample >>= 15; + sample += tmp_sample; + + j += 1; + } + + // s[i] = ((-sign) ^ sample) + sign + s[i] = 0; + s[i] -= sign; + s[i] ^= sample; + s[i] += sign; + + i += 1; + } + + return s; +} diff --git a/src/crypto_kem/frodo/common/amd64/ref/pack.jinc b/src/crypto_kem/frodo/common/amd64/ref/pack.jinc new file mode 100644 index 00000000..0880eb61 --- /dev/null +++ b/src/crypto_kem/frodo/common/amd64/ref/pack.jinc @@ -0,0 +1,415 @@ +fn __pack_B(reg ptr u8[D * N]out, reg ptr u16[NNBAR] in) -> stack u8[D * N] { + reg u64 i j l offset; + inline int k MID TERM Mask; + reg u64 ac tmp; + reg u16 acm tm; + + Mask = (1 << D) - 1; + TERM = (D - 1)/2; + MID = D - TERM * 2; + + i = 0; j = 0; + + // process 16 * 8 bits at a time + // process 2 parts: | 4 * D bits | 4 * D bits | + // separate into: | TERM bytes | 1st half of MID bytes | 2nd half of MID bytes | TERM bytes | + while (i < NNBAR) { + ac = 0; + acm = 0; + + // aggregate 1st half (16 * 4 bits) into 4 * D bits in ac + l = 0; + while (l < 4) { + offset = #LEA(i + l); + tmp = (64u)in[offset]; + tmp &= Mask; + ac <<= D; + ac |= tmp; + + l += 1; + } + + // aggregate the 1st half of the MID in acm from ac + for k = 0 to MID { + tm = ac; + tm &= 0x0F; + tm <<= 4 * k; + acm |= tm; + ac >>= 4; + } + + // process the 1st TERM in ac + l = 0; + while (l < TERM) { + offset = #LEA(j + TERM - 1); + offset -= l; + out[offset] = ac; + ac >>= 8; + + l += 1; + } + + // aggregate 2nd half (16 * 4 bits) into 4 * D bits in ac + l = 0; + while (l < 4) { + offset = #LEA(i + 4 + l); + tmp = (64u)in[offset]; + tmp &= Mask; + ac <<= D; + ac |= tmp; + + l += 1; + } + + // process the 2nd TERM in ac + l = 0; + while (l < TERM) { + offset = #LEA(j + D - 1); + offset -= l; + out[offset] = ac; + ac >>= 8; + + l += 1; + } + + // aggregate the 2nd half of the MID in acm from ac + acm <<= 4 * MID; + for k = 0 to MID { + tm = ac; + tm &= 0x0F; + tm <<= 4 * k; + acm |= tm; + ac >>= 4; + } + + l = 0; + while (l < MID) { + offset = #LEA(j + TERM + MID - 1); + offset -= l; + out[offset] = acm; + acm >>= 8; + + l += 1; + } + + i += 8; + j += D; + } + + return out; +} + +fn __unpack_B(reg ptr u16[NNBAR]out, reg ptr u8[D * N]in) -> stack u16[NNBAR] { + inline int k TERM MID MASK; + + reg u64 i j l ac tmp offset; + reg u16 acm tm; + + // D = TERM + MID + TERM + TERM = (D - 1)/2; + MID = D - TERM * 2; + MASK = (1 << D) - 1; + + i = 0; j = 0; + + // process D bytes at a time + // separate processing into: | TERM bytes | MID bytes | TERM bytes | + // combined 1st half 4 * D bits: | TERM | 1st half of MID | + // combined 2nd half 4 * D bits: | 2nd half of MID | TERM | + while (i < D * N) { + ac = 0; + acm = 0; + + // accumulate the MID bytes (8/16 for D = 15/16) in acm (u16) + l = 0; + while (l < MID) { + offset = #LEA(i + TERM + l); + tm = (16u)in[offset]; + acm <<= 8; + acm |= tm; + + l += 1; + } + + // aggregate 2nd half of MID from acm in ac + for k = 0 to MID { + tmp = (64u)acm; + tmp &= 0x0F; + tmp <<= 4 * k; + ac |= tmp; + acm >>= 4; + } + + // accumulate 2nd TERM bytes in ac + l = 0; + while (l < TERM) { + offset = #LEA(i + TERM + MID + l); + tmp = (64u)in[offset]; + ac <<= 8; + ac |= tmp; + + l += 1; + } + + // result in 4 * D bits in 2nd half of output + l = 0; + while (l < 4) { + tm = ac; + tm &= MASK; + offset = #LEA(j + 7); + offset -= l; + out[offset] = tm; + ac >>= D; + + l += 1; + } + + // accumulate 1st TERM bytes in ac + l = 0; + while (l < TERM) { + offset = #LEA(i + l); + tmp = (64u)in[offset]; + ac <<= 8; + ac |= tmp; + + l += 1; + } + + // aggregate 2nd half of MID from acm to ac + ac <<= 4 * MID; + for k = 0 to MID { + tmp = (64u)acm; + tmp &= 0x0F; + tmp <<= 4 * k; + ac |= tmp; + acm >>= 4; + } + + // result in 4 * D bits in 1st half of output + l = 0; + while (l < 4) { + // disable implicit scaling for handling little endianness + offset = #LEA(j + 3); + offset -= l; + tm = ac; + tm &= MASK; + out[offset] = tm; + ac >>= D; + + l += 1; + } + + i += D; + j += 8; + } + + return out; +} + +fn __pack_C(reg ptr u8[D * NBAR]out, reg ptr u16[NBAR * NBAR] in) -> stack u8[D * NBAR] { + reg u64 i j l offset; + inline int k MID TERM Mask; + reg u64 ac tmp; + reg u16 acm tm; + + Mask = (1 << D) - 1; + TERM = (D - 1)/2; + MID = D - TERM * 2; + + i = 0; j = 0; + + // process 16 * 8 bits at a time + // process 2 parts: | 4 * D bits | 4 * D bits | + // separate into: | TERM bytes | 1st half of MID bytes | 2nd half of MID bytes | TERM bytes | + while (i < NBAR * NBAR) { + ac = 0; + acm = 0; + + // aggregate 1st half (16 * 4 bits) into 4 * D bits in ac + l = 0; + while (l < 4) { + offset = #LEA(i + l); + tmp = (64u)in[offset]; + tmp &= Mask; + ac <<= D; + ac |= tmp; + + l += 1; + } + + // aggregate the 1st half of the MID in acm from ac + for k = 0 to MID { + tm = ac; + tm &= 0x0F; + tm <<= 4 * k; + acm |= tm; + ac >>= 4; + } + + // process the 1st TERM in ac + l = 0; + while (l < TERM) { + offset = #LEA(j + TERM - 1); + offset -= l; + out[offset] = ac; + ac >>= 8; + + l += 1; + } + + // aggregate 2nd half (16 * 4 bits) into 4 * D bits in ac + l = 0; + while (l < 4) { + offset = #LEA(i + 4 + l); + tmp = (64u)in[offset]; + tmp &= Mask; + ac <<= D; + ac |= tmp; + + l += 1; + } + + // process the 2nd TERM in ac + l = 0; + while (l < TERM) { + offset = #LEA(j + D - 1); + offset -= l; + out[offset] = ac; + ac >>= 8; + + l += 1; + } + + // aggregate the 2nd half of the MID in acm from ac + acm <<= 4 * MID; + for k = 0 to MID { + tm = ac; + tm &= 0x0F; + tm <<= 4 * k; + acm |= tm; + ac >>= 4; + } + + l = 0; + while (l < MID) { + offset = #LEA(j + TERM + MID - 1); + offset -= l; + out[offset] = acm; + acm >>= 8; + + l += 1; + } + + i += 8; + j += D; + } + + return out; +} + +fn __unpack_C(reg ptr u16[NBAR * NBAR]out, reg ptr u8[D * NBAR]in) -> stack u16[NBAR * NBAR] { + inline int k TERM MID MASK; + + reg u64 i j l ac tmp offset; + reg u16 acm tm; + + // D = TERM + MID + TERM + TERM = (D - 1)/2; + MID = D - TERM * 2; + MASK = (1 << D) - 1; + + i = 0; j = 0; + + // process D bytes each time + // separate processing into: | TERM bytes | MID bytes | TERM bytes | + // combined 1st half 4 * D bits: | TERM | 1st half of MID | + // combined 2nd half 4 * D bits: | 2nd half of MID | TERM | + while (i < D * NBAR) { + ac = 0; + acm = 0; + + // accumulate the MID bytes (8/16 for D = 15/16) in acm (u16) + l = 0; + while (l < MID) { + offset = #LEA(i + TERM + l); + tm = (16u)in[offset]; + acm <<= 8; + acm |= tm; + + l += 1; + } + + // aggregate 2nd half of MID from acm in ac + for k = 0 to MID { + tmp = (64u)acm; + tmp &= 0x0F; + tmp <<= 4 * k; + ac |= tmp; + acm >>= 4; + } + + // accumulate 2nd TERM bytes in ac + l = 0; + while (l < TERM) { + offset = #LEA(i + TERM + MID + l); + tmp = (64u)in[offset]; + ac <<= 8; + ac |= tmp; + + l += 1; + } + + // result in 4 * D bits in 2nd half of output + l = 0; + while (l < 4) { + offset = #LEA(j + 7); + offset -= l; + tm = ac; + tm &= MASK; + out[offset] = tm; + ac >>= D; + + l += 1; + } + + // accumulate 1st TERM bytes in ac + l = 0; + while (l < TERM) { + offset = #LEA(i + l); + tmp = (64u)in[offset]; + ac <<= 8; + ac |= tmp; + + l += 1; + } + + // aggregate 2nd half of MID from acm to ac + ac <<= 4 * MID; + for k = 0 to MID { + tmp = (64u)acm; + tmp &= 0x0F; + tmp <<= 4 * k; + ac |= tmp; + acm >>= 4; + } + + // result in 4 * D bits in 1st half of output + l = 0; + while (l < 4) { + // disable implicit scaling for handling little endianness + offset = #LEA(j + 3); + offset -= l; + tm = ac; + tm &= MASK; + out[offset] = tm; + ac >>= D; + + l += 1; + } + + i += D; + j += 8; + } + + return out; +} diff --git a/src/crypto_kem/frodo/common/amd64/ref/shake128.jinc b/src/crypto_kem/frodo/common/amd64/ref/shake128.jinc new file mode 100644 index 00000000..d287adb3 --- /dev/null +++ b/src/crypto_kem/frodo/common/amd64/ref/shake128.jinc @@ -0,0 +1,388 @@ +from Jade require "common/keccak/keccak1600/amd64/ref1/keccak1600.jinc" + +param int SHAKE128_RATE = 168; + +#[returnaddress="stack"] +fn __shake128_gen_A(reg ptr u8[2 * N] out, reg const ptr u8[2 + BYTES_SEED_A] in) -> stack u8[2 * N] +{ + stack ptr u8[2 * N] s_out; + stack u64[25] s_state; + reg ptr u64[25] state; + reg u8 c; + inline int INLEN OUTLEN OUTRND i k; + reg u64 j; stack u64 s_j; + + INLEN = 2 + BYTES_SEED_A; + OUTLEN = 2 * N; + OUTRND = OUTLEN / SHAKE128_RATE; + + state = s_state; + state = __keccak_init_ref1(state); + + for i = 0 to INLEN { + c = in[i]; + state[u8 i] ^= c; + } + state[u8 INLEN] ^= 0x1f; + state[u8 SHAKE128_RATE-1] ^= 0x80; + + for i = 0 to OUTRND { + s_out = out; s_j = j; + + state = __keccakf1600_ref1(state); + + out = s_out; j = s_j; + + j = 0; + while (j < SHAKE128_RATE) { + for k = 0 to 4 { + out[(int)j + k + i * SHAKE128_RATE] = state[u8 (int)j + k]; + } + j += 4; + } + } + + s_out = out; s_j = j; + + state = __keccakf1600_ref1(state); + + out = s_out; + + j = 0; + while (j < OUTLEN % SHAKE128_RATE) { + for k = 0 to 4 { + out[(int)j + k + OUTRND * SHAKE128_RATE] = state[u8 (int)j + k]; + } + j += 4; + } + + return out; +} + +#[returnaddress="stack"] +fn __shake128_seed_A(reg ptr u8[BYTES_SEED_A] out, reg const ptr u8[BYTES_SEED_A] in) -> stack u8[BYTES_SEED_A] +{ + stack ptr u8[BYTES_SEED_A] s_out; + stack u64[25] s_state; + reg ptr u64[25] state; + reg u8 c; + inline int INLEN OUTLEN i; + + INLEN = BYTES_SEED_A; OUTLEN = BYTES_SEED_A; + + s_out = out; + + state = s_state; + state = __keccak_init_ref1(state); + + for i = 0 to INLEN { + c = in[i]; + state[u8 i] ^= c; + } + state[u8 INLEN] ^= 0x1f; + state[u8 SHAKE128_RATE-1] ^= 0x80; + + state = __keccakf1600_ref1(state); + + out = s_out; + + for i = 0 to OUTLEN { + out[i] = state[u8 i]; + } + + return out; +} + +#[returnaddress="stack"] +fn __shake128_r(reg ptr u8[4 * NNBAR] out, reg const ptr u8[1 + BYTES_SEED_SE] in) -> stack u8[4 * NNBAR] +{ + stack ptr u8[4 * NNBAR] s_out; + stack u64[25] s_state; + reg ptr u64[25] state; + reg u8 c; + inline int INLEN OUTLEN OUTRND k; + reg u64 i j; + stack u64 s_i s_j; + + INLEN = 1 + BYTES_SEED_SE; + OUTLEN = 4 * NNBAR; + OUTRND = OUTLEN / SHAKE128_RATE; + + s_out = out; + + state = s_state; + state = __keccak_init_ref1(state); + + for k = 0 to INLEN { + c = in[k]; + state[u8 k] ^= c; + } + + state[u8 INLEN] ^= 0x1f; + state[u8 SHAKE128_RATE-1] ^= 0x80; + + i = 0; + while (i < OUTRND * SHAKE128_RATE) { + s_i = i; s_j = j; s_out = out; + + state = __keccakf1600_ref1(state); + + out = s_out; i = s_i; j = s_j; + j = 0; + + while (j < SHAKE128_RATE) { + for k = 0 to 4 { + out[(int)i + k] = state[u8 (int)j + k]; + } + j += 4; + i += 4; + } + } + + s_i = i; s_j = j; s_out = out; + + state = __keccakf1600_ref1(state); + + out = s_out; j = s_j; + j = 0; + + while (j < OUTLEN % SHAKE128_RATE) { + for k = 0 to 4 { + out[(int) j + k + OUTRND * SHAKE128_RATE] = state[u8 (int) j + k]; + } + j += 4; + } + + return out; +} + +#[returnaddress="stack"] +fn __shake128_pkh(reg ptr u8[BYTES_SEC] out, reg const ptr u8[BYTES_PK] in) -> stack u8[BYTES_SEC] +{ + stack ptr u8[BYTES_SEC] s_out; + stack ptr u8[BYTES_PK] s_in; + stack u64[25] s_state; + reg ptr u64[25] state; + reg u8 c; + reg u64 i j; stack u64 s_i s_j; + inline int INLEN OUTLEN INRND k; + + INLEN = BYTES_PK; OUTLEN = BYTES_SEC; + INRND = INLEN / SHAKE128_RATE; + s_out = out; + s_in = in; + + state = s_state; + state = __keccak_init_ref1(state); + + i = 0; + while (i < INRND * SHAKE128_RATE) { + in = s_in; + j = 0; + while (j < SHAKE128_RATE) { + for k = 0 to 4 { + c = in[(int)i + k]; + state[u8 (int)j + k] ^= c; + } + + i += 4; + j += 4; + } + + s_in = in; s_i = i; s_j = j; + state = __keccakf1600_ref1(state); + i = s_i; j = s_j; + } + + in = s_in; + j = 0; + while (j < INLEN % SHAKE128_RATE) { + for k = 0 to 4 { + c = in[(int)j + INRND * SHAKE128_RATE + k]; + state[u8 (int)j + k] ^= c; + } + j += 4; + } + + state[u8 INLEN % SHAKE128_RATE] ^= 0x1f; + state[u8 SHAKE128_RATE-1] ^= 0x80; + + s_in = in; s_i = i; s_j = j; + + state = __keccakf1600_ref1(state); + + out = s_out; + + for k = 0 to OUTLEN { + out[k] = state[u8 k]; + } + + return out; +} + +#[returnaddress="stack"] +fn __shake128_SE_k(reg ptr u8[BYTES_SEED_SE + BYTES_SEC] out, reg const ptr u8[2 * BYTES_SEC + BYTES_SALT] in) -> stack u8[BYTES_SEED_SE + BYTES_SEC] { + stack ptr u8[BYTES_SEED_SE + BYTES_SEC] s_out; + stack ptr u8[2 * BYTES_SEC + BYTES_SALT] s_in; + stack u64[25] s_state; + reg ptr u64[25] state; + reg u8 c; + inline int INLEN OUTLEN k; + reg u64 i; stack u64 s_i; + + INLEN = 2 * BYTES_SEC + BYTES_SALT; + OUTLEN = BYTES_SEED_SE + BYTES_SEC; + + s_out = out; s_in = in; + + state = s_state; + state = __keccak_init_ref1(state); + + in = s_in; + for k = 0 to INLEN { + c = in[k]; + state[u8 k] ^= c; + } + + state[u8 INLEN] ^= 0x1f; + state[u8 SHAKE128_RATE-1] ^= 0x80; + + s_in = in; s_i = i; + + state = __keccakf1600_ref1(state); + + out = s_out; i = s_i; + i = 0; + + for k = 0 to OUTLEN { + out[k] = state[u8 k]; + } + + return out; +} + + +#[returnaddress="stack"] +fn __shake128_encap_r(reg ptr u8[2 * (2 * NNBAR + NBAR * NBAR)] out, reg const ptr u8[1 + BYTES_SEED_SE] in) -> stack u8[2 * (2 * NNBAR + NBAR * NBAR)] { + stack ptr u8[2 * (2 * NNBAR + NBAR * NBAR)] s_out; + stack ptr u8[1 + BYTES_SEED_SE] s_in; + stack u64[25] s_state; + reg ptr u64[25] state; + reg u8 c; + inline int INLEN OUTLEN OUTRND k; + reg u64 i j; stack u64 s_i s_j; + + INLEN = 1 + BYTES_SEED_SE; + OUTLEN = 2 * (2 * NNBAR + NBAR * NBAR); + OUTRND = OUTLEN / SHAKE128_RATE; + + s_out = out; + + state = s_state; + state = __keccak_init_ref1(state); + + for k = 0 to INLEN { + c = in[k]; + state[u8 k] ^= c; + } + + state[u8 INLEN] ^= 0x1f; + state[u8 SHAKE128_RATE-1] ^= 0x80; + + s_in = in; + + i = 0; + while (i < OUTRND * SHAKE128_RATE) { + s_out = out; s_i = i; s_j = j; + + state = __keccakf1600_ref1(state); + + out = s_out; i = s_i; j = s_j; + j = 0; + while (j < SHAKE128_RATE) { + for k = 0 to 4 { + out[(int)i + k] = state[u8 (int)j + k]; + } + i += 4; + j += 4; + } + } + + s_out = out; s_i = i; s_j = j; + state = __keccakf1600_ref1(state); + out = s_out; i = s_i; j = s_j; + + j = 0; + while (j < OUTLEN % SHAKE128_RATE) { + for k = 0 to 4 { + out[(int) j + OUTRND * SHAKE128_RATE + k] = state[u8 (int)j + k]; + } + j += 4; + } + + return out; +} + +#[returnaddress="stack"] +fn __shake128_ss(reg ptr u8[BYTES_SEC] out, reg const ptr u8[BYTES_CT + BYTES_SEC] in) -> stack u8[BYTES_SEC] +{ + stack ptr u8[BYTES_SEC] s_out; + stack ptr u8[BYTES_CT + BYTES_SEC] s_in; + stack u64[25] s_state; + reg ptr u64[25] state; + reg u8 c; + reg u64 i j; stack u64 s_i s_j; + inline int INLEN OUTLEN INRND k; + + INLEN = BYTES_CT + BYTES_SEC; + OUTLEN = BYTES_SEC; + INRND = INLEN / SHAKE128_RATE; + + s_out = out; s_in = in; + + state = s_state; + state = __keccak_init_ref1(state); + + i = 0; + while (i < INRND * SHAKE128_RATE) { + in = s_in; + j = 0; + while (j < SHAKE128_RATE) { + for k = 0 to 4 { + c = in[(int)i + k]; + state[u8 (int)j + k] ^= c; + } + + i += 4; + j += 4; + } + + s_in = in; s_i = i; s_j = j; + state = __keccakf1600_ref1(state); + i = s_i; j = s_j; + } + + in = s_in; + s_i = i; + j = 0; + while (j < INLEN % SHAKE128_RATE) { + c = in[(int)j + INRND * SHAKE128_RATE]; + state[u8 (int)j] ^= c; + j += 1; + } + s_in = in; s_j = j; + + state[u8 INLEN % SHAKE128_RATE] ^= 0x1f; + state[u8 SHAKE128_RATE-1] ^= 0x80; + + state = __keccakf1600_ref1(state); + + out = s_out; i = s_i; + + i = 0; + for k = 0 to OUTLEN { + out[k] = state[u8 k]; + } + + return out; +} diff --git a/src/crypto_kem/frodo/common/amd64/ref/shake128_opt.jinc b/src/crypto_kem/frodo/common/amd64/ref/shake128_opt.jinc new file mode 100644 index 00000000..320c29af --- /dev/null +++ b/src/crypto_kem/frodo/common/amd64/ref/shake128_opt.jinc @@ -0,0 +1,474 @@ +from Jade require "common/keccak/keccak1600/amd64/ref1/keccak1600.jinc" + +param int SHAKE128_RATE = 168; +fn __shake128_gen_A_opt( + #spill_to_mmx reg ptr u8[2 * N * 8] out, + #spill_to_mmx reg u64 out_offset, + #spill_to_mmx reg const ptr u8[BYTES_SEED_A + 2 + 6] in) +-> reg ptr u8[2 * N * 8], + reg u64 +{ + #spill_to_mmx reg u64 i; + + stack u64[25] s_state; + reg ptr u64[25] state; + reg u64 j t0 zero; + inline int k INLEN OUTLEN OUTRND; + + INLEN = BYTES_SEED_A + 2; + OUTLEN = 2 * N; + OUTRND = OUTLEN/SHAKE128_RATE; + + state = s_state; + + ?{}, zero = #set0(); + state[u8 0] = 0x5F; + + for k = 0 to INLEN/8 + 1 { + t0 = in[u64 k]; + state[u64 k] = t0; + } + + i = INLEN/8 + 1; + while (i < 25) { + state[i] = zero; + i += 1; + } + + state[u8 INLEN] = 0x1f; + state[u8 SHAKE128_RATE-1] = 0x80; + + i = 0; + while (i < OUTRND * SHAKE128_RATE/8) { + () = #spill(i, j, out, out_offset); + + state = __keccakf1600_ref1(state); + + () = #unspill(i, j, out, out_offset); + + j = 0; + while (j < SHAKE128_RATE/8) { + t0 = state[u64 j]; + out[u64 out_offset] = t0; + out_offset += 1; + + j += 1; + } + + i += SHAKE128_RATE/8; + } + + () = #spill(i, j, out, out_offset); + + state = __keccakf1600_ref1(state); + + () = #unspill(i, j, out, out_offset); + + i = 0; + while (i < (OUTLEN % SHAKE128_RATE) / 8) { + t0 = state[u64 i]; + out[u64 out_offset] = t0; + out_offset += 1; + i += 1; + } + + return out, out_offset; +} + +fn __shake128_seed_A_opt( + #spill_to_mmx reg ptr u8[BYTES_SEED_A] out, + #spill_to_mmx reg const ptr u8[BYTES_SEED_A] in) + -> reg ptr u8[BYTES_SEED_A] { + #spill_to_mmx reg u64 i; + + stack u64[25] s_state; + reg ptr u64[25] state; + reg u64 t0 zero; + inline int k INLEN OUTLEN; + + INLEN = BYTES_SEED_A; + OUTLEN = BYTES_SEED_A; + + state = s_state; + + for k = 0 to INLEN/8 { + t0 = in[u64 k]; + state[k] = t0; + } + ?{}, zero = #set0(); + + i = INLEN/8; + while (i < 25) { + state[i] = zero; + i += 1; + } + + state[u8 INLEN] = 0x1f; + state[u8 SHAKE128_RATE-1] = 0x80; + + () = #spill(out); + + state = __keccakf1600_ref1(state); + + () = #unspill(out); + + for k = 0 to OUTLEN/8 { + t0 = state[u64 k]; + out[u64 k] = t0; + } + + return out; +} + +fn __shake128_r_opt( + #spill_to_mmx reg ptr u8[4 * NNBAR] out, + #spill_to_mmx reg const ptr u8[BYTES_SEED_SE] in) +-> reg ptr u8[4 * NNBAR] +{ + #spill_to_mmx reg u64 i; + + stack u64[25] s_state; + reg ptr u64[25] state; + reg u64 j offset t0 zero; + inline int INLEN OUTLEN OUTRND; + + INLEN = 1 + BYTES_SEED_SE; + OUTLEN = 4 * NNBAR; + OUTRND = OUTLEN/SHAKE128_RATE; + + state = s_state; + + ?{}, zero = #set0(); + state[u8 0] = 0x5F; + state[INLEN/8] = zero; + + i = 0; + while (i < INLEN/8) { + t0 = in[u64 i]; + offset = #LEA(1+8*i); + state.[u64 offset] = t0; + + i += 1; + } + + i = INLEN/8 + 1; + while (i < 25) { + state[i] = zero; + i += 1; + } + + state[u8 INLEN] = 0x1f; + state[u8 SHAKE128_RATE-1] = 0x80; + + i = 0; + while (i < OUTRND * SHAKE128_RATE/8) { + () = #spill(i, j, out); + + state = __keccakf1600_ref1(state); + + () = #unspill(i, j, out); + + j = 0; + while (j < SHAKE128_RATE/8) { + t0 = state[u64 j]; + offset = #LEA(i+j); + out[u64 offset] = t0; + + j += 1; + } + + i += SHAKE128_RATE/8; + } + + () = #spill(i, j, out); + + state = __keccakf1600_ref1(state); + + () = #unspill(i, j, out); + + i = 0; + while (i < (OUTLEN % SHAKE128_RATE) / 8) { + t0 = state[u64 i]; + out[u64 OUTRND*SHAKE128_RATE/8 + i] = t0; + i += 1; + } + + return out; +} + +fn __shake128_pkh_opt( + #spill_to_mmx reg ptr u8[BYTES_SEC] out, + #spill_to_mmx reg const ptr u8[BYTES_PK] in) +-> reg ptr u8[BYTES_SEC] { + #spill_to_mmx reg u64 i; + + stack u64[25] s_state; + reg ptr u64[25] state; + reg u64 j offset t0 zero; + inline int INLEN OUTLEN INRND; + + INLEN = BYTES_PK; + INRND = INLEN/SHAKE128_RATE; + OUTLEN = BYTES_SEC; + + state = s_state; + + ?{}, zero = #set0(); + + i = 0; + while (i < 25) { + state[i] = zero; + i += 1; + } + + () = #spill(out); + + i = 0; + while (i < INRND * SHAKE128_RATE/8) { + j = 0; + while (j < SHAKE128_RATE/8) { + offset = #LEA(i+j); + t0 = in[u64 offset]; + state[u64 j] ^= t0; + j += 1; + } + + () = #spill(i, j); + + state = __keccakf1600_ref1(state); + + () = #unspill(i, j); + + i += SHAKE128_RATE/8; + } + + i = 0; + while (i < (INLEN % SHAKE128_RATE)/8) { + t0 = in[u64 INRND * SHAKE128_RATE/8 + i]; + state[u64 i] ^= t0; + + i += 1; + } + + state[u8 INLEN%SHAKE128_RATE] ^= 0x1f; + state[u8 SHAKE128_RATE-1] ^= 0x80; + + () = #spill(i, j); + + state = __keccakf1600_ref1(state); + + () = #unspill(i, j, out); + + i = 0; + while (i < OUTLEN/8) { + out[u64 i] = state[u64 i]; + i += 1; + } + + return out; +} + +fn __shake128_SE_k_opt( + #spill_to_mmx reg ptr u8[1 + BYTES_SEED_SE + BYTES_SEC] out, + #spill_to_mmx reg const ptr u8[2 * BYTES_SEC + BYTES_SALT] in) +-> reg ptr u8[1 + BYTES_SEED_SE + BYTES_SEC] { + #spill_to_mmx reg u64 i; + + stack u64[25] s_state; + reg ptr u64[25] state; + reg u64 offset t0 zero; + inline int INLEN OUTLEN; + + INLEN = 2 * BYTES_SEC + BYTES_SALT; + OUTLEN = BYTES_SEED_SE + BYTES_SEC; + + state = s_state; + + i = 0; + while (i < INLEN/8) { + t0 = in[u64 i]; + state[i] = t0; + + i += 1; + } + ?{}, zero = #set0(); + + i = INLEN/8; + while (i < 25) { + state[i] = zero; + i += 1; + } + + state[u8 INLEN] = 0x1f; + state[u8 SHAKE128_RATE-1] = 0x80; + + () = #spill(out); + + state = __keccakf1600_ref1(state); + + () = #unspill(out); + + i = 0; + while (i < OUTLEN/8) { + t0 = state[u64 i]; + offset = #LEA(1+8*i); + out.[u64 offset] = t0; + + i += 1; + } + + return out; +} + +fn __shake128_encap_r_opt( + #spill_to_mmx reg ptr u8[2 * (2 * NNBAR + NBAR * NBAR)] out, + #spill_to_mmx reg const ptr u8[1 + BYTES_SEED_SE] in) +-> reg ptr u8[2 * (2 * NNBAR + NBAR * NBAR)] { + #spill_to_mmx reg u64 i; + + stack u64[25] s_state; + reg ptr u64[25] state; + reg u64 j offset t0 zero; + reg u8 t; + inline int k INLEN OUTLEN OUTRND; + + INLEN = 1 + BYTES_SEED_SE; + OUTLEN = 2 * (2 * NNBAR + NBAR * NBAR); + OUTRND = OUTLEN/SHAKE128_RATE; + + state = s_state; + + ?{}, zero = #set0(); + state[u8 0] = 0x5F; + state[INLEN/8] = zero; + + i = 0; + while (i < INLEN/8) { + t0 = in[u64 i]; + state[u64 i] = t0; + i += 1; + } + + for k = 0 to INLEN%8 { + t = in[INLEN-INLEN%8 + k]; + state[u8 INLEN-INLEN%8 + k] = t; + } + + i = INLEN/8 + 1; + while (i < 25) { + state[i] = zero; + i += 1; + } + + state[u8 INLEN] = 0x1f; + state[u8 SHAKE128_RATE-1] = 0x80; + + i = 0; + while (i < OUTRND * SHAKE128_RATE/8) { + () = #spill(i, out); + + state = __keccakf1600_ref1(state); + + () = #unspill(i, out); + + j = 0; + while (j < SHAKE128_RATE/8) { + t0 = state[u64 j]; + offset = #LEA(i+j); + out[u64 offset] = t0; + + j += 1; + } + + i += SHAKE128_RATE/8; + } + + () = #spill(i, j, out); + + state = __keccakf1600_ref1(state); + + () = #unspill(i, j, out); + + i = 0; + while (i < (OUTLEN % SHAKE128_RATE) / 8) { + t0 = state[u64 i]; + out[u64 OUTRND*SHAKE128_RATE/8 + i] = t0; + + i += 1; + } + + return out; +} + +fn __shake128_ss_opt( + #spill_to_mmx reg ptr u8[BYTES_SEC] out, + #spill_to_mmx reg const ptr u8[BYTES_CT + BYTES_SEC] in) +-> reg ptr u8[BYTES_SEC] { + #spill_to_mmx reg u64 i; + + stack u64[25] s_state; + reg ptr u64[25] state; + reg u64 j offset t0 zero; + inline int INLEN OUTLEN INRND; + + INLEN = BYTES_CT + BYTES_SEC; + INRND = INLEN/SHAKE128_RATE; + OUTLEN = BYTES_SEC; + + state = s_state; + + ?{}, zero = #set0(); + + i = 0; + while (i < 25) { + state[i] = zero; + i += 1; + } + + () = #spill(out); + + i = 0; + while (i < INRND*SHAKE128_RATE/8) { + j = 0; + while (j < SHAKE128_RATE/8) { + offset = #LEA(i+j); + t0 = in[u64 offset]; + state[u64 j] ^= t0; + + j += 1; + } + + () = #spill(i, j); + + state = __keccakf1600_ref1(state); + + () = #unspill(i, j); + + i += SHAKE128_RATE/8; + } + + i = 0; + while (i < (INLEN % SHAKE128_RATE)/8) { + t0 = in[u64 INRND * SHAKE128_RATE/8 + i]; + state[u64 i] ^= t0; + + i += 1; + } + + state[u8 INLEN%SHAKE128_RATE] ^= 0x1f; + state[u8 SHAKE128_RATE-1] ^= 0x80; + + () = #spill(i, j); + + state = __keccakf1600_ref1(state); + + () = #unspill(i, j, out); + + i = 0; + while (i < OUTLEN/8) { + out[u64 i] = state[u64 i]; + i += 1; + } + + return out; +} diff --git a/src/crypto_kem/frodo/common/amd64/ref/shake256.jinc b/src/crypto_kem/frodo/common/amd64/ref/shake256.jinc new file mode 100644 index 00000000..1ed5acf0 --- /dev/null +++ b/src/crypto_kem/frodo/common/amd64/ref/shake256.jinc @@ -0,0 +1,338 @@ +from Jade require "common/keccak/keccak1600/amd64/ref1/keccak1600.jinc" + +param int SHAKE256_RATE = 136; + +#[returnaddress="stack"] +fn __shake256_seed_A(reg ptr u8[BYTES_SEED_A] out, reg const ptr u8[BYTES_SEED_A] in) -> stack u8[BYTES_SEED_A] +{ + stack ptr u8[BYTES_SEED_A] s_out; + stack u64[25] s_state; + reg ptr u64[25] state; + reg u8 c; + reg u64 i; stack u64 s_i; + inline int INLEN OUTLEN; + + INLEN = BYTES_SEED_A; OUTLEN = BYTES_SEED_A; + + s_out = out; + + state = s_state; + state = __keccak_init_ref1(state); + + i = 0; + while (i < INLEN) { + c = in[(int) i]; + state[u8 (int) i] ^= c; + i += 1; + } + state[u8 (int) i] ^= 0x1f; + state[u8 SHAKE256_RATE-1] ^= 0x80; + + s_i = i; + + state = _keccakf1600_ref1(state); + + out = s_out; + + i = s_i; + i = 0; + while (i < OUTLEN) { + out[(int) i] = state[u8 (int) i]; + i += 1; + } + + return out; +} + +#[returnaddress="stack"] +fn __shake256_r(reg ptr u8[4 * NNBAR] out, reg const ptr u8[1 + BYTES_SEED_SE] in) -> stack u8[4 * NNBAR] +{ + stack ptr u8[4 * NNBAR] s_out; + stack u64[25] s_state; + reg ptr u64[25] state; + reg u8 c; + inline int INLEN OUTLEN OUTRND; + reg u64 i j; + stack u64 s_i s_j; + + INLEN = 1 + BYTES_SEED_SE; + OUTLEN = 4 * NNBAR; + OUTRND = OUTLEN / SHAKE256_RATE; + + s_out = out; + + state = s_state; + state = __keccak_init_ref1(state); + + i = 0; + while (i < INLEN) { + c = in[(int) i]; + state[u8 (int) i] ^= c; + i += 1; + } + + state[u8 INLEN] ^= 0x1f; + state[u8 SHAKE256_RATE-1] ^= 0x80; + + i = 0; + while (i < OUTRND * SHAKE256_RATE) { + s_i = i; s_j = j; s_out = out; + + state = _keccakf1600_ref1(state); + + out = s_out; i = s_i; j = s_j; + j = 0; + + while (j < SHAKE256_RATE) { + out[(int)i] = state[u8 (int)j]; + j += 1; + i += 1; + } + } + + s_i = i; s_j = j; s_out = out; + + state = _keccakf1600_ref1(state); + + out = s_out; i = s_i; j = s_j; + j = 0; + + while (i < OUTLEN) { + out[(int) i] = state[u8 (int) j]; + j += 1; + i += 1; + } + + return out; +} + +#[returnaddress="stack"] +fn __shake256_pkh(reg ptr u8[BYTES_SEC] out, reg const ptr u8[BYTES_PK] in) -> stack u8[BYTES_SEC] +{ + stack ptr u8[BYTES_SEC] s_out; + stack ptr u8[BYTES_PK] s_in; + stack u64[25] s_state; + reg ptr u64[25] state; + reg u8 c; + reg u64 i j; stack u64 s_i s_j; + inline int INLEN OUTLEN INRND; + + INLEN = BYTES_PK; OUTLEN = BYTES_SEC; + INRND = INLEN / SHAKE256_RATE; + s_out = out; + s_in = in; + + state = s_state; + state = __keccak_init_ref1(state); + + i = 0; + while (i < INRND * SHAKE256_RATE) { + in = s_in; + j = 0; + while (j < SHAKE256_RATE) { + c = in[(int)i]; + state[u8 (int)j] ^= c; + + i += 1; + j += 1; + } + + s_in = in; s_i = i; s_j = j; + state = _keccakf1600_ref1(state); + i = s_i; j = s_j; + } + + in = s_in; + j = 0; + while (i < INLEN) { + c = in[(int)i]; + state[u8 (int)j] ^= c; + i += 1; + j += 1; + } + + state[u8 INLEN - INRND * SHAKE256_RATE] ^= 0x1f; + state[u8 SHAKE256_RATE-1] ^= 0x80; + + s_in = in; s_i = i; s_j = j; + + state = _keccakf1600_ref1(state); + + out = s_out; i = s_i; + + i = 0; + while (i < OUTLEN) { + out[(int) i] = state[u8 (int) i]; + i += 1; + } + + return out; +} + +#[returnaddress="stack"] +fn __shake256_SE_k(reg ptr u8[BYTES_SEED_SE + BYTES_SEC] out, reg const ptr u8[2 * BYTES_SEC + BYTES_SALT] in) -> stack u8[BYTES_SEED_SE + BYTES_SEC] { + stack ptr u8[BYTES_SEED_SE + BYTES_SEC] s_out; + stack ptr u8[2 * BYTES_SEC + BYTES_SALT] s_in; + stack u64[25] s_state; + reg ptr u64[25] state; + reg u8 c; + inline int INLEN OUTLEN; + reg u64 i; stack u64 s_i; + + INLEN = 2 * BYTES_SEC + BYTES_SALT; + OUTLEN = BYTES_SEED_SE + BYTES_SEC; + + s_out = out; s_in = in; + + state = s_state; + state = __keccak_init_ref1(state); + + in = s_in; + i = 0; + while (i < INLEN) { + c = in[(int) i]; + state[u8 (int) i] ^= c; + i += 1; + } + + state[u8 INLEN] ^= 0x1f; + state[u8 SHAKE256_RATE-1] ^= 0x80; + + s_in = in; s_i = i; + + state = _keccakf1600_ref1(state); + + out = s_out; i = s_i; + i = 0; + + while (i < OUTLEN) { + out[(int) i] = state[u8 (int) i]; + i += 1; + } + + return out; +} + + +#[returnaddress="stack"] +fn __shake256_encap_r(reg ptr u8[2 * (2 * NNBAR + NBAR * NBAR)] out, reg const ptr u8[1 + BYTES_SEED_SE] in) -> stack u8[2 * (2 * NNBAR + NBAR * NBAR)] { + stack ptr u8[2 * (2 * NNBAR + NBAR * NBAR)] s_out; + stack ptr u8[1 + BYTES_SEED_SE] s_in; + stack u64[25] s_state; + reg ptr u64[25] state; + reg u8 c; + inline int INLEN OUTLEN OUTRND; + reg u64 i j; stack u64 s_i s_j; + + INLEN = 1 + BYTES_SEED_SE; + OUTLEN = 2 * (2 * NNBAR + NBAR * NBAR); + OUTRND = OUTLEN / SHAKE256_RATE; + + s_out = out; + + state = s_state; + state = __keccak_init_ref1(state); + + i = 0; + while (i < INLEN) { + c = in[(int)i]; + state[u8 (int)i] ^= c; + i += 1; + } + + state[u8 INLEN] ^= 0x1f; + state[u8 SHAKE256_RATE-1] ^= 0x80; + + s_in = in; + + i = 0; + while (i < OUTRND * SHAKE256_RATE) { + s_out = out; s_i = i; s_j = j; + + state = _keccakf1600_ref1(state); + + out = s_out; i = s_i; j = s_j; + j = 0; + while (j < SHAKE256_RATE) { + out[(int)i] = state[u8 (int)j]; + i += 1; + j += 1; + } + } + + s_out = out; s_i = i; s_j = j; + state = _keccakf1600_ref1(state); + out = s_out; i = s_i; j = s_j; + + j = 0; + while (i < OUTLEN) { + out[(int) i] = state[u8 (int)j]; + i += 1; + j += 1; + } + + return out; +} + +#[returnaddress="stack"] +fn __shake256_ss(reg ptr u8[BYTES_SEC] out, reg const ptr u8[BYTES_CT + BYTES_SEC] in) -> stack u8[BYTES_SEC] +{ + stack ptr u8[BYTES_SEC] s_out; + stack ptr u8[BYTES_CT + BYTES_SEC] s_in; + stack u64[25] s_state; + reg ptr u64[25] state; + reg u8 c; + reg u64 i j; stack u64 s_i s_j; + inline int INLEN OUTLEN INRND; + + INLEN = BYTES_CT + BYTES_SEC; + OUTLEN = BYTES_SEC; + INRND = INLEN / SHAKE256_RATE; + + s_out = out; s_in = in; + + state = s_state; + state = __keccak_init_ref1(state); + + i = 0; + while (i < INRND * SHAKE256_RATE) { + in = s_in; + j = 0; + while (j < SHAKE256_RATE) { + c = in[(int)i]; + state[u8 (int)j] ^= c; + + i += 1; + j += 1; + } + + s_in = in; s_i = i; s_j = j; + state = _keccakf1600_ref1(state); + i = s_i; j = s_j; + } + + in = s_in; + j = 0; + while (i < INLEN) { + c = in[(int)i]; + state[u8 (int)j] ^= c; + i += 1; + j += 1; + } + s_in = in; s_i = i; s_j = j; + + state[u8 INLEN - INRND * SHAKE256_RATE] ^= 0x1f; + state[u8 SHAKE256_RATE-1] ^= 0x80; + + state = _keccakf1600_ref1(state); + + out = s_out; i = s_i; + + i = 0; + while (i < OUTLEN) { + out[(int)i] = state[u8 (int)i]; + i += 1; + } + + return out; +} diff --git a/src/crypto_kem/frodo/common/amd64/ref/shake256_opt.jinc b/src/crypto_kem/frodo/common/amd64/ref/shake256_opt.jinc new file mode 100644 index 00000000..177ee356 --- /dev/null +++ b/src/crypto_kem/frodo/common/amd64/ref/shake256_opt.jinc @@ -0,0 +1,401 @@ +from Jade require "common/keccak/keccak1600/amd64/ref1/keccak1600.jinc" + +param int SHAKE256_RATE = 136; + +fn __shake256_seed_A_opt( + #spill_to_mmx reg ptr u8[BYTES_SEED_A] out, + #spill_to_mmx reg const ptr u8[BYTES_SEED_A] in) + -> reg ptr u8[BYTES_SEED_A] { + #spill_to_mmx reg u64 i; + + stack u64[25] s_state; + reg ptr u64[25] state; + reg u64 t0 zero; + inline int k INLEN OUTLEN; + + INLEN = BYTES_SEED_A; + OUTLEN = BYTES_SEED_A; + + state = s_state; + + for k = 0 to INLEN/8 { + t0 = in[u64 k]; + state[k] = t0; + } + ?{}, zero = #set0(); + + i = INLEN/8; + while (i < 25) { + state[i] = zero; + i += 1; + } + + state[u8 INLEN] = 0x1f; + state[u8 SHAKE256_RATE-1] = 0x80; + + () = #spill(out); + + state = __keccakf1600_ref1(state); + + () = #unspill(out); + + for k = 0 to OUTLEN/8 { + t0 = state[u64 k]; + out[u64 k] = t0; + } + + return out; +} + +fn __shake256_r_opt( + #spill_to_mmx reg ptr u8[4 * NNBAR] out, + #spill_to_mmx reg const ptr u8[BYTES_SEED_SE] in) +-> reg ptr u8[4 * NNBAR] +{ + #spill_to_mmx reg u64 i; + + stack u64[25] s_state; + reg ptr u64[25] state; + reg u64 j offset t0 zero; + inline int INLEN OUTLEN OUTRND; + + INLEN = 1 + BYTES_SEED_SE; + OUTLEN = 4 * NNBAR; + OUTRND = OUTLEN/SHAKE256_RATE; + + state = s_state; + + ?{}, zero = #set0(); + state[u8 0] = 0x5F; + state[INLEN/8] = zero; + + i = 0; + while (i < INLEN/8) { + t0 = in[u64 i]; + offset = #LEA(1+8*i); + state.[u64 offset] = t0; + + i += 1; + } + + i = INLEN/8 + 1; + while (i < 25) { + state[i] = zero; + i += 1; + } + + state[u8 INLEN] = 0x1f; + state[u8 SHAKE256_RATE-1] = 0x80; + + i = 0; + while (i < OUTRND * SHAKE256_RATE/8) { + () = #spill(i, j, out); + + state = __keccakf1600_ref1(state); + + () = #unspill(i, j, out); + + j = 0; + while (j < SHAKE256_RATE/8) { + t0 = state[u64 j]; + offset = #LEA(i+j); + out[u64 offset] = t0; + + j += 1; + } + + i += SHAKE256_RATE/8; + } + + () = #spill(i, j, out); + + state = __keccakf1600_ref1(state); + + () = #unspill(i, j, out); + + i = 0; + while (i < (OUTLEN % SHAKE256_RATE) / 8) { + t0 = state[u64 i]; + out[u64 OUTRND*SHAKE256_RATE/8 + i] = t0; + i += 1; + } + + return out; +} + +fn __shake256_pkh_opt( + #spill_to_mmx reg ptr u8[BYTES_SEC] out, + #spill_to_mmx reg const ptr u8[BYTES_PK] in) +-> reg ptr u8[BYTES_SEC] { + #spill_to_mmx reg u64 i; + + stack u64[25] s_state; + reg ptr u64[25] state; + reg u64 j offset t0 zero; + inline int INLEN OUTLEN INRND; + + INLEN = BYTES_PK; + INRND = INLEN/SHAKE256_RATE; + OUTLEN = BYTES_SEC; + + state = s_state; + + ?{}, zero = #set0(); + + i = 0; + while (i < 25) { + state[i] = zero; + i += 1; + } + + () = #spill(out); + + i = 0; + while (i < INRND * SHAKE256_RATE/8) { + j = 0; + while (j < SHAKE256_RATE/8) { + offset = #LEA(i+j); + t0 = in[u64 offset]; + state[u64 j] ^= t0; + j += 1; + } + + () = #spill(i, j); + + state = __keccakf1600_ref1(state); + + () = #unspill(i, j); + + i += SHAKE256_RATE/8; + } + + i = 0; + while (i < (INLEN % SHAKE256_RATE)/8) { + t0 = in[u64 INRND * SHAKE256_RATE/8 + i]; + state[u64 i] ^= t0; + + i += 1; + } + + state[u8 INLEN%SHAKE256_RATE] ^= 0x1f; + state[u8 SHAKE256_RATE-1] ^= 0x80; + + () = #spill(i, j); + + state = __keccakf1600_ref1(state); + + () = #unspill(i, j, out); + + i = 0; + while (i < OUTLEN/8) { + out[u64 i] = state[u64 i]; + i += 1; + } + + return out; +} + +fn __shake256_SE_k_opt( + #spill_to_mmx reg ptr u8[1 + BYTES_SEED_SE + BYTES_SEC] out, + #spill_to_mmx reg const ptr u8[2 * BYTES_SEC + BYTES_SALT] in) +-> reg ptr u8[1 + BYTES_SEED_SE + BYTES_SEC] { + #spill_to_mmx reg u64 i; + + stack u64[25] s_state; + reg ptr u64[25] state; + reg u64 offset t0 zero; + inline int INLEN OUTLEN; + + INLEN = 2 * BYTES_SEC + BYTES_SALT; + OUTLEN = BYTES_SEED_SE + BYTES_SEC; + + state = s_state; + + i = 0; + while (i < INLEN/8) { + t0 = in[u64 i]; + state[i] = t0; + + i += 1; + } + ?{}, zero = #set0(); + + i = INLEN/8; + while (i < 25) { + state[i] = zero; + i += 1; + } + + state[u8 INLEN] = 0x1f; + state[u8 SHAKE256_RATE-1] = 0x80; + + () = #spill(out); + + state = __keccakf1600_ref1(state); + + () = #unspill(out); + + i = 0; + while (i < OUTLEN/8) { + t0 = state[u64 i]; + offset = #LEA(1+8*i); + out.[u64 offset] = t0; + + i += 1; + } + + return out; +} + +fn __shake256_encap_r_opt( + #spill_to_mmx reg ptr u8[2 * (2 * NNBAR + NBAR * NBAR)] out, + #spill_to_mmx reg const ptr u8[1 + BYTES_SEED_SE] in) +-> reg ptr u8[2 * (2 * NNBAR + NBAR * NBAR)] { + #spill_to_mmx reg u64 i; + + stack u64[25] s_state; + reg ptr u64[25] state; + reg u64 j offset t0 zero; + reg u8 t; + inline int k INLEN OUTLEN OUTRND; + + INLEN = 1 + BYTES_SEED_SE; + OUTLEN = 2 * (2 * NNBAR + NBAR * NBAR); + OUTRND = OUTLEN/SHAKE256_RATE; + + state = s_state; + + ?{}, zero = #set0(); + state[u8 0] = 0x5F; + state[INLEN/8] = zero; + + i = 0; + while (i < INLEN/8) { + t0 = in[u64 i]; + state[u64 i] = t0; + i += 1; + } + + for k = 0 to INLEN%8 { + t = in[INLEN-INLEN%8 + k]; + state[u8 INLEN-INLEN%8 + k] = t; + } + + i = INLEN/8 + 1; + while (i < 25) { + state[i] = zero; + i += 1; + } + + state[u8 INLEN] = 0x1f; + state[u8 SHAKE256_RATE-1] = 0x80; + + i = 0; + while (i < OUTRND * SHAKE256_RATE/8) { + () = #spill(i, out); + + state = __keccakf1600_ref1(state); + + () = #unspill(i, out); + + j = 0; + while (j < SHAKE256_RATE/8) { + t0 = state[u64 j]; + offset = #LEA(i+j); + out[u64 offset] = t0; + + j += 1; + } + + i += SHAKE256_RATE/8; + } + + () = #spill(i, j, out); + + state = __keccakf1600_ref1(state); + + () = #unspill(i, j, out); + + i = 0; + while (i < (OUTLEN % SHAKE256_RATE) / 8) { + t0 = state[u64 i]; + out[u64 OUTRND*SHAKE256_RATE/8 + i] = t0; + + i += 1; + } + + return out; +} + +fn __shake256_ss_opt( + #spill_to_mmx reg ptr u8[BYTES_SEC] out, + #spill_to_mmx reg const ptr u8[BYTES_CT + BYTES_SEC] in) +-> reg ptr u8[BYTES_SEC] { + #spill_to_mmx reg u64 i; + + stack u64[25] s_state; + reg ptr u64[25] state; + reg u64 j offset t0 zero; + inline int INLEN OUTLEN INRND; + + INLEN = BYTES_CT + BYTES_SEC; + INRND = INLEN/SHAKE256_RATE; + OUTLEN = BYTES_SEC; + + state = s_state; + + ?{}, zero = #set0(); + + i = 0; + while (i < 25) { + state[i] = zero; + i += 1; + } + + () = #spill(out); + + i = 0; + while (i < INRND*SHAKE256_RATE/8) { + j = 0; + while (j < SHAKE256_RATE/8) { + offset = #LEA(i+j); + t0 = in[u64 offset]; + state[u64 j] ^= t0; + + j += 1; + } + + () = #spill(i, j); + + state = __keccakf1600_ref1(state); + + () = #unspill(i, j); + + i += SHAKE256_RATE/8; + } + + i = 0; + while (i < (INLEN % SHAKE256_RATE)/8) { + t0 = in[u64 INRND * SHAKE256_RATE/8 + i]; + state[u64 i] ^= t0; + + i += 1; + } + + state[u8 INLEN%SHAKE256_RATE] ^= 0x1f; + state[u8 SHAKE256_RATE-1] ^= 0x80; + + () = #spill(i, j); + + state = __keccakf1600_ref1(state); + + () = #unspill(i, j, out); + + i = 0; + while (i < OUTLEN/8) { + out[u64 i] = state[u64 i]; + i += 1; + } + + return out; +} diff --git a/src/crypto_kem/frodo/common/frodo640_params.jinc b/src/crypto_kem/frodo/common/frodo640_params.jinc new file mode 100644 index 00000000..2656cc7e --- /dev/null +++ b/src/crypto_kem/frodo/common/frodo640_params.jinc @@ -0,0 +1,35 @@ +// params of frodo 640 + +param int EXTRACTED_BITS = 2; +param int D = 15; +param int N = 640; +param int NBAR = 8; + +param int NNBAR = N * NBAR; + +param int BYTES_SEED_A = 16; +param int BYTES_SEC = 16; +param int BYTES_SEED_SE = 32; +param int BYTES_SALT = 32; + +param int BYTES_PK = BYTES_SEED_A + D * N; +param int BYTES_SK = BYTES_SEC + BYTES_PK + 2 * NNBAR + BYTES_SEC; +param int BYTES_CT = D * N + D * NBAR + BYTES_SALT; + +param int CDF_TABLE_LEN = 13; + +u16[CDF_TABLE_LEN] CDF_TABLE = { + 4643, + 13363, + 20579, + 25843, + 29227, + 31145, + 32103, + 32525, + 32689, + 32745, + 32762, + 32766, + 32767 +}; diff --git a/src/crypto_kem/frodo/common/frodo976_params.jinc b/src/crypto_kem/frodo/common/frodo976_params.jinc new file mode 100644 index 00000000..49c32cd5 --- /dev/null +++ b/src/crypto_kem/frodo/common/frodo976_params.jinc @@ -0,0 +1,33 @@ +// params of frodo 976 + +param int EXTRACTED_BITS = 3; +param int D = 16; +param int N = 976; +param int NBAR = 8; + +param int NNBAR = N * NBAR; + +param int BYTES_SEED_A = 16; +param int BYTES_SEC = 24; +param int BYTES_SEED_SE = 48; +param int BYTES_SALT = 48; + +param int BYTES_PK = BYTES_SEED_A + D * N; +param int BYTES_SK = BYTES_SEC + BYTES_PK + 2 * NNBAR + BYTES_SEC; +param int BYTES_CT = D * N + D * NBAR + BYTES_SALT; + +param int CDF_TABLE_LEN = 11; + +u16[CDF_TABLE_LEN] CDF_TABLE = { + 5638, + 15915, + 23689, + 28571, + 31116, + 32217, + 32613, + 32731, + 32760, + 32766, + 32767 +}; diff --git a/src/crypto_kem/frodo/frodo640shake/META.yml b/src/crypto_kem/frodo/frodo640shake/META.yml new file mode 100644 index 00000000..a3dbdce3 --- /dev/null +++ b/src/crypto_kem/frodo/frodo640shake/META.yml @@ -0,0 +1,26 @@ +name: frodo640shake +type: kem +checksumsmall: 24cf060c7263b4e138de49f07c70d0c3392e1d57b1295f25bff27eee7c88ebe5 +checksumbig: ac84a42c2750be4a04256b3fb4817a8483d3422d5f557e067e320491147f5401 +claimed-nist-level: 3 +claimed-security: IND-CCA2 +length-public-key: 9616 +length-ciphertext: 9752 +length-secret-key: 19888 +length-shared-secret: 16 +principal-submitters: + - TODO +auxiliary-submitters: + - TODO +implementations: + - name: amd64/ref + version: TODO + supported_platforms: + - architecture: x86_64 + operating_systems: + - Linux + - Darwin + required_flags: # FIXME + - avx2 + - bmi2 + - popcnt diff --git a/src/crypto_kem/frodo/frodo640shake/amd64/ref/Makefile b/src/crypto_kem/frodo/frodo640shake/amd64/ref/Makefile new file mode 100644 index 00000000..29a80faa --- /dev/null +++ b/src/crypto_kem/frodo/frodo640shake/amd64/ref/Makefile @@ -0,0 +1,2 @@ +SRCS := kem.jazz +include ../../../../../Makefile.common diff --git a/src/crypto_kem/frodo/frodo640shake/amd64/ref/include/api.h b/src/crypto_kem/frodo/frodo640shake/amd64/ref/include/api.h new file mode 100644 index 00000000..3eebf599 --- /dev/null +++ b/src/crypto_kem/frodo/frodo640shake/amd64/ref/include/api.h @@ -0,0 +1,36 @@ +#ifndef JADE_KEM_frodo_frodo640shake_amd64_ref_API_H +#define JADE_KEM_frodo_frodo640shake_amd64_ref_API_H + +#include + +#define JADE_KEM_frodo_frodo640shake_amd64_ref_SECRETKEYBYTES 19888 +#define JADE_KEM_frodo_frodo640shake_amd64_ref_PUBLICKEYBYTES 9616 +#define JADE_KEM_frodo_frodo640shake_amd64_ref_CIPHERTEXTBYTES 9752 +#define JADE_KEM_frodo_frodo640shake_amd64_ref_KEYPAIRCOINBYTES 64 +#define JADE_KEM_frodo_frodo640shake_amd64_ref_ENCCOINBYTES 48 +#define JADE_KEM_frodo_frodo640shake_amd64_ref_BYTES 16 + +#define JADE_KEM_frodo_frodo640shake_amd64_ref_ALGNAME "Frodo640" +#define JADE_KEM_frodo_frodo640shake_amd64_ref_ARCH "amd64" +#define JADE_KEM_frodo_frodo640shake_amd64_ref_IMPL "ref" + +// kem api +int jade_kem_frodo_frodo640shake_amd64_ref_keypair_derand( + uint8_t *public_key, uint8_t *secret_key, const uint8_t *coins); + +int jade_kem_frodo_frodo640shake_amd64_ref_keypair(uint8_t *public_key, + uint8_t *secret_key); + +int jade_kem_frodo_frodo640shake_amd64_ref_enc_derand( + uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key, + const uint8_t *coins); + +int jade_kem_frodo_frodo640shake_amd64_ref_enc(uint8_t *ciphertext, + uint8_t *shared_secret, + const uint8_t *public_key); + +int jade_kem_frodo_frodo640shake_amd64_ref_dec(uint8_t *shared_secret, + uint8_t *ciphertext, + uint8_t *secret_key); + +#endif diff --git a/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jazz b/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jazz new file mode 100644 index 00000000..b6dd27f1 --- /dev/null +++ b/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jazz @@ -0,0 +1,37 @@ +from Jade require "crypto_kem/frodo/common/frodo640_params.jinc" +from Jade require "crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc" + +export fn jade_kem_frodo_frodo640shake_amd64_ref_keypair_derand(#public reg u64 pkp skp coinsp) -> #public reg u64 { + reg u64 r; + _frodo_amd64_ref_keypair_derand(pkp, skp, coinsp); + ?{}, r = #set0(); + return r; +} + +export fn jade_kem_frodo_frodo640shake_amd64_ref_keypair(#public reg u64 pkp skp) -> #public reg u64 { + reg u64 r; + _frodo_amd64_ref_keypair(pkp, skp); + ?{}, r = #set0(); + return r; +} + +export fn jade_kem_frodo_frodo640shake_amd64_ref_enc_derand(#public reg u64 ctp ssp pkp coinsp) -> #public reg u64 { + reg u64 r; + _frodo_amd64_ref_enc_derand(ctp, ssp, pkp, coinsp); + ?{}, r = #set0(); + return r; +} + +export fn jade_kem_frodo_frodo640shake_amd64_ref_enc(#public reg u64 ctp ssp pkp) -> #public reg u64 { + reg u64 r; + _frodo_amd64_ref_enc(ctp, ssp, pkp); + ?{}, r = #set0(); + return r; +} + +export fn jade_kem_frodo_frodo640shake_amd64_ref_dec(#public reg u64 ssp ctp skp) -> #public reg u64 { + reg u64 r; + _frodo_amd64_ref_dec(ssp, ctp, skp); + ?{}, r = #set0(); + return r; +} diff --git a/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc b/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc new file mode 100644 index 00000000..922c5a50 --- /dev/null +++ b/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc @@ -0,0 +1,360 @@ +from Jade require "crypto_kem/frodo/common/amd64/ref/shake128_opt.jinc" +from Jade require "crypto_kem/frodo/common/amd64/ref/encode.jinc" +from Jade require "crypto_kem/frodo/common/amd64/ref/noise.jinc" +from Jade require "crypto_kem/frodo/common/amd64/ref/matrix.jinc" +from Jade require "crypto_kem/frodo/common/amd64/ref/matrix_mul_opt.jinc" +from Jade require "crypto_kem/frodo/common/amd64/ref/pack.jinc" + +// coins = s || seed SE || z +fn __frodo_amd64_ref_keypair_derand( + reg u64 pkp skp, + #spill_to_mmx reg ptr u8[BYTES_SEED_A + BYTES_SEED_SE + BYTES_SEC] coins) { + stack u16[2 * NNBAR] SE; + stack u16[NNBAR] B; + + inline int k; + reg u64 i j; + + // seedA || b + stack u8[BYTES_PK] pk; + + // s || seedA || b || S_T || pkh + stack u8[BYTES_SK] sk; + + () = #spill(i, j, pkp, skp); + + for k = 0 to BYTES_SEC/8 { + sk[u64 k] = coins[u64 k]; + } + + // gen seedA + pk[0:BYTES_SEED_A] = __shake128_seed_A_opt(pk[0:BYTES_SEED_A], coins[BYTES_SEC + BYTES_SEED_SE:BYTES_SEED_A]); + + // gen S || E + SE = __shake128_r_opt(SE, coins[BYTES_SEC:BYTES_SEED_SE]); + SE = __sample_2NNBAR(SE); + + () = #spill(coins); + + // B = A*S+E + B = __AS_plus_E_opt(B, pk[0:BYTES_SEED_A], SE[0:NNBAR], SE[NNBAR:NNBAR]); + + // pack + pk[BYTES_SEED_A:D * N] = __pack_B(pk[BYTES_SEED_A:D * N], B); + + () = #unspill(i); + i = 0; + while (i < BYTES_PK/8) { + sk[u64 BYTES_SEC/8 + i] = pk[u64 i]; + i += 1; + } + + i = 0; + while (i < 2 * NNBAR / 8) { + sk[u64 BYTES_SEC/8 + BYTES_PK/8 + i] = SE[u64 i]; + i += 1; + } + () = #spill(i); + + sk[BYTES_SEC + BYTES_PK + 2 * NNBAR : BYTES_SEC] = __shake128_pkh_opt(sk[BYTES_SEC + BYTES_PK + 2 * NNBAR:BYTES_SEC], pk); + + () = #unspill(i, j, pkp, skp); + i = 0; j = 0; + while (i < BYTES_PK/8) { + [pkp + j] = pk[u64 i]; + i += 1; + j += 8; + } + + i = 0; j = 0; + while (i < BYTES_SK/8) { + [skp + j] = sk[u64 i]; + i += 1; + j += 8; + } +} + +#[returnaddress="stack"] +fn __frodo_amd64_ref_enc_derand( + reg u64 ctp ssp pkp, + #spill_to_mmx reg ptr u8[BYTES_SEC + BYTES_SALT] coins) { + reg u64 i j; + inline int k; + + // seedA || b + #public stack u8[BYTES_PK] pk; + + // c1 || c2 || salt || k + stack u8[BYTES_CT + BYTES_SEC] ct_k; + + // pkh || u || salt + stack u8[BYTES_SEC * 2 + BYTES_SALT] pkh_u_salt; + // 0x96 || seedSE || k + stack u8[1 + BYTES_SEED_SE + BYTES_SEC] seedSE_k; + seedSE_k[0] = 0x96; + + // S' || E' || E'' + stack u16[2 * NNBAR + NBAR * NBAR] SEE; + + stack u16[NNBAR] B; + reg ptr u16[NNBAR] Bp; + stack u16[NBAR * NBAR] C; + reg ptr u16[NBAR * NBAR] V; + stack u8[BYTES_SEC] ss; + + pkp = pkp; + () = #spill(ctp, ssp, i, j); + + // gen u || salt + for k = 0 to (BYTES_SEC + BYTES_SALT)/8 { + pkh_u_salt[u64 BYTES_SEC/8 + k] = coins[u64 k]; + } + + for k = 0 to BYTES_SALT/8 { + ct_k[u64 (D * N + D * NBAR)/8 + k] = pkh_u_salt[u64 (BYTES_SEC * 2)/8 + k]; + } + + () = #unspill(i, j); + // read pk + i = 0; j = 0; + while (i < BYTES_PK/8) { + #declassify pk[u64 i] = [pkp + j]; + i += 1; + j += 8; + } + () = #spill(i, j); + + // pkh + pkh_u_salt[0:BYTES_SEC] = __shake128_pkh_opt(pkh_u_salt[0:BYTES_SEC], pk); + + // seedSE || k + seedSE_k = __shake128_SE_k_opt(seedSE_k, pkh_u_salt); + + // copy k + for k = 0 to BYTES_SEC/8 { + ct_k[u64 BYTES_CT/8 + k] = seedSE_k.[u64 1 + BYTES_SEED_SE + 8*k]; + } + + // gen input bit string for sampling S and E + SEE = __shake128_encap_r_opt(SEE, seedSE_k[0 : 1 + BYTES_SEED_SE]); + + // S' || E' + SEE[0:2 * NNBAR] = __sample_2NNBAR(SEE[0:2 * NNBAR]); + // E'' + SEE[NNBAR * 2:NBAR * NBAR] = __sample_NBAR2(SEE[NNBAR * 2:NBAR * NBAR]); + + // B' = S'A + E'' + Bp = SEE[NNBAR:NNBAR]; + Bp = __SA_plus_E_opt(Bp, pk[0:BYTES_SEED_A], SEE[0:NNBAR]); + + // c1 <- Pack(B') + ct_k[0:D * N] = __pack_B(ct_k[0:D * N], Bp); + + // B <- Unpack(b) + B = __unpack_B(B, pk[BYTES_SEED_A:D * N]); + + // V = S'B + E'' + V = SEE[NNBAR*2:NBAR*NBAR]; + V = __SB_plus_E_opt(V, SEE[0:NNBAR], B); + + // C = V + Encode(u) + C = __encode(C, pkh_u_salt[BYTES_SEC:BYTES_SEC]); + C = __matrix_add(C, V); + + // c2 <- Pack(C) + ct_k[D * N: D * NBAR] = __pack_C(ct_k[D * N: D * NBAR], C); + + // ss <- shake(c1 || c2 || salt || k) + ss = __shake128_ss_opt(ss, ct_k); + + () = #unspill(i, j, ctp, ssp); + i = 0; j = 0; + while (i < BYTES_CT/8) { + [ctp + j] = ct_k[u64 i]; + i += 1; + j += 8; + } + + for k = 0 to BYTES_SEC/8 { + [ssp + 8*k] = ss[u64 k]; + } +} + +#[returnaddress="stack"] +fn _frodo_amd64_ref_dec(reg u64 ssp ctp skp) { + #public stack u8[BYTES_PK] pk; + stack u8[2 * NNBAR] ST; + stack u8[BYTES_SEC] s; + stack u8[BYTES_CT + BYTES_SEC] ct_k; + stack u16[NNBAR] B Bp; + reg ptr u16[NNBAR] Bpp; + stack u16[NBAR * NBAR] M C Cp; + reg ptr u16[NBAR * NBAR] V; + stack u8[BYTES_SEC * 2 + BYTES_SALT] pkh_u_salt; + stack u8[1 + BYTES_SEED_SE + BYTES_SEC] seedSE_k; + stack u8[BYTES_SEC] ss; + + // S' || E' || E'' + stack u16[2 * NNBAR + NBAR * NBAR] SEE; + + reg u8 s1 s2; + reg u64 i j t; + stack u64 s_ssp s_skp; + inline int k; + + ctp = ctp; + skp = skp; + s_ssp = ssp; + + // copy pkh + for k = 0 to BYTES_SEC/8 { + pkh_u_salt[u64 k] = [skp + BYTES_SK - BYTES_SEC + 8*k]; + } + s_skp = skp; + + // read ct + i = 0; j = 0; + while (i < BYTES_CT/8) { + t = [ctp + j]; + ct_k[u64 i] = t; + i += 1; + j += 8; + } + + for k = 0 to BYTES_SEC/8 { + s[u64 k] = [skp + 8*k]; + } + + i = 0; j = 0; + while (i < BYTES_PK/8) { + #declassify pk[u64 i] = [skp + BYTES_SEC + j]; + i += 1; + j += 8; + } + + i = 0; j = 0; + while (i < 2 * NNBAR/8) { + ST[u64 i] = [skp + BYTES_SEC + BYTES_PK + j]; + i += 1; + j += 8; + } + + () = #spill(i); + + // copy salt + for k = 0 to BYTES_SALT/8 { + pkh_u_salt[u64 (BYTES_SEC * 2)/8 + k] = ct_k[u64 (BYTES_CT - BYTES_SALT)/8 + k]; + } + + // B' <- Unpack(c1) + Bp = __unpack_B(Bp, ct_k[0:D * N]); + // C <- Unpack(c2) + C = __unpack_C(C, ct_k[D * N:D * NBAR]); + + // M = C - B'S + M = __mul_BS_opt(M, Bp, ST); + M = __matrix_sub(M, C); + + pkh_u_salt[BYTES_SEC:BYTES_SEC] = __decode(pkh_u_salt[BYTES_SEC:BYTES_SEC], M); + + seedSE_k[0] = 0x96; + seedSE_k = __shake128_SE_k_opt(seedSE_k, pkh_u_salt); + + SEE = __shake128_encap_r_opt(SEE, seedSE_k[0: 1 + BYTES_SEED_SE]); + + // S' || E' + SEE[0:2 * NNBAR] = __sample_2NNBAR(SEE[0:2 * NNBAR]); + // E'' + SEE[NNBAR * 2:NBAR * NBAR] = __sample_NBAR2(SEE[NNBAR * 2:NBAR * NBAR]); + + // B'' = S'A + E' + Bpp = SEE[NNBAR:NNBAR]; + Bpp = __SA_plus_E_opt(Bpp, pk[0:BYTES_SEED_A], SEE[0:NNBAR]); + + // B'' (mod q) + () = #unspill(i); + i = 0; + while (i < NNBAR) { + Bpp[i] &= (1 << D) - 1; + i += 1; + } + + // + B = __unpack_B(B, pk[BYTES_SEED_A:BYTES_PK - BYTES_SEED_A]); + + V = SEE[NNBAR*2:NBAR*NBAR]; + V = __SB_plus_E_opt(V, SEE[0:NNBAR], B); + + Cp = __encode(Cp, pkh_u_salt[BYTES_SEC:BYTES_SEC]); + Cp = __matrix_add(Cp, V); + + s1 = __ct_verify_NNBAR(Bp, Bpp); + s2 = __ct_verify_NBAR2(C, Cp); + s1 |= s2; + + ct_k[BYTES_CT:BYTES_SEC] = __ct_select(ct_k[BYTES_CT:BYTES_SEC], seedSE_k[1+BYTES_SEED_SE:BYTES_SEC], s, s1); + + ss = __shake128_ss_opt(ss, ct_k); + + ssp = s_ssp; + for k = 0 to BYTES_SEC/8 { + [ssp + 8*k] = ss[u64 k]; + } +} + +fn _frodo_amd64_ref_keypair(reg u64 pkp skp) { + #public stack u8[BYTES_SEED_A + BYTES_SEED_SE + BYTES_SEC] coins; + + pkp = pkp; + skp = skp; + + #declassify coins = #randombytes(coins); + + __frodo_amd64_ref_keypair_derand(pkp, skp, coins); +} + +fn _frodo_amd64_ref_keypair_derand(reg u64 pkp skp coinsp) { + #public stack u8[BYTES_SEED_A + BYTES_SEED_SE + BYTES_SEC] coins; + reg u64 i; stack u64 s_i; + + pkp = pkp; + skp = skp; + + i = 0; + while (i < BYTES_SEED_A + BYTES_SEED_SE + BYTES_SEC) { + #declassify coins[(int)i] = (u8)[coinsp + i]; + i += 1; + } + + s_i = i; + __frodo_amd64_ref_keypair_derand(pkp, skp, coins); +} + +fn _frodo_amd64_ref_enc_derand(reg u64 ctp ssp pkp coinsp) { + stack u8[BYTES_SEC + BYTES_SALT] coins; + reg u64 i; stack u64 s_i; + + pkp = pkp; + ctp = ctp; + ssp = ssp; + + i = 0; + while (i < BYTES_SEC + BYTES_SALT) { + coins[(int)i] = (u8)[coinsp + i]; + i += 1; + } + s_i = i; + + __frodo_amd64_ref_enc_derand(ctp, ssp, pkp, coins); +} + +fn _frodo_amd64_ref_enc(reg u64 ctp ssp pkp) { + stack u8[BYTES_SEC + BYTES_SALT] coins; + pkp = pkp; + ctp = ctp; + ssp = ssp; + + coins = #randombytes(coins); + + __frodo_amd64_ref_enc_derand(ctp, ssp, pkp, coins); +} diff --git a/src/crypto_kem/frodo/frodo976shake/META.yml b/src/crypto_kem/frodo/frodo976shake/META.yml new file mode 100644 index 00000000..8179c4c3 --- /dev/null +++ b/src/crypto_kem/frodo/frodo976shake/META.yml @@ -0,0 +1,26 @@ +name: frodo976shake +type: kem +checksumsmall: 0fc98ee686f97225c0f831be7a72d45a7ad2848811adaa7cc4a9bc651c245fa8 +checksumbig: de7124ccc70e76c6058b517a55789fe7b53a2a668d4dfb5651242d4afcd2d53a +claimed-nist-level: 3 +claimed-security: IND-CCA2 +length-public-key: 15632 +length-ciphertext: 15792 +length-secret-key: 31296 +length-shared-secret: 24 +principal-submitters: + - TODO +auxiliary-submitters: + - TODO +implementations: + - name: amd64/ref + version: TODO + supported_platforms: + - architecture: x86_64 + operating_systems: + - Linux + - Darwin + required_flags: # FIXME + - avx2 + - bmi2 + - popcnt diff --git a/src/crypto_kem/frodo/frodo976shake/amd64/ref/Makefile b/src/crypto_kem/frodo/frodo976shake/amd64/ref/Makefile new file mode 100644 index 00000000..29a80faa --- /dev/null +++ b/src/crypto_kem/frodo/frodo976shake/amd64/ref/Makefile @@ -0,0 +1,2 @@ +SRCS := kem.jazz +include ../../../../../Makefile.common diff --git a/src/crypto_kem/frodo/frodo976shake/amd64/ref/include/api.h b/src/crypto_kem/frodo/frodo976shake/amd64/ref/include/api.h new file mode 100644 index 00000000..bb30c2ac --- /dev/null +++ b/src/crypto_kem/frodo/frodo976shake/amd64/ref/include/api.h @@ -0,0 +1,38 @@ +#ifndef JADE_KEM_frodo_frodo976shake_amd64_ref_API_H +#define JADE_KEM_frodo_frodo976shake_amd64_ref_API_H + +#include + +#define JADE_KEM_frodo_frodo976shake_amd64_ref_SECRETKEYBYTES 31296 +#define JADE_KEM_frodo_frodo976shake_amd64_ref_PUBLICKEYBYTES 15632 +#define JADE_KEM_frodo_frodo976shake_amd64_ref_CIPHERTEXTBYTES 15792 +#define JADE_KEM_frodo_frodo976shake_amd64_ref_KEYPAIRCOINBYTES 88 +#define JADE_KEM_frodo_frodo976shake_amd64_ref_ENCCOINBYTES 72 +#define JADE_KEM_frodo_frodo976shake_amd64_ref_BYTES 24 + +#define JADE_KEM_frodo_frodo976shake_amd64_ref_ALGNAME "Frodo976shake" +#define JADE_KEM_frodo_frodo976shake_amd64_ref_ARCH "amd64" +#define JADE_KEM_frodo_frodo976shake_amd64_ref_IMPL "ref" + +// kem api +int jade_kem_frodo_frodo976shake_amd64_ref_keypair_derand(uint8_t *public_key, + uint8_t *secret_key, + uint8_t *coins); + +int jade_kem_frodo_frodo976shake_amd64_ref_keypair(uint8_t *public_key, + uint8_t *secret_key); + +int jade_kem_frodo_frodo976shake_amd64_ref_enc_derand(uint8_t *ciphertext, + uint8_t *shared_secret, + uint8_t *public_key, + uint8_t *coins); + +int jade_kem_frodo_frodo976shake_amd64_ref_enc(uint8_t *ciphertext, + uint8_t *shared_secret, + uint8_t *public_key); + +int jade_kem_frodo_frodo976shake_amd64_ref_dec(uint8_t *shared_secret, + uint8_t *ciphertext, + uint8_t *secret_key); + +#endif diff --git a/src/crypto_kem/frodo/frodo976shake/amd64/ref/kem.jazz b/src/crypto_kem/frodo/frodo976shake/amd64/ref/kem.jazz new file mode 100644 index 00000000..c64c8260 --- /dev/null +++ b/src/crypto_kem/frodo/frodo976shake/amd64/ref/kem.jazz @@ -0,0 +1,37 @@ +from Jade require "crypto_kem/frodo/common/frodo976_params.jinc" +from Jade require "crypto_kem/frodo/frodo976shake/amd64/ref/kem.jinc" + +export fn jade_kem_frodo_frodo976shake_amd64_ref_keypair_derand(#public reg u64 pkp skp coinsp) -> #public reg u64 { + reg u64 r; + _frodo_amd64_ref_keypair_derand(pkp, skp, coinsp); + ?{}, r = #set0(); + return r; +} + +export fn jade_kem_frodo_frodo976shake_amd64_ref_keypair(#public reg u64 pkp skp) -> #public reg u64 { + reg u64 r; + _frodo_amd64_ref_keypair(pkp, skp); + ?{}, r = #set0(); + return r; +} + +export fn jade_kem_frodo_frodo976shake_amd64_ref_enc_derand(#public reg u64 ctp ssp pkp coinsp) -> #public reg u64 { + reg u64 r; + _frodo_amd64_ref_enc_derand(ctp, ssp, pkp, coinsp); + ?{}, r = #set0(); + return r; +} + +export fn jade_kem_frodo_frodo976shake_amd64_ref_enc(#public reg u64 ctp ssp pkp) -> #public reg u64 { + reg u64 r; + _frodo_amd64_ref_enc(ctp, ssp, pkp); + ?{}, r = #set0(); + return r; +} + +export fn jade_kem_frodo_frodo976shake_amd64_ref_dec(#public reg u64 ssp ctp skp) -> #public reg u64 { + reg u64 r; + _frodo_amd64_ref_dec(ssp, ctp, skp); + ?{}, r = #set0(); + return r; +} diff --git a/src/crypto_kem/frodo/frodo976shake/amd64/ref/kem.jinc b/src/crypto_kem/frodo/frodo976shake/amd64/ref/kem.jinc new file mode 100644 index 00000000..c1ed48cd --- /dev/null +++ b/src/crypto_kem/frodo/frodo976shake/amd64/ref/kem.jinc @@ -0,0 +1,361 @@ +from Jade require "crypto_kem/frodo/common/amd64/ref/shake128_opt.jinc" +from Jade require "crypto_kem/frodo/common/amd64/ref/shake256_opt.jinc" +from Jade require "crypto_kem/frodo/common/amd64/ref/encode.jinc" +from Jade require "crypto_kem/frodo/common/amd64/ref/noise.jinc" +from Jade require "crypto_kem/frodo/common/amd64/ref/matrix.jinc" +from Jade require "crypto_kem/frodo/common/amd64/ref/matrix_mul_opt.jinc" +from Jade require "crypto_kem/frodo/common/amd64/ref/pack.jinc" + +// coins = s || seed SE || z +fn __frodo_amd64_ref_keypair_derand( + reg u64 pkp skp, + #spill_to_mmx reg ptr u8[BYTES_SEED_A + BYTES_SEED_SE + BYTES_SEC] coins) { + stack u16[2 * NNBAR] SE; + stack u16[NNBAR] B; + + inline int k; + reg u64 i j; + + // seedA || b + stack u8[BYTES_PK] pk; + + // s || seedA || b || S_T || pkh + stack u8[BYTES_SK] sk; + + () = #spill(i, j, pkp, skp); + + for k = 0 to BYTES_SEC/8 { + sk[u64 k] = coins[u64 k]; + } + + // gen seedA + pk[0:BYTES_SEED_A] = __shake256_seed_A_opt(pk[0:BYTES_SEED_A], coins[BYTES_SEC + BYTES_SEED_SE:BYTES_SEED_A]); + + // gen S || E + SE = __shake256_r_opt(SE, coins[BYTES_SEC:BYTES_SEED_SE]); + SE = __sample_2NNBAR(SE); + + () = #spill(coins); + + // B = A*S+E + B = __AS_plus_E_opt(B, pk[0:BYTES_SEED_A], SE[0:NNBAR], SE[NNBAR:NNBAR]); + + // pack + pk[BYTES_SEED_A:D * N] = __pack_B(pk[BYTES_SEED_A:D * N], B); + + () = #unspill(i); + i = 0; + while (i < BYTES_PK/8) { + sk[u64 BYTES_SEC/8 + i] = pk[u64 i]; + i += 1; + } + + i = 0; + while (i < 2 * NNBAR / 8) { + sk[u64 BYTES_SEC/8 + BYTES_PK/8 + i] = SE[u64 i]; + i += 1; + } + () = #spill(i); + + sk[BYTES_SEC + BYTES_PK + 2 * NNBAR : BYTES_SEC] = __shake256_pkh_opt(sk[BYTES_SEC + BYTES_PK + 2 * NNBAR:BYTES_SEC], pk); + + () = #unspill(i, j, pkp, skp); + i = 0; j = 0; + while (i < BYTES_PK/8) { + [pkp + j] = pk[u64 i]; + i += 1; + j += 8; + } + + i = 0; j = 0; + while (i < BYTES_SK/8) { + [skp + j] = sk[u64 i]; + i += 1; + j += 8; + } +} + +#[returnaddress="stack"] +fn __frodo_amd64_ref_enc_derand( + reg u64 ctp ssp pkp, + #spill_to_mmx reg ptr u8[BYTES_SEC + BYTES_SALT] coins) { + reg u64 i j; + inline int k; + + // seedA || b + #public stack u8[BYTES_PK] pk; + + // c1 || c2 || salt || k + stack u8[BYTES_CT + BYTES_SEC] ct_k; + + // pkh || u || salt + stack u8[BYTES_SEC * 2 + BYTES_SALT] pkh_u_salt; + // 0x96 || seedSE || k + stack u8[1 + BYTES_SEED_SE + BYTES_SEC] seedSE_k; + seedSE_k[0] = 0x96; + + // S' || E' || E'' + stack u16[2 * NNBAR + NBAR * NBAR] SEE; + + stack u16[NNBAR] B; + reg ptr u16[NNBAR] Bp; + stack u16[NBAR * NBAR] C; + reg ptr u16[NBAR * NBAR] V; + stack u8[BYTES_SEC] ss; + + pkp = pkp; + () = #spill(ctp, ssp, i, j); + + // gen u || salt + for k = 0 to (BYTES_SEC + BYTES_SALT)/8 { + pkh_u_salt[u64 BYTES_SEC/8 + k] = coins[u64 k]; + } + + for k = 0 to BYTES_SALT/8 { + ct_k[u64 (D * N + D * NBAR)/8 + k] = pkh_u_salt[u64 (BYTES_SEC * 2)/8 + k]; + } + + () = #unspill(i, j); + // read pk + i = 0; j = 0; + while (i < BYTES_PK/8) { + #declassify pk[u64 i] = [pkp + j]; + i += 1; + j += 8; + } + () = #spill(i, j); + + // pkh + pkh_u_salt[0:BYTES_SEC] = __shake256_pkh_opt(pkh_u_salt[0:BYTES_SEC], pk); + + // seedSE || k + seedSE_k = __shake256_SE_k_opt(seedSE_k, pkh_u_salt); + + // copy k + for k = 0 to BYTES_SEC/8 { + ct_k[u64 BYTES_CT/8 + k] = seedSE_k.[u64 1 + BYTES_SEED_SE + 8*k]; + } + + // gen input bit string for sampling S and E + SEE = __shake256_encap_r_opt(SEE, seedSE_k[0 : 1 + BYTES_SEED_SE]); + + // S' || E' + SEE[0:2 * NNBAR] = __sample_2NNBAR(SEE[0:2 * NNBAR]); + // E'' + SEE[NNBAR * 2:NBAR * NBAR] = __sample_NBAR2(SEE[NNBAR * 2:NBAR * NBAR]); + + // B' = S'A + E'' + Bp = SEE[NNBAR:NNBAR]; + Bp = __SA_plus_E_opt(Bp, pk[0:BYTES_SEED_A], SEE[0:NNBAR]); + + // c1 <- Pack(B') + ct_k[0:D * N] = __pack_B(ct_k[0:D * N], Bp); + + // B <- Unpack(b) + B = __unpack_B(B, pk[BYTES_SEED_A:D * N]); + + // V = S'B + E'' + V = SEE[NNBAR*2:NBAR*NBAR]; + V = __SB_plus_E_opt(V, SEE[0:NNBAR], B); + + // C = V + Encode(u) + C = __encode(C, pkh_u_salt[BYTES_SEC:BYTES_SEC]); + C = __matrix_add(C, V); + + // c2 <- Pack(C) + ct_k[D * N: D * NBAR] = __pack_C(ct_k[D * N: D * NBAR], C); + + // ss <- shake(c1 || c2 || salt || k) + ss = __shake256_ss_opt(ss, ct_k); + + () = #unspill(i, j, ctp, ssp); + i = 0; j = 0; + while (i < BYTES_CT/8) { + [ctp + j] = ct_k[u64 i]; + i += 1; + j += 8; + } + + for k = 0 to BYTES_SEC/8 { + [ssp + 8*k] = ss[u64 k]; + } +} + +#[returnaddress="stack"] +fn _frodo_amd64_ref_dec(reg u64 ssp ctp skp) { + #public stack u8[BYTES_PK] pk; + stack u8[2 * NNBAR] ST; + stack u8[BYTES_SEC] s; + stack u8[BYTES_CT + BYTES_SEC] ct_k; + stack u16[NNBAR] B Bp; + reg ptr u16[NNBAR] Bpp; + stack u16[NBAR * NBAR] M C Cp; + reg ptr u16[NBAR * NBAR] V; + stack u8[BYTES_SEC * 2 + BYTES_SALT] pkh_u_salt; + stack u8[1 + BYTES_SEED_SE + BYTES_SEC] seedSE_k; + stack u8[BYTES_SEC] ss; + + // S' || E' || E'' + stack u16[2 * NNBAR + NBAR * NBAR] SEE; + + reg u8 s1 s2; + reg u64 i j t; + stack u64 s_ssp s_skp; + inline int k; + + ctp = ctp; + skp = skp; + s_ssp = ssp; + + // copy pkh + for k = 0 to BYTES_SEC/8 { + pkh_u_salt[u64 k] = [skp + BYTES_SK - BYTES_SEC + 8*k]; + } + s_skp = skp; + + // read ct + i = 0; j = 0; + while (i < BYTES_CT/8) { + t = [ctp + j]; + ct_k[u64 i] = t; + i += 1; + j += 8; + } + + for k = 0 to BYTES_SEC/8 { + s[u64 k] = [skp + 8*k]; + } + + i = 0; j = 0; + while (i < BYTES_PK/8) { + #declassify pk[u64 i] = [skp + BYTES_SEC + j]; + i += 1; + j += 8; + } + + i = 0; j = 0; + while (i < 2 * NNBAR/8) { + ST[u64 i] = [skp + BYTES_SEC + BYTES_PK + j]; + i += 1; + j += 8; + } + + () = #spill(i); + + // copy salt + for k = 0 to BYTES_SALT/8 { + pkh_u_salt[u64 (BYTES_SEC * 2)/8 + k] = ct_k[u64 (BYTES_CT - BYTES_SALT)/8 + k]; + } + + // B' <- Unpack(c1) + Bp = __unpack_B(Bp, ct_k[0:D * N]); + // C <- Unpack(c2) + C = __unpack_C(C, ct_k[D * N:D * NBAR]); + + // M = C - B'S + M = __mul_BS_opt(M, Bp, ST); + M = __matrix_sub(M, C); + + pkh_u_salt[BYTES_SEC:BYTES_SEC] = __decode(pkh_u_salt[BYTES_SEC:BYTES_SEC], M); + + seedSE_k[0] = 0x96; + seedSE_k = __shake256_SE_k_opt(seedSE_k, pkh_u_salt); + + SEE = __shake256_encap_r_opt(SEE, seedSE_k[0: 1 + BYTES_SEED_SE]); + + // S' || E' + SEE[0:2 * NNBAR] = __sample_2NNBAR(SEE[0:2 * NNBAR]); + // E'' + SEE[NNBAR * 2:NBAR * NBAR] = __sample_NBAR2(SEE[NNBAR * 2:NBAR * NBAR]); + + // B'' = S'A + E' + Bpp = SEE[NNBAR:NNBAR]; + Bpp = __SA_plus_E_opt(Bpp, pk[0:BYTES_SEED_A], SEE[0:NNBAR]); + + // B'' (mod q) + () = #unspill(i); + i = 0; + while (i < NNBAR) { + Bpp[i] &= (1 << D) - 1; + i += 1; + } + + // + B = __unpack_B(B, pk[BYTES_SEED_A:BYTES_PK - BYTES_SEED_A]); + + V = SEE[NNBAR*2:NBAR*NBAR]; + V = __SB_plus_E_opt(V, SEE[0:NNBAR], B); + + Cp = __encode(Cp, pkh_u_salt[BYTES_SEC:BYTES_SEC]); + Cp = __matrix_add(Cp, V); + + s1 = __ct_verify_NNBAR(Bp, Bpp); + s2 = __ct_verify_NBAR2(C, Cp); + s1 |= s2; + + ct_k[BYTES_CT:BYTES_SEC] = __ct_select(ct_k[BYTES_CT:BYTES_SEC], seedSE_k[1+BYTES_SEED_SE:BYTES_SEC], s, s1); + + ss = __shake256_ss_opt(ss, ct_k); + + ssp = s_ssp; + for k = 0 to BYTES_SEC/8 { + [ssp + 8*k] = ss[u64 k]; + } +} + +fn _frodo_amd64_ref_keypair(reg u64 pkp skp) { + #public stack u8[BYTES_SEED_A + BYTES_SEED_SE + BYTES_SEC] coins; + + pkp = pkp; + skp = skp; + + #declassify coins = #randombytes(coins); + + __frodo_amd64_ref_keypair_derand(pkp, skp, coins); +} + +fn _frodo_amd64_ref_keypair_derand(reg u64 pkp skp coinsp) { + #public stack u8[BYTES_SEED_A + BYTES_SEED_SE + BYTES_SEC] coins; + reg u64 i; stack u64 s_i; + + pkp = pkp; + skp = skp; + + i = 0; + while (i < BYTES_SEED_A + BYTES_SEED_SE + BYTES_SEC) { + #declassify coins[(int)i] = (u8)[coinsp + i]; + i += 1; + } + + s_i = i; + __frodo_amd64_ref_keypair_derand(pkp, skp, coins); +} + +fn _frodo_amd64_ref_enc_derand(reg u64 ctp ssp pkp coinsp) { + stack u8[BYTES_SEC + BYTES_SALT] coins; + reg u64 i; stack u64 s_i; + + pkp = pkp; + ctp = ctp; + ssp = ssp; + + i = 0; + while (i < BYTES_SEC + BYTES_SALT) { + coins[(int)i] = (u8)[coinsp + i]; + i += 1; + } + s_i = i; + + __frodo_amd64_ref_enc_derand(ctp, ssp, pkp, coins); +} + +fn _frodo_amd64_ref_enc(reg u64 ctp ssp pkp) { + stack u8[BYTES_SEC + BYTES_SALT] coins; + pkp = pkp; + ctp = ctp; + ssp = ssp; + + coins = #randombytes(coins); + + __frodo_amd64_ref_enc_derand(ctp, ssp, pkp, coins); +}