Skip to content

Commit

Permalink
Patch Kyber AVX2
Browse files Browse the repository at this point in the history
  • Loading branch information
sarranz committed Oct 17, 2023
1 parent 5f2c188 commit 88aabfb
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 59 deletions.
50 changes: 39 additions & 11 deletions src/crypto_kem/kyber/common/amd64/avx2/poly.jinc
Original file line number Diff line number Diff line change
Expand Up @@ -776,28 +776,54 @@ fn _poly_getnoise(reg ptr u16[KYBER_N] rp, reg ptr u8[KYBER_SYMBYTES] seed, reg
}

inline
fn __shake256_squeezenblocks4x(reg ptr u256[25] state, reg ptr u8[NOISE_NBLOCKS * SHAKE256_RATE] buf0 buf1 buf2 buf3) -> reg ptr u256[25], reg ptr u8[NOISE_NBLOCKS*SHAKE256_RATE], reg ptr u8[NOISE_NBLOCKS*SHAKE256_RATE], reg ptr u8[NOISE_NBLOCKS*SHAKE256_RATE], reg ptr u8[NOISE_NBLOCKS*SHAKE256_RATE]
fn __shake256_squeezenblocks4x(
reg ptr u256[25] state,
reg ptr u8[NOISE_NBLOCKS * SHAKE256_RATE] buf0 buf1 buf2 buf3,
reg u64 ms)
->
reg ptr u256[25],
reg ptr u8[NOISE_NBLOCKS*SHAKE256_RATE],
reg ptr u8[NOISE_NBLOCKS*SHAKE256_RATE],
reg ptr u8[NOISE_NBLOCKS*SHAKE256_RATE],
reg ptr u8[NOISE_NBLOCKS*SHAKE256_RATE],
#msf reg u64
{
inline int i;

for i = 0 to NOISE_NBLOCKS
{
state, buf0[i*SHAKE256_RATE:SHAKE256_RATE], buf1[i*SHAKE256_RATE:SHAKE256_RATE], buf2[i*SHAKE256_RATE:SHAKE256_RATE], buf3[i*SHAKE256_RATE:SHAKE256_RATE] = __shake256_squeezeblock4x(state, buf0[i*SHAKE256_RATE:SHAKE256_RATE], buf1[i*SHAKE256_RATE:SHAKE256_RATE], buf2[i*SHAKE256_RATE:SHAKE256_RATE], buf3[i*SHAKE256_RATE:SHAKE256_RATE]);
state,
buf0[i*SHAKE256_RATE:SHAKE256_RATE],
buf1[i*SHAKE256_RATE:SHAKE256_RATE],
buf2[i*SHAKE256_RATE:SHAKE256_RATE],
buf3[i*SHAKE256_RATE:SHAKE256_RATE],
ms
= __shake256_squeezeblock4x(
state,
buf0[i*SHAKE256_RATE:SHAKE256_RATE],
buf1[i*SHAKE256_RATE:SHAKE256_RATE],
buf2[i*SHAKE256_RATE:SHAKE256_RATE],
buf3[i*SHAKE256_RATE:SHAKE256_RATE],
ms
);
ms = #init_msf();
}

return state, buf0, buf1, buf2, buf3;
return state, buf0, buf1, buf2, buf3, ms;
}

#[returnaddress="stack"]
fn _poly_getnoise_eta1_4x(
reg ptr u16[KYBER_N] r0 r1 r2 r3,
reg ptr u8[KYBER_SYMBYTES] seed,
reg u8 nonce)
reg u8 nonce,
reg u64 ms)
->
reg ptr u16[KYBER_N],
reg ptr u16[KYBER_N],
reg ptr u16[KYBER_N],
reg ptr u16[KYBER_N]
reg ptr u16[KYBER_N],
#msf reg u64
{
reg u256 f;
stack u256[25] state;
Expand All @@ -821,27 +847,29 @@ fn _poly_getnoise_eta1_4x(
buf3.[32] = nonce;

state = _shake256_absorb4x_33(state, buf0[0:33], buf1[0:33], buf2[0:33], buf3[0:33]);
state, buf0, buf1, buf2, buf3 = __shake256_squeezenblocks4x(state, buf0, buf1, buf2, buf3);
state, buf0, buf1, buf2, buf3, ms = __shake256_squeezenblocks4x(state, buf0, buf1, buf2, buf3, ms);

r0 = _r0;
r0 = __poly_cbd_eta1(r0, buf0[0:KYBER_ETA1*KYBER_N/4+(KYBER_ETA1 - 2)*8]);
r1 = __poly_cbd_eta1(r1, buf1[0:KYBER_ETA1*KYBER_N/4+(KYBER_ETA1 - 2)*8]);
r2 = __poly_cbd_eta1(r2, buf2[0:KYBER_ETA1*KYBER_N/4+(KYBER_ETA1 - 2)*8]);
r3 = __poly_cbd_eta1(r3, buf3[0:KYBER_ETA1*KYBER_N/4+(KYBER_ETA1 - 2)*8]);

return r0, r1, r2, r3;
return r0, r1, r2, r3, ms;
}

#[returnaddress="stack"]
fn _poly_getnoise_eta1122_4x(
reg ptr u16[KYBER_N] r0 r1 r2 r3,
reg ptr u8[KYBER_SYMBYTES] seed,
reg u8 nonce)
reg u8 nonce,
reg u64 ms)
->
reg ptr u16[KYBER_N],
reg ptr u16[KYBER_N],
reg ptr u16[KYBER_N],
reg ptr u16[KYBER_N]
reg ptr u16[KYBER_N],
#msf reg u64
{
reg u256 f;
stack u256[25] state;
Expand All @@ -865,15 +893,15 @@ fn _poly_getnoise_eta1122_4x(
buf3.[32] = nonce;

state = _shake256_absorb4x_33(state, buf0[0:33], buf1[0:33], buf2[0:33], buf3[0:33]);
state, buf0, buf1, buf2, buf3 = __shake256_squeezenblocks4x(state, buf0, buf1, buf2, buf3);
state, buf0, buf1, buf2, buf3, ms = __shake256_squeezenblocks4x(state, buf0, buf1, buf2, buf3, ms);

r0 = _r0;
r0 = __poly_cbd_eta1(r0, buf0[0:KYBER_ETA1*KYBER_N/4+(KYBER_ETA1 - 2)*8]);
r1 = __poly_cbd_eta1(r1, buf1[0:KYBER_ETA1*KYBER_N/4+(KYBER_ETA1 - 2)*8]);
r2 = __poly_cbd_eta2(r2, buf2[0:KYBER_ETA2*KYBER_N/4]);
r3 = __poly_cbd_eta2(r3, buf3[0:KYBER_ETA2*KYBER_N/4]);

return r0, r1, r2, r3;
return r0, r1, r2, r3, ms;
}


Expand Down
51 changes: 43 additions & 8 deletions src/crypto_kem/kyber/kyber512/amd64/avx2/gen_matrix.jinc
Original file line number Diff line number Diff line change
Expand Up @@ -263,17 +263,39 @@ u8[2048] ru_idx = {-1, -1, -1, -1, -1, -1, -1, -1,
0, 2, 4, 6, 8, 10, 12, 14};

inline
fn __shake128_squeezenblocks4x(reg ptr u256[25] state, reg ptr u8[REJ_UNIFORM_AVX_BUFLEN] h0 h1 h2 h3)
-> reg ptr u256[25], reg ptr u8[REJ_UNIFORM_AVX_BUFLEN], reg ptr u8[REJ_UNIFORM_AVX_BUFLEN], reg ptr u8[REJ_UNIFORM_AVX_BUFLEN], reg ptr u8[REJ_UNIFORM_AVX_BUFLEN]
fn __shake128_squeezenblocks4x(
reg ptr u256[25] state,
reg ptr u8[REJ_UNIFORM_AVX_BUFLEN] h0 h1 h2 h3,
reg u64 ms)
->
reg ptr u256[25],
reg ptr u8[REJ_UNIFORM_AVX_BUFLEN],
reg ptr u8[REJ_UNIFORM_AVX_BUFLEN],
reg ptr u8[REJ_UNIFORM_AVX_BUFLEN],
reg ptr u8[REJ_UNIFORM_AVX_BUFLEN],
#msf reg u64
{
inline int i;

for i = 0 to GENMATRIX_NBLOCKS
{
state, h0[i*SHAKE128_RATE:SHAKE128_RATE], h1[i*SHAKE128_RATE:SHAKE128_RATE], h2[i*SHAKE128_RATE:SHAKE128_RATE], h3[i*SHAKE128_RATE:SHAKE128_RATE] = __shake128_squeezeblock4x(state, h0[i*SHAKE128_RATE:SHAKE128_RATE], h1[i*SHAKE128_RATE:SHAKE128_RATE], h2[i*SHAKE128_RATE:SHAKE128_RATE], h3[i*SHAKE128_RATE:SHAKE128_RATE]);
state,
h0[i*SHAKE128_RATE:SHAKE128_RATE],
h1[i*SHAKE128_RATE:SHAKE128_RATE],
h2[i*SHAKE128_RATE:SHAKE128_RATE],
h3[i*SHAKE128_RATE:SHAKE128_RATE],
ms
= __shake128_squeezeblock4x(
state,
h0[i*SHAKE128_RATE:SHAKE128_RATE],
h1[i*SHAKE128_RATE:SHAKE128_RATE],
h2[i*SHAKE128_RATE:SHAKE128_RATE],
h3[i*SHAKE128_RATE:SHAKE128_RATE],
ms
);
}

return state, h0, h1, h2, h3;
return state, h0, h1, h2, h3, ms;
}

inline
Expand Down Expand Up @@ -634,8 +656,7 @@ fn __gen_matrix(stack u8[KYBER_SYMBYTES] seed, inline int transposed, reg u64 ms
}

state = _shake128_absorb4x_34(state, buf0[0:34], buf1[0:34], buf2[0:34], buf3[0:34]);
state, buf0, buf1, buf2, buf3 = __shake128_squeezenblocks4x(state, buf0, buf1, buf2, buf3);
ms = #init_msf();
state, buf0, buf1, buf2, buf3, ms = __shake128_squeezenblocks4x(state, buf0, buf1, buf2, buf3, ms);

tmp, rr[0*KYBER_VECN+0*KYBER_N:KYBER_N], ms = _rej_uniform_avx(rr[0*KYBER_VECN+0*KYBER_N:KYBER_N], buf0, ms);

Expand Down Expand Up @@ -669,8 +690,22 @@ fn __gen_matrix(stack u8[KYBER_SYMBYTES] seed, inline int transposed, reg u64 ms
b = bflg == 0;
while(!b) {
ms = #update_msf(!b, ms);
state, buf0[0:SHAKE128_RATE], buf1[0:SHAKE128_RATE], buf2[0:SHAKE128_RATE], buf3[0:SHAKE128_RATE] = __shake128_squeezeblock4x(state, buf0[0:SHAKE128_RATE], buf1[0:SHAKE128_RATE], buf2[0:SHAKE128_RATE], buf3[0:SHAKE128_RATE]);
ms = #init_msf();

state,
buf0[0:SHAKE128_RATE],
buf1[0:SHAKE128_RATE],
buf2[0:SHAKE128_RATE],
buf3[0:SHAKE128_RATE],
ms
= __shake128_squeezeblock4x(
state,
buf0[0:SHAKE128_RATE],
buf1[0:SHAKE128_RATE],
buf2[0:SHAKE128_RATE],
buf3[0:SHAKE128_RATE],
ms
);

ctr0, rr[0*KYBER_VECN+0*KYBER_N:KYBER_N], ms = __rej_uniform(rr[0*KYBER_VECN+0*KYBER_N:KYBER_N], ctr0, buf0[0:SHAKE128_RATE], SHAKE128_RATE, ms);
ctr1, rr[0*KYBER_VECN+1*KYBER_N:KYBER_N], ms = __rej_uniform(rr[0*KYBER_VECN+1*KYBER_N:KYBER_N], ctr1, buf1[0:SHAKE128_RATE], SHAKE128_RATE, ms);
ctr2, rr[1*KYBER_VECN+0*KYBER_N:KYBER_N], ms = __rej_uniform(rr[1*KYBER_VECN+0*KYBER_N:KYBER_N], ctr2, buf2[0:SHAKE128_RATE], SHAKE128_RATE, ms);
Expand Down
21 changes: 3 additions & 18 deletions src/crypto_kem/kyber/kyber512/amd64/avx2/indcpa.jinc
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,8 @@ fn __indcpa_keypair_derand(reg u64 pkp, reg u64 skp, reg ptr u8[KYBER_SYMBYTES]

aa, ms = __gen_matrix(publicseed, 0, ms);

#mmx reg u64 _ms;
_ms = #mov_msf(ms);

nonce = 0;
skpv[0:KYBER_N], skpv[KYBER_N:KYBER_N], e[0:KYBER_N], e[KYBER_N:KYBER_N] = _poly_getnoise_eta1_4x(skpv[0:KYBER_N], skpv[KYBER_N:KYBER_N], e[0:KYBER_N], e[KYBER_N:KYBER_N], noiseseed, nonce);

ms = #init_msf(); //#mov_msf(_ms);
skpv[0:KYBER_N], skpv[KYBER_N:KYBER_N], e[0:KYBER_N], e[KYBER_N:KYBER_N], ms = _poly_getnoise_eta1_4x(skpv[0:KYBER_N], skpv[KYBER_N:KYBER_N], e[0:KYBER_N], e[KYBER_N:KYBER_N], noiseseed, nonce, ms);

skpv = __polyvec_ntt(skpv);
e = __polyvec_ntt(e);
Expand Down Expand Up @@ -106,13 +101,8 @@ fn __indcpa_enc_0(stack u64 sctp, reg ptr u8[KYBER_INDCPA_MSGBYTES] msgp, reg u6
lnoiseseed = s_noiseseed;
lnoiseseed = #protect_ptr(lnoiseseed, ms);

#mmx reg u64 _ms;
_ms = #mov_msf(ms);

nonce = 0;
sp[0:KYBER_N], sp[KYBER_N:KYBER_N], ep[0:KYBER_N], ep[KYBER_N:KYBER_N] = _poly_getnoise_eta1122_4x(sp[0:KYBER_N], sp[KYBER_N:KYBER_N], ep[0:KYBER_N], ep[KYBER_N:KYBER_N], lnoiseseed, nonce);

ms = #init_msf(); //#mov_msf(_ms);
sp[0:KYBER_N], sp[KYBER_N:KYBER_N], ep[0:KYBER_N], ep[KYBER_N:KYBER_N], ms = _poly_getnoise_eta1122_4x(sp[0:KYBER_N], sp[KYBER_N:KYBER_N], ep[0:KYBER_N], ep[KYBER_N:KYBER_N], lnoiseseed, nonce, ms);

nonce = 4;
lnoiseseed = s_noiseseed;
Expand Down Expand Up @@ -185,13 +175,8 @@ fn __indcpa_enc_1(reg ptr u8[KYBER_INDCPA_BYTES] ctp, reg ptr u8[KYBER_INDCPA_MS
lnoiseseed = s_noiseseed;
lnoiseseed = #protect_ptr(lnoiseseed, ms);

#mmx reg u64 _ms;
_ms = #mov_msf(ms);

nonce = 0;
sp[0:KYBER_N], sp[KYBER_N:KYBER_N], ep[0:KYBER_N], ep[KYBER_N:KYBER_N] = _poly_getnoise_eta1122_4x(sp[0:KYBER_N], sp[KYBER_N:KYBER_N], ep[0:KYBER_N], ep[KYBER_N:KYBER_N], lnoiseseed, nonce);

ms = #init_msf(); //#mov_msf(_ms);
sp[0:KYBER_N], sp[KYBER_N:KYBER_N], ep[0:KYBER_N], ep[KYBER_N:KYBER_N], ms = _poly_getnoise_eta1122_4x(sp[0:KYBER_N], sp[KYBER_N:KYBER_N], ep[0:KYBER_N], ep[KYBER_N:KYBER_N], lnoiseseed, nonce, ms);

nonce = 4;
lnoiseseed = s_noiseseed;
Expand Down
38 changes: 30 additions & 8 deletions src/crypto_kem/kyber/kyber768/amd64/avx2/gen_matrix.jinc
Original file line number Diff line number Diff line change
Expand Up @@ -276,17 +276,39 @@ fn __shake128_squeezenblocks(reg u256[7] state, stack u8[REJ_UNIFORM_AVX_BUFLEN]
}

inline
fn __shake128_squeezenblocks4x(reg ptr u256[25] state, reg ptr u8[REJ_UNIFORM_AVX_BUFLEN] h0 h1 h2 h3)
-> reg ptr u256[25], reg ptr u8[REJ_UNIFORM_AVX_BUFLEN], reg ptr u8[REJ_UNIFORM_AVX_BUFLEN], reg ptr u8[REJ_UNIFORM_AVX_BUFLEN], reg ptr u8[REJ_UNIFORM_AVX_BUFLEN]
fn __shake128_squeezenblocks4x(
reg ptr u256[25] state,
reg ptr u8[REJ_UNIFORM_AVX_BUFLEN] h0 h1 h2 h3,
reg u64 ms)
->
reg ptr u256[25],
reg ptr u8[REJ_UNIFORM_AVX_BUFLEN],
reg ptr u8[REJ_UNIFORM_AVX_BUFLEN],
reg ptr u8[REJ_UNIFORM_AVX_BUFLEN],
reg ptr u8[REJ_UNIFORM_AVX_BUFLEN],
#msf reg u64
{
inline int i;

for i = 0 to GENMATRIX_NBLOCKS
{
state, h0[i*SHAKE128_RATE:SHAKE128_RATE], h1[i*SHAKE128_RATE:SHAKE128_RATE], h2[i*SHAKE128_RATE:SHAKE128_RATE], h3[i*SHAKE128_RATE:SHAKE128_RATE] = __shake128_squeezeblock4x(state, h0[i*SHAKE128_RATE:SHAKE128_RATE], h1[i*SHAKE128_RATE:SHAKE128_RATE], h2[i*SHAKE128_RATE:SHAKE128_RATE], h3[i*SHAKE128_RATE:SHAKE128_RATE]);
state,
h0[i*SHAKE128_RATE:SHAKE128_RATE],
h1[i*SHAKE128_RATE:SHAKE128_RATE],
h2[i*SHAKE128_RATE:SHAKE128_RATE],
h3[i*SHAKE128_RATE:SHAKE128_RATE],
ms
= __shake128_squeezeblock4x(
state,
h0[i*SHAKE128_RATE:SHAKE128_RATE],
h1[i*SHAKE128_RATE:SHAKE128_RATE],
h2[i*SHAKE128_RATE:SHAKE128_RATE],
h3[i*SHAKE128_RATE:SHAKE128_RATE],
ms
);
}

return state, h0, h1, h2, h3;
return state, h0, h1, h2, h3, ms;
}

inline
Expand Down Expand Up @@ -660,7 +682,7 @@ fn __gen_matrix(stack u8[KYBER_SYMBYTES] seed, inline int transposed, reg u64 ms
}

state = _shake128_absorb4x_34(state, buf0[0:34], buf1[0:34], buf2[0:34], buf3[0:34]);
state, buf0, buf1, buf2, buf3 = __shake128_squeezenblocks4x(state, buf0, buf1, buf2, buf3);
state, buf0, buf1, buf2, buf3, ms = __shake128_squeezenblocks4x(state, buf0, buf1, buf2, buf3, ms);

tmp, rr[0*KYBER_VECN+0*KYBER_N:KYBER_N], ms = _rej_uniform_avx(rr[0*KYBER_VECN+0*KYBER_N:KYBER_N], buf0, ms);
ctr0 = tmp;
Expand Down Expand Up @@ -688,7 +710,7 @@ fn __gen_matrix(stack u8[KYBER_SYMBYTES] seed, inline int transposed, reg u64 ms
_, _, _, _, zf, _ = #OR_8(flg0, bflg);

while (!zf) {
state, buf0[0:SHAKE128_RATE], buf1[0:SHAKE128_RATE], buf2[0:SHAKE128_RATE], buf3[0:SHAKE128_RATE] = __shake128_squeezeblock4x(state, buf0[0:SHAKE128_RATE], buf1[0:SHAKE128_RATE], buf2[0:SHAKE128_RATE], buf3[0:SHAKE128_RATE]);
state, buf0[0:SHAKE128_RATE], buf1[0:SHAKE128_RATE], buf2[0:SHAKE128_RATE], buf3[0:SHAKE128_RATE], ms = __shake128_squeezeblock4x(state, buf0[0:SHAKE128_RATE], buf1[0:SHAKE128_RATE], buf2[0:SHAKE128_RATE], buf3[0:SHAKE128_RATE], ms);

ctr0, rr[0*KYBER_VECN+0*KYBER_N:KYBER_N], ms = __rej_uniform(rr[0*KYBER_VECN+0*KYBER_N:KYBER_N], ctr0, buf0[0:SHAKE128_RATE], SHAKE128_RATE, ms);
ctr1, rr[0*KYBER_VECN+1*KYBER_N:KYBER_N], ms = __rej_uniform(rr[0*KYBER_VECN+1*KYBER_N:KYBER_N], ctr1, buf1[0:SHAKE128_RATE], SHAKE128_RATE, ms);
Expand Down Expand Up @@ -745,7 +767,7 @@ fn __gen_matrix(stack u8[KYBER_SYMBYTES] seed, inline int transposed, reg u64 ms
}

state = _shake128_absorb4x_34(state, buf0[0:34], buf1[0:34], buf2[0:34], buf3[0:34]);
state, buf0, buf1, buf2, buf3 = __shake128_squeezenblocks4x(state, buf0, buf1, buf2, buf3);
state, buf0, buf1, buf2, buf3, ms = __shake128_squeezenblocks4x(state, buf0, buf1, buf2, buf3, ms);

tmp, rr[1*KYBER_VECN+1*KYBER_N:KYBER_N], ms = _rej_uniform_avx(rr[1*KYBER_VECN+1*KYBER_N:KYBER_N], buf0, ms);
ctr0 = tmp;
Expand Down Expand Up @@ -774,7 +796,7 @@ fn __gen_matrix(stack u8[KYBER_SYMBYTES] seed, inline int transposed, reg u64 ms


while(!zf) {
state, buf0[0:SHAKE128_RATE], buf1[0:SHAKE128_RATE], buf2[0:SHAKE128_RATE], buf3[0:SHAKE128_RATE] = __shake128_squeezeblock4x(state, buf0[0:SHAKE128_RATE], buf1[0:SHAKE128_RATE], buf2[0:SHAKE128_RATE], buf3[0:SHAKE128_RATE]);
state, buf0[0:SHAKE128_RATE], buf1[0:SHAKE128_RATE], buf2[0:SHAKE128_RATE], buf3[0:SHAKE128_RATE], ms = __shake128_squeezeblock4x(state, buf0[0:SHAKE128_RATE], buf1[0:SHAKE128_RATE], buf2[0:SHAKE128_RATE], buf3[0:SHAKE128_RATE], ms);

ctr0, rr[1*KYBER_VECN+1*KYBER_N:KYBER_N], ms = __rej_uniform(rr[1*KYBER_VECN+1*KYBER_N:KYBER_N], ctr0, buf0[0:SHAKE128_RATE], SHAKE128_RATE, ms);
ctr1, rr[1*KYBER_VECN+2*KYBER_N:KYBER_N], ms = __rej_uniform(rr[1*KYBER_VECN+2*KYBER_N:KYBER_N], ctr1, buf1[0:SHAKE128_RATE], SHAKE128_RATE, ms);
Expand Down
12 changes: 6 additions & 6 deletions src/crypto_kem/kyber/kyber768/amd64/avx2/indcpa.jinc
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ fn __indcpa_keypair_derand(reg u64 pkp, reg u64 skp, reg ptr u8[KYBER_SYMBYTES]
aa, ms = __gen_matrix(publicseed, 0, ms);

nonce = 0;
skpv[0:KYBER_N], skpv[KYBER_N:KYBER_N], skpv[2*KYBER_N:KYBER_N], e[0:KYBER_N] = _poly_getnoise_eta1_4x(skpv[0:KYBER_N], skpv[KYBER_N:KYBER_N], skpv[2*KYBER_N:KYBER_N], e[0:KYBER_N], noiseseed, nonce);
skpv[0:KYBER_N], skpv[KYBER_N:KYBER_N], skpv[2*KYBER_N:KYBER_N], e[0:KYBER_N], ms = _poly_getnoise_eta1_4x(skpv[0:KYBER_N], skpv[KYBER_N:KYBER_N], skpv[2*KYBER_N:KYBER_N], e[0:KYBER_N], noiseseed, nonce, ms);

nonce = 4;
e[KYBER_N:KYBER_N], e[2*KYBER_N:KYBER_N], pkpv[0:KYBER_N], pkpv[KYBER_N:KYBER_N] = _poly_getnoise_eta1_4x(e[KYBER_N:KYBER_N], e[2*KYBER_N:KYBER_N], pkpv[0:KYBER_N], pkpv[KYBER_N:KYBER_N], noiseseed, nonce);
e[KYBER_N:KYBER_N], e[2*KYBER_N:KYBER_N], pkpv[0:KYBER_N], pkpv[KYBER_N:KYBER_N], ms = _poly_getnoise_eta1_4x(e[KYBER_N:KYBER_N], e[2*KYBER_N:KYBER_N], pkpv[0:KYBER_N], pkpv[KYBER_N:KYBER_N], noiseseed, nonce, ms);

skpv = __polyvec_ntt(skpv);
e = __polyvec_ntt(e);
Expand Down Expand Up @@ -105,10 +105,10 @@ fn __indcpa_enc_0(stack u64 sctp, reg ptr u8[KYBER_INDCPA_MSGBYTES] msgp, reg u6
lnoiseseed = #protect_ptr(lnoiseseed, ms);

nonce = 0;
sp[0:KYBER_N], sp[KYBER_N:KYBER_N], sp[2*KYBER_N:KYBER_N], ep[0:KYBER_N] = _poly_getnoise_eta1_4x(sp[0:KYBER_N], sp[KYBER_N:KYBER_N], sp[2*KYBER_N:KYBER_N], ep[0:KYBER_N], lnoiseseed, nonce);
sp[0:KYBER_N], sp[KYBER_N:KYBER_N], sp[2*KYBER_N:KYBER_N], ep[0:KYBER_N], ms = _poly_getnoise_eta1_4x(sp[0:KYBER_N], sp[KYBER_N:KYBER_N], sp[2*KYBER_N:KYBER_N], ep[0:KYBER_N], lnoiseseed, nonce, ms);

nonce = 4;
ep[KYBER_N:KYBER_N], ep[2*KYBER_N:KYBER_N], epp, bp[0:KYBER_N] = _poly_getnoise_eta1_4x(ep[KYBER_N:KYBER_N], ep[2*KYBER_N:KYBER_N], epp, bp[0:KYBER_N], lnoiseseed, nonce);
ep[KYBER_N:KYBER_N], ep[2*KYBER_N:KYBER_N], epp, bp[0:KYBER_N], ms = _poly_getnoise_eta1_4x(ep[KYBER_N:KYBER_N], ep[2*KYBER_N:KYBER_N], epp, bp[0:KYBER_N], lnoiseseed, nonce, ms);

sp = __polyvec_ntt(sp);

Expand Down Expand Up @@ -177,10 +177,10 @@ fn __indcpa_enc_1(reg ptr u8[KYBER_INDCPA_BYTES] ctp, reg ptr u8[KYBER_INDCPA_MS
lnoiseseed = #protect_ptr(lnoiseseed, ms);

nonce = 0;
sp[0:KYBER_N], sp[KYBER_N:KYBER_N], sp[2*KYBER_N:KYBER_N], ep[0:KYBER_N] = _poly_getnoise_eta1_4x(sp[0:KYBER_N], sp[KYBER_N:KYBER_N], sp[2*KYBER_N:KYBER_N], ep[0:KYBER_N], lnoiseseed, nonce);
sp[0:KYBER_N], sp[KYBER_N:KYBER_N], sp[2*KYBER_N:KYBER_N], ep[0:KYBER_N], ms = _poly_getnoise_eta1_4x(sp[0:KYBER_N], sp[KYBER_N:KYBER_N], sp[2*KYBER_N:KYBER_N], ep[0:KYBER_N], lnoiseseed, nonce, ms);

nonce = 4;
ep[KYBER_N:KYBER_N], ep[2*KYBER_N:KYBER_N], epp, bp[0:KYBER_N] = _poly_getnoise_eta1_4x(ep[KYBER_N:KYBER_N], ep[2*KYBER_N:KYBER_N], epp, bp[0:KYBER_N], lnoiseseed, nonce);
ep[KYBER_N:KYBER_N], ep[2*KYBER_N:KYBER_N], epp, bp[0:KYBER_N], ms = _poly_getnoise_eta1_4x(ep[KYBER_N:KYBER_N], ep[2*KYBER_N:KYBER_N], epp, bp[0:KYBER_N], lnoiseseed, nonce, ms);

sp = __polyvec_ntt(sp);

Expand Down
4 changes: 2 additions & 2 deletions src/crypto_kem/kyber/kyber768/amd64/avx2/kem.jazz
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ export fn jade_kem_kyber_kyber768_amd64_avx2_keypair_derand(reg u64 public_key s

public_key = public_key;
secret_key = secret_key;
ms = #init_msf(); // Protects `__tostack`.
stack_coins = __tostack64u8(stack_coins, coins);
ms = #init_msf();
ms = __crypto_kem_keypair_derand_jazz(public_key, secret_key, stack_coins, ms);
?{}, r = #set0();
return r;
Expand Down Expand Up @@ -39,8 +39,8 @@ export fn jade_kem_kyber_kyber768_amd64_avx2_enc_derand(reg u64 ciphertext share
ciphertext = ciphertext;
shared_secret = shared_secret;
public_key = public_key;
ms = #init_msf(); // Protects `__tostack`.
stack_coins = __tostack32u8(stack_coins, coins);
ms = #init_msf();
ms = __crypto_kem_enc_derand_jazz(ciphertext, shared_secret, public_key, stack_coins, ms);
?{}, r = #set0();
return r;
Expand Down
Loading

0 comments on commit 88aabfb

Please sign in to comment.