Skip to content

Commit

Permalink
crypto: aegis128 - duplicate init() and final() hooks in SIMD code
Browse files Browse the repository at this point in the history
In order to speed up aegis128 processing even more, duplicate the init()
and final() routines as SIMD versions in their entirety. This results
in a 2x speedup on ARM Cortex-A57 for ~1500 byte packets (using AES
instructions).

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
  • Loading branch information
Ard Biesheuvel authored and herbertx committed Oct 25, 2019
1 parent 2698bce commit 5282826
Showing 3 changed files with 97 additions and 12 deletions.
38 changes: 26 additions & 12 deletions crypto/aegis128-core.c
Original file line number Diff line number Diff line change
@@ -60,10 +60,16 @@ static bool aegis128_do_simd(void)

bool crypto_aegis128_have_simd(void);
void crypto_aegis128_update_simd(struct aegis_state *state, const void *msg);
void crypto_aegis128_init_simd(struct aegis_state *state,
const union aegis_block *key,
const u8 *iv);
void crypto_aegis128_encrypt_chunk_simd(struct aegis_state *state, u8 *dst,
const u8 *src, unsigned int size);
void crypto_aegis128_decrypt_chunk_simd(struct aegis_state *state, u8 *dst,
const u8 *src, unsigned int size);
void crypto_aegis128_final_simd(struct aegis_state *state,
union aegis_block *tag_xor,
u64 assoclen, u64 cryptlen);

static void crypto_aegis128_update(struct aegis_state *state)
{
@@ -395,17 +401,21 @@ static int crypto_aegis128_encrypt(struct aead_request *req)
struct skcipher_walk walk;
struct aegis_state state;

crypto_aegis128_init(&state, &ctx->key, req->iv);
crypto_aegis128_process_ad(&state, req->src, req->assoclen);

skcipher_walk_aead_encrypt(&walk, req, false);
if (aegis128_do_simd())
if (aegis128_do_simd()) {
crypto_aegis128_init_simd(&state, &ctx->key, req->iv);
crypto_aegis128_process_ad(&state, req->src, req->assoclen);
crypto_aegis128_process_crypt(&state, req, &walk,
crypto_aegis128_encrypt_chunk_simd);
else
crypto_aegis128_final_simd(&state, &tag, req->assoclen,
cryptlen);
} else {
crypto_aegis128_init(&state, &ctx->key, req->iv);
crypto_aegis128_process_ad(&state, req->src, req->assoclen);
crypto_aegis128_process_crypt(&state, req, &walk,
crypto_aegis128_encrypt_chunk);
crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
}

scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen,
authsize, 1);
@@ -426,17 +436,21 @@ static int crypto_aegis128_decrypt(struct aead_request *req)
scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen,
authsize, 0);

crypto_aegis128_init(&state, &ctx->key, req->iv);
crypto_aegis128_process_ad(&state, req->src, req->assoclen);

skcipher_walk_aead_decrypt(&walk, req, false);
if (aegis128_do_simd())
if (aegis128_do_simd()) {
crypto_aegis128_init_simd(&state, &ctx->key, req->iv);
crypto_aegis128_process_ad(&state, req->src, req->assoclen);
crypto_aegis128_process_crypt(&state, req, &walk,
crypto_aegis128_decrypt_chunk_simd);
else
crypto_aegis128_final_simd(&state, &tag, req->assoclen,
cryptlen);
} else {
crypto_aegis128_init(&state, &ctx->key, req->iv);
crypto_aegis128_process_ad(&state, req->src, req->assoclen);
crypto_aegis128_process_crypt(&state, req, &walk,
crypto_aegis128_decrypt_chunk);
crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
}

return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
}
50 changes: 50 additions & 0 deletions crypto/aegis128-neon-inner.c
Original file line number Diff line number Diff line change
@@ -132,6 +132,36 @@ void preload_sbox(void)
:: "r"(crypto_aes_sbox));
}

void crypto_aegis128_init_neon(void *state, const void *key, const void *iv)
{
static const uint8_t const0[] = {
0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d,
0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62,
};
static const uint8_t const1[] = {
0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1,
0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd,
};
uint8x16_t k = vld1q_u8(key);
uint8x16_t kiv = k ^ vld1q_u8(iv);
struct aegis128_state st = {{
kiv,
vld1q_u8(const1),
vld1q_u8(const0),
k ^ vld1q_u8(const0),
k ^ vld1q_u8(const1),
}};
int i;

preload_sbox();

for (i = 0; i < 5; i++) {
st = aegis128_update_neon(st, k);
st = aegis128_update_neon(st, kiv);
}
aegis128_save_state_neon(st, state);
}

void crypto_aegis128_update_neon(void *state, const void *msg)
{
struct aegis128_state st = aegis128_load_state_neon(state);
@@ -210,3 +240,23 @@ void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,

aegis128_save_state_neon(st, state);
}

void crypto_aegis128_final_neon(void *state, void *tag_xor, uint64_t assoclen,
uint64_t cryptlen)
{
struct aegis128_state st = aegis128_load_state_neon(state);
uint8x16_t v;
int i;

preload_sbox();

v = st.v[3] ^ (uint8x16_t)vcombine_u64(vmov_n_u64(8 * assoclen),
vmov_n_u64(8 * cryptlen));

for (i = 0; i < 7; i++)
st = aegis128_update_neon(st, v);

v = vld1q_u8(tag_xor);
v ^= st.v[0] ^ st.v[1] ^ st.v[2] ^ st.v[3] ^ st.v[4];
vst1q_u8(tag_xor, v);
}
21 changes: 21 additions & 0 deletions crypto/aegis128-neon.c
Original file line number Diff line number Diff line change
@@ -8,11 +8,14 @@

#include "aegis.h"

void crypto_aegis128_init_neon(void *state, const void *key, const void *iv);
void crypto_aegis128_update_neon(void *state, const void *msg);
void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src,
unsigned int size);
void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
unsigned int size);
void crypto_aegis128_final_neon(void *state, void *tag_xor, uint64_t assoclen,
uint64_t cryptlen);

int aegis128_have_aes_insn __ro_after_init;

@@ -25,6 +28,15 @@ bool crypto_aegis128_have_simd(void)
return IS_ENABLED(CONFIG_ARM64);
}

void crypto_aegis128_init_simd(union aegis_block *state,
const union aegis_block *key,
const u8 *iv)
{
kernel_neon_begin();
crypto_aegis128_init_neon(state, key, iv);
kernel_neon_end();
}

void crypto_aegis128_update_simd(union aegis_block *state, const void *msg)
{
kernel_neon_begin();
@@ -47,3 +59,12 @@ void crypto_aegis128_decrypt_chunk_simd(union aegis_block *state, u8 *dst,
crypto_aegis128_decrypt_chunk_neon(state, dst, src, size);
kernel_neon_end();
}

void crypto_aegis128_final_simd(union aegis_block *state,
union aegis_block *tag_xor,
u64 assoclen, u64 cryptlen)
{
kernel_neon_begin();
crypto_aegis128_final_neon(state, tag_xor, assoclen, cryptlen);
kernel_neon_end();
}

0 comments on commit 5282826

Please sign in to comment.