AES: optimize AES-GCM

HW acceleration for GCM is now enabled by default

Closes IDF-1443
This commit is contained in:
Marius Vikhammer 2020-03-10 13:53:09 +08:00
parent 37369a8a57
commit 3351376a11
9 changed files with 511 additions and 140 deletions

View file

@ -2,9 +2,10 @@
// AES-CBC hardware throughput (accounts for worst-case performance with PSRAM workaround)
#define IDF_PERFORMANCE_MIN_AES_CBC_THROUGHPUT_MBSEC 8.2
#define IDF_PERFORMANCE_MIN_AES_GCM_THROUGHPUT_MBSEC 0.5
// SHA256 hardware throughput at 240MHz, threshold set lower than worst case
#define IDF_PERFORMANCE_MIN_SHA256_THROUGHPUT_MBSEC 9.0
#define IDF_PERFORMANCE_MIN_SHA256_THROUGHPUT_MBSEC 8.0
// esp_sha() time to process 32KB of input data from RAM
#define IDF_PERFORMANCE_MAX_TIME_SHA1_32KB 5000
#define IDF_PERFORMANCE_MAX_TIME_SHA512_32KB 4500

View file

@ -1,12 +1,13 @@
#pragma once
#define IDF_PERFORMANCE_MIN_AES_CBC_THROUGHPUT_MBSEC 14.4
#define IDF_PERFORMANCE_MIN_AES_CBC_THROUGHPUT_MBSEC 43.0
#define IDF_PERFORMANCE_MIN_AES_GCM_THROUGHPUT_MBSEC 2.1
// SHA256 hardware throughput at 240MHz, threshold set lower than worst case
#define IDF_PERFORMANCE_MIN_SHA256_THROUGHPUT_MBSEC 19.8
#define IDF_PERFORMANCE_MIN_SHA256_THROUGHPUT_MBSEC 70.0
// esp_sha() time to process 32KB of input data from RAM
#define IDF_PERFORMANCE_MAX_TIME_SHA1_32KB 1000
#define IDF_PERFORMANCE_MAX_TIME_SHA512_32KB 900
#define IDF_PERFORMANCE_MAX_TIME_SHA1_32KB 900
#define IDF_PERFORMANCE_MAX_TIME_SHA512_32KB 800
#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PUBLIC_OP 14000
#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PRIVATE_OP 100000

View file

@ -197,7 +197,7 @@ menu "mbedTLS"
config MBEDTLS_AES_USE_INTERRUPT
bool "Use interrupt for long AES operations"
depends on MBEDTLS_HARDWARE_AES
depends on IDF_TARGET_ESP32S2 && MBEDTLS_HARDWARE_AES
default y
help
Use an interrupt to coordinate long AES operations.
@ -207,14 +207,15 @@ menu "mbedTLS"
config MBEDTLS_HARDWARE_GCM
bool "Enable partially hardware accelerated GCM"
#depends on IDF_TARGET_ESP32S2 && MBEDTLS_HARDWARE_AES
depends on 0
default n
depends on IDF_TARGET_ESP32S2 && MBEDTLS_HARDWARE_AES
default y
help
Enable partially hardware accelerated GCM.
Enable partially hardware accelerated GCM. GHASH calculation is still done
in software.
Due to hardware limitations, hardware acceleration currently does not
offer any speed boost over software GCM with hardware accelerated AES operations.
If MBEDTLS_HARDWARE_GCM is disabled and MBEDTLS_HARDWARE_AES is enabled then
mbedTLS will still use the hardware accelerated AES block operation, but
on a single block at a time.
config MBEDTLS_HARDWARE_MPI
bool "Enable hardware MPI (bignum) acceleration"

View file

@ -46,11 +46,11 @@
#include "soc/lldesc.h"
#include "esp_heap_caps.h"
#include "sys/param.h"
#include "esp_pm.h"
#include "freertos/FreeRTOS.h"
#include "freertos/semphr.h"
#define AES_BLOCK_BYTES 16
#define IV_WORDS 4
@ -63,13 +63,6 @@
busy-waiting, 30000 bytes is approx 0.5 ms */
#define AES_DMA_INTR_TRIG_LEN 2000
#define ESP_GET_BE32(a) __builtin_bswap32( *(uint32_t*)(a) )
#define ESP_PUT_BE32(a, val) \
do { \
*(uint32_t*)(a) = __builtin_bswap32( (uint32_t)(val) ); \
} while (0)
#define ESP_PUT_BE64(a, val) \
do { \
*(uint64_t*)(a) = __builtin_bswap64( (uint64_t)(val) ); \
@ -90,10 +83,16 @@ typedef enum {
#if defined(CONFIG_MBEDTLS_AES_USE_INTERRUPT)
static SemaphoreHandle_t op_complete_sem;
#if defined(CONFIG_PM_ENABLE)
static esp_pm_lock_handle_t s_pm_cpu_lock;
static esp_pm_lock_handle_t s_pm_sleep_lock;
#endif
#endif
_lock_t crypto_dma_lock;
static _lock_t aes_lock;
static _lock_t s_aes_lock;
static const char *TAG = "esp-aes";
@ -102,10 +101,11 @@ static inline bool valid_key_length(const esp_aes_context *ctx)
return ctx->key_bytes == 128 / 8 || ctx->key_bytes == 192 / 8 || ctx->key_bytes == 256 / 8;
}
void esp_aes_acquire_hardware( void )
{
/* Need to lock DMA since it is shared with SHA block */
_lock_acquire(&aes_lock);
_lock_acquire(&s_aes_lock);
_lock_acquire(&crypto_dma_lock);
/* Enable AES hardware */
@ -119,7 +119,7 @@ void esp_aes_release_hardware( void )
periph_module_disable(PERIPH_AES_DMA_MODULE);
_lock_release(&crypto_dma_lock);
_lock_release(&aes_lock);
_lock_release(&s_aes_lock);
}
@ -234,14 +234,38 @@ static IRAM_ATTR void esp_aes_complete_isr(void *arg)
}
}
static void esp_aes_isr_initialise( void )
static esp_err_t esp_aes_isr_initialise( void )
{
REG_WRITE(AES_INT_CLR_REG, 1);
REG_WRITE(AES_INT_ENA_REG, 1);
if (op_complete_sem == NULL) {
op_complete_sem = xSemaphoreCreateBinary();
if (op_complete_sem == NULL) {
ESP_LOGE(TAG, "Failed to create intr semaphore");
return ESP_FAIL;
}
esp_intr_alloc(ETS_AES_INTR_SOURCE, 0, esp_aes_complete_isr, NULL, NULL);
}
/* AES is clocked proportionally to CPU clock, take power management lock */
#ifdef CONFIG_PM_ENABLE
if (s_pm_cpu_lock == NULL) {
if (esp_pm_lock_create(ESP_PM_NO_LIGHT_SLEEP, 0, "aes_sleep", &s_pm_sleep_lock) != ESP_OK) {
ESP_LOGE(TAG, "Failed to create PM sleep lock");
return ESP_FAIL;
}
if (esp_pm_lock_create(ESP_PM_CPU_FREQ_MAX, 0, "aes_cpu", &s_pm_cpu_lock) != ESP_OK) {
ESP_LOGE(TAG, "Failed to create PM CPU lock");
return ESP_FAIL;
}
}
esp_pm_lock_acquire(s_pm_cpu_lock);
esp_pm_lock_acquire(s_pm_sleep_lock);
#endif
return ESP_OK;
}
#endif // CONFIG_MBEDTLS_AES_USE_INTERRUPT
@ -257,6 +281,10 @@ static void esp_aes_dma_wait_complete(bool use_intr, lldesc_t *output_desc)
ESP_LOGE("AES", "Timed out waiting for completion of AES Interrupt");
abort();
}
#ifdef CONFIG_PM_ENABLE
esp_pm_lock_release(s_pm_cpu_lock);
esp_pm_lock_release(s_pm_sleep_lock);
#endif // CONFIG_PM_ENABLE
}
#endif
@ -312,6 +340,7 @@ static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input,
static int esp_aes_process_dma_ext_ram(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out, bool realloc_input, bool realloc_output)
{
size_t chunk_len;
int ret = 0;
int offset = 0;
unsigned char *input_buf = NULL;
unsigned char *output_buf = NULL;
@ -324,7 +353,8 @@ static int esp_aes_process_dma_ext_ram(esp_aes_context *ctx, const unsigned char
if (input_buf == NULL) {
ESP_LOGE(TAG, "Failed to allocate memory");
return -1;
ret = -1;
goto cleanup;
}
}
@ -333,7 +363,8 @@ static int esp_aes_process_dma_ext_ram(esp_aes_context *ctx, const unsigned char
if (output_buf == NULL) {
ESP_LOGE(TAG, "Failed to allocate memory");
return -1;
ret = -1;
goto cleanup;
}
} else {
output_buf = output;
@ -351,7 +382,8 @@ static int esp_aes_process_dma_ext_ram(esp_aes_context *ctx, const unsigned char
}
if (esp_aes_process_dma(ctx, dma_input, output_buf, chunk_len, stream_out) != 0) {
return -1;
ret = -1;
goto cleanup;
}
if (realloc_output) {
@ -364,6 +396,8 @@ static int esp_aes_process_dma_ext_ram(esp_aes_context *ctx, const unsigned char
offset += chunk_len;
}
cleanup:
if (realloc_input) {
free(input_buf);
}
@ -371,7 +405,7 @@ static int esp_aes_process_dma_ext_ram(esp_aes_context *ctx, const unsigned char
free(output_buf);
}
return 0;
return ret;
}
/* Encrypt/decrypt the input using DMA */
@ -474,7 +508,10 @@ static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input,
/* Only use interrupt for long AES operations */
if (len > AES_DMA_INTR_TRIG_LEN) {
use_intr = true;
esp_aes_isr_initialise();
if (esp_aes_isr_initialise() == ESP_FAIL) {
ret = -1;
goto cleanup;
}
} else
#endif
{
@ -483,10 +520,10 @@ static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input,
/* Start AES operation */
REG_WRITE(AES_TRIGGER_REG, 1);
esp_aes_dma_wait_complete(use_intr, out_desc_head);
#if (CONFIG_SPIRAM_USE_CAPS_ALLOC || CONFIG_SPIRAM_USE_MALLOC)
if (block_bytes > 0) {
if (esp_ptr_external_ram(output)) {
@ -510,6 +547,25 @@ cleanup:
}
static int esp_aes_validate_input(esp_aes_context *ctx, const unsigned char *input,
unsigned char *output )
{
if (!ctx) {
ESP_LOGE(TAG, "No AES context supplied");
return -1;
}
if (!input) {
ESP_LOGE(TAG, "No input supplied");
return -1;
}
if (!output) {
ESP_LOGE(TAG, "No output supplied");
return -1;
}
return 0;
}
/*
* AES-ECB single block encryption
@ -520,6 +576,10 @@ int esp_internal_aes_encrypt( esp_aes_context *ctx,
{
int r;
if (esp_aes_validate_input(ctx, input, output)) {
return -1;
}
if (!valid_key_length(ctx)) {
return MBEDTLS_ERR_AES_INVALID_KEY_LENGTH;
}
@ -550,6 +610,10 @@ int esp_internal_aes_decrypt( esp_aes_context *ctx,
{
int r;
if (esp_aes_validate_input(ctx, input, output)) {
return -1;
}
if (!valid_key_length(ctx)) {
return MBEDTLS_ERR_AES_INVALID_KEY_LENGTH;
}
@ -582,6 +646,10 @@ int esp_aes_crypt_ecb( esp_aes_context *ctx,
{
int r;
if (esp_aes_validate_input(ctx, input, output)) {
return -1;
}
if (!valid_key_length(ctx)) {
return MBEDTLS_ERR_AES_INVALID_KEY_LENGTH;
}
@ -607,11 +675,19 @@ int esp_aes_crypt_cbc( esp_aes_context *ctx,
unsigned char *output )
{
int r = 0;
if (esp_aes_validate_input(ctx, input, output)) {
return -1;
}
if (!iv) {
ESP_LOGE(TAG, "No IV supplied");
return -1;
}
/* For CBC input length should be multiple of
* AES BLOCK BYTES
* */
if ( length % AES_BLOCK_BYTES ) {
if ( (length % AES_BLOCK_BYTES) || (length == 0) ) {
return ERR_ESP_AES_INVALID_INPUT_LENGTH;
}
@ -652,6 +728,16 @@ int esp_aes_crypt_cfb8( esp_aes_context *ctx,
int r = 0;
size_t block_bytes = length - (length % AES_BLOCK_BYTES);
if (esp_aes_validate_input(ctx, input, output)) {
return -1;
}
if (!iv) {
ESP_LOGE(TAG, "No IV supplied");
return -1;
}
if (!valid_key_length(ctx)) {
return MBEDTLS_ERR_AES_INVALID_KEY_LENGTH;
}
@ -728,12 +814,28 @@ int esp_aes_crypt_cfb128( esp_aes_context *ctx,
uint8_t c;
int r = 0;
size_t stream_bytes = 0;
size_t n = *iv_off;
size_t n;
if (esp_aes_validate_input(ctx, input, output)) {
return -1;
}
if (!iv) {
ESP_LOGE(TAG, "No IV supplied");
return -1;
}
if (!iv_off) {
ESP_LOGE(TAG, "No IV offset supplied");
return -1;
}
if (!valid_key_length(ctx)) {
return MBEDTLS_ERR_AES_INVALID_KEY_LENGTH;
}
n = *iv_off;
/* First process the *iv_off bytes
* which are pending from the previous call to this API
*/
@ -796,8 +898,25 @@ int esp_aes_crypt_ofb( esp_aes_context *ctx,
unsigned char *output )
{
int r = 0;
size_t n = *iv_off;
size_t n;
size_t stream_bytes = 0;
if (esp_aes_validate_input(ctx, input, output)) {
return -1;
}
if (!iv) {
ESP_LOGE(TAG, "No IV supplied");
return -1;
}
if (!iv_off) {
ESP_LOGE(TAG, "No IV offset supplied");
return -1;
}
n = *iv_off;
/* If there is an offset then use the output of the previous AES block
(the updated IV) to calculate the new output */
while (n > 0 && length > 0) {
@ -841,7 +960,23 @@ int esp_aes_crypt_ctr( esp_aes_context *ctx,
unsigned char *output )
{
int r = 0;
size_t n = *nc_off;
size_t n;
if (esp_aes_validate_input(ctx, input, output)) {
return -1;
}
if (!nonce_counter) {
ESP_LOGE(TAG, "No nonce supplied");
return -1;
}
if (!nc_off) {
ESP_LOGE(TAG, "No nonce offset supplied");
return -1;
}
n = *nc_off;
if (!valid_key_length(ctx)) {
return MBEDTLS_ERR_AES_INVALID_KEY_LENGTH;
@ -881,7 +1016,7 @@ int esp_aes_crypt_ctr( esp_aes_context *ctx,
return r;
}
static void esp_gcm_ghash(uint8_t *h0, const unsigned char *x, size_t x_len, uint8_t *j0);
static void esp_gcm_ghash(esp_gcm_context *ctx, const unsigned char *x, size_t x_len, uint8_t *z);
/*
* Calculates the Initial Counter Block, J0
@ -899,14 +1034,16 @@ static void esp_gcm_derive_J0(esp_gcm_context *ctx)
memcpy(ctx->J0, ctx->iv, ctx->iv_len);
ctx->J0[AES_BLOCK_BYTES - 1] |= 1;
} else {
/* For IV != 96 bit, J0 = GHASH(IV || 0[s+64] || [len(IV)]64) */
/* For IV != 96 bit, J0 = GHASH(IV || 0[s+64] || [len(IV)]64) */
/* First calculate GHASH on IV */
esp_gcm_ghash(ctx->H, ctx->iv, ctx->iv_len, ctx->J0);
esp_gcm_ghash(ctx, ctx->iv, ctx->iv_len, ctx->J0);
/* Next create 128 bit block which is equal to
64 bit 0 + iv length truncated to 64 bits */
ESP_PUT_BE64(len_buf + 8, ctx->iv_len * 8);
/* Calculate GHASH on last block */
esp_gcm_ghash(ctx->H, len_buf, 16, ctx->J0);
esp_gcm_ghash(ctx, len_buf, 16, ctx->J0);
}
}
@ -942,68 +1079,146 @@ static void xor_data(uint8_t *d, const uint8_t *s)
*dst++ ^= *src++;
}
/* Right shift 128 bits by 1 in Big Endian format */
static void right_shift_be(uint8_t *v)
{
uint8_t prev_lsb = 0, cur_lsb;
uint32_t data;
for (int i = 0; i < 16; i += 4) {
data = ESP_GET_BE32(v + i);
cur_lsb = v[i + 3] & 0x1;
data = (data >> 1) | (prev_lsb << 31);
ESP_PUT_BE32((v + i), data);
prev_lsb = cur_lsb;
}
}
/* Multiplication in GF(2^128)
* z = x * y
*
* Steps:
* 1. Let x0.x1...x127 denote the sequence of bits in X.
* 2. Let Z0 =[0]128 and V0 = Y.
* 3. For i = 0 to 127, calculate blocks Zi+1 and Vi+1 as follows:
*
* Zi+1 = Zi if [x]i = 0, else Zi+1 = Zi ^ Vi
* Vi+1 = Vi >> 1 if LSB(Vi) = 0, else Vi+1 = (Vi >> 1) ^ R
*
* Note: as per AES-GCM spec 800-38D for Vi+1 calculation LSB(Vi)
* should be check for 1 but this is actually big endian format so
* we need to check MSB(V[15])
/*
* 32-bit integer manipulation macros (big endian)
*/
static void gcm_mult(const uint8_t *x, const uint8_t *y, uint8_t *z)
#ifndef GET_UINT32_BE
#define GET_UINT32_BE(n,b,i) \
{ \
(n) = ( (uint32_t) (b)[(i) ] << 24 ) \
| ( (uint32_t) (b)[(i) + 1] << 16 ) \
| ( (uint32_t) (b)[(i) + 2] << 8 ) \
| ( (uint32_t) (b)[(i) + 3] ); \
}
#endif
#ifndef PUT_UINT32_BE
#define PUT_UINT32_BE(n,b,i) \
{ \
(b)[(i) ] = (unsigned char) ( (n) >> 24 ); \
(b)[(i) + 1] = (unsigned char) ( (n) >> 16 ); \
(b)[(i) + 2] = (unsigned char) ( (n) >> 8 ); \
(b)[(i) + 3] = (unsigned char) ( (n) ); \
}
#endif
/* Based on MbedTLS's implemenation
*
* Precompute small multiples of H, that is set
* HH[i] || HL[i] = H times i,
* where i is seen as a field element as in [MGV], ie high-order bits
* correspond to low powers of P. The result is stored in the same way, that
* is the high-order bit of HH corresponds to P^0 and the low-order bit of HL
* corresponds to P^127.
*/
static int gcm_gen_table( esp_gcm_context *ctx )
{
uint8_t v[16];
int i, j;
uint32_t R = 0x000000E1; /* Field polynomial in Big endian format */
uint64_t hi, lo;
uint64_t vl, vh;
unsigned char *h;
memset(z, 0, 16); /* Z_0 = 0^128 */
memcpy(v, y, 16); /* V_0 = Y */
h = ctx->H;
for (i = 0; i < 16; i++) {
/* Test each bit in a byte of x[i]
* Again as per spec we need to test each bit
* in x from index 0 to 127, however its big
* endian format for each sub byte
*/
for (j = 0; j < 8; j++) {
if (x[i] & (1 << (7 - j))) {
xor_data(z, v);
}
/* https://pdfs.semanticscholar.org/1246/a9ad98dc0421ccfc945e6529c886f23e848d.pdf
* page 9
*/
if (v[15] & 0x1) {
right_shift_be(v);
v[0] ^= R;
} else {
right_shift_be(v);
}
/* pack h as two 64-bits ints, big-endian */
GET_UINT32_BE( hi, h, 0 );
GET_UINT32_BE( lo, h, 4 );
vh = (uint64_t) hi << 32 | lo;
GET_UINT32_BE( hi, h, 8 );
GET_UINT32_BE( lo, h, 12 );
vl = (uint64_t) hi << 32 | lo;
/* 8 = 1000 corresponds to 1 in GF(2^128) */
ctx->HL[8] = vl;
ctx->HH[8] = vh;
/* 0 corresponds to 0 in GF(2^128) */
ctx->HH[0] = 0;
ctx->HL[0] = 0;
for( i = 4; i > 0; i >>= 1 )
{
uint32_t T = ( vl & 1 ) * 0xe1000000U;
vl = ( vh << 63 ) | ( vl >> 1 );
vh = ( vh >> 1 ) ^ ( (uint64_t) T << 32);
ctx->HL[i] = vl;
ctx->HH[i] = vh;
}
for( i = 2; i <= 8; i *= 2 )
{
uint64_t *HiL = ctx->HL + i, *HiH = ctx->HH + i;
vh = *HiH;
vl = *HiL;
for( j = 1; j < i; j++ )
{
HiH[j] = vh ^ ctx->HH[j];
HiL[j] = vl ^ ctx->HL[j];
}
}
return( 0 );
}
/*
* Shoup's method for multiplication use this table with
* last4[x] = x times P^128
* where x and last4[x] are seen as elements of GF(2^128) as in [MGV]
*/
static const uint64_t last4[16] =
{
0x0000, 0x1c20, 0x3840, 0x2460,
0x7080, 0x6ca0, 0x48c0, 0x54e0,
0xe100, 0xfd20, 0xd940, 0xc560,
0x9180, 0x8da0, 0xa9c0, 0xb5e0
};
/* Based on MbedTLS's implemenation
*
* Sets output to x times H using the precomputed tables.
* x and output are seen as elements of GF(2^128) as in [MGV].
*/
static void gcm_mult( esp_gcm_context *ctx, const unsigned char x[16],
unsigned char output[16] )
{
int i = 0;
unsigned char lo, hi, rem;
uint64_t zh, zl;
lo = x[15] & 0xf;
zh = ctx->HH[lo];
zl = ctx->HL[lo];
for( i = 15; i >= 0; i-- )
{
lo = x[i] & 0xf;
hi = x[i] >> 4;
if( i != 15 )
{
rem = (unsigned char) zl & 0xf;
zl = ( zh << 60 ) | ( zl >> 4 );
zh = ( zh >> 4 );
zh ^= (uint64_t) last4[rem] << 48;
zh ^= ctx->HH[lo];
zl ^= ctx->HL[lo];
}
rem = (unsigned char) zl & 0xf;
zl = ( zh << 60 ) | ( zl >> 4 );
zh = ( zh >> 4 );
zh ^= (uint64_t) last4[rem] << 48;
zh ^= ctx->HH[hi];
zl ^= ctx->HL[hi];
}
PUT_UINT32_BE( zh >> 32, output, 0 );
PUT_UINT32_BE( zh, output, 4 );
PUT_UINT32_BE( zl >> 32, output, 8 );
PUT_UINT32_BE( zl, output, 12 );
}
@ -1024,14 +1239,16 @@ int esp_aes_gcm_setkey( esp_gcm_context *ctx,
return ( 0 );
}
/* AES-GCM GHASH calculation j0 = GHASH(x) using h0 hash key
/* AES-GCM GHASH calculation z = GHASH(x) using h0 hash key
*/
static void esp_gcm_ghash(uint8_t *h0, const unsigned char *x, size_t x_len, uint8_t *j0)
static void esp_gcm_ghash(esp_gcm_context *ctx, const unsigned char *x, size_t x_len, uint8_t *z)
{
uint8_t y0[AES_BLOCK_BYTES], tmp[AES_BLOCK_BYTES];
uint8_t tmp[AES_BLOCK_BYTES];
memset(tmp, 0, AES_BLOCK_BYTES);
/* GHASH(X) is calculated on input string which is multiple of 128 bits
* If input string bit length is not multiple of 128 bits it needs to
* be padded by 0
@ -1046,12 +1263,12 @@ static void esp_gcm_ghash(uint8_t *h0, const unsigned char *x, size_t x_len, uin
/* If input bit string is >= 128 bits, process full 128 bit blocks */
while (x_len >= AES_BLOCK_BYTES) {
xor_data(j0, x);
gcm_mult(j0, h0, y0);
xor_data(z, x);
gcm_mult(ctx, z, z);
x += AES_BLOCK_BYTES;
x_len -= AES_BLOCK_BYTES;
memcpy(j0, y0, AES_BLOCK_BYTES);
}
/* If input bit string is not multiple of 128 create last 128 bit
@ -1059,14 +1276,12 @@ static void esp_gcm_ghash(uint8_t *h0, const unsigned char *x, size_t x_len, uin
*/
if (x_len) {
memcpy(tmp, x, x_len);
xor_data(j0, tmp);
gcm_mult(j0, h0, y0);
memcpy(j0, y0, AES_BLOCK_BYTES);
xor_data(z, tmp);
gcm_mult(ctx, z, z);
}
}
/* Function to init AES GCM context to zero */
void esp_aes_gcm_init( esp_gcm_context *ctx)
{
@ -1103,6 +1318,22 @@ int esp_aes_gcm_starts( esp_gcm_context *ctx,
return ( MBEDTLS_ERR_GCM_BAD_INPUT );
}
if (!ctx) {
ESP_LOGE(TAG, "No AES context supplied");
return -1;
}
if (!iv) {
ESP_LOGE(TAG, "No IV supplied");
return -1;
}
if ( (aad_len > 0) && !aad) {
ESP_LOGE(TAG, "No aad supplied");
return -1;
}
/* Initialize AES-GCM context */
ctx->iv = iv;
ctx->iv_len = iv_len;
@ -1123,6 +1354,7 @@ int esp_aes_gcm_starts( esp_gcm_context *ctx,
memcpy(ctx->H, (uint8_t *)AES_H_BASE, AES_BLOCK_BYTES);
esp_aes_release_hardware();
gcm_gen_table(ctx);
/* Once H is obtained we need to derive J0 (Initial Counter Block) */
esp_gcm_derive_J0(ctx);
@ -1133,6 +1365,9 @@ int esp_aes_gcm_starts( esp_gcm_context *ctx,
*/
memcpy(ctx->ori_j0, ctx->J0, 16);
esp_gcm_ghash(ctx, ctx->aad, ctx->aad_len, ctx->ghash);
return ( 0 );
}
@ -1143,9 +1378,21 @@ int esp_aes_gcm_update( esp_gcm_context *ctx,
unsigned char *output )
{
size_t nc_off = 0;
uint8_t stream[AES_BLOCK_BYTES] = {0};
uint8_t nonce_counter[AES_BLOCK_BYTES] = {0};
uint8_t gcm_s[AES_BLOCK_BYTES] = {0};
uint8_t stream[AES_BLOCK_BYTES] = {0};
if (!ctx) {
ESP_LOGE(TAG, "No GCM context supplied");
return -1;
}
if (!input) {
ESP_LOGE(TAG, "No input supplied");
return -1;
}
if (!output) {
ESP_LOGE(TAG, "No output supplied");
return -1;
}
if ( output > input && (size_t) ( output - input ) < length ) {
return ( MBEDTLS_ERR_GCM_BAD_INPUT );
@ -1154,17 +1401,12 @@ int esp_aes_gcm_update( esp_gcm_context *ctx,
* calculate GHASH on aad and preincrement the ICB
*/
if (ctx->gcm_state == ESP_AES_GCM_STATE_INIT) {
/* The GHASH calculation is done at multiple stages
* Here we calculate GHASH of AAD and save it
*/
esp_gcm_ghash(ctx->H, ctx->aad, ctx->aad_len, gcm_s);
/* Jo needs to be incremented first time, later the GCTR
/* Jo needs to be incremented first time, later the CTR
* operation will auto update it
*/
increment32_j0(ctx, nonce_counter);
ctx->gcm_state = ESP_AES_GCM_STATE_UPDATE;
} else if (ctx->gcm_state == ESP_AES_GCM_STATE_UPDATE) {
memcpy(gcm_s, ctx->S, AES_BLOCK_BYTES);
memcpy(nonce_counter, ctx->J0, AES_BLOCK_BYTES);
}
@ -1178,12 +1420,11 @@ int esp_aes_gcm_update( esp_gcm_context *ctx,
/* Perform intermediate GHASH on "encrypted" data irrespective of mode */
if (ctx->mode == ESP_AES_DECRYPT) {
esp_gcm_ghash(ctx->H, input, length, gcm_s);
esp_gcm_ghash(ctx, input, length, ctx->ghash);
} else {
esp_gcm_ghash(ctx->H, output, length, gcm_s);
}
esp_gcm_ghash(ctx, output, length, ctx->ghash);
memcpy(ctx->S, gcm_s, AES_BLOCK_BYTES);
}
return 0;
}
@ -1203,10 +1444,10 @@ int esp_aes_gcm_finish( esp_gcm_context *ctx,
/* Calculate final GHASH on aad_len, data length */
ESP_PUT_BE64(len_block, ctx->aad_len * 8);
ESP_PUT_BE64(len_block + 8, ctx->data_len * 8);
esp_gcm_ghash(ctx->H, len_block, AES_BLOCK_BYTES, ctx->S);
esp_gcm_ghash(ctx, len_block, AES_BLOCK_BYTES, ctx->ghash);
/* Tag T = GCTR(J0, S) where T is truncated to tag_len */
esp_aes_crypt_ctr(&ctx->aes_ctx, tag_len, &nc_off, ctx->ori_j0, 0, ctx->S, tag);
/* Tag T = GCTR(J0, ) where T is truncated to tag_len */
esp_aes_crypt_ctr(&ctx->aes_ctx, tag_len, &nc_off, ctx->ori_j0, 0, ctx->ghash, tag);
return 0;
}

View file

@ -43,10 +43,12 @@ typedef enum {
* \brief The GCM context structure.
*/
typedef struct {
uint8_t H[16]; /*!< H */
uint8_t S[16];
uint8_t H[16]; /*!< Initial hash value */
uint8_t ghash[16]; /*!< GHASH value. */
uint8_t J0[16];
uint8_t ori_j0[16];
uint64_t HL[16]; /*!< Precalculated HTable low. */
uint64_t HH[16]; /*!< Precalculated HTable high. */
uint8_t ori_j0[16]; /*!< J0 from first iteration. */
const uint8_t *iv;
size_t iv_len; /*!< The length of IV. */
uint64_t aad_len; /*!< The total length of the additional data. */

View file

@ -105,6 +105,114 @@ TEST_CASE("mbedtls CTR stream test", "[aes]")
free(decryptedtext);
}
TEST_CASE("mbedtls GCM stream test", "[aes]")
{
const unsigned SZ = 100;
mbedtls_gcm_context ctx;
uint8_t nonce[16];
uint8_t key[16];
uint8_t tag[16];
mbedtls_cipher_id_t cipher = MBEDTLS_CIPHER_ID_AES;
/* Cipher produced via this Python:
import os, binascii
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
key = b'\x56' * 16
iv = b'\x89' * 16
data = b'\xab' * 100
aesgcm = AESGCM(key)
ct = aesgcm.encrypt(iv, data, '')
ct_arr = ""
for idx, b in enumerate(ct):
if idx % 8 == 0:
ct_arr += '\n'
ct_arr += "0x{}, ".format(binascii.hexlify(b))
print(ct_arr)
*/
const uint8_t expected_cipher[] = {
0x03, 0x92, 0x13, 0x49, 0x1f, 0x1f, 0x24, 0x41,
0xe8, 0xeb, 0x89, 0x47, 0x50, 0x0a, 0xce, 0xa3,
0xc7, 0x1c, 0x10, 0x70, 0xb0, 0x89, 0x82, 0x5e,
0x0f, 0x4a, 0x23, 0xee, 0xd2, 0xfc, 0xff, 0x45,
0x61, 0x4c, 0xd1, 0xfb, 0x6d, 0xe2, 0xbe, 0x67,
0x6f, 0x94, 0x72, 0xa3, 0xe7, 0x04, 0x99, 0xb3,
0x4a, 0x46, 0xf9, 0x2b, 0xaf, 0xac, 0xa9, 0x0e,
0x43, 0x7e, 0x8b, 0xc4, 0xbf, 0x49, 0xa4, 0x83,
0x9c, 0x31, 0x11, 0x1c, 0x09, 0xac, 0x90, 0xdf,
0x00, 0x34, 0x08, 0xe5, 0x70, 0xa3, 0x7e, 0x4b,
0x36, 0x48, 0x5a, 0x3f, 0x28, 0xc7, 0x1c, 0xd9,
0x1b, 0x1b, 0x49, 0x96, 0xe9, 0x7c, 0xea, 0x54,
0x7c, 0x71, 0x29, 0x0d
};
const uint8_t expected_tag[] = {
0x35, 0x1c, 0x21, 0xc6, 0xbc, 0x6b, 0x18, 0x52,
0x90, 0xe1, 0xf2, 0x5b, 0xe1, 0xf6, 0x15, 0xee,
};
memset(nonce, 0x89, 16);
memset(key, 0x56, 16);
// allocate internal memory
uint8_t *chipertext = heap_caps_malloc(SZ, MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL);
uint8_t *plaintext = heap_caps_malloc(SZ, MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL);
uint8_t *decryptedtext = heap_caps_malloc(SZ, MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL);
TEST_ASSERT_NOT_NULL(chipertext);
TEST_ASSERT_NOT_NULL(plaintext);
TEST_ASSERT_NOT_NULL(decryptedtext);
memset(plaintext, 0xAB, SZ);
/* Test that all the end results are the same
no matter how many bytes we encrypt each call
*/
for (int bytes_to_process = 16; bytes_to_process < SZ; bytes_to_process = bytes_to_process + 16) {
memset(nonce, 0x89, 16);
memset(chipertext, 0x0, SZ);
memset(decryptedtext, 0x0, SZ);
memset(tag, 0x0, 16);
mbedtls_gcm_init(&ctx);
mbedtls_gcm_setkey(&ctx, cipher, key, 128);
mbedtls_gcm_starts( &ctx, MBEDTLS_AES_ENCRYPT, nonce, sizeof(nonce), NULL, 0 );
// Encrypt
for (int idx = 0; idx < SZ; idx = idx + bytes_to_process) {
// Limit length of last call to avoid exceeding buffer size
size_t length = (idx + bytes_to_process > SZ) ? (SZ - idx) : bytes_to_process;
mbedtls_gcm_update(&ctx, length, plaintext+idx, chipertext+idx );
}
mbedtls_gcm_finish( &ctx, tag, sizeof(tag) );
TEST_ASSERT_EQUAL_HEX8_ARRAY(expected_cipher, chipertext, SZ);
TEST_ASSERT_EQUAL_HEX8_ARRAY(expected_tag, tag, sizeof(tag));
// Decrypt
memset(nonce, 0x89, 16);
mbedtls_gcm_free( &ctx );
mbedtls_gcm_init(&ctx);
mbedtls_gcm_setkey(&ctx, cipher, key, 128);
mbedtls_gcm_starts( &ctx, MBEDTLS_AES_DECRYPT, nonce, sizeof(nonce), NULL, 0 );
for (int idx = 0; idx < SZ; idx = idx + bytes_to_process) {
// Limit length of last call to avoid exceeding buffer size
size_t length = (idx + bytes_to_process > SZ) ? (SZ - idx) : bytes_to_process;
mbedtls_gcm_update(&ctx, length, chipertext+idx, decryptedtext + idx );
}
mbedtls_gcm_finish( &ctx, tag, sizeof(tag) );
TEST_ASSERT_EQUAL_HEX8_ARRAY(plaintext, decryptedtext, SZ);
mbedtls_gcm_free( &ctx );
}
free(plaintext);
free(chipertext);
free(decryptedtext);
}
TEST_CASE("mbedtls OFB stream test", "[aes]")
{
const unsigned SZ = 100;

View file

@ -72,7 +72,6 @@ TEST_CASE("mbedtls AES performance", "[aes]")
TEST_CASE("mbedtls AES GCM performance", "[aes]")
{
const unsigned CALLS = 1;
const unsigned CALL_SZ = 32 * 1024;
mbedtls_gcm_context ctx;
int64_t start, end;
@ -87,42 +86,60 @@ TEST_CASE("mbedtls AES GCM performance", "[aes]")
// allocate internal memory
uint8_t *buf = heap_caps_malloc(CALL_SZ, MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL);
TEST_ASSERT_NOT_NULL(buf);
uint8_t aad[16];
memset(aad, 0x22, 16);
mbedtls_gcm_init(&ctx);
mbedtls_gcm_setkey( &ctx, cipher, key, 128);
start = esp_timer_get_time();
for (int c = 0; c < CALLS; c++) {
memset(buf, 0xAA, CALL_SZ);
mbedtls_gcm_crypt_and_tag(&ctx, MBEDTLS_AES_ENCRYPT, CALL_SZ, iv, sizeof(iv), NULL, 0, buf, buf, 16, tag_buf);
}
memset(buf, 0xAA, CALL_SZ);
mbedtls_gcm_crypt_and_tag(&ctx, MBEDTLS_AES_ENCRYPT, CALL_SZ, iv, sizeof(iv), aad, sizeof(aad), buf, buf, 16, tag_buf);
end = esp_timer_get_time();
/* Sanity check: make sure the last ciphertext block matches
what we expect to see.
Last block produced via this Python:
import os, binascii
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.backends import default_backend
key = b'\x44' * 16
iv = b'\xee' * 16
cipher = Cipher(algorithms.AES(key), modes.GCM(iv), backend=default_backend())
encryptor = cipher.encryptor()
ct = encryptor.update(b'\xaa' * 1 * 32 * 1024) + encryptor.finalize()
print(binascii.hexlify(ct[-16:]))
Last block and tag produced via this Python:
import os, binascii
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
key = b'\x44' * 16
iv = b'\xEE' * 16
data = b'\xAA' * 100
aad = b'\x22 * 16
aesgcm = AESGCM(key)
ct = aesgcm.encrypt(iv, data, aad)
*/
const uint8_t expected_last_block[] = {
0x7d, 0x3d, 0x16, 0x84, 0xd0, 0xb4, 0x38, 0x30,
0xd1, 0x24, 0x6f, 0x7e, 0x9a, 0x9c, 0x81, 0x58,
};
TEST_ASSERT_EQUAL_HEX8_ARRAY(expected_last_block, buf + CALL_SZ - 16, 16);
const uint8_t expected_tag[] = {
0x7e, 0x16, 0x04, 0x07, 0x4b, 0x7e, 0x6b, 0xf7,
0x5d, 0xce, 0x9e, 0x7d, 0x3f, 0x85, 0xc5, 0xa5,
};
TEST_ASSERT_EQUAL_HEX8_ARRAY(expected_last_block, buf + CALL_SZ - 16 , 16);
TEST_ASSERT_EQUAL_HEX8_ARRAY(expected_tag, tag_buf, 16);
free(buf);
float usecs = end - start;
// bytes/usec = MB/sec
float mb_sec = (CALL_SZ * CALLS) / usecs;
float mb_sec = CALL_SZ / usecs;
printf("GCM encryption rate %.3fMB/sec\n", mb_sec);
#ifdef CONFIG_MBEDTLS_HARDWARE_GCM
// Don't put a hard limit on software AES performance
TEST_PERFORMANCE_GREATER_THAN(AES_GCM_THROUGHPUT_MBSEC, "%.3fMB/sec", mb_sec);
#endif
}

View file

@ -495,7 +495,7 @@ UT_034:
UT_035:
extends: .unit_test_s2_template
parallel: 28
parallel: 29
tags:
- ESP32S2_IDF
- UT_T1_1

View file

@ -1,4 +1,4 @@
TEST_COMPONENTS=driver esp32s2 spi_flash
TEST_COMPONENTS=driver esp32s2 spi_flash mbedtls
CONFIG_ESP32S2_SPIRAM_SUPPORT=y
CONFIG_IDF_TARGET="esp32s2"
CONFIG_SPIRAM_OCCUPY_NO_HOST=y