Merge branch 'feature/block_sha_fallback' into 'master'

esp32s2 SHA: fallback to hashing block by block for non DMA memory

Closes IDF-1529

See merge request espressif/esp-idf!8293
This commit is contained in:
Angus Gratton 2020-06-25 09:30:39 +08:00
commit 22d9ff5b05
3 changed files with 142 additions and 48 deletions

View file

@ -7,7 +7,7 @@
#define IDF_PERFORMANCE_MIN_SHA256_THROUGHPUT_MBSEC 90.0
// esp_sha() time to process 32KB of input data from RAM
#define IDF_PERFORMANCE_MAX_TIME_SHA1_32KB 900
#define IDF_PERFORMANCE_MAX_TIME_SHA512_32KB 800
#define IDF_PERFORMANCE_MAX_TIME_SHA512_32KB 900
#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PUBLIC_OP 13500
#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PRIVATE_OP 130000

View file

@ -54,6 +54,9 @@
*/
#define SHA_DMA_MAX_BYTES 3968
/* The longest length of a single block is for SHA512 = 128 byte */
#define SHA_MAX_BLK_LEN 128
const static char *TAG = "esp-sha";
/* Return block size (in bytes) for a given SHA type */
@ -196,6 +199,59 @@ int esp_sha_512_t_init_hash(uint16_t t)
return 0;
}
static void esp_sha_fill_text_block(esp_sha_type sha_type, const void *input)
{
uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE);
uint32_t *data_words = NULL;
/* Fill the data block */
data_words = (uint32_t *)(input);
for (int i = 0; i < block_length(sha_type) / 4; i++) {
reg_addr_buf[i] = (data_words[i]);
}
asm volatile ("memw");
}
/* Hash a single SHA block */
static void esp_sha_block(esp_sha_type sha_type, const void *input, bool is_first_block)
{
esp_sha_fill_text_block(sha_type, input);
esp_sha_wait_idle();
/* Start hashing */
if (is_first_block) {
REG_WRITE(SHA_START_REG, 1);
} else {
REG_WRITE(SHA_CONTINUE_REG, 1);
}
}
/* Hash the input block by block, using non-DMA mode */
static void esp_sha_block_mode(esp_sha_type sha_type, const uint8_t *input, uint32_t ilen,
const uint8_t *buf, uint32_t buf_len, bool is_first_block)
{
size_t blk_len = 0;
int num_block = 0;
blk_len = block_length(sha_type);
REG_WRITE(SHA_MODE_REG, sha_type);
num_block = ilen / blk_len;
if (buf_len != 0) {
esp_sha_block(sha_type, buf, is_first_block);
is_first_block = false;
}
for (int i = 0; i < num_block; i++) {
esp_sha_block(sha_type, input + blk_len*i, is_first_block);
is_first_block = false;
}
esp_sha_wait_idle();
}
static int esp_sha_dma_process(esp_sha_type sha_type, const void *input, uint32_t ilen,
const void *buf, uint32_t buf_len, bool is_first_block);
@ -207,87 +263,65 @@ int esp_sha_dma(esp_sha_type sha_type, const void *input, uint32_t ilen,
const void *buf, uint32_t buf_len, bool is_first_block)
{
int ret = 0;
const void *dma_input;
unsigned char *non_icache_input = NULL;
unsigned char *non_icache_buf = NULL;
int dma_op_num;
size_t dma_max_chunk_len = SHA_DMA_MAX_BYTES;
unsigned char *dma_cap_buf = NULL;
int dma_op_num = ( ilen / (SHA_DMA_MAX_BYTES + 1) ) + 1;
if (buf_len > 128) {
if (buf_len > block_length(sha_type)) {
ESP_LOGE(TAG, "SHA DMA buf_len cannot exceed max size for a single block");
return -1;
}
#if (CONFIG_SPIRAM_USE_CAPS_ALLOC || CONFIG_SPIRAM_USE_MALLOC)
if (esp_ptr_external_ram(input) || esp_ptr_external_ram(buf)) {
Cache_WriteBack_All();
/* DMA cannot access memory in the iCache range, hash block by block instead of using DMA */
if (!esp_ptr_dma_ext_capable(input) && !esp_ptr_dma_capable(input) && (ilen != 0)) {
esp_sha_block_mode(sha_type, input, ilen, buf, buf_len, is_first_block);
return 0;
}
#if (CONFIG_ESP32S2_SPIRAM_SUPPORT)
if (esp_ptr_external_ram(input)) {
Cache_WriteBack_Addr((uint32_t)input, ilen);
}
if (esp_ptr_external_ram(buf)) {
Cache_WriteBack_Addr((uint32_t)buf, buf_len);
}
#endif
/* Copy to internal buf if buf is in non DMA capable memory */
if (!esp_ptr_dma_ext_capable(buf) && !esp_ptr_dma_capable(buf) && (buf_len != 0)) {
non_icache_buf = heap_caps_malloc(sizeof(unsigned char) * buf_len, MALLOC_CAP_DMA);
if (non_icache_buf == NULL) {
dma_cap_buf = heap_caps_malloc(sizeof(unsigned char) * buf_len, MALLOC_CAP_DMA);
if (dma_cap_buf == NULL) {
ESP_LOGE(TAG, "Failed to allocate buf memory");
ret = ESP_ERR_NO_MEM;
ret = -1;
goto cleanup;
}
memcpy(non_icache_buf, buf, buf_len);
buf = non_icache_buf;
}
/* DMA cannot access memory in the iCache range, copy data to temporary buffers before transfer */
if (!esp_ptr_dma_ext_capable(input) && !esp_ptr_dma_capable(input) && (ilen != 0)) {
non_icache_input = heap_caps_malloc(sizeof(unsigned char) * MIN(ilen, dma_max_chunk_len), MALLOC_CAP_DMA);
if (non_icache_input == NULL) {
/* Allocate biggest available heap */
size_t max_alloc_len = heap_caps_get_largest_free_block(MALLOC_CAP_DMA);
dma_max_chunk_len = max_alloc_len - max_alloc_len % block_length(sha_type);
non_icache_input = heap_caps_malloc(sizeof(unsigned char) * MIN(ilen, dma_max_chunk_len), MALLOC_CAP_DMA);
if (non_icache_input == NULL) {
ESP_LOGE(TAG, "Failed to allocate input memory");
ret = ESP_ERR_NO_MEM;
goto cleanup;
}
}
memcpy(dma_cap_buf, buf, buf_len);
buf = dma_cap_buf;
}
/* The max amount of blocks in a single hardware operation is 2^6 - 1 = 63
Thus we only do a single DMA input list + dma buf list,
which is max 3968/64 + 64/64 = 63 blocks */
dma_op_num = ( ilen / (dma_max_chunk_len + 1) ) + 1;
for (int i = 0; i < dma_op_num; i++) {
int dma_chunk_len = MIN(ilen, dma_max_chunk_len);
int dma_chunk_len = MIN(ilen, SHA_DMA_MAX_BYTES);
/* Input depends on if it's a temp alloc buffer or supplied by user */
if (non_icache_input != NULL) {
memcpy(non_icache_input, input, dma_chunk_len);
dma_input = non_icache_input;
} else {
dma_input = input;
}
ret = esp_sha_dma_process(sha_type, dma_input, dma_chunk_len, buf, buf_len, is_first_block);
ret = esp_sha_dma_process(sha_type, input, dma_chunk_len, buf, buf_len, is_first_block);
if (ret != 0) {
goto cleanup;
}
is_first_block = false;
ilen -= dma_chunk_len;
input += dma_chunk_len;
// Only append buf to the first operation
buf_len = 0;
is_first_block = false;
}
cleanup:
free(non_icache_input);
free(non_icache_buf);
free(dma_cap_buf);
return ret;
}

View file

@ -305,6 +305,66 @@ TEST_CASE("mbedtls SHA session passed between tasks", "[mbedtls]")
TEST_ASSERT_EQUAL_MEMORY_MESSAGE(sha256_thousand_as, param.result, 32, "SHA256 result from other task");
}
/* Random input generated and hashed using python:
import hashlib
import os, binascii
input = bytearray(os.urandom(150))
arr = ''
for idx, b in enumerate(input):
if idx % 8 == 0:
arr += '\n'
arr += "{}, ".format(hex(b))
digest = hashlib.sha256(input).hexdigest()
*/
const uint8_t test_vector[] = {
0xe4, 0x1a, 0x1a, 0x30, 0x71, 0xd3, 0x94, 0xb0,
0xc3, 0x7e, 0x99, 0x9f, 0x1a, 0xde, 0x4a, 0x36,
0xb1, 0x1, 0x81, 0x2b, 0x41, 0x91, 0x11, 0x7f,
0xd8, 0xe1, 0xd5, 0xe5, 0x52, 0x6d, 0x92, 0xee,
0x6c, 0xf7, 0x70, 0xea, 0x3a, 0xb, 0xc9, 0x97,
0xc0, 0x12, 0x6f, 0x10, 0x5b, 0x90, 0xd8, 0x52,
0x91, 0x69, 0xea, 0xc4, 0x1f, 0xc, 0xcf, 0xc6,
0xf0, 0x43, 0xc6, 0xa3, 0x1f, 0x46, 0x3c, 0x3d,
0x25, 0xe5, 0xa8, 0x27, 0x86, 0x85, 0x32, 0x3f,
0x33, 0xd8, 0x40, 0xc4, 0x41, 0xf6, 0x4b, 0x12,
0xd8, 0x5e, 0x4, 0x27, 0x42, 0x90, 0x73, 0x4,
0x8, 0x42, 0xd1, 0x64, 0xd, 0x84, 0x3, 0x1,
0x76, 0x88, 0xe4, 0x95, 0xdf, 0xe7, 0x62, 0xb4,
0xb3, 0xb2, 0x7e, 0x6d, 0x78, 0xca, 0x79, 0x82,
0xcc, 0xba, 0x22, 0xd2, 0x90, 0x2e, 0xe3, 0xa8,
0x2a, 0x53, 0x3a, 0xb1, 0x9a, 0x7f, 0xb7, 0x8b,
0xfa, 0x32, 0x47, 0xc1, 0x5c, 0x6, 0x4f, 0x7b,
0xcd, 0xb3, 0xf4, 0xf1, 0xd0, 0xb5, 0xbf, 0xfb,
0x7c, 0xc3, 0xa5, 0xb2, 0xc4, 0xd4,
};
const uint8_t test_vector_digest[] = {
0xff, 0x1c, 0x60, 0xcb, 0x21, 0xf0, 0x63, 0x68,
0xb9, 0xfc, 0xfe, 0xad, 0x3e, 0xb0, 0x2e, 0xd1,
0xf9, 0x08, 0x82, 0x82, 0x83, 0x06, 0xc1, 0x8a,
0x98, 0x5d, 0x36, 0xc0, 0xb7, 0xeb, 0x35, 0xe0,
};
TEST_CASE("mbedtls SHA, input in flash", "[mbedtls]")
{
mbedtls_sha256_context sha256_ctx;
unsigned char sha256[32];
mbedtls_sha256_init(&sha256_ctx);
TEST_ASSERT_EQUAL(0, mbedtls_sha256_starts_ret(&sha256_ctx, false));
TEST_ASSERT_EQUAL(0, mbedtls_sha256_update_ret(&sha256_ctx, test_vector, sizeof(test_vector)));
TEST_ASSERT_EQUAL(0, mbedtls_sha256_finish_ret(&sha256_ctx, sha256));
TEST_ASSERT_EQUAL_MEMORY_MESSAGE(test_vector_digest, sha256, 32, "SHA256 calculation");
}
/* ESP32 do not have SHA512/t functions */
#if !DISABLED_FOR_TARGETS(ESP32)