From b75edc84e30faba558dd3dffe272a782a8d11e07 Mon Sep 17 00:00:00 2001 From: Marius Vikhammer Date: Wed, 8 Apr 2020 16:37:51 +0800 Subject: [PATCH] esp32s2 SHA: fallback to hashing block by block for non DMA memory Also adds unit test for SHA with input buffer in flash Closes IDF-1529 --- .../include/esp32s2/idf_performance_target.h | 2 +- components/mbedtls/port/esp32s2/sha.c | 128 +++++++++++------- components/mbedtls/test/test_mbedtls_sha.c | 60 ++++++++ 3 files changed, 142 insertions(+), 48 deletions(-) diff --git a/components/idf_test/include/esp32s2/idf_performance_target.h b/components/idf_test/include/esp32s2/idf_performance_target.h index 64ba92930..1f2f6dc66 100644 --- a/components/idf_test/include/esp32s2/idf_performance_target.h +++ b/components/idf_test/include/esp32s2/idf_performance_target.h @@ -7,7 +7,7 @@ #define IDF_PERFORMANCE_MIN_SHA256_THROUGHPUT_MBSEC 90.0 // esp_sha() time to process 32KB of input data from RAM #define IDF_PERFORMANCE_MAX_TIME_SHA1_32KB 900 -#define IDF_PERFORMANCE_MAX_TIME_SHA512_32KB 800 +#define IDF_PERFORMANCE_MAX_TIME_SHA512_32KB 900 #define IDF_PERFORMANCE_MAX_RSA_2048KEY_PUBLIC_OP 13500 #define IDF_PERFORMANCE_MAX_RSA_2048KEY_PRIVATE_OP 130000 diff --git a/components/mbedtls/port/esp32s2/sha.c b/components/mbedtls/port/esp32s2/sha.c index fdaf71012..c9f1852e5 100644 --- a/components/mbedtls/port/esp32s2/sha.c +++ b/components/mbedtls/port/esp32s2/sha.c @@ -54,6 +54,9 @@ */ #define SHA_DMA_MAX_BYTES 3968 +/* The longest length of a single block is for SHA512 = 128 byte */ +#define SHA_MAX_BLK_LEN 128 + const static char *TAG = "esp-sha"; /* Return block size (in bytes) for a given SHA type */ @@ -196,6 +199,59 @@ int esp_sha_512_t_init_hash(uint16_t t) return 0; } +static void esp_sha_fill_text_block(esp_sha_type sha_type, const void *input) +{ + uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); + uint32_t *data_words = NULL; + + /* Fill the data block */ + data_words = (uint32_t *)(input); + for (int i = 0; i < block_length(sha_type) / 4; i++) { + reg_addr_buf[i] = (data_words[i]); + } + asm volatile ("memw"); +} + +/* Hash a single SHA block */ +static void esp_sha_block(esp_sha_type sha_type, const void *input, bool is_first_block) +{ + esp_sha_fill_text_block(sha_type, input); + + esp_sha_wait_idle(); + /* Start hashing */ + if (is_first_block) { + REG_WRITE(SHA_START_REG, 1); + } else { + REG_WRITE(SHA_CONTINUE_REG, 1); + } +} + +/* Hash the input block by block, using non-DMA mode */ +static void esp_sha_block_mode(esp_sha_type sha_type, const uint8_t *input, uint32_t ilen, + const uint8_t *buf, uint32_t buf_len, bool is_first_block) +{ + size_t blk_len = 0; + int num_block = 0; + + blk_len = block_length(sha_type); + + REG_WRITE(SHA_MODE_REG, sha_type); + num_block = ilen / blk_len; + + if (buf_len != 0) { + esp_sha_block(sha_type, buf, is_first_block); + is_first_block = false; + } + + for (int i = 0; i < num_block; i++) { + esp_sha_block(sha_type, input + blk_len*i, is_first_block); + is_first_block = false; + } + + esp_sha_wait_idle(); +} + + static int esp_sha_dma_process(esp_sha_type sha_type, const void *input, uint32_t ilen, const void *buf, uint32_t buf_len, bool is_first_block); @@ -207,87 +263,65 @@ int esp_sha_dma(esp_sha_type sha_type, const void *input, uint32_t ilen, const void *buf, uint32_t buf_len, bool is_first_block) { int ret = 0; - const void *dma_input; - unsigned char *non_icache_input = NULL; - unsigned char *non_icache_buf = NULL; - int dma_op_num; - size_t dma_max_chunk_len = SHA_DMA_MAX_BYTES; + unsigned char *dma_cap_buf = NULL; + int dma_op_num = ( ilen / (SHA_DMA_MAX_BYTES + 1) ) + 1; - if (buf_len > 128) { + if (buf_len > block_length(sha_type)) { ESP_LOGE(TAG, "SHA DMA buf_len cannot exceed max size for a single block"); return -1; } -#if (CONFIG_SPIRAM_USE_CAPS_ALLOC || CONFIG_SPIRAM_USE_MALLOC) - if (esp_ptr_external_ram(input) || esp_ptr_external_ram(buf)) { - Cache_WriteBack_All(); + /* DMA cannot access memory in the iCache range, hash block by block instead of using DMA */ + if (!esp_ptr_dma_ext_capable(input) && !esp_ptr_dma_capable(input) && (ilen != 0)) { + esp_sha_block_mode(sha_type, input, ilen, buf, buf_len, is_first_block); + return 0; + } + +#if (CONFIG_ESP32S2_SPIRAM_SUPPORT) + if (esp_ptr_external_ram(input)) { + Cache_WriteBack_Addr((uint32_t)input, ilen); + } + if (esp_ptr_external_ram(buf)) { + Cache_WriteBack_Addr((uint32_t)buf, buf_len); } #endif + /* Copy to internal buf if buf is in non DMA capable memory */ if (!esp_ptr_dma_ext_capable(buf) && !esp_ptr_dma_capable(buf) && (buf_len != 0)) { - non_icache_buf = heap_caps_malloc(sizeof(unsigned char) * buf_len, MALLOC_CAP_DMA); - if (non_icache_buf == NULL) { + dma_cap_buf = heap_caps_malloc(sizeof(unsigned char) * buf_len, MALLOC_CAP_DMA); + if (dma_cap_buf == NULL) { ESP_LOGE(TAG, "Failed to allocate buf memory"); - ret = ESP_ERR_NO_MEM; + ret = -1; goto cleanup; } - memcpy(non_icache_buf, buf, buf_len); - buf = non_icache_buf; - } - - /* DMA cannot access memory in the iCache range, copy data to temporary buffers before transfer */ - if (!esp_ptr_dma_ext_capable(input) && !esp_ptr_dma_capable(input) && (ilen != 0)) { - non_icache_input = heap_caps_malloc(sizeof(unsigned char) * MIN(ilen, dma_max_chunk_len), MALLOC_CAP_DMA); - - if (non_icache_input == NULL) { - /* Allocate biggest available heap */ - size_t max_alloc_len = heap_caps_get_largest_free_block(MALLOC_CAP_DMA); - dma_max_chunk_len = max_alloc_len - max_alloc_len % block_length(sha_type); - non_icache_input = heap_caps_malloc(sizeof(unsigned char) * MIN(ilen, dma_max_chunk_len), MALLOC_CAP_DMA); - - if (non_icache_input == NULL) { - ESP_LOGE(TAG, "Failed to allocate input memory"); - ret = ESP_ERR_NO_MEM; - goto cleanup; - } - } + memcpy(dma_cap_buf, buf, buf_len); + buf = dma_cap_buf; } /* The max amount of blocks in a single hardware operation is 2^6 - 1 = 63 Thus we only do a single DMA input list + dma buf list, which is max 3968/64 + 64/64 = 63 blocks */ - dma_op_num = ( ilen / (dma_max_chunk_len + 1) ) + 1; for (int i = 0; i < dma_op_num; i++) { - int dma_chunk_len = MIN(ilen, dma_max_chunk_len); + int dma_chunk_len = MIN(ilen, SHA_DMA_MAX_BYTES); - /* Input depends on if it's a temp alloc buffer or supplied by user */ - if (non_icache_input != NULL) { - memcpy(non_icache_input, input, dma_chunk_len); - dma_input = non_icache_input; - } else { - dma_input = input; - } - - ret = esp_sha_dma_process(sha_type, dma_input, dma_chunk_len, buf, buf_len, is_first_block); - + ret = esp_sha_dma_process(sha_type, input, dma_chunk_len, buf, buf_len, is_first_block); if (ret != 0) { goto cleanup; } - is_first_block = false; ilen -= dma_chunk_len; input += dma_chunk_len; // Only append buf to the first operation buf_len = 0; + is_first_block = false; } cleanup: - free(non_icache_input); - free(non_icache_buf); + free(dma_cap_buf); return ret; } diff --git a/components/mbedtls/test/test_mbedtls_sha.c b/components/mbedtls/test/test_mbedtls_sha.c index 3989d000a..590bf2a16 100644 --- a/components/mbedtls/test/test_mbedtls_sha.c +++ b/components/mbedtls/test/test_mbedtls_sha.c @@ -305,6 +305,66 @@ TEST_CASE("mbedtls SHA session passed between tasks", "[mbedtls]") TEST_ASSERT_EQUAL_MEMORY_MESSAGE(sha256_thousand_as, param.result, 32, "SHA256 result from other task"); } + + + +/* Random input generated and hashed using python: + + import hashlib + import os, binascii + + input = bytearray(os.urandom(150)) + arr = '' + for idx, b in enumerate(input): + if idx % 8 == 0: + arr += '\n' + arr += "{}, ".format(hex(b)) + digest = hashlib.sha256(input).hexdigest() + +*/ +const uint8_t test_vector[] = { + 0xe4, 0x1a, 0x1a, 0x30, 0x71, 0xd3, 0x94, 0xb0, + 0xc3, 0x7e, 0x99, 0x9f, 0x1a, 0xde, 0x4a, 0x36, + 0xb1, 0x1, 0x81, 0x2b, 0x41, 0x91, 0x11, 0x7f, + 0xd8, 0xe1, 0xd5, 0xe5, 0x52, 0x6d, 0x92, 0xee, + 0x6c, 0xf7, 0x70, 0xea, 0x3a, 0xb, 0xc9, 0x97, + 0xc0, 0x12, 0x6f, 0x10, 0x5b, 0x90, 0xd8, 0x52, + 0x91, 0x69, 0xea, 0xc4, 0x1f, 0xc, 0xcf, 0xc6, + 0xf0, 0x43, 0xc6, 0xa3, 0x1f, 0x46, 0x3c, 0x3d, + 0x25, 0xe5, 0xa8, 0x27, 0x86, 0x85, 0x32, 0x3f, + 0x33, 0xd8, 0x40, 0xc4, 0x41, 0xf6, 0x4b, 0x12, + 0xd8, 0x5e, 0x4, 0x27, 0x42, 0x90, 0x73, 0x4, + 0x8, 0x42, 0xd1, 0x64, 0xd, 0x84, 0x3, 0x1, + 0x76, 0x88, 0xe4, 0x95, 0xdf, 0xe7, 0x62, 0xb4, + 0xb3, 0xb2, 0x7e, 0x6d, 0x78, 0xca, 0x79, 0x82, + 0xcc, 0xba, 0x22, 0xd2, 0x90, 0x2e, 0xe3, 0xa8, + 0x2a, 0x53, 0x3a, 0xb1, 0x9a, 0x7f, 0xb7, 0x8b, + 0xfa, 0x32, 0x47, 0xc1, 0x5c, 0x6, 0x4f, 0x7b, + 0xcd, 0xb3, 0xf4, 0xf1, 0xd0, 0xb5, 0xbf, 0xfb, + 0x7c, 0xc3, 0xa5, 0xb2, 0xc4, 0xd4, +}; + +const uint8_t test_vector_digest[] = { + 0xff, 0x1c, 0x60, 0xcb, 0x21, 0xf0, 0x63, 0x68, + 0xb9, 0xfc, 0xfe, 0xad, 0x3e, 0xb0, 0x2e, 0xd1, + 0xf9, 0x08, 0x82, 0x82, 0x83, 0x06, 0xc1, 0x8a, + 0x98, 0x5d, 0x36, 0xc0, 0xb7, 0xeb, 0x35, 0xe0, +}; + +TEST_CASE("mbedtls SHA, input in flash", "[mbedtls]") +{ + mbedtls_sha256_context sha256_ctx; + unsigned char sha256[32]; + + mbedtls_sha256_init(&sha256_ctx); + + TEST_ASSERT_EQUAL(0, mbedtls_sha256_starts_ret(&sha256_ctx, false)); + TEST_ASSERT_EQUAL(0, mbedtls_sha256_update_ret(&sha256_ctx, test_vector, sizeof(test_vector))); + TEST_ASSERT_EQUAL(0, mbedtls_sha256_finish_ret(&sha256_ctx, sha256)); + + TEST_ASSERT_EQUAL_MEMORY_MESSAGE(test_vector_digest, sha256, 32, "SHA256 calculation"); +} + /* ESP32 do not have SHA512/t functions */ #if !DISABLED_FOR_TARGETS(ESP32)