From 6a6fbde83db96b6e6c3e430c584051f7e28f3142 Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Fri, 8 Mar 2019 16:14:15 +1100 Subject: [PATCH 1/3] esp32 hwcrypto: Prevent esp_sha() from disabling interrupts for extended period * Closes https://github.com/espressif/esp-idf/issues/3127 * Closes IDFGH-681 Also reported at https://esp32.com/viewtopic.php?f=13&t=9506 --- components/esp32/hwcrypto/sha.c | 32 +++++++++++++----- components/esp32/test/test_sha.c | 57 ++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 8 deletions(-) create mode 100644 components/esp32/test/test_sha.c diff --git a/components/esp32/hwcrypto/sha.c b/components/esp32/hwcrypto/sha.c index fe42997a1..ac0c8c5e0 100644 --- a/components/esp32/hwcrypto/sha.c +++ b/components/esp32/hwcrypto/sha.c @@ -309,26 +309,42 @@ void esp_sha(esp_sha_type sha_type, const unsigned char *input, size_t ilen, uns SHA_CTX ctx; ets_sha_init(&ctx); - esp_sha_lock_memory_block(); + while(ilen > 0) { size_t chunk_len = (ilen > block_len) ? block_len : ilen; + + // Wait for idle before entering critical section + // (to reduce time spent in it), then check after esp_sha_wait_idle(); + esp_sha_lock_memory_block(); + esp_sha_wait_idle(); + DPORT_STALL_OTHER_CPU_START(); { // This SHA ROM function reads DPORT regs ets_sha_update(&ctx, sha_type, input, chunk_len * 8); } DPORT_STALL_OTHER_CPU_END(); + input += chunk_len; ilen -= chunk_len; + + if (ilen == 0) { + /* Finish the last block before releasing the memory + block lock, as ets_sha_update() may have written data to + the memory block already (partial last block) and hardware + hasn't yet processed it. + */ + DPORT_STALL_OTHER_CPU_START(); + { + // This SHA ROM function also reads DPORT regs + ets_sha_finish(&ctx, sha_type, output); + } + DPORT_STALL_OTHER_CPU_END(); + } + + esp_sha_unlock_memory_block(); } - esp_sha_wait_idle(); - DPORT_STALL_OTHER_CPU_START(); - { - ets_sha_finish(&ctx, sha_type, output); - } - DPORT_STALL_OTHER_CPU_END(); - esp_sha_unlock_memory_block(); esp_sha_unlock_engine(sha_type); } diff --git a/components/esp32/test/test_sha.c b/components/esp32/test/test_sha.c new file mode 100644 index 000000000..5f85c74fd --- /dev/null +++ b/components/esp32/test/test_sha.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include "esp_types.h" +#include "esp_clk.h" + +#include "unity.h" +#include "test_utils.h" +#include "mbedtls/sha1.h" +#include "mbedtls/sha256.h" +#include "mbedtls/sha512.h" +#include "hwcrypto/sha.h" + +/* Note: Most of the SHA functions are called as part of mbedTLS, so +are tested as part of mbedTLS tests. Only esp_sha() is different. +*/ + +TEST_CASE("Test esp_sha() function", "[hw_crypto]") +{ + const void* ptr; + spi_flash_mmap_handle_t handle; + uint8_t sha1_espsha[20] = { 0 }; + uint8_t sha1_mbedtls[20] = { 0 }; + uint8_t sha256_espsha[32] = { 0 }; + uint8_t sha256_mbedtls[32] = { 0 }; + uint8_t sha512_espsha[64] = { 0 }; + uint8_t sha512_mbedtls[64] = { 0 }; + + const size_t LEN = 1024 * 1024; + + /* mmap() 1MB of flash, we don't care what it is really */ + esp_err_t err = spi_flash_mmap(0x0, LEN, SPI_FLASH_MMAP_DATA, &ptr, &handle); + + TEST_ASSERT_EQUAL_HEX32(ESP_OK, err); + TEST_ASSERT_NOT_NULL(ptr); + + /* Compare esp_sha() result to the mbedTLS result, should always be the same */ + + esp_sha(SHA1, ptr, LEN, sha1_espsha); + int r = mbedtls_sha1_ret(ptr, LEN, sha1_mbedtls); + TEST_ASSERT_EQUAL(0, r); + + esp_sha(SHA2_256, ptr, LEN, sha256_espsha); + r = mbedtls_sha256_ret(ptr, LEN, sha256_mbedtls, 0); + TEST_ASSERT_EQUAL(0, r); + + esp_sha(SHA2_512, ptr, LEN, sha512_espsha); + r = mbedtls_sha512_ret(ptr, LEN, sha512_mbedtls, 0); + TEST_ASSERT_EQUAL(0, r); + + TEST_ASSERT_EQUAL_MEMORY_MESSAGE(sha1_espsha, sha1_mbedtls, sizeof(sha1_espsha), "SHA1 results should match"); + + TEST_ASSERT_EQUAL_MEMORY_MESSAGE(sha256_espsha, sha256_mbedtls, sizeof(sha256_espsha), "SHA256 results should match"); + + TEST_ASSERT_EQUAL_MEMORY_MESSAGE(sha512_espsha, sha512_mbedtls, sizeof(sha512_espsha), "SHA512 results should match"); +} + From 1cc726b2a3e824713f51111fc0e5fa2c8b79dc91 Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Fri, 8 Mar 2019 16:16:55 +1100 Subject: [PATCH 2/3] secure boot: Use mbedtls_sha256() not esp_sha() Latter is probably compiled into most firmwares already, saves some size. Ref https://github.com/espressif/esp-idf/issues/3127 --- components/bootloader_support/src/secure_boot_signatures.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/components/bootloader_support/src/secure_boot_signatures.c b/components/bootloader_support/src/secure_boot_signatures.c index ddb7ad73a..b6681bc79 100644 --- a/components/bootloader_support/src/secure_boot_signatures.c +++ b/components/bootloader_support/src/secure_boot_signatures.c @@ -25,7 +25,7 @@ #include "rom/sha.h" typedef SHA_CTX sha_context; #else -#include "hwcrypto/sha.h" +#include "mbedtls/sha256.h" #endif static const char* TAG = "secure_boot"; @@ -57,8 +57,8 @@ esp_err_t esp_secure_boot_verify_signature(uint32_t src_addr, uint32_t length) bootloader_sha256_data(handle, data, length); bootloader_sha256_finish(handle, digest); #else - /* Use thread-safe esp-idf SHA function */ - esp_sha(SHA2_256, data, length, digest); + /* Use thread-safe mbedTLS version */ + mbedtls_sha256_ret(data, length, digest, 0); #endif // Map the signature block and verify the signature From 12bdf8e45b566c013d5aa48e1cd3de34c9d25eab Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Mon, 11 Mar 2019 18:24:32 +1100 Subject: [PATCH 3/3] esp32: Chunk input blocks for esp_sha() function performance, add perf test --- components/esp32/hwcrypto/sha.c | 25 ++++++--- components/esp32/test/test_sha.c | 53 ++++++++++++++++++- components/idf_test/include/idf_performance.h | 3 ++ 3 files changed, 72 insertions(+), 9 deletions(-) diff --git a/components/esp32/hwcrypto/sha.c b/components/esp32/hwcrypto/sha.c index ac0c8c5e0..422065f9c 100644 --- a/components/esp32/hwcrypto/sha.c +++ b/components/esp32/hwcrypto/sha.c @@ -305,30 +305,39 @@ void esp_sha(esp_sha_type sha_type, const unsigned char *input, size_t ilen, uns { size_t block_len = block_length(sha_type); + // Max number of blocks to pass per each call to esp_sha_lock_memory_block() + // (keep low enough to avoid starving interrupt handlers, especially if reading + // data into SHA via flash cache, but high enough that spinlock overhead is low) + const size_t BLOCKS_PER_CHUNK = 100; + const size_t MAX_CHUNK_LEN = BLOCKS_PER_CHUNK * block_len; + esp_sha_lock_engine(sha_type); SHA_CTX ctx; ets_sha_init(&ctx); - while(ilen > 0) { - size_t chunk_len = (ilen > block_len) ? block_len : ilen; + while (ilen > 0) { + size_t chunk_len = (ilen > MAX_CHUNK_LEN) ? MAX_CHUNK_LEN : ilen; // Wait for idle before entering critical section - // (to reduce time spent in it), then check after + // (to reduce time spent in it), then check again after esp_sha_wait_idle(); esp_sha_lock_memory_block(); esp_sha_wait_idle(); DPORT_STALL_OTHER_CPU_START(); - { + while (chunk_len > 0) { // This SHA ROM function reads DPORT regs - ets_sha_update(&ctx, sha_type, input, chunk_len * 8); + // (can accept max one SHA block each call) + size_t update_len = (chunk_len > block_len) ? block_len : chunk_len; + ets_sha_update(&ctx, sha_type, input, update_len * 8); + + input += update_len; + chunk_len -= update_len; + ilen -= update_len; } DPORT_STALL_OTHER_CPU_END(); - input += chunk_len; - ilen -= chunk_len; - if (ilen == 0) { /* Finish the last block before releasing the memory block lock, as ets_sha_update() may have written data to diff --git a/components/esp32/test/test_sha.c b/components/esp32/test/test_sha.c index 5f85c74fd..d64b86760 100644 --- a/components/esp32/test/test_sha.c +++ b/components/esp32/test/test_sha.c @@ -3,6 +3,10 @@ #include #include "esp_types.h" #include "esp_clk.h" +#include "esp_log.h" +#include "esp_timer.h" +#include "esp_heap_caps.h" +#include "idf_performance.h" #include "unity.h" #include "test_utils.h" @@ -15,7 +19,54 @@ are tested as part of mbedTLS tests. Only esp_sha() is different. */ -TEST_CASE("Test esp_sha() function", "[hw_crypto]") +#define TAG "sha_test" + +TEST_CASE("Test esp_sha()", "[hw_crypto]") +{ + const size_t BUFFER_SZ = 32 * 1024 + 6; // NB: not an exact multiple of SHA block size + + int64_t begin, end; + uint32_t us_sha1, us_sha512; + uint8_t sha1_result[20] = { 0 }; + uint8_t sha512_result[64] = { 0 }; + void *buffer = heap_caps_malloc(BUFFER_SZ, MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL); + TEST_ASSERT_NOT_NULL(buffer); + memset(buffer, 0xEE, BUFFER_SZ); + + const uint8_t sha1_expected[20] = { 0xc7, 0xbb, 0xd3, 0x74, 0xf2, 0xf6, 0x20, 0x86, + 0x61, 0xf4, 0x50, 0xd5, 0xf5, 0x18, 0x44, 0xcc, + 0x7a, 0xb7, 0xa5, 0x4a }; + const uint8_t sha512_expected[64] = { 0xc7, 0x7f, 0xda, 0x8c, 0xb3, 0x58, 0x14, 0x8a, + 0x52, 0x3b, 0x46, 0x04, 0xc0, 0x85, 0xc5, 0xf0, + 0x46, 0x64, 0x14, 0xd5, 0x96, 0x7a, 0xa2, 0x80, + 0x20, 0x9c, 0x04, 0x27, 0x7d, 0x3b, 0xf9, 0x1f, + 0xb2, 0xa3, 0x45, 0x3c, 0xa1, 0x6a, 0x8d, 0xdd, + 0x35, 0x5e, 0x35, 0x57, 0x76, 0x22, 0x74, 0xd8, + 0x1e, 0x07, 0xc6, 0xa2, 0x9e, 0x3b, 0x65, 0x75, + 0x80, 0x7d, 0xe6, 0x6e, 0x47, 0x61, 0x2c, 0x94 }; + + begin = esp_timer_get_time(); + esp_sha(SHA1, buffer, BUFFER_SZ, sha1_result); + end = esp_timer_get_time(); + TEST_ASSERT_EQUAL_HEX8_ARRAY(sha1_expected, sha1_result, sizeof(sha1_expected)); + us_sha1 = end - begin; + ESP_LOGI(TAG, "esp_sha() 32KB SHA1 in %u us", us_sha1); + + begin = esp_timer_get_time(); + esp_sha(SHA2_512, buffer, BUFFER_SZ, sha512_result); + end = esp_timer_get_time(); + TEST_ASSERT_EQUAL_HEX8_ARRAY(sha512_expected, sha512_result, sizeof(sha512_expected)); + + us_sha512 = end - begin; + ESP_LOGI(TAG, "esp_sha() 32KB SHA512 in %u us", us_sha512); + + free(buffer); + + TEST_PERFORMANCE_LESS_THAN(ESP32_TIME_SHA1_32KB, "%dus", us_sha1); + TEST_PERFORMANCE_LESS_THAN(ESP32_TIME_SHA512_32KB, "%dus", us_sha512); +} + +TEST_CASE("Test esp_sha() function with long input", "[hw_crypto]") { const void* ptr; spi_flash_mmap_handle_t handle; diff --git a/components/idf_test/include/idf_performance.h b/components/idf_test/include/idf_performance.h index 0ba430e76..ad60009f0 100644 --- a/components/idf_test/include/idf_performance.h +++ b/components/idf_test/include/idf_performance.h @@ -22,3 +22,6 @@ // events dispatched per second by event loop library #define IDF_PERFORMANCE_MIN_EVENT_DISPATCH 25000 #define IDF_PERFORMANCE_MIN_EVENT_DISPATCH_PSRAM 21000 +// esp_sha() time to process 32KB of input data from RAM +#define IDF_PERFORMANCE_MAX_ESP32_TIME_SHA1_32KB 5000 +#define IDF_PERFORMANCE_MAX_ESP32_TIME_SHA512_32KB 4500