esp32: Chunk input blocks for esp_sha() function performance, add perf test

This commit is contained in:
Angus Gratton 2019-03-11 18:24:32 +11:00 committed by bot
parent 5c6be8380e
commit 892c2be168
3 changed files with 72 additions and 9 deletions

View file

@ -305,30 +305,39 @@ void esp_sha(esp_sha_type sha_type, const unsigned char *input, size_t ilen, uns
{
size_t block_len = block_length(sha_type);
// Max number of blocks to pass per each call to esp_sha_lock_memory_block()
// (keep low enough to avoid starving interrupt handlers, especially if reading
// data into SHA via flash cache, but high enough that spinlock overhead is low)
const size_t BLOCKS_PER_CHUNK = 100;
const size_t MAX_CHUNK_LEN = BLOCKS_PER_CHUNK * block_len;
esp_sha_lock_engine(sha_type);
SHA_CTX ctx;
ets_sha_init(&ctx);
while(ilen > 0) {
size_t chunk_len = (ilen > block_len) ? block_len : ilen;
while (ilen > 0) {
size_t chunk_len = (ilen > MAX_CHUNK_LEN) ? MAX_CHUNK_LEN : ilen;
// Wait for idle before entering critical section
// (to reduce time spent in it), then check after
// (to reduce time spent in it), then check again after
esp_sha_wait_idle();
esp_sha_lock_memory_block();
esp_sha_wait_idle();
DPORT_STALL_OTHER_CPU_START();
{
while (chunk_len > 0) {
// This SHA ROM function reads DPORT regs
ets_sha_update(&ctx, sha_type, input, chunk_len * 8);
// (can accept max one SHA block each call)
size_t update_len = (chunk_len > block_len) ? block_len : chunk_len;
ets_sha_update(&ctx, sha_type, input, update_len * 8);
input += update_len;
chunk_len -= update_len;
ilen -= update_len;
}
DPORT_STALL_OTHER_CPU_END();
input += chunk_len;
ilen -= chunk_len;
if (ilen == 0) {
/* Finish the last block before releasing the memory
block lock, as ets_sha_update() may have written data to

View file

@ -3,6 +3,10 @@
#include <string.h>
#include "esp_types.h"
#include "esp_clk.h"
#include "esp_log.h"
#include "esp_timer.h"
#include "esp_heap_caps.h"
#include "idf_performance.h"
#include "unity.h"
#include "test_utils.h"
@ -15,7 +19,54 @@
are tested as part of mbedTLS tests. Only esp_sha() is different.
*/
TEST_CASE("Test esp_sha() function", "[hw_crypto]")
#define TAG "sha_test"
TEST_CASE("Test esp_sha()", "[hw_crypto]")
{
const size_t BUFFER_SZ = 32 * 1024 + 6; // NB: not an exact multiple of SHA block size
int64_t begin, end;
uint32_t us_sha1, us_sha512;
uint8_t sha1_result[20] = { 0 };
uint8_t sha512_result[64] = { 0 };
void *buffer = heap_caps_malloc(BUFFER_SZ, MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL);
TEST_ASSERT_NOT_NULL(buffer);
memset(buffer, 0xEE, BUFFER_SZ);
const uint8_t sha1_expected[20] = { 0xc7, 0xbb, 0xd3, 0x74, 0xf2, 0xf6, 0x20, 0x86,
0x61, 0xf4, 0x50, 0xd5, 0xf5, 0x18, 0x44, 0xcc,
0x7a, 0xb7, 0xa5, 0x4a };
const uint8_t sha512_expected[64] = { 0xc7, 0x7f, 0xda, 0x8c, 0xb3, 0x58, 0x14, 0x8a,
0x52, 0x3b, 0x46, 0x04, 0xc0, 0x85, 0xc5, 0xf0,
0x46, 0x64, 0x14, 0xd5, 0x96, 0x7a, 0xa2, 0x80,
0x20, 0x9c, 0x04, 0x27, 0x7d, 0x3b, 0xf9, 0x1f,
0xb2, 0xa3, 0x45, 0x3c, 0xa1, 0x6a, 0x8d, 0xdd,
0x35, 0x5e, 0x35, 0x57, 0x76, 0x22, 0x74, 0xd8,
0x1e, 0x07, 0xc6, 0xa2, 0x9e, 0x3b, 0x65, 0x75,
0x80, 0x7d, 0xe6, 0x6e, 0x47, 0x61, 0x2c, 0x94 };
begin = esp_timer_get_time();
esp_sha(SHA1, buffer, BUFFER_SZ, sha1_result);
end = esp_timer_get_time();
TEST_ASSERT_EQUAL_HEX8_ARRAY(sha1_expected, sha1_result, sizeof(sha1_expected));
us_sha1 = end - begin;
ESP_LOGI(TAG, "esp_sha() 32KB SHA1 in %u us", us_sha1);
begin = esp_timer_get_time();
esp_sha(SHA2_512, buffer, BUFFER_SZ, sha512_result);
end = esp_timer_get_time();
TEST_ASSERT_EQUAL_HEX8_ARRAY(sha512_expected, sha512_result, sizeof(sha512_expected));
us_sha512 = end - begin;
ESP_LOGI(TAG, "esp_sha() 32KB SHA512 in %u us", us_sha512);
free(buffer);
TEST_PERFORMANCE_LESS_THAN(ESP32_TIME_SHA1_32KB, "%dus", us_sha1);
TEST_PERFORMANCE_LESS_THAN(ESP32_TIME_SHA512_32KB, "%dus", us_sha512);
}
TEST_CASE("Test esp_sha() function with long input", "[hw_crypto]")
{
const void* ptr;
spi_flash_mmap_handle_t handle;

View file

@ -30,3 +30,6 @@
// events dispatched per second by event loop library
#define IDF_PERFORMANCE_MIN_EVENT_DISPATCH 25000
#define IDF_PERFORMANCE_MIN_EVENT_DISPATCH_PSRAM 21000
// esp_sha() time to process 32KB of input data from RAM
#define IDF_PERFORMANCE_MAX_ESP32_TIME_SHA1_32KB 5000
#define IDF_PERFORMANCE_MAX_ESP32_TIME_SHA512_32KB 4500