diff --git a/components/esp32/dport_access.c b/components/esp32/dport_access.c index 2281eea34..4e5d5b177 100644 --- a/components/esp32/dport_access.c +++ b/components/esp32/dport_access.c @@ -217,3 +217,22 @@ void IRAM_ATTR esp_dport_access_int_resume(void) #endif } +/** + * @brief Read a sequence of DPORT registers to the buffer, SMP-safe version. + * + * This implementation uses a method of the pre-reading of the APB register + * before reading the register of the DPORT, without stall other CPU. + * There is disable/enable interrupt. + * + * @param[out] buff_out Contains the read data. + * @param[in] address Initial address for reading registers. + * @param[in] num_words The number of words. + */ +void IRAM_ATTR esp_dport_access_read_buffer(uint32_t *buff_out, uint32_t address, uint32_t num_words) +{ + DPORT_INTERRUPT_DISABLE(); + for (uint32_t i = 0; i < num_words; ++i) { + buff_out[i] = DPORT_SEQUENCE_REG_READ(address + i * 4); + } + DPORT_INTERRUPT_RESTORE(); +} diff --git a/components/esp32/hwcrypto/aes.c b/components/esp32/hwcrypto/aes.c index 1e9bdc852..bddaf1fcd 100644 --- a/components/esp32/hwcrypto/aes.c +++ b/components/esp32/hwcrypto/aes.c @@ -53,31 +53,23 @@ void esp_aes_acquire_hardware( void ) /* newlib locks lazy initialize on ESP-IDF */ portENTER_CRITICAL(&aes_spinlock); - DPORT_STALL_OTHER_CPU_START(); - { - /* Enable AES hardware */ - _DPORT_REG_SET_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_AES); - /* Clear reset on digital signature & secure boot units, - otherwise AES unit is held in reset also. */ - _DPORT_REG_CLR_BIT(DPORT_PERI_RST_EN_REG, - DPORT_PERI_EN_AES - | DPORT_PERI_EN_DIGITAL_SIGNATURE - | DPORT_PERI_EN_SECUREBOOT); - } - DPORT_STALL_OTHER_CPU_END(); + /* Enable AES hardware */ + DPORT_REG_SET_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_AES); + /* Clear reset on digital signature & secure boot units, + otherwise AES unit is held in reset also. */ + DPORT_REG_CLR_BIT(DPORT_PERI_RST_EN_REG, + DPORT_PERI_EN_AES + | DPORT_PERI_EN_DIGITAL_SIGNATURE + | DPORT_PERI_EN_SECUREBOOT); } void esp_aes_release_hardware( void ) { - DPORT_STALL_OTHER_CPU_START(); - { - /* Disable AES hardware */ - _DPORT_REG_SET_BIT(DPORT_PERI_RST_EN_REG, DPORT_PERI_EN_AES); - /* Don't return other units to reset, as this pulls - reset on RSA & SHA units, respectively. */ - _DPORT_REG_CLR_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_AES); - } - DPORT_STALL_OTHER_CPU_END(); + /* Disable AES hardware */ + DPORT_REG_SET_BIT(DPORT_PERI_RST_EN_REG, DPORT_PERI_EN_AES); + /* Don't return other units to reset, as this pulls + reset on RSA & SHA units, respectively. */ + DPORT_REG_CLR_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_AES); portEXIT_CRITICAL(&aes_spinlock); } @@ -141,15 +133,8 @@ static inline void esp_aes_block(const void *input, void *output) } DPORT_REG_WRITE(AES_START_REG, 1); - - DPORT_STALL_OTHER_CPU_START(); - { - while (_DPORT_REG_READ(AES_IDLE_REG) != 1) { } - for (int i = 0; i < 4; i++) { - output_words[i] = mem_block[i]; - } - } - DPORT_STALL_OTHER_CPU_END(); + while (DPORT_REG_READ(AES_IDLE_REG) != 1) { } + esp_dport_access_read_buffer(output_words, (uint32_t)&mem_block[0], 4); } /* diff --git a/components/esp32/hwcrypto/sha.c b/components/esp32/hwcrypto/sha.c index 305b476a8..df0086be5 100644 --- a/components/esp32/hwcrypto/sha.c +++ b/components/esp32/hwcrypto/sha.c @@ -159,16 +159,14 @@ static void esp_sha_lock_engine_inner(sha_engine_state *engine) _lock_acquire(&state_change_lock); if (sha_engines_all_idle()) { + /* Enable SHA hardware */ + DPORT_REG_SET_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_SHA); + /* also clear reset on secure boot, otherwise SHA is held in reset */ + DPORT_REG_CLR_BIT(DPORT_PERI_RST_EN_REG, + DPORT_PERI_EN_SHA + | DPORT_PERI_EN_SECUREBOOT); DPORT_STALL_OTHER_CPU_START(); - { - /* Enable SHA hardware */ - _DPORT_REG_SET_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_SHA); - /* also clear reset on secure boot, otherwise SHA is held in reset */ - _DPORT_REG_CLR_BIT(DPORT_PERI_RST_EN_REG, - DPORT_PERI_EN_SHA - | DPORT_PERI_EN_SECUREBOOT); - ets_sha_enable(); - } + ets_sha_enable(); DPORT_STALL_OTHER_CPU_END(); } @@ -191,12 +189,8 @@ void esp_sha_unlock_engine(esp_sha_type sha_type) if (sha_engines_all_idle()) { /* Disable SHA hardware */ /* Don't assert reset on secure boot, otherwise AES is held in reset */ - DPORT_STALL_OTHER_CPU_START(); - { - _DPORT_REG_SET_BIT(DPORT_PERI_RST_EN_REG, DPORT_PERI_EN_SHA); - _DPORT_REG_CLR_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_SHA); - } - DPORT_STALL_OTHER_CPU_END(); + DPORT_REG_SET_BIT(DPORT_PERI_RST_EN_REG, DPORT_PERI_EN_SHA); + DPORT_REG_CLR_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_SHA); } _lock_release(&state_change_lock); @@ -206,16 +200,14 @@ void esp_sha_unlock_engine(esp_sha_type sha_type) void esp_sha_wait_idle(void) { - DPORT_STALL_OTHER_CPU_START(); while(1) { - if(_DPORT_REG_READ(SHA_1_BUSY_REG) == 0 - && _DPORT_REG_READ(SHA_256_BUSY_REG) == 0 - && _DPORT_REG_READ(SHA_384_BUSY_REG) == 0 - && _DPORT_REG_READ(SHA_512_BUSY_REG) == 0) { + if(DPORT_REG_READ(SHA_1_BUSY_REG) == 0 + && DPORT_REG_READ(SHA_256_BUSY_REG) == 0 + && DPORT_REG_READ(SHA_384_BUSY_REG) == 0 + && DPORT_REG_READ(SHA_512_BUSY_REG) == 0) { break; } } - DPORT_STALL_OTHER_CPU_END(); } void esp_sha_read_digest_state(esp_sha_type sha_type, void *digest_state) @@ -225,27 +217,23 @@ void esp_sha_read_digest_state(esp_sha_type sha_type, void *digest_state) esp_sha_lock_memory_block(); - DPORT_STALL_OTHER_CPU_START(); // This block reads from DPORT memory (reg_addr_buf) - { - esp_sha_wait_idle(); + esp_sha_wait_idle(); - _DPORT_REG_WRITE(SHA_LOAD_REG(sha_type), 1); - while(_DPORT_REG_READ(SHA_BUSY_REG(sha_type)) == 1) { } - - uint32_t *digest_state_words = (uint32_t *)digest_state; - uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); - if(sha_type == SHA2_384 || sha_type == SHA2_512) { - /* for these ciphers using 64-bit states, swap each pair of words */ - for(int i = 0; i < sha_length(sha_type)/4; i += 2) { - digest_state_words[i+1] = reg_addr_buf[i]; - digest_state_words[i]= reg_addr_buf[i+1]; - } - } else { - memcpy(digest_state_words, reg_addr_buf, sha_length(sha_type)); + DPORT_REG_WRITE(SHA_LOAD_REG(sha_type), 1); + while(DPORT_REG_READ(SHA_BUSY_REG(sha_type)) == 1) { } + uint32_t *digest_state_words = (uint32_t *)digest_state; + uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); + if(sha_type == SHA2_384 || sha_type == SHA2_512) { + /* for these ciphers using 64-bit states, swap each pair of words */ + DPORT_INTERRUPT_DISABLE(); // Disable interrupt only on current CPU. + for(int i = 0; i < sha_length(sha_type)/4; i += 2) { + digest_state_words[i+1] = DPORT_SEQUENCE_REG_READ((uint32_t)®_addr_buf[i]); + digest_state_words[i] = DPORT_SEQUENCE_REG_READ((uint32_t)®_addr_buf[i+1]); } + DPORT_INTERRUPT_RESTORE(); // restore the previous interrupt level + } else { + esp_dport_access_read_buffer(digest_state_words, (uint32_t)®_addr_buf[0], sha_length(sha_type)/4); } - DPORT_STALL_OTHER_CPU_END(); - esp_sha_unlock_memory_block(); } diff --git a/components/esp32/include/esp_dport_access.h b/components/esp32/include/esp_dport_access.h index 3acf80688..1a46fa00c 100644 --- a/components/esp32/include/esp_dport_access.h +++ b/components/esp32/include/esp_dport_access.h @@ -26,7 +26,7 @@ void esp_dport_access_stall_other_cpu_end(void); void esp_dport_access_int_init(void); void esp_dport_access_int_pause(void); void esp_dport_access_int_resume(void); - +void esp_dport_access_read_buffer(uint32_t *buff_out, uint32_t address, uint32_t num_words); //This routine does not stop the dport routines in any way that is recoverable. Please //only call in case of panic(). void esp_dport_access_int_abort(void); @@ -34,9 +34,14 @@ void esp_dport_access_int_abort(void); #if defined(BOOTLOADER_BUILD) || defined(CONFIG_FREERTOS_UNICORE) || !defined(ESP_PLATFORM) #define DPORT_STALL_OTHER_CPU_START() #define DPORT_STALL_OTHER_CPU_END() +#define DPORT_STALL_OTHER_CPU_START() +#define DPORT_INTERRUPT_DISABLE() +#define DPORT_INTERRUPT_RESTORE() #else #define DPORT_STALL_OTHER_CPU_START() esp_dport_access_stall_other_cpu_start() #define DPORT_STALL_OTHER_CPU_END() esp_dport_access_stall_other_cpu_end() +#define DPORT_INTERRUPT_DISABLE() unsigned int intLvl = XTOS_SET_INTLEVEL(XCHAL_EXCM_LEVEL) +#define DPORT_INTERRUPT_RESTORE() XTOS_RESTORE_JUST_INTLEVEL(intLvl) #endif #ifdef __cplusplus diff --git a/components/esp32/test/test_dport.c b/components/esp32/test/test_dport.c index 930d8cdce..6b5960bf4 100644 --- a/components/esp32/test/test_dport.c +++ b/components/esp32/test/test_dport.c @@ -6,12 +6,13 @@ #include "freertos/FreeRTOS.h" #include "freertos/task.h" #include "freertos/semphr.h" - +#include "soc/cpu.h" #include "unity.h" - +#include "rom/uart.h" #include "soc/uart_reg.h" #include "soc/dport_reg.h" - +#include "soc/rtc.h" +#define MHZ (1000000) static volatile bool exit_flag; static bool dport_test_result; static bool apb_test_result; @@ -54,40 +55,267 @@ static void accessAPB(void *pvParameters) vTaskDelete(NULL); } -TEST_CASE("access DPORT and APB at same time", "[esp32]") +void run_tasks(const char *task1_description, void (* task1_func)(void *), const char *task2_description, void (* task2_func)(void *), uint32_t delay_ms) { int i; TaskHandle_t th[2]; xSemaphoreHandle exit_sema[2]; for (i=0; i<2; i++) { - exit_sema[i] = xSemaphoreCreateMutex(); - xSemaphoreTake(exit_sema[i], portMAX_DELAY); + if((task1_func != NULL && i == 0) || (task2_func != NULL && i == 1)){ + exit_sema[i] = xSemaphoreCreateMutex(); + xSemaphoreTake(exit_sema[i], portMAX_DELAY); + } } exit_flag = false; #ifndef CONFIG_FREERTOS_UNICORE printf("assign task accessing DPORT to core 0 and task accessing APB to core 1\n"); - xTaskCreatePinnedToCore(accessDPORT , "accessDPORT" , 2048, &exit_sema[0], UNITY_FREERTOS_PRIORITY - 1, &th[0], 0); - xTaskCreatePinnedToCore(accessAPB , "accessAPB" , 2048, &exit_sema[1], UNITY_FREERTOS_PRIORITY - 1, &th[1], 1); + if(task1_func != NULL) xTaskCreatePinnedToCore(task1_func, task1_description, 2048, &exit_sema[0], UNITY_FREERTOS_PRIORITY - 1, &th[0], 0); + if(task2_func != NULL) xTaskCreatePinnedToCore(task2_func, task2_description, 2048, &exit_sema[1], UNITY_FREERTOS_PRIORITY - 1, &th[1], 1); #else printf("assign task accessing DPORT and accessing APB\n"); - xTaskCreate(accessDPORT , "accessDPORT" , 2048, &exit_sema[0], UNITY_FREERTOS_PRIORITY - 1, &th[0]); - xTaskCreate(accessAPB , "accessAPB" , 2048, &exit_sema[1], UNITY_FREERTOS_PRIORITY - 1, &th[1]); + if(task1_func != NULL) xTaskCreate(task1_func, task1_description, 2048, &exit_sema[0], UNITY_FREERTOS_PRIORITY - 1, &th[0]); + if(task2_func != NULL) xTaskCreate(task2_func, task2_description, 2048, &exit_sema[1], UNITY_FREERTOS_PRIORITY - 1, &th[1]); #endif - printf("start wait for 10 seconds\n"); - vTaskDelay(10000 / portTICK_PERIOD_MS); + printf("start wait for %d seconds [Test %s and %s]\n", delay_ms/1000, task1_description, task2_description); + vTaskDelay(delay_ms / portTICK_PERIOD_MS); // set exit flag to let thread exit exit_flag = true; for (i=0; i<2; i++) { - xSemaphoreTake(exit_sema[i], portMAX_DELAY); - vSemaphoreDelete(exit_sema[i]); + if ((task1_func != NULL && i == 0) || (task2_func != NULL && i == 1)) { + xSemaphoreTake(exit_sema[i], portMAX_DELAY); + vSemaphoreDelete(exit_sema[i]); + } } - TEST_ASSERT(dport_test_result == true && apb_test_result == true); } +TEST_CASE("access DPORT and APB at same time", "[esp32]") +{ + dport_test_result = false; + apb_test_result = false; + printf("CPU_FREQ = %d MHz\n", rtc_clk_cpu_freq_value(rtc_clk_cpu_freq_get()) / MHZ); + run_tasks("accessDPORT", accessDPORT, "accessAPB", accessAPB, 10000); +} + +void run_tasks_with_change_freq_cpu (rtc_cpu_freq_t cpu_freq) +{ + dport_test_result = false; + apb_test_result = false; + rtc_cpu_freq_t cur_freq = rtc_clk_cpu_freq_get(); + uint32_t freq_before_changed = rtc_clk_cpu_freq_value(cur_freq) / MHZ; + uint32_t freq_changed = freq_before_changed; + printf("CPU_FREQ = %d MHz\n", freq_before_changed); + + if (cur_freq != cpu_freq) { + uart_tx_wait_idle(CONFIG_CONSOLE_UART_NUM); + + rtc_clk_cpu_freq_set(cpu_freq); + + const int uart_num = CONFIG_CONSOLE_UART_NUM; + const int uart_baud = CONFIG_CONSOLE_UART_BAUDRATE; + uart_div_modify(uart_num, (rtc_clk_apb_freq_get() << 4) / uart_baud); + + freq_changed = rtc_clk_cpu_freq_value(rtc_clk_cpu_freq_get()) / MHZ; + printf("CPU_FREQ switching to %d MHz\n", freq_changed); + } + run_tasks("accessDPORT", accessDPORT, "accessAPB", accessAPB, 10000 / ((freq_before_changed <= freq_changed) ? 1 : (freq_before_changed / freq_changed))); + + // return old freq. + uart_tx_wait_idle(CONFIG_CONSOLE_UART_NUM); + rtc_clk_cpu_freq_set(cur_freq); + const int uart_num = CONFIG_CONSOLE_UART_NUM; + const int uart_baud = CONFIG_CONSOLE_UART_BAUDRATE; + uart_div_modify(uart_num, (rtc_clk_apb_freq_get() << 4) / uart_baud); +} + +TEST_CASE("access DPORT and APB at same time (Freq CPU and APB = 80 MHz)", "[esp32] [ignore]") +{ + run_tasks_with_change_freq_cpu(RTC_CPU_FREQ_80M); +} + +TEST_CASE("access DPORT and APB at same time (Freq CPU and APB = 40 MHz (XTAL))", "[esp32]") +{ + run_tasks_with_change_freq_cpu(RTC_CPU_FREQ_XTAL); +} + +static uint32_t stall_other_cpu_counter; +static uint32_t pre_reading_apb_counter; +static uint32_t apb_counter; + +static void accessDPORT_stall_other_cpu(void *pvParameters) +{ + xSemaphoreHandle *sema = (xSemaphoreHandle *) pvParameters; + uint32_t dport_date = DPORT_REG_READ(DPORT_DATE_REG); + uint32_t dport_date_cur; + dport_test_result = true; + stall_other_cpu_counter = 0; + // although exit flag is set in another task, checking (exit_flag == false) is safe + while (exit_flag == false) { + ++stall_other_cpu_counter; + DPORT_STALL_OTHER_CPU_START(); + dport_date_cur = _DPORT_REG_READ(DPORT_DATE_REG); + DPORT_STALL_OTHER_CPU_END(); + if (dport_date != dport_date_cur) { + apb_test_result = false; + break; + } + } + + xSemaphoreGive(*sema); + vTaskDelete(NULL); +} + +static void accessAPB_measure_performance(void *pvParameters) +{ + xSemaphoreHandle *sema = (xSemaphoreHandle *) pvParameters; + uint32_t uart_date = REG_READ(UART_DATE_REG(0)); + + apb_test_result = true; + apb_counter = 0; + // although exit flag is set in another task, checking (exit_flag == false) is safe + while (exit_flag == false) { + ++apb_counter; + if (uart_date != REG_READ(UART_DATE_REG(0))) { + apb_test_result = false; + break; + } + } + + xSemaphoreGive(*sema); + vTaskDelete(NULL); +} + +static void accessDPORT_pre_reading_apb(void *pvParameters) +{ + xSemaphoreHandle *sema = (xSemaphoreHandle *) pvParameters; + uint32_t dport_date = DPORT_REG_READ(DPORT_DATE_REG); + uint32_t dport_date_cur; + dport_test_result = true; + pre_reading_apb_counter = 0; + // although exit flag is set in another task, checking (exit_flag == false) is safe + while (exit_flag == false) { + ++pre_reading_apb_counter; + dport_date_cur = DPORT_REG_READ(DPORT_DATE_REG); + if (dport_date != dport_date_cur) { + apb_test_result = false; + break; + } + } + + xSemaphoreGive(*sema); + vTaskDelete(NULL); +} + +TEST_CASE("test for DPORT access performance", "[esp32]") +{ + dport_test_result = true; + apb_test_result = true; + typedef struct { + uint32_t dport; + uint32_t apb; + uint32_t summ; + } test_performance_t; + test_performance_t t[5] = {0}; + uint32_t delay_ms = 5000; + + run_tasks("-", NULL, "accessAPB", accessAPB_measure_performance, delay_ms); + t[0].apb = apb_counter; + t[0].dport = 0; + t[0].summ = t[0].apb + t[0].dport; + + run_tasks("accessDPORT_stall_other_cpu", accessDPORT_stall_other_cpu, "-", NULL, delay_ms); + t[1].apb = 0; + t[1].dport = stall_other_cpu_counter; + t[1].summ = t[1].apb + t[1].dport; + + run_tasks("accessDPORT_pre_reading_apb", accessDPORT_pre_reading_apb, "-", NULL, delay_ms); + t[2].apb = 0; + t[2].dport = pre_reading_apb_counter; + t[2].summ = t[2].apb + t[2].dport; + + run_tasks("accessDPORT_stall_other_cpu", accessDPORT_stall_other_cpu, "accessAPB", accessAPB_measure_performance, delay_ms); + t[3].apb = apb_counter; + t[3].dport = stall_other_cpu_counter; + t[3].summ = t[3].apb + t[3].dport; + + run_tasks("accessDPORT_pre_reading_apb", accessDPORT_pre_reading_apb, "accessAPB", accessAPB_measure_performance, delay_ms); + t[4].apb = apb_counter; + t[4].dport = pre_reading_apb_counter; + t[4].summ = t[4].apb + t[4].dport; + + printf("\nPerformance table: \n" + "The number of simultaneous read operations of the APB and DPORT registers\n" + "by different methods for %d seconds.\n", delay_ms/1000); + printf("+-----------------------+----------+----------+----------+\n"); + printf("| Method read DPORT | DPORT | APB | SUMM |\n"); + printf("+-----------------------+----------+----------+----------+\n"); + printf("|1.Only accessAPB |%10d|%10d|%10d|\n", t[0].dport, t[0].apb, t[0].summ); + printf("|2.Only STALL_OTHER_CPU |%10d|%10d|%10d|\n", t[1].dport, t[1].apb, t[1].summ); + printf("|3.Only PRE_READ_APB_REG|%10d|%10d|%10d|\n", t[2].dport, t[2].apb, t[2].summ); + printf("+-----------------------+----------+----------+----------+\n"); + printf("|4.STALL_OTHER_CPU |%10d|%10d|%10d|\n", t[3].dport, t[3].apb, t[3].summ); + printf("|5.PRE_READ_APB_REG |%10d|%10d|%10d|\n", t[4].dport, t[4].apb, t[4].summ); + printf("+-----------------------+----------+----------+----------+\n"); + printf("| ratio=PRE_READ/STALL |%10f|%10f|%10f|\n", (float)t[4].dport/t[3].dport, (float)t[4].apb/t[3].apb, (float)t[4].summ/t[3].summ); + printf("+-----------------------+----------+----------+----------+\n"); +} + +#define REPEAT_OPS 10000 + +static uint32_t start, end; + +#define BENCHMARK_START() do { \ + RSR(CCOUNT, start); \ + } while(0) + +#define BENCHMARK_END(OPERATION) do { \ + RSR(CCOUNT, end); \ + printf("%s took %d cycles/op (%d cycles for %d ops)\n", \ + OPERATION, (end - start)/REPEAT_OPS, \ + (end - start), REPEAT_OPS); \ + } while(0) + +TEST_CASE("BENCHMARK for DPORT access performance", "[freertos]") +{ + BENCHMARK_START(); + for (int i = 0; i < REPEAT_OPS; i++) { + DPORT_STALL_OTHER_CPU_START(); + _DPORT_REG_READ(DPORT_DATE_REG); + DPORT_STALL_OTHER_CPU_END(); + } + BENCHMARK_END("[old]DPORT access STALL OTHER CPU"); + + + BENCHMARK_START(); + for (int i = 0; i < REPEAT_OPS; i++) { + DPORT_REG_READ(DPORT_DATE_REG); + } + BENCHMARK_END("[new]DPORT access PRE-READ APB REG"); + + + BENCHMARK_START(); + for (int i = 0; i < REPEAT_OPS; i++) { + DPORT_SEQUENCE_REG_READ(DPORT_DATE_REG); + } + BENCHMARK_END("[seq]DPORT access PRE-READ APB REG"); + + + BENCHMARK_START(); + for (int i = 0; i < REPEAT_OPS; i++) { + REG_READ(UART_DATE_REG(0)); + } + BENCHMARK_END("REG_READ"); + + + BENCHMARK_START(); + for (int i = 0; i < REPEAT_OPS; i++) { + _DPORT_REG_READ(DPORT_DATE_REG); + } + BENCHMARK_END("_DPORT_REG_READ"); +} diff --git a/components/mbedtls/port/esp_bignum.c b/components/mbedtls/port/esp_bignum.c index f18652c7b..3bfcd6145 100644 --- a/components/mbedtls/port/esp_bignum.c +++ b/components/mbedtls/port/esp_bignum.c @@ -76,17 +76,13 @@ void esp_mpi_acquire_hardware( void ) /* newlib locks lazy initialize on ESP-IDF */ _lock_acquire(&mpi_lock); - DPORT_STALL_OTHER_CPU_START(); - { - _DPORT_REG_SET_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_RSA); - /* also clear reset on digital signature, otherwise RSA is held in reset */ - _DPORT_REG_CLR_BIT(DPORT_PERI_RST_EN_REG, - DPORT_PERI_EN_RSA - | DPORT_PERI_EN_DIGITAL_SIGNATURE); + DPORT_REG_SET_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_RSA); + /* also clear reset on digital signature, otherwise RSA is held in reset */ + DPORT_REG_CLR_BIT(DPORT_PERI_RST_EN_REG, + DPORT_PERI_EN_RSA + | DPORT_PERI_EN_DIGITAL_SIGNATURE); - _DPORT_REG_CLR_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_PD); - } - DPORT_STALL_OTHER_CPU_END(); + DPORT_REG_CLR_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_PD); while(DPORT_REG_READ(RSA_CLEAN_REG) != 1); // Note: from enabling RSA clock to here takes about 1.3us @@ -98,15 +94,11 @@ void esp_mpi_acquire_hardware( void ) void esp_mpi_release_hardware( void ) { - DPORT_STALL_OTHER_CPU_START(); - { - _DPORT_REG_SET_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_PD); + DPORT_REG_SET_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_PD); - /* don't reset digital signature unit, as this resets AES also */ - _DPORT_REG_SET_BIT(DPORT_PERI_RST_EN_REG, DPORT_PERI_EN_RSA); - _DPORT_REG_CLR_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_RSA); - } - DPORT_STALL_OTHER_CPU_END(); + /* don't reset digital signature unit, as this resets AES also */ + DPORT_REG_SET_BIT(DPORT_PERI_RST_EN_REG, DPORT_PERI_EN_RSA); + DPORT_REG_CLR_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_RSA); _lock_release(&mpi_lock); } @@ -183,14 +175,7 @@ static inline int mem_block_to_mpi(mbedtls_mpi *x, uint32_t mem_base, int num_wo MBEDTLS_MPI_CHK( mbedtls_mpi_grow(x, num_words) ); /* Copy data from memory block registers */ - DPORT_STALL_OTHER_CPU_START(); - { - for (size_t i = 0; i < num_words; ++i) { - x->p[i] = _DPORT_REG_READ(mem_base + i * 4); - } - } - DPORT_STALL_OTHER_CPU_END(); - + esp_dport_access_read_buffer(x->p, mem_base, num_words); /* Zero any remaining limbs in the bignum, if the buffer is bigger than num_words */ for(size_t i = num_words; i < x->n; i++) { diff --git a/components/soc/esp32/include/soc/dport_access.h b/components/soc/esp32/include/soc/dport_access.h index 817ac98e7..db3908241 100644 --- a/components/soc/esp32/include/soc/dport_access.h +++ b/components/soc/esp32/include/soc/dport_access.h @@ -18,6 +18,9 @@ #include #include "esp_attr.h" #include "esp_dport_access.h" +#include "soc.h" +#include "uart_reg.h" +#include "xtensa/xtruntime.h" #ifdef __cplusplus extern "C" { @@ -28,10 +31,29 @@ extern "C" { // The _DPORT_xxx register read macros access DPORT memory directly (as opposed to // DPORT_REG_READ which applies SMP-safe protections). // -// Use DPORT_REG_READ versions to be SMP-safe in IDF apps. If you want to -// make a sequence of DPORT reads, use DPORT_STALL_OTHER_CPU_START() macro -// explicitly and then use _DPORT_REG_READ macro while other CPU is stalled. -// +// There are several ways to read the DPORT registers: +// 1) Use DPORT_REG_READ versions to be SMP-safe in IDF apps. +// This method uses the pre-read APB implementation(*) without stall other CPU. +// This is beneficial for single readings. +// 2) If you want to make a sequence of DPORT reads to buffer, +// use dport_read_buffer(buff_out, address, num_words), +// it is the faster method and it doesn't stop other CPU. +// 3) If you want to make a sequence of DPORT reads, but you don't want to stop other CPU +// and you want to do it faster then you need use DPORT_SEQUENCE_REG_READ(). +// The difference from the first is that the user himself must disable interrupts while DPORT reading. +// Note that disable interrupt need only if the chip has two cores. +// 4) If you want to make a sequence of DPORT reads, +// use DPORT_STALL_OTHER_CPU_START() macro explicitly +// and then use _DPORT_REG_READ macro while other CPU is stalled. +// After completing read operations, use DPORT_STALL_OTHER_CPU_END(). +// This method uses stall other CPU while reading DPORT registers. +// Useful for compatibility, as well as for large consecutive readings. +// This method is slower, but must be used if ROM functions or +// other code is called which accesses DPORT without any other workaround. +// *) The pre-readable APB register before reading the DPORT register +// helps synchronize the operation of the two CPUs, +// so that reading on different CPUs no longer causes random errors APB register. + // _DPORT_REG_WRITE & DPORT_REG_WRITE are equivalent. #define _DPORT_REG_READ(_r) (*(volatile uint32_t *)(_r)) #define _DPORT_REG_WRITE(_r, _v) (*(volatile uint32_t *)(_r)) = (_v) @@ -39,16 +61,78 @@ extern "C" { // Write value to DPORT register (does not require protecting) #define DPORT_REG_WRITE(_r, _v) _DPORT_REG_WRITE((_r), (_v)) -// Read value from register, SMP-safe version. +/** + * @brief Read value from register, SMP-safe version. + * + * This method uses the pre-reading of the APB register before reading the register of the DPORT. + * This implementation is useful for reading DORT registers for single reading without stall other CPU. + * There is disable/enable interrupt. + * + * @param reg Register address + * @return Value + */ static inline uint32_t IRAM_ATTR DPORT_REG_READ(uint32_t reg) { - uint32_t val; +#ifndef CONFIG_FREERTOS_UNICORE + uint32_t apb; + unsigned int intLvl; + __asm__ __volatile__ (\ + "movi %[APB], "XTSTR(0x3ff40078)"\n"\ + "rsil %[LVL], "XTSTR(3)"\n"\ + "l32i %[APB], %[APB], 0\n"\ + "l32i %[REG], %[REG], 0\n"\ + "wsr %[LVL], "XTSTR(PS)"\n"\ + "rsync\n"\ + : [APB]"=a"(apb), [REG]"+a"(reg), [LVL]"=a"(intLvl)\ + : \ + : "memory" \ + ); + return reg; +#else + return _DPORT_REG_READ(reg); +#endif +} - DPORT_STALL_OTHER_CPU_START(); - val = _DPORT_REG_READ(reg); - DPORT_STALL_OTHER_CPU_END(); - - return val; +/** + * @brief Read value from register, NOT SMP-safe version. + * + * This method uses the pre-reading of the APB register before reading the register of the DPORT. + * There is not disable/enable interrupt. + * The difference from DPORT_REG_READ() is that the user himself must disable interrupts while DPORT reading. + * This implementation is useful for reading DORT registers in loop without stall other CPU. Note the usage example. + * The recommended way to read registers sequentially without stall other CPU + * is to use the method esp_dport_read_buffer(buff_out, address, num_words). It allows you to read registers in the buffer. + * + * \code{c} + * // This example shows how to use it. + * { // Use curly brackets to limit the visibility of variables in macros DPORT_INTERRUPT_DISABLE/RESTORE. + * DPORT_INTERRUPT_DISABLE(); // Disable interrupt only on current CPU. + * for (i = 0; i < max; ++i) { + * array[i] = DPORT_SEQUENCE_REG_READ(Address + i * 4); // reading DPORT registers + * } + * DPORT_INTERRUPT_RESTORE(); // restore the previous interrupt level + * } + * \endcode + * + * @param reg Register address + * @return Value + */ +static inline uint32_t IRAM_ATTR DPORT_SEQUENCE_REG_READ(uint32_t reg) +{ +#ifndef CONFIG_FREERTOS_UNICORE + uint32_t apb; + __asm__ __volatile__ (\ + "movi %[APB], "XTSTR(0x3ff40078)"\n"\ + "l32i %[APB], %[APB], 0\n"\ + "l32i %[REG], %[REG], 0\n"\ + : [APB]"=a"(apb), [REG]"+a"(reg)\ + : \ + : "memory" \ + ); + return reg; +#else + return _DPORT_REG_READ(reg); +#endif } //get bit or get bits from register @@ -93,16 +177,35 @@ static inline uint32_t IRAM_ATTR DPORT_REG_READ(uint32_t reg) #define _DPORT_REG_SET_BIT(_r, _b) _DPORT_REG_WRITE((_r), (_DPORT_REG_READ(_r)|(_b))) #define _DPORT_REG_CLR_BIT(_r, _b) _DPORT_REG_WRITE((_r), (_DPORT_REG_READ(_r) & (~(_b)))) -//read value from register -static inline uint32_t IRAM_ATTR DPORT_READ_PERI_REG(uint32_t addr) +/** + * @brief Read value from register, SMP-safe version. + * + * This method uses the pre-reading of the APB register before reading the register of the DPORT. + * This implementation is useful for reading DORT registers for single reading without stall other CPU. + * + * @param reg Register address + * @return Value + */ +static inline uint32_t IRAM_ATTR DPORT_READ_PERI_REG(uint32_t reg) { - uint32_t val; - - DPORT_STALL_OTHER_CPU_START(); - val = _DPORT_READ_PERI_REG(addr); - DPORT_STALL_OTHER_CPU_END(); - - return val; +#ifndef CONFIG_FREERTOS_UNICORE + uint32_t apb; + unsigned int intLvl; + __asm__ __volatile__ (\ + "movi %[APB], "XTSTR(0x3ff40078)"\n"\ + "rsil %[LVL], "XTSTR(3)"\n"\ + "l32i %[APB], %[APB], 0\n"\ + "l32i %[REG], %[REG], 0\n"\ + "wsr %[LVL], "XTSTR(PS)"\n"\ + "rsync\n"\ + : [APB]"=a"(apb), [REG]"+a"(reg), [LVL]"=a"(intLvl)\ + : \ + : "memory" \ + ); + return reg; +#else + return _DPORT_READ_PERI_REG(reg); +#endif } //write value to register diff --git a/components/spi_flash/flash_mmap.c b/components/spi_flash/flash_mmap.c index 91e213d4d..43fdf6ccd 100644 --- a/components/spi_flash/flash_mmap.c +++ b/components/spi_flash/flash_mmap.c @@ -79,11 +79,10 @@ static void IRAM_ATTR spi_flash_mmap_init() if (s_mmap_page_refcnt[0] != 0) { return; /* mmap data already initialised */ } - - DPORT_STALL_OTHER_CPU_START(); + DPORT_INTERRUPT_DISABLE(); for (int i = 0; i < REGIONS_COUNT * PAGES_PER_REGION; ++i) { - uint32_t entry_pro = DPORT_PRO_FLASH_MMU_TABLE[i]; - uint32_t entry_app = DPORT_APP_FLASH_MMU_TABLE[i]; + uint32_t entry_pro = DPORT_SEQUENCE_REG_READ((uint32_t)&DPORT_PRO_FLASH_MMU_TABLE[i]); + uint32_t entry_app = DPORT_SEQUENCE_REG_READ((uint32_t)&DPORT_APP_FLASH_MMU_TABLE[i]); if (entry_pro != entry_app) { // clean up entries used by boot loader @@ -97,7 +96,7 @@ static void IRAM_ATTR spi_flash_mmap_init() DPORT_APP_FLASH_MMU_TABLE[i] = DPORT_FLASH_MMU_TABLE_INVALID_VAL; } } - DPORT_STALL_OTHER_CPU_END(); + DPORT_INTERRUPT_RESTORE(); } static void IRAM_ATTR get_mmu_region(spi_flash_mmap_memory_t memory, int* out_begin, int* out_size,uint32_t* region_addr) @@ -186,15 +185,15 @@ esp_err_t IRAM_ATTR spi_flash_mmap_pages(int *pages, size_t page_count, spi_flas for (start = region_begin; start < end; ++start) { int pageno = 0; int pos; - DPORT_STALL_OTHER_CPU_START(); + DPORT_INTERRUPT_DISABLE(); for (pos = start; pos < start + page_count; ++pos, ++pageno) { - int table_val = (int) DPORT_PRO_FLASH_MMU_TABLE[pos]; + int table_val = (int) DPORT_SEQUENCE_REG_READ((uint32_t)&DPORT_PRO_FLASH_MMU_TABLE[pos]); uint8_t refcnt = s_mmap_page_refcnt[pos]; if (refcnt != 0 && table_val != pages[pageno]) { break; } } - DPORT_STALL_OTHER_CPU_END(); + DPORT_INTERRUPT_RESTORE(); // whole mapping range matched, bail out if (pos - start == page_count) { break; @@ -208,14 +207,16 @@ esp_err_t IRAM_ATTR spi_flash_mmap_pages(int *pages, size_t page_count, spi_flas } else { // set up mapping using pages uint32_t pageno = 0; - DPORT_STALL_OTHER_CPU_START(); + DPORT_INTERRUPT_DISABLE(); for (int i = start; i != start + page_count; ++i, ++pageno) { // sanity check: we won't reconfigure entries with non-zero reference count + uint32_t entry_pro = DPORT_SEQUENCE_REG_READ((uint32_t)&DPORT_PRO_FLASH_MMU_TABLE[i]); + uint32_t entry_app = DPORT_SEQUENCE_REG_READ((uint32_t)&DPORT_APP_FLASH_MMU_TABLE[i]); assert(s_mmap_page_refcnt[i] == 0 || - (DPORT_PRO_FLASH_MMU_TABLE[i] == pages[pageno] && - DPORT_APP_FLASH_MMU_TABLE[i] == pages[pageno])); + (entry_pro == pages[pageno] && + entry_app == pages[pageno])); if (s_mmap_page_refcnt[i] == 0) { - if (DPORT_PRO_FLASH_MMU_TABLE[i] != pages[pageno] || DPORT_APP_FLASH_MMU_TABLE[i] != pages[pageno]) { + if (entry_pro != pages[pageno] || entry_app != pages[pageno]) { DPORT_PRO_FLASH_MMU_TABLE[i] = pages[pageno]; DPORT_APP_FLASH_MMU_TABLE[i] = pages[pageno]; need_flush = true; @@ -223,7 +224,7 @@ esp_err_t IRAM_ATTR spi_flash_mmap_pages(int *pages, size_t page_count, spi_flas } ++s_mmap_page_refcnt[i]; } - DPORT_STALL_OTHER_CPU_END(); + DPORT_INTERRUPT_RESTORE(); LIST_INSERT_HEAD(&s_mmap_entries_head, new_entry, entries); new_entry->page = start; new_entry->count = page_count; @@ -264,7 +265,6 @@ void IRAM_ATTR spi_flash_munmap(spi_flash_mmap_handle_t handle) // for each page, decrement reference counter // if reference count is zero, disable MMU table entry to // facilitate debugging of use-after-free conditions - DPORT_STALL_OTHER_CPU_START(); for (int i = it->page; i < it->page + it->count; ++i) { assert(s_mmap_page_refcnt[i] > 0); if (--s_mmap_page_refcnt[i] == 0) { @@ -272,7 +272,6 @@ void IRAM_ATTR spi_flash_munmap(spi_flash_mmap_handle_t handle) DPORT_APP_FLASH_MMU_TABLE[i] = INVALID_ENTRY_VAL; } } - DPORT_STALL_OTHER_CPU_END(); LIST_REMOVE(it, entries); break; } @@ -294,7 +293,7 @@ void spi_flash_mmap_dump() for (int i = 0; i < REGIONS_COUNT * PAGES_PER_REGION; ++i) { if (s_mmap_page_refcnt[i] != 0) { printf("page %d: refcnt=%d paddr=%d\n", - i, (int) s_mmap_page_refcnt[i], DPORT_PRO_FLASH_MMU_TABLE[i]); + i, (int) s_mmap_page_refcnt[i], DPORT_REG_READ((uint32_t)&DPORT_PRO_FLASH_MMU_TABLE[i])); } } } @@ -307,13 +306,13 @@ uint32_t spi_flash_mmap_get_free_pages(spi_flash_mmap_memory_t memory) int region_size; // number of pages to check uint32_t region_addr; // base address of memory region get_mmu_region(memory,®ion_begin,®ion_size,®ion_addr); - DPORT_STALL_OTHER_CPU_START(); + DPORT_INTERRUPT_DISABLE(); for (int i = region_begin; i < region_begin + region_size; ++i) { - if (s_mmap_page_refcnt[i] == 0 && DPORT_PRO_FLASH_MMU_TABLE[i] == INVALID_ENTRY_VAL) { + if (s_mmap_page_refcnt[i] == 0 && DPORT_SEQUENCE_REG_READ((uint32_t)&DPORT_PRO_FLASH_MMU_TABLE[i]) == INVALID_ENTRY_VAL) { count++; } } - DPORT_STALL_OTHER_CPU_END(); + DPORT_INTERRUPT_RESTORE(); return count; } @@ -403,9 +402,7 @@ uint32_t spi_flash_cache2phys(const void *cached) /* cached address was not in IROM or DROM */ return SPI_FLASH_CACHE2PHYS_FAIL; } - DPORT_STALL_OTHER_CPU_START(); - uint32_t phys_page = DPORT_PRO_FLASH_MMU_TABLE[cache_page]; - DPORT_STALL_OTHER_CPU_END(); + uint32_t phys_page = DPORT_REG_READ((uint32_t)&DPORT_PRO_FLASH_MMU_TABLE[cache_page]); if (phys_page == INVALID_ENTRY_VAL) { /* page is not mapped */ return SPI_FLASH_CACHE2PHYS_FAIL; @@ -432,16 +429,15 @@ const void *spi_flash_phys2cache(uint32_t phys_offs, spi_flash_mmap_memory_t mem base = VADDR1_START_ADDR; page_delta = 64; } - - DPORT_STALL_OTHER_CPU_START(); + DPORT_INTERRUPT_DISABLE(); for (int i = start; i < end; i++) { - if (DPORT_PRO_FLASH_MMU_TABLE[i] == phys_page) { + if (DPORT_SEQUENCE_REG_READ((uint32_t)&DPORT_PRO_FLASH_MMU_TABLE[i]) == phys_page) { i -= page_delta; intptr_t cache_page = base + (SPI_FLASH_MMU_PAGE_SIZE * i); - DPORT_STALL_OTHER_CPU_END(); + DPORT_INTERRUPT_RESTORE(); return (const void *) (cache_page | (phys_offs & (SPI_FLASH_MMU_PAGE_SIZE-1))); } } - DPORT_STALL_OTHER_CPU_END(); + DPORT_INTERRUPT_RESTORE(); return NULL; }