From 301dacfb33f1fccb20143bedc81b1a9189229c8a Mon Sep 17 00:00:00 2001 From: Sachin Parekh Date: Wed, 26 Feb 2020 20:21:59 +0800 Subject: [PATCH] Exception handlers for LoadStoreError and LoadStoreAlignmentError Configurable option to use IRAM as byte accessible memory (in single core mode) using load-store (non-word aligned and non-word size IRAM access specific) exception handlers. This allows to use IRAM for use-cases where certain performance penalty (upto 170 cpu cycles per load or store operation) is acceptable. Additional configuration option has been provided to redirect mbedTLS specific in-out content length buffers to IRAM (in single core mode), allows to save 20KB per TLS connection. --- components/esp32/Kconfig | 10 + components/freertos/CMakeLists.txt | 4 + components/freertos/component.mk | 4 + .../test/test_xtensa_loadstore_handler.c | 122 ++++ .../xtensa/xtensa_loadstore_handler.S | 559 ++++++++++++++++++ components/freertos/xtensa/xtensa_vectors.S | 28 + components/heap/include/esp_heap_caps.h | 3 + components/heap/test/test_malloc_caps.c | 25 + components/mbedtls/Kconfig | 11 + components/mbedtls/port/esp_mem.c | 11 + components/soc/src/esp32/soc_memory_layout.c | 20 +- tools/ci/config/target-test.yml | 4 +- tools/unit-test-app/configs/single_core | 1 + 13 files changed, 793 insertions(+), 9 deletions(-) create mode 100644 components/freertos/test/test_xtensa_loadstore_handler.c create mode 100644 components/freertos/xtensa/xtensa_loadstore_handler.S diff --git a/components/esp32/Kconfig b/components/esp32/Kconfig index fe8310f8a..3cac1cc54 100644 --- a/components/esp32/Kconfig +++ b/components/esp32/Kconfig @@ -700,6 +700,16 @@ menu "ESP32-specific" To prevent interrupting DPORT workarounds, need to disable interrupt with a maximum used level in the system. + config ESP32_IRAM_AS_8BIT_ACCESSIBLE_MEMORY + bool "Enable IRAM as 8 bit accessible memory" + depends on FREERTOS_UNICORE + help + If enabled, application can use IRAM as byte accessible region for storing data + (Note: IRAM region cannot be used as task stack) + + This is possible due to handling of exceptions `LoadStoreError (3)` and `LoadStoreAlignmentError (9)` + Each unaligned read/write access will incur a penalty of maximum of 167 CPU cycles. + endmenu # ESP32-Specific menu "Power Management" diff --git a/components/freertos/CMakeLists.txt b/components/freertos/CMakeLists.txt index 4e4b7ff02..6471d3262 100644 --- a/components/freertos/CMakeLists.txt +++ b/components/freertos/CMakeLists.txt @@ -34,6 +34,10 @@ set(private_include_dirs xtensa .) +if(CONFIG_ESP32_IRAM_AS_8BIT_ACCESSIBLE_MEMORY) + list(APPEND srcs "xtensa/xtensa_loadstore_handler.S") + endif() + # app_trace is required by FreeRTOS headers only when CONFIG_SYSVIEW_ENABLE=y, # but requirements can't depend on config options, so always require it. idf_component_register(SRCS "${srcs}" diff --git a/components/freertos/component.mk b/components/freertos/component.mk index 8a39282d8..fcf27d80a 100644 --- a/components/freertos/component.mk +++ b/components/freertos/component.mk @@ -10,5 +10,9 @@ COMPONENT_ADD_INCLUDEDIRS := include xtensa/include COMPONENT_PRIV_INCLUDEDIRS := include/freertos xtensa/include/freertos xtensa . COMPONENT_SRCDIRS += xtensa +ifndef CONFIG_ESP32_IRAM_AS_8BIT_ACCESSIBLE_MEMORY + COMPONENT_OBJEXCLUDE := xtensa/xtensa_loadstore_handler.o +endif + tasks.o event_groups.o timers.o queue.o: CFLAGS += -D_ESP_FREERTOS_INTERNAL COMPONENT_ADD_LDFRAGMENTS += linker.lf diff --git a/components/freertos/test/test_xtensa_loadstore_handler.c b/components/freertos/test/test_xtensa_loadstore_handler.c new file mode 100644 index 000000000..6c386e16c --- /dev/null +++ b/components/freertos/test/test_xtensa_loadstore_handler.c @@ -0,0 +1,122 @@ +/* + Test for LoadStore exception handlers. This test performs unaligned load and store in 32bit aligned addresses +*/ + +#include +#include +#include +#include "esp_system.h" +#include "freertos/xtensa_api.h" +#include "unity.h" + +#ifdef CONFIG_ESP32_IRAM_AS_8BIT_ACCESSIBLE_MEMORY +TEST_CASE("LoadStore Exception handler", "[freertos]") +{ + int32_t val0 = 0xDEADBEEF; + int32_t val1 = 0xBBAA9988; + int32_t val2 = 0x77665544; + int32_t val3 = 0x33221100; + + int8_t val8_0 = val0 & 0xff; + int8_t val8_1 = val1 & 0xff; + int8_t val8_2 = val2 & 0xff; + int8_t val8_3 = val3 & 0xff; + + int16_t val16_0 = val0 & 0xffff; + int16_t val16_1 = val1 & 0xffff; + int16_t val16_2 = val2 & 0xffff; + int16_t val16_3 = val3 & 0xffff; + + uint32_t largest_free = heap_caps_get_largest_free_block(MALLOC_CAP_IRAM_8BIT); + + int8_t *arr = heap_caps_malloc(largest_free * sizeof(int8_t), MALLOC_CAP_IRAM_8BIT); + TEST_ASSERT(arr != NULL); + + int8_t *arr8 = arr; + int16_t *arr16 = (int16_t *)arr; + int32_t *arr32 = (int32_t *)arr; + + for (int i = 0; i < 1024; i++) { + + // LoadStoreError + + uint32_t offset = esp_random() % (largest_free - 20); + uint32_t offset8, offset16, offset32; + + // Get word aligned offset + offset8 = offset & ~3; + offset16 = offset8 / 2; + offset32 = offset8 / 4; + + arr8[offset8] = val8_0; + arr8[offset8+1] = val8_1; + arr8[offset8+2] = val8_2; + arr8[offset8+3] = val8_3; + + // Just to make sure compiler doesn't read stale data + asm volatile("memw\n"); + TEST_ASSERT_EQUAL(val8_0, arr8[offset8]); + TEST_ASSERT_EQUAL(val8_1, arr8[offset8+1]); + TEST_ASSERT_EQUAL(val8_2, arr8[offset8+2]); + TEST_ASSERT_EQUAL(val8_3, arr8[offset8+3]); + + arr16[offset16] = val16_0; + arr16[offset16+1] = val16_1; + arr16[offset16+2] = val16_2; + arr16[offset16+3] = val16_3; + + // Just to make sure compiler doesn't read stale data + asm volatile("memw\n"); + TEST_ASSERT_EQUAL(val16_0, arr16[offset16]); + TEST_ASSERT_EQUAL(val16_1, arr16[offset16+1]); + TEST_ASSERT_EQUAL(val16_2, arr16[offset16+2]); + TEST_ASSERT_EQUAL(val16_3, arr16[offset16+3]); + + // LoadStoreAlignement Error + + // Check that it doesn't write to adjacent bytes + int8_t *ptr8_0 = (void *)&arr8[offset8]; + int8_t *ptr8_1 = (void *)&arr8[offset8] + 5; + int8_t *ptr8_2 = (void *)&arr8[offset8] + 10; + int8_t *ptr8_3 = (void *)&arr8[offset8] + 15; + + *ptr8_0 = 0x73; + *ptr8_1 = 0x73; + *ptr8_2 = 0x73; + *ptr8_3 = 0x73; + + int16_t *ptr16_0 = (void *)&arr16[offset16] + 1; + int16_t *ptr16_1 = (void *)&arr16[offset16] + 3; + + *ptr16_0 = val16_0; + *ptr16_1 = val16_1; + + // Just to make sure compiler doesn't read stale data + asm volatile("memw\n"); + TEST_ASSERT_EQUAL(val16_0, *ptr16_0); + TEST_ASSERT_EQUAL(0x73, *ptr8_0); + TEST_ASSERT_EQUAL(val16_1, *ptr16_1); + TEST_ASSERT_EQUAL(0x73, *ptr8_1); + + int32_t *ptr32_0 = (void *)&arr32[offset32] + 1; + int32_t *ptr32_1 = (void *)&arr32[offset32] + 6; + int32_t *ptr32_2 = (void *)&arr32[offset32] + 11; + *ptr32_0 = val0; + *ptr32_1 = val1; + *ptr32_2 = val2; + + // Just to make sure compiler doesn't read stale data + asm volatile ("memw"); + TEST_ASSERT_EQUAL(0x73, *ptr8_0); + TEST_ASSERT_EQUAL(val0, *ptr32_0); + TEST_ASSERT_EQUAL(0x73, *ptr8_1); + TEST_ASSERT_EQUAL(val1, *ptr32_1); + TEST_ASSERT_EQUAL(0x73, *ptr8_2); + TEST_ASSERT_EQUAL(val2, *ptr32_2); + TEST_ASSERT_EQUAL(0x73, *ptr8_3); + } + + TEST_ASSERT_TRUE(heap_caps_check_integrity_all(true)); + heap_caps_free(arr); +} +#endif diff --git a/components/freertos/xtensa/xtensa_loadstore_handler.S b/components/freertos/xtensa/xtensa_loadstore_handler.S new file mode 100644 index 000000000..88fc82c33 --- /dev/null +++ b/components/freertos/xtensa/xtensa_loadstore_handler.S @@ -0,0 +1,559 @@ +/* + Copyright 2019 Espressif Systems (Shanghai) PTE LTD + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* + * LoadStoreErrorCause: Occurs when trying to access 32 bit addressable memory region as 8 bit or 16 bit + * LoadStoreAlignmentCause: Occurs when trying to access in an unaligned manner + * + * xxxx xxxx = imm8 field + * yyyy = imm4 field + * ssss = s field + * tttt = t field + * + * 16 0 + * ------------------- + * L32I.N yyyy ssss tttt 1000 + * S32I.N yyyy ssss tttt 1001 + * + * 23 0 + * ----------------------------- + * L8UI xxxx xxxx 0000 ssss tttt 0010 <- LoadStoreError + * L16UI xxxx xxxx 0001 ssss tttt 0010 <- LoadStoreError, LoadStoreAlignment + * L16SI xxxx xxxx 1001 ssss tttt 0010 <- LoadStoreError, LoadStoreAlignment + * L32I xxxx xxxx 0010 ssss tttt 0010 <- LoadStoreAlignment + * + * S8I xxxx xxxx 0100 ssss tttt 0010 <- LoadStoreError + * S16I xxxx xxxx 0101 ssss tttt 0010 <- LoadStoreError, LoadStoreAlignment + * S32I xxxx xxxx 0110 ssss tttt 0010 <- LoadStoreAlignment + * + * ******* UNSUPPORTED ******* + * + * L32E 0000 1001 rrrr ssss tttt 0000 + * S32E 0100 1001 rrrr ssss tttt 0000 + * ----------------------------- + */ + +#include "xtensa_rtos.h" +#include "sdkconfig.h" +#include "soc/soc.h" + +#define LOADSTORE_HANDLER_STACK_SZ 8 + .section .bss, "aw" + .balign 16 +LoadStoreHandlerStack: + .rept LOADSTORE_HANDLER_STACK_SZ + .word 0 + .endr + + +/* LoadStoreErrorCause handler: + * + * Completes 8-bit or 16-bit load/store instructions from 32-bit aligned memory region + * Called from UserExceptionVector if EXCCAUSE is LoadStoreErrorCause + */ + + .global LoadStoreErrorHandler + .section .iram1, "ax" + + .literal_position + + .balign 4 +LoadStoreErrorHandler: + .type LoadStoreErrorHandler, @function + + wsr a0, depc // Save return address in depc + mov a0, sp + movi sp, LoadStoreHandlerStack + s32i a0, sp, 0x04 // Since a0 contains value of a1 + s32i a2, sp, 0x08 + s32i a3, sp, 0x0c + s32i a4, sp, 0x10 + + rsr a0, sar // Save SAR in a0 to restore later + + /* Check whether the address lies in the valid range */ + rsr a3, excvaddr + movi a4, _iram_text_end // End of code section of IRAM + bge a3, a4, 1f + movi a4, SOC_CACHE_APP_LOW // Check if in APP cache region + blt a3, a4, .LS_wrong_opcode + movi a4, SOC_CACHE_APP_HIGH + bge a3, a4, .LS_wrong_opcode + j 2f + +1: + movi a4, SOC_IRAM_HIGH // End of IRAM address range + bge a3, a4, .LS_wrong_opcode + +2: + /* Examine the opcode which generated the exception */ + /* Note: Instructions are in this order to avoid pipeline stalls. */ + rsr a2, epc1 + movi a4, ~3 + ssa8l a2 // sar is now correct shift for aligned read + and a2, a2, a4 // a2 now 4-byte aligned address of instruction + l32i a4, a2, 0 + l32i a2, a2, 4 + + src a2, a2, a4 // a2 now instruction that failed + bbci a2, 1, .LS_wrong_opcode + bbsi a2, 14, .LSE_store_op // Store instruction + + /* l8/l16ui/l16si */ + movi a4, ~3 + and a4, a3, a4 // a4 now word aligned read address + + ssa8l a3 // sar is now shift to extract a3's byte + l32i a4, a4, 0 // perform the actual read + srl a4, a4 // shift right correct distance + extui a3, a2, 12, 4 + bnez a3, 1f // l16ui/l16si + extui a4, a4, 0, 8 // mask off bits needed for an l8 + j 2f + +1: + extui a4, a4, 0, 16 + bbci a2, 15, 2f // l16ui + + /* Sign adjustment */ + slli a4, a4, 16 + srai a4, a4, 16 // a4 contains the value + +2: + /* a4 contains the value */ + rsr a3, epc1 + addi a3, a3, 3 + wsr a3, epc1 + wsr a0, sar + rsr a0, excsave1 + + extui a2, a2, 3, 5 + blti a2, 10, .LSE_stack_reg + + movi a3, .LS_jumptable_base + addx8 a2, a2, a3 // a2 is now the address to jump to + l32i a3, sp, 0x0c + jx a2 + +.LSE_stack_reg: + addx2 a2, a2, sp + s32i a4, a2, 0 + + /* Restore all values */ + l32i a4, sp, 0x10 + l32i a3, sp, 0x0c + l32i a2, sp, 0x08 + l32i a1, sp, 0x04 + rfe + +.LSE_store_op: + s32i a5, a1, 0x14 + s32i a6, a1, 0x18 + + /* a2 -> instruction that caused the error */ + /* a3 -> unaligned address */ + extui a4, a2, 4, 4 + blti a4, 7, 1f + movi a5, .LSE_store_reg + addx8 a5, a4, a5 + jx a5 + +1: + addx4 a4, a4, sp + l32i a4, a4, 0 + +.LSE_store_data: + /* a4 contains the value */ + rsr a6, epc1 + addi a6, a6, 3 + wsr a6, epc1 + + ssa8b a3 + movi a5, -1 + bbsi a2, 12, 1f // s16 + extui a4, a4, 0, 8 + movi a6, 0xff + j 2f +1: + extui a4, a4, 0, 16 + movi a6, 0xffff +2: + sll a4, a4 // shift the value to proper offset + sll a6, a6 + xor a5, a5, a6 // a5 contains the mask + + movi a6, ~3 + and a3, a3, a6 // a3 has the aligned address + l32i a6, a3, 0 // a6 contains the data at the aligned address + and a6, a6, a5 + or a4, a6, a4 + s32i a4, a3, 0 + + /* Restore registers */ + wsr a0, sar + + l32i a6, sp, 0x18 + l32i a5, sp, 0x14 + l32i a4, sp, 0x10 + l32i a3, sp, 0x0c + l32i a2, sp, 0x08 + l32i a1, sp, 0x04 + rsr a0, excsave1 + + rfe + +.LSE_store_reg: + .org .LSE_store_reg + (7 * 8) + mov a4, a7 + j .LSE_store_data + + .org .LSE_store_reg + (8 * 8) + mov a4, a8 + j .LSE_store_data + + .org .LSE_store_reg + (9 * 8) + mov a4, a9 + j .LSE_store_data + + .org .LSE_store_reg + (10 * 8) + mov a4, a10 + j .LSE_store_data + + .org .LSE_store_reg + (11 * 8) + mov a4, a11 + j .LSE_store_data + + .org .LSE_store_reg + (12 * 8) + mov a4, a12 + j .LSE_store_data + + .org .LSE_store_reg + (13 * 8) + mov a4, a13 + j .LSE_store_data + + .org .LSE_store_reg + (14 * 8) + mov a4, a14 + j .LSE_store_data + + .org .LSE_store_reg + (15 * 8) + mov a4, a15 + j .LSE_store_data + + +/* LoadStoreAlignmentCause handler: + * + * Completes unaligned 16-bit and 32-bit load/store instructions from 32-bit aligned memory region + * Called from UserExceptionVector if EXCCAUSE is LoadStoreAlignmentCause + */ + + .global AlignmentErrorHandler + .section .iram1, "ax" + + .literal_position + + .balign 4 +AlignmentErrorHandler: + .type AlignmentErrorHandler, @function + + wsr a0, depc // Save return address in depc + mov a0, sp + movi sp, LoadStoreHandlerStack + s32i a0, sp, 0x04 // Since a0 contains value of a1 + s32i a2, sp, 0x08 + s32i a3, sp, 0x0c + s32i a4, sp, 0x10 + + rsr a0, sar // Save SAR in a0 to restore later + + /* Check whether the address lies in the valid range */ + rsr a3, excvaddr + movi a4, _iram_text_end // End of code section of IRAM + bge a3, a4, 1f + movi a4, SOC_CACHE_APP_LOW // Check if in APP cache region + blt a3, a4, .LS_wrong_opcode + movi a4, SOC_CACHE_APP_HIGH + bge a3, a4, .LS_wrong_opcode + j 2f + +1: + movi a4, SOC_IRAM_HIGH // End of IRAM address range + bge a3, a4, .LS_wrong_opcode + +2: + /* Examine the opcode which generated the exception */ + /* Note: Instructions are in this order to avoid pipeline stalls. */ + rsr a2, epc1 + movi a4, ~3 + ssa8l a2 // sar is now correct shift for aligned read + and a2, a2, a4 // a2 now 4-byte aligned address of instruction + l32i a4, a2, 0 + l32i a2, a2, 4 + + /* a2 has the instruction that caused the error */ + src a2, a2, a4 + extui a4, a2, 0, 4 + addi a4, a4, -9 + beqz a4, .LSA_store_op + bbsi a2, 14, .LSA_store_op + + ssa8l a3 // a3 contains the unaligned address + movi a4, ~3 + and a4, a3, a4 // a4 has the aligned address + l32i a3, a4, 0 + l32i a4, a4, 4 + src a4, a4, a3 + + rsr a3, epc1 + addi a3, a3, 2 + bbsi a2, 3, 1f // l32i.n + bbci a2, 1, .LS_wrong_opcode + addi a3, a3, 1 + + bbsi a2, 13, 1f // l32 + extui a4, a4, 0, 16 + bbci a2, 15, 1f // l16ui + + /* Sign adjustment */ + slli a4, a4, 16 + srai a4, a4, 16 // a4 contains the value + +1: + wsr a3, epc1 + wsr a0, sar + rsr a0, excsave1 + + extui a2, a2, 4, 4 + blti a2, 5, .LSA_stack_reg // a3 contains the target register + + movi a3, .LS_jumptable_base + slli a2, a2, 4 + add a2, a2, a3 // a2 is now the address to jump to + l32i a3, sp, 0x0c + jx a2 + +.LSA_stack_reg: + addx4 a2, a2, sp + s32i a4, a2, 0 + + /* Restore all values */ + l32i a4, sp, 0x10 + l32i a3, sp, 0x0c + l32i a2, sp, 0x08 + l32i a1, sp, 0x04 + rfe + +/* Store instruction */ +.LSA_store_op: + s32i a5, sp, 0x14 + s32i a6, sp, 0x18 + s32i a7, sp, 0x1c + + /* a2 -> instruction that caused the error */ + /* a3 -> unaligned address */ + extui a4, a2, 4, 4 + blti a4, 8, 1f + movi a5, .LSA_store_reg + addx8 a5, a4, a5 + jx a5 + +1: + addx4 a4, a4, sp + l32i a4, a4, 0 // a4 contains the value + +.LSA_store_data: + movi a6, 0 + + rsr a7, epc1 + addi a7, a7 ,2 + bbsi a2, 3, 1f // s32i.n + bbci a2, 1, .LS_wrong_opcode + + addi a7, a7, 1 + bbsi a2, 13, 1f // s32i + + movi a5, -1 + extui a4, a4, 0, 16 + slli a6, a5, 16 // 0xffff0000 + +1: + wsr a7, epc1 + movi a5, ~3 + and a5, a3, a5 // a5 has the aligned address + + ssa8b a3 + movi a3, -1 + src a7, a6, a3 + src a3, a3, a6 + + /* Store data on lower address */ + l32i a6, a5, 0 + and a6, a6, a7 + sll a7, a4 + or a6, a6, a7 + s32i a6, a5, 0 + + /* Store data on higher address */ + l32i a7, a5, 4 + srl a6, a4 + and a3, a7, a3 + or a3, a3, a6 + s32i a3, a5, 4 + + /* Restore registers */ + wsr a0, sar + rsr a0, excsave1 + + l32i a7, sp, 0x1c + l32i a6, sp, 0x18 + l32i a5, sp, 0x14 + l32i a4, sp, 0x10 + l32i a3, sp, 0x0c + l32i a2, sp, 0x08 + l32i a1, sp, 0x04 + rfe + +.LSA_store_reg: + .org .LSA_store_reg + (8 * 8) + mov a4, a8 + j .LSA_store_data + + .org .LSA_store_reg + (9 * 8) + mov a4, a9 + j .LSA_store_data + + .org .LSA_store_reg + (10 * 8) + mov a4, a10 + j .LSA_store_data + + .org .LSA_store_reg + (11 * 8) + mov a4, a11 + j .LSA_store_data + + .org .LSA_store_reg + (12 * 8) + mov a4, a12 + j .LSA_store_data + + .org .LSA_store_reg + (13 * 8) + mov a4, a13 + j .LSA_store_data + + .org .LSA_store_reg + (14 * 8) + mov a4, a14 + j .LSA_store_data + + .org .LSA_store_reg + (15 * 8) + mov a4, a15 + j .LSA_store_data + +/* + * Common routines for both the exception handlers + */ + .balign 4 +.LS_jumptable: + /* The first 5 entries (80 bytes) of this table are unused (registers + a0..a4 are handled separately above). Rather than have a whole bunch + of wasted space, just pretend that the table starts 80 bytes + earlier in memory. */ + .set .LS_jumptable_base, .LS_jumptable - (16 * 5) + + .org .LS_jumptable_base + (16 * 5) + mov a5, a4 + l32i a4, sp, 0x10 + l32i a2, sp, 0x08 + l32i a1, sp, 0x04 + rfe + + .org .LS_jumptable_base + (16 * 6) + mov a6, a4 + l32i a4, sp, 0x10 + l32i a2, sp, 0x08 + l32i a1, sp, 0x04 + rfe + + .org .LS_jumptable_base + (16 * 7) + mov a7, a4 + l32i a4, sp, 0x10 + l32i a2, sp, 0x08 + l32i a1, sp, 0x04 + rfe + + .org .LS_jumptable_base + (16 * 8) + mov a8, a4 + l32i a4, sp, 0x10 + l32i a2, sp, 0x08 + l32i a1, sp, 0x04 + rfe + + .org .LS_jumptable_base + (16 * 9) + mov a9, a4 + l32i a4, sp, 0x10 + l32i a2, sp, 0x08 + l32i a1, sp, 0x04 + rfe + + .org .LS_jumptable_base + (16 * 10) + mov a10, a4 + l32i a4, sp, 0x10 + l32i a2, sp, 0x08 + l32i a1, sp, 0x04 + rfe + + .org .LS_jumptable_base + (16 * 11) + mov a11, a4 + l32i a4, sp, 0x10 + l32i a2, sp, 0x08 + l32i a1, sp, 0x04 + rfe + + .org .LS_jumptable_base + (16 * 12) + mov a12, a4 + l32i a4, sp, 0x10 + l32i a2, sp, 0x08 + l32i a1, sp, 0x04 + rfe + + .org .LS_jumptable_base + (16 * 13) + mov a13, a4 + l32i a4, sp, 0x10 + l32i a2, sp, 0x08 + l32i a1, sp, 0x04 + rfe + + .org .LS_jumptable_base + (16 * 14) + mov a14, a4 + l32i a4, sp, 0x10 + l32i a2, sp, 0x08 + l32i a1, sp, 0x04 + rfe + + .org .LS_jumptable_base + (16 * 15) + mov a15, a4 + l32i a4, sp, 0x10 + l32i a2, sp, 0x08 + l32i a1, sp, 0x04 + rfe + +.LS_wrong_opcode: + /* Reaches here if the address is in invalid range or the opcode isn't supported. + * Restore registers and jump back to _xt_user_exc + */ + wsr a0, sar + l32i a4, sp, 0x10 + l32i a3, sp, 0x0c + l32i a2, sp, 0x08 + l32i a1, sp, 0x04 + rsr a0, depc + ret // Equivalent to jx a0 diff --git a/components/freertos/xtensa/xtensa_vectors.S b/components/freertos/xtensa/xtensa_vectors.S index 278203729..760fc5134 100644 --- a/components/freertos/xtensa/xtensa_vectors.S +++ b/components/freertos/xtensa/xtensa_vectors.S @@ -611,6 +611,11 @@ _UserExceptionVector: -------------------------------------------------------------------------------- */ +#ifdef CONFIG_ESP32_IRAM_AS_8BIT_ACCESSIBLE_MEMORY + .global LoadStoreErrorHandler + .global AlignmentErrorHandler +#endif + .section .iram1,"ax" #if XCHAL_HAVE_WINDOWED @@ -632,6 +637,20 @@ _xt_to_coproc_exc: /* never returns here - call0 is used as a jump (see note at top) */ #endif +#ifdef CONFIG_ESP32_IRAM_AS_8BIT_ACCESSIBLE_MEMORY + .align 4 +_call_loadstore_handler: + call0 LoadStoreErrorHandler + /* This will return only if wrong opcode or address out of range*/ + j .LS_exit + + .align 4 +_call_alignment_handler: + call0 AlignmentErrorHandler + /* This will return only if wrong opcode or address out of range*/ + addi a0, a0, 1 + j .LS_exit +#endif /* -------------------------------------------------------------------------------- @@ -661,6 +680,15 @@ _xt_user_exc: #endif beqi a0, EXCCAUSE_SYSCALL, _xt_to_syscall_exc +#ifdef CONFIG_ESP32_IRAM_AS_8BIT_ACCESSIBLE_MEMORY + beqi a0, EXCCAUSE_LOAD_STORE_ERROR, _call_loadstore_handler + + addi a0, a0, -1 + beqi a0, 8, _call_alignment_handler + addi a0, a0, 1 +.LS_exit: +#endif + /* Handle all other exceptions. All can have user-defined handlers. */ /* NOTE: we'll stay on the user stack for exception handling. */ diff --git a/components/heap/include/esp_heap_caps.h b/components/heap/include/esp_heap_caps.h index 61db74e1c..6ccbdf943 100644 --- a/components/heap/include/esp_heap_caps.h +++ b/components/heap/include/esp_heap_caps.h @@ -16,6 +16,7 @@ #include #include #include "multi_heap.h" +#include #ifdef __cplusplus extern "C" { @@ -37,6 +38,8 @@ extern "C" { #define MALLOC_CAP_SPIRAM (1<<10) ///< Memory must be in SPI RAM #define MALLOC_CAP_INTERNAL (1<<11) ///< Memory must be internal; specifically it should not disappear when flash/spiram cache is switched off #define MALLOC_CAP_DEFAULT (1<<12) ///< Memory can be returned in a non-capability-specific memory allocation (e.g. malloc(), calloc()) call +#define MALLOC_CAP_IRAM_8BIT (1<<13) ///< Memory must be in IRAM and allow unaligned access + #define MALLOC_CAP_INVALID (1<<31) ///< Memory can't be used / list end marker /** diff --git a/components/heap/test/test_malloc_caps.c b/components/heap/test/test_malloc_caps.c index 19e1d626b..2b069953d 100644 --- a/components/heap/test/test_malloc_caps.c +++ b/components/heap/test/test_malloc_caps.c @@ -101,6 +101,31 @@ TEST_CASE("Capabilities allocator test", "[heap]") printf("Done.\n"); } +#ifdef CONFIG_ESP32_IRAM_AS_8BIT_ACCESSIBLE_MEMORY +TEST_CASE("IRAM_8BIT capability test", "[heap]") +{ + uint8_t *ptr; + size_t free_size, free_size32, largest_free_size; + + /* need to print something as first printf allocates some heap */ + printf("IRAM_8BIT capability test\n"); + + free_size = heap_caps_get_free_size(MALLOC_CAP_IRAM_8BIT); + free_size32 = heap_caps_get_free_size(MALLOC_CAP_32BIT); + + largest_free_size = heap_caps_get_largest_free_block(MALLOC_CAP_IRAM_8BIT); + + ptr = heap_caps_malloc(largest_free_size, MALLOC_CAP_IRAM_8BIT); + + TEST_ASSERT((((int)ptr)&0xFF000000)==0x40000000); + + TEST_ASSERT(heap_caps_get_free_size(MALLOC_CAP_IRAM_8BIT) == (free_size - largest_free_size)); + TEST_ASSERT(heap_caps_get_free_size(MALLOC_CAP_32BIT) == (free_size32 - largest_free_size)); + + free(ptr); +} +#endif + TEST_CASE("heap_caps metadata test", "[heap]") { /* need to print something as first printf allocates some heap */ diff --git a/components/mbedtls/Kconfig b/components/mbedtls/Kconfig index 039afbc43..99e72dac0 100644 --- a/components/mbedtls/Kconfig +++ b/components/mbedtls/Kconfig @@ -13,6 +13,7 @@ menu "mbedTLS" behavior in ESP-IDF - Custom allocation mode, by overwriting calloc()/free() using mbedtls_platform_set_calloc_free() function + - Internal IRAM memory wherever applicable else internal DRAM Recommended mode here is always internal, since that is most preferred from security perspective. But if application requirement does not @@ -32,6 +33,16 @@ menu "mbedTLS" config MBEDTLS_CUSTOM_MEM_ALLOC bool "Custom alloc mode" + config MBEDTLS_IRAM_8BIT_MEM_ALLOC + bool "Internal IRAM" + depends on ESP32_IRAM_AS_8BIT_ACCESSIBLE_MEMORY + help + Allows to use IRAM memory region as 8bit accessible region. + + TLS input and output buffers will be allocated in IRAM section which is 32bit aligned + memory. Every unaligned (8bit or 16bit) access will result in an exception + and incur penalty of certain clock cycles per unaligned read/write. + endchoice #MBEDTLS_MEM_ALLOC_MODE config MBEDTLS_SSL_MAX_CONTENT_LEN diff --git a/components/mbedtls/port/esp_mem.c b/components/mbedtls/port/esp_mem.c index c7b8e706f..3ba915ebb 100644 --- a/components/mbedtls/port/esp_mem.c +++ b/components/mbedtls/port/esp_mem.c @@ -25,6 +25,17 @@ IRAM_ATTR void *esp_mbedtls_mem_calloc(size_t n, size_t size) return heap_caps_calloc(n, size, MALLOC_CAP_INTERNAL|MALLOC_CAP_8BIT); #elif CONFIG_MBEDTLS_EXTERNAL_MEM_ALLOC return heap_caps_calloc(n, size, MALLOC_CAP_SPIRAM|MALLOC_CAP_8BIT); +#elif CONFIG_MBEDTLS_IRAM_8BIT_MEM_ALLOC +#ifdef CONFIG_MBEDTLS_ASYMMETRIC_CONTENT_LEN + if ((n*size) >= CONFIG_MBEDTLS_SSL_IN_CONTENT_LEN || (n*size) >= CONFIG_MBEDTLS_SSL_OUT_CONTENT_LEN) { +#else + if ((n*size) >= CONFIG_MBEDTLS_SSL_MAX_CONTENT_LEN) { +#endif + return heap_caps_calloc_prefer(n, size, 2, MALLOC_CAP_INTERNAL|MALLOC_CAP_IRAM_8BIT, MALLOC_CAP_INTERNAL|MALLOC_CAP_8BIT); + } else { + return heap_caps_calloc(n, size, MALLOC_CAP_INTERNAL|MALLOC_CAP_8BIT); + } + #else return calloc(n, size); #endif diff --git a/components/soc/src/esp32/soc_memory_layout.c b/components/soc/src/esp32/soc_memory_layout.c index 1a9144fc8..47494d404 100644 --- a/components/soc/src/esp32/soc_memory_layout.c +++ b/components/soc/src/esp32/soc_memory_layout.c @@ -21,6 +21,12 @@ #include "esp_heap_caps.h" #include "sdkconfig.h" +#ifdef CONFIG_ESP32_IRAM_AS_8BIT_ACCESSIBLE_MEMORY +#define MALLOC_IRAM_CAP MALLOC_CAP_EXEC|MALLOC_CAP_32BIT|MALLOC_CAP_IRAM_8BIT +#else +#define MALLOC_IRAM_CAP MALLOC_CAP_EXEC|MALLOC_CAP_32BIT +#endif + /* Memory layout for ESP32 SoC */ /* @@ -45,14 +51,14 @@ const soc_memory_type_desc_t soc_memory_types[] = { //(This DRAM is also the region used by ROM during startup) { "D/IRAM", { 0, MALLOC_CAP_DMA|MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL|MALLOC_CAP_DEFAULT, MALLOC_CAP_32BIT|MALLOC_CAP_EXEC }, true, true}, //Type 2: IRAM - { "IRAM", { MALLOC_CAP_EXEC|MALLOC_CAP_32BIT|MALLOC_CAP_INTERNAL, 0, 0 }, false, false}, + { "IRAM", { MALLOC_CAP_INTERNAL|MALLOC_IRAM_CAP, 0, 0 }, false, false}, //Type 3-8: PID 2-7 IRAM - { "PID2IRAM", { MALLOC_CAP_PID2|MALLOC_CAP_INTERNAL, 0, MALLOC_CAP_EXEC|MALLOC_CAP_32BIT }, false, false}, - { "PID3IRAM", { MALLOC_CAP_PID3|MALLOC_CAP_INTERNAL, 0, MALLOC_CAP_EXEC|MALLOC_CAP_32BIT }, false, false}, - { "PID4IRAM", { MALLOC_CAP_PID4|MALLOC_CAP_INTERNAL, 0, MALLOC_CAP_EXEC|MALLOC_CAP_32BIT }, false, false}, - { "PID5IRAM", { MALLOC_CAP_PID5|MALLOC_CAP_INTERNAL, 0, MALLOC_CAP_EXEC|MALLOC_CAP_32BIT }, false, false}, - { "PID6IRAM", { MALLOC_CAP_PID6|MALLOC_CAP_INTERNAL, 0, MALLOC_CAP_EXEC|MALLOC_CAP_32BIT }, false, false}, - { "PID7IRAM", { MALLOC_CAP_PID7|MALLOC_CAP_INTERNAL, 0, MALLOC_CAP_EXEC|MALLOC_CAP_32BIT }, false, false}, + { "PID2IRAM", { MALLOC_CAP_PID2|MALLOC_CAP_INTERNAL, 0, MALLOC_IRAM_CAP }, false, false}, + { "PID3IRAM", { MALLOC_CAP_PID3|MALLOC_CAP_INTERNAL, 0, MALLOC_IRAM_CAP }, false, false}, + { "PID4IRAM", { MALLOC_CAP_PID4|MALLOC_CAP_INTERNAL, 0, MALLOC_IRAM_CAP }, false, false}, + { "PID5IRAM", { MALLOC_CAP_PID5|MALLOC_CAP_INTERNAL, 0, MALLOC_IRAM_CAP }, false, false}, + { "PID6IRAM", { MALLOC_CAP_PID6|MALLOC_CAP_INTERNAL, 0, MALLOC_IRAM_CAP }, false, false}, + { "PID7IRAM", { MALLOC_CAP_PID7|MALLOC_CAP_INTERNAL, 0, MALLOC_IRAM_CAP }, false, false}, //Type 9-14: PID 2-7 DRAM { "PID2DRAM", { MALLOC_CAP_PID2|MALLOC_CAP_INTERNAL, MALLOC_CAP_8BIT, MALLOC_CAP_32BIT|MALLOC_CAP_DEFAULT }, false, false}, { "PID3DRAM", { MALLOC_CAP_PID3|MALLOC_CAP_INTERNAL, MALLOC_CAP_8BIT, MALLOC_CAP_32BIT|MALLOC_CAP_DEFAULT }, false, false}, diff --git a/tools/ci/config/target-test.yml b/tools/ci/config/target-test.yml index b17ca38be..5bd4091aa 100644 --- a/tools/ci/config/target-test.yml +++ b/tools/ci/config/target-test.yml @@ -345,7 +345,7 @@ UT_001: UT_002: extends: .unit_test_template - parallel: 12 + parallel: 13 tags: - ESP32_IDF - UT_T1_1 @@ -426,7 +426,7 @@ UT_017: UT_018: extends: .unit_test_template - parallel: 2 + parallel: 3 tags: - ESP32_IDF - UT_T1_1 diff --git a/tools/unit-test-app/configs/single_core b/tools/unit-test-app/configs/single_core index 99c9bb818..2b354bf3c 100644 --- a/tools/unit-test-app/configs/single_core +++ b/tools/unit-test-app/configs/single_core @@ -1,4 +1,5 @@ TEST_COMPONENTS=freertos esp32 esp_timer driver heap pthread soc spi_flash vfs CONFIG_MEMMAP_SMP=n CONFIG_FREERTOS_UNICORE=y +CONFIG_ESP32_IRAM_AS_8BIT_ACCESSIBLE_MEMORY=y CONFIG_ESP32_RTCDATA_IN_FAST_MEM=y