Merge branch 'feature/interrupt_overhead_improvement' into 'master'
feature/interrupt overhead improvement Closes IDF-248 See merge request espressif/esp-idf!6328
This commit is contained in:
commit
a7aea56977
7 changed files with 211 additions and 41 deletions
|
@ -5,7 +5,7 @@ if(IDF_TARGET STREQUAL "esp32")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
||||||
idf_component_register(SRC_DIRS ${src_dirs}
|
idf_component_register(SRC_DIRS ${srcdirs}
|
||||||
INCLUDE_DIRS .
|
INCLUDE_DIRS .
|
||||||
REQUIRES unity test_utils
|
REQUIRES unity test_utils
|
||||||
)
|
)
|
73
components/freertos/test/test_isr_latency.c
Normal file
73
components/freertos/test/test_isr_latency.c
Normal file
|
@ -0,0 +1,73 @@
|
||||||
|
#include <esp_types.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "freertos/FreeRTOS.h"
|
||||||
|
#include "freertos/task.h"
|
||||||
|
#include "freertos/semphr.h"
|
||||||
|
#include "freertos/queue.h"
|
||||||
|
#include "freertos/xtensa_api.h"
|
||||||
|
#include "esp_intr_alloc.h"
|
||||||
|
#include "xtensa/hal.h"
|
||||||
|
#include "unity.h"
|
||||||
|
#include "soc/cpu.h"
|
||||||
|
#include "test_utils.h"
|
||||||
|
|
||||||
|
#define SW_ISR_LEVEL_1 7
|
||||||
|
|
||||||
|
static SemaphoreHandle_t sync;
|
||||||
|
static SemaphoreHandle_t end_sema;
|
||||||
|
static uint32_t cycle_before_trigger;
|
||||||
|
static uint32_t cycle_before_exit;
|
||||||
|
static uint32_t delta_enter_cycles = 0;
|
||||||
|
static uint32_t delta_exit_cycles = 0;
|
||||||
|
|
||||||
|
static void software_isr(void *arg) {
|
||||||
|
(void)arg;
|
||||||
|
BaseType_t yield;
|
||||||
|
delta_enter_cycles += portGET_RUN_TIME_COUNTER_VALUE() - cycle_before_trigger;
|
||||||
|
|
||||||
|
xt_set_intclear(1 << SW_ISR_LEVEL_1);
|
||||||
|
|
||||||
|
xSemaphoreGiveFromISR(sync, &yield);
|
||||||
|
if(yield) {
|
||||||
|
portYIELD_FROM_ISR();
|
||||||
|
}
|
||||||
|
|
||||||
|
cycle_before_exit = portGET_RUN_TIME_COUNTER_VALUE();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_task(void *arg) {
|
||||||
|
(void)arg;
|
||||||
|
|
||||||
|
intr_handle_t handle;
|
||||||
|
|
||||||
|
esp_err_t err = esp_intr_alloc(ETS_INTERNAL_SW0_INTR_SOURCE, ESP_INTR_FLAG_LEVEL1, &software_isr, NULL, &handle);
|
||||||
|
TEST_ASSERT_EQUAL_HEX32(ESP_OK, err);
|
||||||
|
|
||||||
|
for(int i = 0;i < 10000; i++) {
|
||||||
|
cycle_before_trigger = portGET_RUN_TIME_COUNTER_VALUE();
|
||||||
|
xt_set_intset(1 << SW_ISR_LEVEL_1);
|
||||||
|
xSemaphoreTake(sync, portMAX_DELAY);
|
||||||
|
delta_exit_cycles += portGET_RUN_TIME_COUNTER_VALUE() - cycle_before_exit;
|
||||||
|
}
|
||||||
|
|
||||||
|
delta_enter_cycles /= 10000;
|
||||||
|
delta_exit_cycles /= 10000;
|
||||||
|
|
||||||
|
esp_intr_free(handle);
|
||||||
|
xSemaphoreGive(end_sema);
|
||||||
|
vTaskDelete(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE("isr latency test", "[freertos] [ignore]")
|
||||||
|
{
|
||||||
|
sync = xSemaphoreCreateBinary();
|
||||||
|
TEST_ASSERT(sync != NULL);
|
||||||
|
end_sema = xSemaphoreCreateBinary();
|
||||||
|
TEST_ASSERT(end_sema != NULL);
|
||||||
|
xTaskCreatePinnedToCore(test_task, "tst" , 4096, NULL, configMAX_PRIORITIES - 1, NULL, 0);
|
||||||
|
BaseType_t result = xSemaphoreTake(end_sema, portMAX_DELAY);
|
||||||
|
TEST_ASSERT_EQUAL_HEX32(pdTRUE, result);
|
||||||
|
TEST_PERFORMANCE_LESS_THAN(ISR_ENTER_CYCLES, "%d cycles" ,delta_enter_cycles);
|
||||||
|
TEST_PERFORMANCE_LESS_THAN(ISR_EXIT_CYCLES, "%d cycles" ,delta_exit_cycles);
|
||||||
|
}
|
88
components/freertos/xt_asm_utils.h
Normal file
88
components/freertos/xt_asm_utils.h
Normal file
|
@ -0,0 +1,88 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2017, Intel Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Copyright 2015-2018 Espressif Systems (Shanghai) PTE LTD
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* File adapted to use on IDF FreeRTOS component, extracted
|
||||||
|
* originally from zephyr RTOS code base:
|
||||||
|
* https://github.com/zephyrproject-rtos/zephyr/blob/dafd348/arch/xtensa/include/xtensa-asm2-s.h
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __XT_ASM_UTILS_H
|
||||||
|
#define __XT_ASM_UTILS_H
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SPILL_ALL_WINDOWS
|
||||||
|
*
|
||||||
|
* Spills all windowed registers (i.e. registers not visible as
|
||||||
|
* A0-A15) to their ABI-defined spill regions on the stack.
|
||||||
|
*
|
||||||
|
* Unlike the Xtensa HAL implementation, this code requires that the
|
||||||
|
* EXCM and WOE bit be enabled in PS, and relies on repeated hardware
|
||||||
|
* exception handling to do the register spills. The trick is to do a
|
||||||
|
* noop write to the high registers, which the hardware will trap
|
||||||
|
* (into an overflow exception) in the case where those registers are
|
||||||
|
* already used by an existing call frame. Then it rotates the window
|
||||||
|
* and repeats until all but the A0-A3 registers of the original frame
|
||||||
|
* are guaranteed to be spilled, eventually rotating back around into
|
||||||
|
* the original frame. Advantages:
|
||||||
|
*
|
||||||
|
* - Vastly smaller code size
|
||||||
|
*
|
||||||
|
* - More easily maintained if changes are needed to window over/underflow
|
||||||
|
* exception handling.
|
||||||
|
*
|
||||||
|
* - Requires no scratch registers to do its work, so can be used safely in any
|
||||||
|
* context.
|
||||||
|
*
|
||||||
|
* - If the WOE bit is not enabled (for example, in code written for
|
||||||
|
* the CALL0 ABI), this becomes a silent noop and operates compatbily.
|
||||||
|
*
|
||||||
|
* - Hilariously it's ACTUALLY FASTER than the HAL routine. And not
|
||||||
|
* just a little bit, it's MUCH faster. With a mostly full register
|
||||||
|
* file on an LX6 core (ESP-32) I'm measuring 145 cycles to spill
|
||||||
|
* registers with this vs. 279 (!) to do it with
|
||||||
|
* xthal_spill_windows().
|
||||||
|
*/
|
||||||
|
|
||||||
|
.macro SPILL_ALL_WINDOWS
|
||||||
|
#if XCHAL_NUM_AREGS == 64
|
||||||
|
and a12, a12, a12
|
||||||
|
rotw 3
|
||||||
|
and a12, a12, a12
|
||||||
|
rotw 3
|
||||||
|
and a12, a12, a12
|
||||||
|
rotw 3
|
||||||
|
and a12, a12, a12
|
||||||
|
rotw 3
|
||||||
|
and a12, a12, a12
|
||||||
|
rotw 4
|
||||||
|
#elif XCHAL_NUM_AREGS == 32
|
||||||
|
and a12, a12, a12
|
||||||
|
rotw 3
|
||||||
|
and a12, a12, a12
|
||||||
|
rotw 3
|
||||||
|
and a4, a4, a4
|
||||||
|
rotw 2
|
||||||
|
#else
|
||||||
|
#error Unrecognized XCHAL_NUM_AREGS
|
||||||
|
#endif
|
||||||
|
.endm
|
||||||
|
|
||||||
|
#endif
|
|
@ -51,6 +51,7 @@ NOERROR: .error "C preprocessor needed for this file: make sure its filename\
|
||||||
|
|
||||||
#include "xtensa_rtos.h"
|
#include "xtensa_rtos.h"
|
||||||
#include "xtensa_context.h"
|
#include "xtensa_context.h"
|
||||||
|
#include "xt_asm_utils.h"
|
||||||
|
|
||||||
#ifdef XT_USE_OVLY
|
#ifdef XT_USE_OVLY
|
||||||
#include <xtensa/overlay_os_asm.h>
|
#include <xtensa/overlay_os_asm.h>
|
||||||
|
@ -58,8 +59,6 @@ NOERROR: .error "C preprocessor needed for this file: make sure its filename\
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*******************************************************************************
|
/*******************************************************************************
|
||||||
|
|
||||||
_xt_context_save
|
_xt_context_save
|
||||||
|
@ -97,8 +96,8 @@ Exit conditions:
|
||||||
.align 4
|
.align 4
|
||||||
.literal_position
|
.literal_position
|
||||||
.align 4
|
.align 4
|
||||||
_xt_context_save:
|
|
||||||
|
|
||||||
|
_xt_context_save:
|
||||||
s32i a2, sp, XT_STK_A2
|
s32i a2, sp, XT_STK_A2
|
||||||
s32i a3, sp, XT_STK_A3
|
s32i a3, sp, XT_STK_A3
|
||||||
s32i a4, sp, XT_STK_A4
|
s32i a4, sp, XT_STK_A4
|
||||||
|
@ -143,49 +142,15 @@ _xt_context_save:
|
||||||
mov a9, a0 /* preserve ret addr */
|
mov a9, a0 /* preserve ret addr */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef __XTENSA_CALL0_ABI__
|
|
||||||
/*
|
|
||||||
To spill the reg windows, temp. need pre-interrupt stack ptr and a4-15.
|
|
||||||
Need to save a9,12,13 temporarily (in frame temps) and recover originals.
|
|
||||||
Interrupts need to be disabled below XCHAL_EXCM_LEVEL and window overflow
|
|
||||||
and underflow exceptions disabled (assured by PS.EXCM == 1).
|
|
||||||
*/
|
|
||||||
s32i a12, sp, XT_STK_TMP0 /* temp. save stuff in stack frame */
|
s32i a12, sp, XT_STK_TMP0 /* temp. save stuff in stack frame */
|
||||||
s32i a13, sp, XT_STK_TMP1
|
s32i a13, sp, XT_STK_TMP1
|
||||||
s32i a9, sp, XT_STK_TMP2
|
s32i a9, sp, XT_STK_TMP2
|
||||||
|
|
||||||
/*
|
|
||||||
Save the overlay state if we are supporting overlays. Since we just saved
|
|
||||||
three registers, we can conveniently use them here. Note that as of now,
|
|
||||||
overlays only work for windowed calling ABI.
|
|
||||||
*/
|
|
||||||
#ifdef XT_USE_OVLY
|
|
||||||
l32i a9, sp, XT_STK_PC /* recover saved PC */
|
|
||||||
_xt_overlay_get_state a9, a12, a13
|
|
||||||
s32i a9, sp, XT_STK_OVLY /* save overlay state */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
l32i a12, sp, XT_STK_A12 /* recover original a9,12,13 */
|
l32i a12, sp, XT_STK_A12 /* recover original a9,12,13 */
|
||||||
l32i a13, sp, XT_STK_A13
|
l32i a13, sp, XT_STK_A13
|
||||||
l32i a9, sp, XT_STK_A9
|
l32i a9, sp, XT_STK_A9
|
||||||
addi sp, sp, XT_STK_FRMSZ /* restore the interruptee's SP */
|
|
||||||
call0 xthal_window_spill_nw /* preserves only a4,5,8,9,12,13 */
|
|
||||||
addi sp, sp, -XT_STK_FRMSZ
|
|
||||||
l32i a12, sp, XT_STK_TMP0 /* recover stuff from stack frame */
|
|
||||||
l32i a13, sp, XT_STK_TMP1
|
|
||||||
l32i a9, sp, XT_STK_TMP2
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if XCHAL_EXTRA_SA_SIZE > 0
|
#if XCHAL_EXTRA_SA_SIZE > 0
|
||||||
/*
|
|
||||||
NOTE: Normally the xthal_save_extra_nw macro only affects address
|
|
||||||
registers a2-a5. It is theoretically possible for Xtensa processor
|
|
||||||
designers to write TIE that causes more address registers to be
|
|
||||||
affected, but it is generally unlikely. If that ever happens,
|
|
||||||
more registers need to be saved/restored around this macro invocation.
|
|
||||||
Here we assume a9,12,13 are preserved.
|
|
||||||
Future Xtensa tools releases might limit the regs that can be affected.
|
|
||||||
*/
|
|
||||||
addi a2, sp, XT_STK_EXTRA /* where to save it */
|
addi a2, sp, XT_STK_EXTRA /* where to save it */
|
||||||
# if XCHAL_EXTRA_SA_ALIGN > 16
|
# if XCHAL_EXTRA_SA_ALIGN > 16
|
||||||
movi a3, -XCHAL_EXTRA_SA_ALIGN
|
movi a3, -XCHAL_EXTRA_SA_ALIGN
|
||||||
|
@ -194,6 +159,38 @@ _xt_context_save:
|
||||||
call0 xthal_save_extra_nw /* destroys a0,2,3,4,5 */
|
call0 xthal_save_extra_nw /* destroys a0,2,3,4,5 */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef __XTENSA_CALL0_ABI__
|
||||||
|
#ifdef XT_USE_OVLY
|
||||||
|
l32i a9, sp, XT_STK_PC /* recover saved PC */
|
||||||
|
_xt_overlay_get_state a9, a12, a13
|
||||||
|
s32i a9, sp, XT_STK_OVLY /* save overlay state */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
rsr a2, PS /* We need to enable window exceptions to */
|
||||||
|
movi a3, PS_INTLEVEL_MASK /* perform spill registers*/
|
||||||
|
and a2, a2, a3
|
||||||
|
bnez a2, _not_l1
|
||||||
|
rsr a2, PS
|
||||||
|
movi a3, PS_INTLEVEL(1) /* For some curious reason the level 1 interrupts */
|
||||||
|
or a2, a2, a3 /* dont set the intlevel correctly on PS, we need to */
|
||||||
|
wsr a2, PS /* do this manually */
|
||||||
|
_not_l1:
|
||||||
|
rsr a2, PS /* finally umask the window exceptions */
|
||||||
|
movi a3, ~(PS_EXCM_MASK)
|
||||||
|
and a2, a2, a3
|
||||||
|
wsr a2, PS
|
||||||
|
rsync
|
||||||
|
|
||||||
|
addi sp, sp, XT_STK_FRMSZ /* go back to spill register region */
|
||||||
|
SPILL_ALL_WINDOWS /* place the live register windows there */
|
||||||
|
addi sp, sp, -XT_STK_FRMSZ /* return the current stack pointer and proceed with context save*/
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
l32i a12, sp, XT_STK_TMP0 /* restore the temp saved registers */
|
||||||
|
l32i a13, sp, XT_STK_TMP1 /* our return address is there */
|
||||||
|
l32i a9, sp, XT_STK_TMP2
|
||||||
|
|
||||||
#if XCHAL_EXTRA_SA_SIZE > 0 || !defined(__XTENSA_CALL0_ABI__)
|
#if XCHAL_EXTRA_SA_SIZE > 0 || !defined(__XTENSA_CALL0_ABI__)
|
||||||
mov a0, a9 /* retrieve ret addr */
|
mov a0, a9 /* retrieve ret addr */
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -32,6 +32,10 @@
|
||||||
#define IDF_PERFORMANCE_MAX_ESP32_CYCLES_PER_SQRT 140
|
#define IDF_PERFORMANCE_MAX_ESP32_CYCLES_PER_SQRT 140
|
||||||
// SHA256 hardware throughput at 240MHz, threshold set lower than worst case
|
// SHA256 hardware throughput at 240MHz, threshold set lower than worst case
|
||||||
#define IDF_PERFORMANCE_MIN_SHA256_THROUGHPUT_MBSEC 9.0
|
#define IDF_PERFORMANCE_MIN_SHA256_THROUGHPUT_MBSEC 9.0
|
||||||
|
#define IDF_PERFORMANCE_MAX_SPILL_REG_CYCLES 150
|
||||||
|
#define IDF_PERFORMANCE_MAX_ISR_ENTER_CYCLES 290
|
||||||
|
#define IDF_PERFORMANCE_MAX_ISR_EXIT_CYCLES 565
|
||||||
|
|
||||||
|
|
||||||
#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PUBLIC_OP 19000
|
#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PUBLIC_OP 19000
|
||||||
#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PRIVATE_OP 180000
|
#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PRIVATE_OP 180000
|
||||||
|
|
|
@ -27,6 +27,14 @@
|
||||||
|
|
||||||
#define UNITY_EXCLUDE_TIME_H
|
#define UNITY_EXCLUDE_TIME_H
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @note For some reason setjmp does not work with
|
||||||
|
* unity, since it is only used on test entry and
|
||||||
|
* exit it should not impact the rest of test
|
||||||
|
* framework. So we disable it here.
|
||||||
|
*/
|
||||||
|
#define UNITY_EXCLUDE_SETJMP_H
|
||||||
|
|
||||||
void unity_flush(void);
|
void unity_flush(void);
|
||||||
void unity_putc(int c);
|
void unity_putc(int c);
|
||||||
void unity_gets(char* dst, size_t len);
|
void unity_gets(char* dst, size_t len);
|
||||||
|
|
|
@ -307,7 +307,7 @@ example_test_012:
|
||||||
|
|
||||||
UT_001:
|
UT_001:
|
||||||
extends: .unit_test_template
|
extends: .unit_test_template
|
||||||
parallel: 28
|
parallel: 30
|
||||||
tags:
|
tags:
|
||||||
- ESP32_IDF
|
- ESP32_IDF
|
||||||
- UT_T1_1
|
- UT_T1_1
|
||||||
|
@ -316,7 +316,7 @@ UT_001:
|
||||||
|
|
||||||
UT_002:
|
UT_002:
|
||||||
extends: .unit_test_template
|
extends: .unit_test_template
|
||||||
parallel: 9
|
parallel: 11
|
||||||
tags:
|
tags:
|
||||||
- ESP32_IDF
|
- ESP32_IDF
|
||||||
- UT_T1_1
|
- UT_T1_1
|
||||||
|
@ -466,7 +466,7 @@ UT_034:
|
||||||
|
|
||||||
UT_035:
|
UT_035:
|
||||||
extends: .unit_test_template
|
extends: .unit_test_template
|
||||||
parallel: 17
|
parallel: 20
|
||||||
tags:
|
tags:
|
||||||
- ESP32S2BETA_IDF
|
- ESP32S2BETA_IDF
|
||||||
- UT_T1_1
|
- UT_T1_1
|
||||||
|
|
Loading…
Reference in a new issue