Merge branch 'feature/interrupt_overhead_improvement' into 'master'

feature/interrupt overhead improvement

Closes IDF-248

See merge request espressif/esp-idf!6328
This commit is contained in:
Angus Gratton 2019-12-05 10:21:46 +08:00
commit a7aea56977
7 changed files with 211 additions and 41 deletions

View file

@ -5,7 +5,7 @@ if(IDF_TARGET STREQUAL "esp32")
endif()
idf_component_register(SRC_DIRS ${src_dirs}
idf_component_register(SRC_DIRS ${srcdirs}
INCLUDE_DIRS .
REQUIRES unity test_utils
)

View file

@ -0,0 +1,73 @@
#include <esp_types.h>
#include <stdio.h>
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/semphr.h"
#include "freertos/queue.h"
#include "freertos/xtensa_api.h"
#include "esp_intr_alloc.h"
#include "xtensa/hal.h"
#include "unity.h"
#include "soc/cpu.h"
#include "test_utils.h"
#define SW_ISR_LEVEL_1 7
static SemaphoreHandle_t sync;
static SemaphoreHandle_t end_sema;
static uint32_t cycle_before_trigger;
static uint32_t cycle_before_exit;
static uint32_t delta_enter_cycles = 0;
static uint32_t delta_exit_cycles = 0;
static void software_isr(void *arg) {
(void)arg;
BaseType_t yield;
delta_enter_cycles += portGET_RUN_TIME_COUNTER_VALUE() - cycle_before_trigger;
xt_set_intclear(1 << SW_ISR_LEVEL_1);
xSemaphoreGiveFromISR(sync, &yield);
if(yield) {
portYIELD_FROM_ISR();
}
cycle_before_exit = portGET_RUN_TIME_COUNTER_VALUE();
}
static void test_task(void *arg) {
(void)arg;
intr_handle_t handle;
esp_err_t err = esp_intr_alloc(ETS_INTERNAL_SW0_INTR_SOURCE, ESP_INTR_FLAG_LEVEL1, &software_isr, NULL, &handle);
TEST_ASSERT_EQUAL_HEX32(ESP_OK, err);
for(int i = 0;i < 10000; i++) {
cycle_before_trigger = portGET_RUN_TIME_COUNTER_VALUE();
xt_set_intset(1 << SW_ISR_LEVEL_1);
xSemaphoreTake(sync, portMAX_DELAY);
delta_exit_cycles += portGET_RUN_TIME_COUNTER_VALUE() - cycle_before_exit;
}
delta_enter_cycles /= 10000;
delta_exit_cycles /= 10000;
esp_intr_free(handle);
xSemaphoreGive(end_sema);
vTaskDelete(NULL);
}
TEST_CASE("isr latency test", "[freertos] [ignore]")
{
sync = xSemaphoreCreateBinary();
TEST_ASSERT(sync != NULL);
end_sema = xSemaphoreCreateBinary();
TEST_ASSERT(end_sema != NULL);
xTaskCreatePinnedToCore(test_task, "tst" , 4096, NULL, configMAX_PRIORITIES - 1, NULL, 0);
BaseType_t result = xSemaphoreTake(end_sema, portMAX_DELAY);
TEST_ASSERT_EQUAL_HEX32(pdTRUE, result);
TEST_PERFORMANCE_LESS_THAN(ISR_ENTER_CYCLES, "%d cycles" ,delta_enter_cycles);
TEST_PERFORMANCE_LESS_THAN(ISR_EXIT_CYCLES, "%d cycles" ,delta_exit_cycles);
}

View file

@ -0,0 +1,88 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
/* Copyright 2015-2018 Espressif Systems (Shanghai) PTE LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* File adapted to use on IDF FreeRTOS component, extracted
* originally from zephyr RTOS code base:
* https://github.com/zephyrproject-rtos/zephyr/blob/dafd348/arch/xtensa/include/xtensa-asm2-s.h
*/
#ifndef __XT_ASM_UTILS_H
#define __XT_ASM_UTILS_H
/*
* SPILL_ALL_WINDOWS
*
* Spills all windowed registers (i.e. registers not visible as
* A0-A15) to their ABI-defined spill regions on the stack.
*
* Unlike the Xtensa HAL implementation, this code requires that the
* EXCM and WOE bit be enabled in PS, and relies on repeated hardware
* exception handling to do the register spills. The trick is to do a
* noop write to the high registers, which the hardware will trap
* (into an overflow exception) in the case where those registers are
* already used by an existing call frame. Then it rotates the window
* and repeats until all but the A0-A3 registers of the original frame
* are guaranteed to be spilled, eventually rotating back around into
* the original frame. Advantages:
*
* - Vastly smaller code size
*
* - More easily maintained if changes are needed to window over/underflow
* exception handling.
*
* - Requires no scratch registers to do its work, so can be used safely in any
* context.
*
* - If the WOE bit is not enabled (for example, in code written for
* the CALL0 ABI), this becomes a silent noop and operates compatbily.
*
* - Hilariously it's ACTUALLY FASTER than the HAL routine. And not
* just a little bit, it's MUCH faster. With a mostly full register
* file on an LX6 core (ESP-32) I'm measuring 145 cycles to spill
* registers with this vs. 279 (!) to do it with
* xthal_spill_windows().
*/
.macro SPILL_ALL_WINDOWS
#if XCHAL_NUM_AREGS == 64
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 4
#elif XCHAL_NUM_AREGS == 32
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 3
and a4, a4, a4
rotw 2
#else
#error Unrecognized XCHAL_NUM_AREGS
#endif
.endm
#endif

View file

@ -51,6 +51,7 @@ NOERROR: .error "C preprocessor needed for this file: make sure its filename\
#include "xtensa_rtos.h"
#include "xtensa_context.h"
#include "xt_asm_utils.h"
#ifdef XT_USE_OVLY
#include <xtensa/overlay_os_asm.h>
@ -58,8 +59,6 @@ NOERROR: .error "C preprocessor needed for this file: make sure its filename\
.text
/*******************************************************************************
_xt_context_save
@ -97,8 +96,8 @@ Exit conditions:
.align 4
.literal_position
.align 4
_xt_context_save:
_xt_context_save:
s32i a2, sp, XT_STK_A2
s32i a3, sp, XT_STK_A3
s32i a4, sp, XT_STK_A4
@ -143,49 +142,15 @@ _xt_context_save:
mov a9, a0 /* preserve ret addr */
#endif
#ifndef __XTENSA_CALL0_ABI__
/*
To spill the reg windows, temp. need pre-interrupt stack ptr and a4-15.
Need to save a9,12,13 temporarily (in frame temps) and recover originals.
Interrupts need to be disabled below XCHAL_EXCM_LEVEL and window overflow
and underflow exceptions disabled (assured by PS.EXCM == 1).
*/
s32i a12, sp, XT_STK_TMP0 /* temp. save stuff in stack frame */
s32i a13, sp, XT_STK_TMP1
s32i a9, sp, XT_STK_TMP2
/*
Save the overlay state if we are supporting overlays. Since we just saved
three registers, we can conveniently use them here. Note that as of now,
overlays only work for windowed calling ABI.
*/
#ifdef XT_USE_OVLY
l32i a9, sp, XT_STK_PC /* recover saved PC */
_xt_overlay_get_state a9, a12, a13
s32i a9, sp, XT_STK_OVLY /* save overlay state */
#endif
l32i a12, sp, XT_STK_A12 /* recover original a9,12,13 */
l32i a13, sp, XT_STK_A13
l32i a9, sp, XT_STK_A9
addi sp, sp, XT_STK_FRMSZ /* restore the interruptee's SP */
call0 xthal_window_spill_nw /* preserves only a4,5,8,9,12,13 */
addi sp, sp, -XT_STK_FRMSZ
l32i a12, sp, XT_STK_TMP0 /* recover stuff from stack frame */
l32i a13, sp, XT_STK_TMP1
l32i a9, sp, XT_STK_TMP2
#endif
#if XCHAL_EXTRA_SA_SIZE > 0
/*
NOTE: Normally the xthal_save_extra_nw macro only affects address
registers a2-a5. It is theoretically possible for Xtensa processor
designers to write TIE that causes more address registers to be
affected, but it is generally unlikely. If that ever happens,
more registers need to be saved/restored around this macro invocation.
Here we assume a9,12,13 are preserved.
Future Xtensa tools releases might limit the regs that can be affected.
*/
addi a2, sp, XT_STK_EXTRA /* where to save it */
# if XCHAL_EXTRA_SA_ALIGN > 16
movi a3, -XCHAL_EXTRA_SA_ALIGN
@ -194,6 +159,38 @@ _xt_context_save:
call0 xthal_save_extra_nw /* destroys a0,2,3,4,5 */
#endif
#ifndef __XTENSA_CALL0_ABI__
#ifdef XT_USE_OVLY
l32i a9, sp, XT_STK_PC /* recover saved PC */
_xt_overlay_get_state a9, a12, a13
s32i a9, sp, XT_STK_OVLY /* save overlay state */
#endif
rsr a2, PS /* We need to enable window exceptions to */
movi a3, PS_INTLEVEL_MASK /* perform spill registers*/
and a2, a2, a3
bnez a2, _not_l1
rsr a2, PS
movi a3, PS_INTLEVEL(1) /* For some curious reason the level 1 interrupts */
or a2, a2, a3 /* dont set the intlevel correctly on PS, we need to */
wsr a2, PS /* do this manually */
_not_l1:
rsr a2, PS /* finally umask the window exceptions */
movi a3, ~(PS_EXCM_MASK)
and a2, a2, a3
wsr a2, PS
rsync
addi sp, sp, XT_STK_FRMSZ /* go back to spill register region */
SPILL_ALL_WINDOWS /* place the live register windows there */
addi sp, sp, -XT_STK_FRMSZ /* return the current stack pointer and proceed with context save*/
#endif
l32i a12, sp, XT_STK_TMP0 /* restore the temp saved registers */
l32i a13, sp, XT_STK_TMP1 /* our return address is there */
l32i a9, sp, XT_STK_TMP2
#if XCHAL_EXTRA_SA_SIZE > 0 || !defined(__XTENSA_CALL0_ABI__)
mov a0, a9 /* retrieve ret addr */
#endif

View file

@ -32,6 +32,10 @@
#define IDF_PERFORMANCE_MAX_ESP32_CYCLES_PER_SQRT 140
// SHA256 hardware throughput at 240MHz, threshold set lower than worst case
#define IDF_PERFORMANCE_MIN_SHA256_THROUGHPUT_MBSEC 9.0
#define IDF_PERFORMANCE_MAX_SPILL_REG_CYCLES 150
#define IDF_PERFORMANCE_MAX_ISR_ENTER_CYCLES 290
#define IDF_PERFORMANCE_MAX_ISR_EXIT_CYCLES 565
#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PUBLIC_OP 19000
#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PRIVATE_OP 180000

View file

@ -27,6 +27,14 @@
#define UNITY_EXCLUDE_TIME_H
/**
* @note For some reason setjmp does not work with
* unity, since it is only used on test entry and
* exit it should not impact the rest of test
* framework. So we disable it here.
*/
#define UNITY_EXCLUDE_SETJMP_H
void unity_flush(void);
void unity_putc(int c);
void unity_gets(char* dst, size_t len);

View file

@ -307,7 +307,7 @@ example_test_012:
UT_001:
extends: .unit_test_template
parallel: 28
parallel: 30
tags:
- ESP32_IDF
- UT_T1_1
@ -316,7 +316,7 @@ UT_001:
UT_002:
extends: .unit_test_template
parallel: 9
parallel: 11
tags:
- ESP32_IDF
- UT_T1_1
@ -466,7 +466,7 @@ UT_034:
UT_035:
extends: .unit_test_template
parallel: 17
parallel: 20
tags:
- ESP32S2BETA_IDF
- UT_T1_1