Merge branch 'feature/faster_task_switching' into 'master'

Optimize task switching

xPortGetCoreID to inline assembly: 32 to 25 uS

Spinlock optimizations: another 1.5uS.

See merge request !139
This commit is contained in:
Jeroen Domburg 2016-10-20 13:50:43 +08:00
commit ede25dbc17
7 changed files with 67 additions and 89 deletions

View file

@ -192,8 +192,14 @@ void vPortEndScheduler( void ) PRIVILEGED_FUNCTION;
#endif
/* Multi-core: get current core ID */
int xPortGetCoreID( void );
inline uint32_t xPortGetCoreID() {
int id;
asm volatile(
"rsr.prid %0\n"
" extui %0,%0,13,1"
:"=r"(id));
return id;
}
#ifdef __cplusplus
}

View file

@ -225,6 +225,26 @@ static inline unsigned portENTER_CRITICAL_NESTED() { unsigned state = XTOS_SET_I
#define portCLEAR_INTERRUPT_MASK_FROM_ISR(state) portEXIT_CRITICAL_NESTED(state)
/*
* Wrapper for the Xtensa compare-and-set instruction. This subroutine will atomically compare
* *mux to compare, and if it's the same, will set *mux to set. It will return the old value
* of *addr in *set.
*
* Warning: From the ISA docs: in some (unspecified) cases, the s32c1i instruction may return the
* *bitwise inverse* of the old mem if the mem wasn't written. This doesn't seem to happen on the
* ESP32, though. (Would show up directly if it did because the magic wouldn't match.)
*/
inline void uxPortCompareSet(volatile uint32_t *addr, uint32_t compare, uint32_t *set) {
__asm__ __volatile__(
"WSR %2,SCOMPARE1 \n"
"ISYNC \n"
"S32C1I %0, %1, 0 \n"
:"=r"(*set)
:"r"(addr), "r"(compare), "0"(*set)
);
}
/*-----------------------------------------------------------*/
/* Architecture specifics. */

View file

@ -322,12 +322,7 @@ STRUCT_END(XtSolFrame)
#ifdef __ASSEMBLER__
.macro getcoreid reg
rsr.prid \reg
bbci \reg,1,1f
movi \reg,1
j 2f
1:
movi \reg,0
2:
extui \reg,\reg,13,1
.endm
#endif

View file

@ -76,7 +76,6 @@ inline static void panicPutHex(int a) { }
inline static void panicPutDec(int a) { }
#endif
int xPortGetCoreID();
void __attribute__((weak)) vApplicationStackOverflowHook( TaskHandle_t xTask, signed char *pcTaskName ) {
panicPutStr("***ERROR*** A stack overflow in task ");

View file

@ -253,28 +253,6 @@ void vPortAssertIfInISR()
configASSERT(port_interruptNesting[xPortGetCoreID()]==0)
}
/*
* Wrapper for the Xtensa compare-and-set instruction. This subroutine will atomically compare
* *mux to compare, and if it's the same, will set *mux to set. It will return the old value
* of *addr.
*
* Warning: From the ISA docs: in some (unspecified) cases, the s32c1i instruction may return the
* *bitwise inverse* of the old mem if the mem wasn't written. This doesn't seem to happen on the
* ESP32, though. (Would show up directly if it did because the magic wouldn't match.)
*/
uint32_t uxPortCompareSet(volatile uint32_t *mux, uint32_t compare, uint32_t set)
{
__asm__ __volatile__ (
"WSR %2,SCOMPARE1 \n" //initialize SCOMPARE1
"ISYNC \n" //wait sync
"S32C1I %0, %1, 0 \n" //store id into the lock, if the lock is the same as comparel. Otherwise, no write-access
:"=r"(set) \
:"r"(mux), "r"(compare), "0"(set) \
);
return set;
}
/*
* For kernel use: Initialize a per-CPU mux. Mux will be initialized unlocked.
*/
@ -310,7 +288,8 @@ void vPortCPUAcquireMutex(portMUX_TYPE *mux) {
irqStatus=portENTER_CRITICAL_NESTED();
do {
//Lock mux if it's currently unlocked
res=uxPortCompareSet(&mux->mux, portMUX_FREE_VAL, (xPortGetCoreID()<<portMUX_VAL_SHIFT)|portMUX_MAGIC_VAL);
res=(xPortGetCoreID()<<portMUX_VAL_SHIFT)|portMUX_MAGIC_VAL;
uxPortCompareSet(&mux->mux, portMUX_FREE_VAL, &res);
//If it wasn't free and we're the owner of the lock, we are locking recursively.
if ( (res != portMUX_FREE_VAL) && (((res&portMUX_VAL_MASK)>>portMUX_VAL_SHIFT) == xPortGetCoreID()) ) {
//Mux was already locked by us. Just bump the recurse count by one.
@ -362,29 +341,33 @@ portBASE_TYPE vPortCPUReleaseMutex(portMUX_TYPE *mux) {
if ( (mux->mux & portMUX_MAGIC_MASK) != portMUX_MAGIC_VAL ) ets_printf("ERROR: vPortCPUReleaseMutex: mux %p is uninitialized (0x%X)!\n", mux, mux->mux);
#endif
//Unlock mux if it's currently locked with a recurse count of 0
res=uxPortCompareSet(&mux->mux, (xPortGetCoreID()<<portMUX_VAL_SHIFT)|portMUX_MAGIC_VAL, portMUX_FREE_VAL);
res=portMUX_FREE_VAL;
uxPortCompareSet(&mux->mux, (xPortGetCoreID()<<portMUX_VAL_SHIFT)|portMUX_MAGIC_VAL, &res);
if ( res == portMUX_FREE_VAL ) {
if ( ((res&portMUX_VAL_MASK)>>portMUX_VAL_SHIFT) == xPortGetCoreID() ) {
//Lock is valid, we can return safely. Just need to check if it's a recursive lock; if so we need to decrease the refcount.
if ( ((res&portMUX_CNT_MASK)>>portMUX_CNT_SHIFT)!=0) {
//We locked this, but the reccount isn't zero. Decrease refcount and continue.
recCnt=(res&portMUX_CNT_MASK)>>portMUX_CNT_SHIFT;
recCnt--;
#ifdef CONFIG_FREERTOS_PORTMUX_DEBUG_RECURSIVE
ets_printf("Recursive unlock: recCnt=%d last locked %s line %d, curr %s line %d\n", recCnt, lastLockedFn, lastLockedLine, fnName, line);
#endif
mux->mux=portMUX_MAGIC_VAL|(recCnt<<portMUX_CNT_SHIFT)|(xPortGetCoreID()<<portMUX_VAL_SHIFT);
}
} else if ( res == portMUX_FREE_VAL ) {
#ifdef CONFIG_FREERTOS_PORTMUX_DEBUG
ets_printf("ERROR: vPortCPUReleaseMutex: mux %p was already unlocked!\n", mux);
ets_printf("Last non-recursive unlock %s line %d, curr unlock %s line %d\n", lastLockedFn, lastLockedLine, fnName, line);
#endif
ret=pdFALSE;
} else if ( ((res&portMUX_VAL_MASK)>>portMUX_VAL_SHIFT) != xPortGetCoreID() ) {
} else {
#ifdef CONFIG_FREERTOS_PORTMUX_DEBUG
ets_printf("ERROR: vPortCPUReleaseMutex: mux %p wasn't locked by this core (%d) but by core %d (ret=%x, mux=%x).\n", mux, xPortGetCoreID(), ((res&portMUX_VAL_MASK)>>portMUX_VAL_SHIFT), res, mux->mux);
ets_printf("Last non-recursive lock %s line %d\n", lastLockedFn, lastLockedLine);
ets_printf("Called by %s line %d\n", fnName, line);
#endif
ret=pdFALSE;
} else if ( ((res&portMUX_CNT_MASK)>>portMUX_CNT_SHIFT)!=0) {
//We locked this, but the reccount isn't zero. Decrease refcount and continue.
recCnt=(res&portMUX_CNT_MASK)>>portMUX_CNT_SHIFT;
recCnt--;
#ifdef CONFIG_FREERTOS_PORTMUX_DEBUG_RECURSIVE
ets_printf("Recursive unlock: recCnt=%d last locked %s line %d, curr %s line %d\n", recCnt, lastLockedFn, lastLockedLine, fnName, line);
#endif
mux->mux=portMUX_MAGIC_VAL|(recCnt<<portMUX_CNT_SHIFT)|(xPortGetCoreID()<<portMUX_VAL_SHIFT);
}
portEXIT_CRITICAL_NESTED(irqStatus);
return ret;

View file

@ -51,18 +51,6 @@ port_switch_flag:
.text
/* C function to get proc ID.*/
.global xPortGetCoreID
.type xPortGetCoreID,@function
.align 4
xPortGetCoreID:
ENTRY(16)
getcoreid a2
RET(16)
/*
*******************************************************************************
* _frxt_setup_switch
@ -81,9 +69,8 @@ _frxt_setup_switch:
ENTRY(16)
getcoreid a3
slli a3, a3, 2
movi a2, port_switch_flag
add a2, a2, a3
addx4 a2, a3, a2
movi a3, 1
s32i a3, a2, 0
@ -128,12 +115,11 @@ _frxt_int_enter:
Manage nesting directly rather than call the generic IntEnter()
(in windowed ABI we can't call a C function here anyway because PS.EXCM is still set).
*/
getcoreid a3
slli a4, a3, 2 /* a4 = cpuid * 4 */
getcoreid a4
movi a2, port_xSchedulerRunning
add a2, a2, a4
addx4 a2, a4, a2
movi a3, port_interruptNesting
add a3, a3, a4
addx4 a3, a4, a3
l32i a2, a2, 0 /* a2 = port_xSchedulerRunning */
beqz a2, 1f /* scheduler not running, no tasks */
l32i a2, a3, 0 /* a2 = port_interruptNesting */
@ -142,14 +128,13 @@ _frxt_int_enter:
bnei a2, 1, .Lnested /* !=0 before incr, so nested */
movi a2, pxCurrentTCB
add a2, a2, a4
addx4 a2, a4, a2
l32i a2, a2, 0 /* a2 = current TCB */
beqz a2, 1f
s32i a1, a2, TOPOFSTACK_OFFS /* pxCurrentTCB->pxTopOfStack = SP */
movi a1, port_IntStackTop /* a1 = top of intr stack */
movi a2, configISR_STACK_SIZE
getcoreid a3
mull a2, a3, a2
mull a2, a4, a2
add a1, a1, a2 /* for current proc */
.Lnested:
@ -177,12 +162,11 @@ _frxt_int_enter:
.align 4
_frxt_int_exit:
getcoreid a3
slli a4, a3, 2 /* a4 is core * 4 */
getcoreid a4
movi a2, port_xSchedulerRunning
add a2, a2, a4
addx4 a2, a4, a2
movi a3, port_interruptNesting
add a3, a3, a4
addx4 a3, a4, a3
rsil a0, XCHAL_EXCM_LEVEL /* lock out interrupts */
l32i a2, a2, 0 /* a2 = port_xSchedulerRunning */
beqz a2, .Lnoswitch /* scheduler not running, no tasks */
@ -192,13 +176,13 @@ _frxt_int_exit:
bnez a2, .Lnesting /* !=0 after decr so still nested */
movi a2, pxCurrentTCB
add a2, a2, a4
addx4 a2, a4, a2
l32i a2, a2, 0 /* a2 = current TCB */
beqz a2, 1f /* no task ? go to dispatcher */
l32i a1, a2, TOPOFSTACK_OFFS /* SP = pxCurrentTCB->pxTopOfStack */
movi a2, port_switch_flag /* address of switch flag */
add a2, a2, a4 /* point to flag for this cpu */
addx4 a2, a4, a2 /* point to flag for this cpu */
l32i a3, a2, 0 /* a3 = port_switch_flag */
beqz a3, .Lnoswitch /* flag = 0 means no switch reqd */
movi a3, 0
@ -404,14 +388,12 @@ _frxt_dispatch:
call0 vTaskSwitchContext // Get next TCB to resume
movi a2, pxCurrentTCB
getcoreid a3
slli a3, a3, 2
add a2, a2, a3
addx4 a2, a3, a2
#else
call4 vTaskSwitchContext // Get next TCB to resume
movi a2, pxCurrentTCB
getcoreid a3
slli a3, a3, 2
add a2, a2, a3
addx4 a2, a3, a2
#endif
l32i a3, a2, 0
l32i sp, a3, TOPOFSTACK_OFFS /* SP = next_TCB->pxTopOfStack; */
@ -451,8 +433,7 @@ _frxt_dispatch:
/* Restore CPENABLE from task's co-processor save area. */
movi a3, pxCurrentTCB /* cp_state = */
getcoreid a2
slli a2, a2, 2
add a3, a2, a3
addx4 a3, a2, a3
l32i a3, a3, 0
l32i a2, a3, CP_TOPOFSTACK_OFFS /* StackType_t *pxStack; */
l16ui a3, a2, XT_CPENABLE /* CPENABLE = cp_state->cpenable; */
@ -541,8 +522,7 @@ vPortYield:
movi a2, pxCurrentTCB
getcoreid a3
slli a3, a3, 2
add a2, a2, a3
addx4 a2, a3, a2
l32i a2, a2, 0 /* a2 = pxCurrentTCB */
movi a3, 0
s32i a3, sp, XT_SOL_EXIT /* 0 to flag as solicited frame */
@ -593,8 +573,7 @@ vPortYieldFromInt:
/* Save CPENABLE in task's co-processor save area, and clear CPENABLE. */
movi a3, pxCurrentTCB /* cp_state = */
getcoreid a2
slli a2, a2, 2
add a3, a2, a3
addx4 a3, a2, a3
l32i a3, a3, 0
l32i a2, a3, CP_TOPOFSTACK_OFFS
@ -637,18 +616,17 @@ _frxt_task_coproc_state:
/* We can use a3 as a scratchpad, the instances of code calling XT_RTOS_CP_STATE don't seem to need it saved. */
getcoreid a3
slli a3, a3, 2 /* a3=coreid*4 */
movi a15, port_xSchedulerRunning /* if (port_xSchedulerRunning */
add a15, a15, a3
addx4 a15, a3,a15
l32i a15, a15, 0
beqz a15, 1f
movi a15, port_interruptNesting /* && port_interruptNesting == 0 */
add a15, a15, a3
addx4 a15, a3, a15
l32i a15, a15, 0
bnez a15, 1f
movi a15, pxCurrentTCB
add a15, a3, a15
addx4 a15, a3, a15
l32i a15, a15, 0 /* && pxCurrentTCB != 0) { */

View file

@ -904,16 +904,13 @@ _xt_coproc_exc:
core we're running on now. */
movi a2, pxCurrentTCB
getcoreid a3
slli a3, a3, 2
add a2, a2, a3
addx4 a2, a3, a2
l32i a2, a2, 0 /* a2 = start of pxCurrentTCB[cpuid] */
addi a2, a2, TASKTCB_XCOREID_OFFSET /* offset to xCoreID in tcb struct */
getcoreid a3
s32i a3, a2, 0 /* store current cpuid */
/* Grab correct xt_coproc_owner_sa for this core */
getcoreid a2
movi a3, XCHAL_CP_MAX << 2
movi a2, XCHAL_CP_MAX << 2
mull a2, a2, a3
movi a3, _xt_coproc_owner_sa /* a3 = base of owner array */
add a3, a3, a2