From d4c82606fb4ba5e2c168f083a280b0176c549e34 Mon Sep 17 00:00:00 2001 From: Felipe Neves Date: Wed, 6 Nov 2019 15:08:24 +0800 Subject: [PATCH 1/4] components/freertos: added and enabled the optimized task selection FreeRTOS have an platform dependent configuration to enable selection task in a optimized way. Provided the platform dependent functions in order to allow the scheduler to use the optimized algorithms by telling to the port layer where to found bitscan instruction i.e. NSAU. This closes IDF-1116 components/freertos: added option to disable the optimized scheduler --- components/freertos/Kconfig | 8 ++++++ .../include/freertos/FreeRTOSConfig.h | 8 ++++++ .../freertos/include/freertos/portmacro.h | 26 +++++++++++++++++++ components/freertos/tasks.c | 17 ++++++++++-- 4 files changed, 57 insertions(+), 2 deletions(-) diff --git a/components/freertos/Kconfig b/components/freertos/Kconfig index 198722331..06f84e70b 100644 --- a/components/freertos/Kconfig +++ b/components/freertos/Kconfig @@ -39,6 +39,14 @@ menu "FreeRTOS" endchoice + config FREERTOS_OPTIMIZED_SCHEDULER + bool "Enable FreeRTOS pĺatform optimized scheduler" + default y + help + On most platforms there are instructions can speedup the ready task + searching. Enabling this option the FreeRTOS with this instructions + support will be built + config FREERTOS_HZ int "Tick rate (Hz)" range 1 1000 diff --git a/components/freertos/include/freertos/FreeRTOSConfig.h b/components/freertos/include/freertos/FreeRTOSConfig.h index af4bf29b1..06c10b9db 100644 --- a/components/freertos/include/freertos/FreeRTOSConfig.h +++ b/components/freertos/include/freertos/FreeRTOSConfig.h @@ -72,6 +72,10 @@ #include "sdkconfig.h" +/* enable use of optimized task selection by the scheduler */ +#ifdef CONFIG_FREERTOS_OPTIMIZED_SCHEDULER +#define configUSE_PORT_OPTIMISED_TASK_SELECTION 1 +#endif /* ESP31 and ESP32 are dualcore processors. */ #ifndef CONFIG_FREERTOS_UNICORE @@ -172,11 +176,15 @@ int xt_clock_freq(void) __attribute__((deprecated)); //#define configCPU_CLOCK_HZ 80000000 /* This has impact on speed of search for highest priority */ +#ifdef configUSE_PORT_OPTIMISED_TASK_SELECTION #ifdef SMALL_TEST #define configMAX_PRIORITIES ( 7 ) #else #define configMAX_PRIORITIES ( 25 ) #endif +#else +#define configMAX_PRIORITIES ( 32 ) +#endif #ifndef CONFIG_APPTRACE_ENABLE #define configMINIMAL_STACK_SIZE 768 diff --git a/components/freertos/include/freertos/portmacro.h b/components/freertos/include/freertos/portmacro.h index 010f60f25..eaa2d6603 100644 --- a/components/freertos/include/freertos/portmacro.h +++ b/components/freertos/include/freertos/portmacro.h @@ -469,6 +469,32 @@ void vApplicationSleep( TickType_t xExpectedIdleTime ); #define portSUPPRESS_TICKS_AND_SLEEP( idleTime ) vApplicationSleep( idleTime ) +/*-----------------------------------------------------------*/ + +/* Architecture specific optimisations. */ +#ifndef configUSE_PORT_OPTIMISED_TASK_SELECTION + #define configUSE_PORT_OPTIMISED_TASK_SELECTION 1 +#endif + +#if configUSE_PORT_OPTIMISED_TASK_SELECTION == 1 + +/* Check the configuration. */ +#if( configMAX_PRIORITIES > 32 ) + #error configUSE_PORT_OPTIMISED_TASK_SELECTION can only be set to 1 when configMAX_PRIORITIES is less than or equal to 32. It is very rare that a system requires more than 10 to 15 difference priorities as tasks that share a priority will time slice. +#endif + +/* Store/clear the ready priorities in a bit map. */ +#define portRECORD_READY_PRIORITY( uxPriority, uxReadyPriorities ) ( uxReadyPriorities ) |= ( 1UL << ( uxPriority ) ) +#define portRESET_READY_PRIORITY( uxPriority, uxReadyPriorities ) ( uxReadyPriorities ) &= ~( 1UL << ( uxPriority ) ) + +/*-----------------------------------------------------------*/ + +#define portGET_HIGHEST_PRIORITY( uxTopPriority, uxReadyPriorities ) uxTopPriority = ( 31 - __builtin_clz( ( uxReadyPriorities ) ) ) + +#endif /* configUSE_PORT_OPTIMISED_TASK_SELECTION */ + +/*-----------------------------------------------------------*/ + // porttrace #if configUSE_TRACE_FACILITY_2 #include "porttrace.h" diff --git a/components/freertos/tasks.c b/components/freertos/tasks.c index 564b4aa4b..06e681f7f 100644 --- a/components/freertos/tasks.c +++ b/components/freertos/tasks.c @@ -2783,9 +2783,15 @@ void vTaskSwitchContext( void ) #endif unsigned portBASE_TYPE foundNonExecutingWaiter = pdFALSE, ableToSchedule = pdFALSE, resetListHead; - portBASE_TYPE uxDynamicTopReady = uxTopReadyPriority; unsigned portBASE_TYPE holdTop=pdFALSE; +#if configUSE_PORT_OPTIMISED_TASK_SELECTION == 1 + portBASE_TYPE uxDynamicTopReady; + portGET_HIGHEST_PRIORITY( uxDynamicTopReady, uxTopReadyPriority ); + portBASE_TYPE uxCopyOfTopReadyPrio = uxDynamicTopReady; +#else + portBASE_TYPE uxDynamicTopReady = uxTopReadyPriority; +#endif /* * ToDo: This scheduler doesn't correctly implement the round-robin scheduling as done in the single-core * FreeRTOS stack when multiple tasks have the same priority and are all ready; it just keeps grabbing the @@ -2861,7 +2867,14 @@ void vTaskSwitchContext( void ) } } while ((ableToSchedule == pdFALSE) && (pxTCB != pxRefTCB)); } else { - if (!holdTop) --uxTopReadyPriority; + if (!holdTop) { + #if configUSE_PORT_OPTIMISED_TASK_SELECTION == 1 + portRESET_READY_PRIORITY( uxCopyOfTopReadyPrio,uxTopReadyPriority ); + portGET_HIGHEST_PRIORITY( uxCopyOfTopReadyPrio,uxTopReadyPriority ); + #else + uxTopReadyPriority--; + #endif + } } --uxDynamicTopReady; } From 77bf1ff1c0f841218ab51403d6075a3dbd3689ed Mon Sep 17 00:00:00 2001 From: Felipe Neves Date: Wed, 6 Nov 2019 16:59:16 +0800 Subject: [PATCH 2/4] freertos/tests: added test to evaluate scheduling time freertos/Kconfig: fix trailing space on optimized scheduler option freertos/tests: moved test context variables inside of test task. The public variables used on scheduling time test now were packed into a structure allocated on test case task stack and passed to tasks as arguments saving RAM comsumption. --- components/freertos/Kconfig | 4 +- .../freertos/include/freertos/portmacro.h | 4 -- .../test/test_freertos_scheduling_time.c | 58 +++++++++++++++++++ components/idf_test/include/idf_performance.h | 2 + 4 files changed, 62 insertions(+), 6 deletions(-) create mode 100644 components/freertos/test/test_freertos_scheduling_time.c diff --git a/components/freertos/Kconfig b/components/freertos/Kconfig index 06f84e70b..102ed95df 100644 --- a/components/freertos/Kconfig +++ b/components/freertos/Kconfig @@ -44,8 +44,8 @@ menu "FreeRTOS" default y help On most platforms there are instructions can speedup the ready task - searching. Enabling this option the FreeRTOS with this instructions - support will be built + searching. Enabling this option the FreeRTOS with this instructions + support will be built. config FREERTOS_HZ int "Tick rate (Hz)" diff --git a/components/freertos/include/freertos/portmacro.h b/components/freertos/include/freertos/portmacro.h index eaa2d6603..b96bd8ff2 100644 --- a/components/freertos/include/freertos/portmacro.h +++ b/components/freertos/include/freertos/portmacro.h @@ -472,10 +472,6 @@ void vApplicationSleep( TickType_t xExpectedIdleTime ); /*-----------------------------------------------------------*/ /* Architecture specific optimisations. */ -#ifndef configUSE_PORT_OPTIMISED_TASK_SELECTION - #define configUSE_PORT_OPTIMISED_TASK_SELECTION 1 -#endif - #if configUSE_PORT_OPTIMISED_TASK_SELECTION == 1 /* Check the configuration. */ diff --git a/components/freertos/test/test_freertos_scheduling_time.c b/components/freertos/test/test_freertos_scheduling_time.c new file mode 100644 index 000000000..68f846800 --- /dev/null +++ b/components/freertos/test/test_freertos_scheduling_time.c @@ -0,0 +1,58 @@ +#include +#include +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "freertos/semphr.h" +#include "freertos/queue.h" +#include "freertos/xtensa_api.h" +#include "esp_intr_alloc.h" +#include "xtensa/hal.h" +#include "unity.h" +#include "soc/cpu.h" +#include "test_utils.h" + +typedef struct { + uint32_t noof_runs; + SemaphoreHandle_t end_sema; + uint32_t before_sched; + uint32_t cycles_to_sched; +} test_context_t; + +static void test_task_1(void *arg) { + test_context_t *context = (test_context_t *)arg; + + for(context->noof_runs = 0 ;context->noof_runs < 10000; ) { + context->before_sched = portGET_RUN_TIME_COUNTER_VALUE(); + vPortYield(); + } + + vTaskDelete(NULL); +} + +static void test_task_2(void *arg) { + test_context_t *context = (test_context_t *)arg; + + for( ; context->noof_runs < 10000; context->noof_runs++) { + context->cycles_to_sched += (portGET_RUN_TIME_COUNTER_VALUE() - context->before_sched); + vPortYield(); + } + + context->cycles_to_sched /= 10000; + xSemaphoreGive(context->end_sema); + vTaskDelete(NULL); +} + +TEST_CASE("scheduling time test", "[freertos]") +{ + test_context_t context; + + context.end_sema = xSemaphoreCreateBinary(); + TEST_ASSERT(context.end_sema != NULL); + + xTaskCreatePinnedToCore(test_task_1, "test1" , 4096, &context, 1, NULL,1); + xTaskCreatePinnedToCore(test_task_2, "test2" , 4096, &context, 1, NULL,1); + + BaseType_t result = xSemaphoreTake(context.end_sema, portMAX_DELAY); + TEST_ASSERT_EQUAL_HEX32(pdTRUE, result); + TEST_PERFORMANCE_LESS_THAN(SCHEDULING_TIME , "scheduling time %d cycles" ,context.cycles_to_sched); +} \ No newline at end of file diff --git a/components/idf_test/include/idf_performance.h b/components/idf_test/include/idf_performance.h index ac3d1e97f..f5ddd9a14 100644 --- a/components/idf_test/include/idf_performance.h +++ b/components/idf_test/include/idf_performance.h @@ -76,3 +76,5 @@ #endif //CONFIG_IDF_TARGET_ESP32S2BETA +//time to perform the task selection plus context switch (from task) +#define IDF_PERFORMANCE_MAX_SCHEDULING_TIME 4500 From 2c612ec1dcdb2507a1e9aecd915a05980de037ad Mon Sep 17 00:00:00 2001 From: Felipe Neves Date: Fri, 8 Nov 2019 13:27:02 +0800 Subject: [PATCH 3/4] components/freertos: using the optimized task selection on esp32s2beta components/freertos: cleaned up multicore option scheduler. components/freertos: more cleanup and test optimization to present realistic results components/freertos: remove unused macros of optimized task selection when multicore is used --- .../include/freertos/FreeRTOSConfig.h | 4 --- components/freertos/tasks.c | 32 +++++++++---------- .../test/test_freertos_scheduling_time.c | 19 +++++++---- components/idf_test/include/idf_performance.h | 2 +- 4 files changed, 29 insertions(+), 28 deletions(-) diff --git a/components/freertos/include/freertos/FreeRTOSConfig.h b/components/freertos/include/freertos/FreeRTOSConfig.h index 06c10b9db..35370dd36 100644 --- a/components/freertos/include/freertos/FreeRTOSConfig.h +++ b/components/freertos/include/freertos/FreeRTOSConfig.h @@ -176,15 +176,11 @@ int xt_clock_freq(void) __attribute__((deprecated)); //#define configCPU_CLOCK_HZ 80000000 /* This has impact on speed of search for highest priority */ -#ifdef configUSE_PORT_OPTIMISED_TASK_SELECTION #ifdef SMALL_TEST #define configMAX_PRIORITIES ( 7 ) #else #define configMAX_PRIORITIES ( 25 ) #endif -#else -#define configMAX_PRIORITIES ( 32 ) -#endif #ifndef CONFIG_APPTRACE_ENABLE #define configMINIMAL_STACK_SIZE 768 diff --git a/components/freertos/tasks.c b/components/freertos/tasks.c index 06e681f7f..1f89457ef 100644 --- a/components/freertos/tasks.c +++ b/components/freertos/tasks.c @@ -368,7 +368,7 @@ PRIVILEGED_DATA static volatile BaseType_t xSwitchingContext[ portNUM_PROCESSORS \ /* listGET_OWNER_OF_NEXT_ENTRY indexes through the list, so the tasks of \ the same priority get an equal share of the processor time. */ \ - listGET_OWNER_OF_NEXT_ENTRY( xTaskGetCurrentTaskHandle(), &( pxReadyTasksLists[ uxTopReadyPriority ] ) ); \ + listGET_OWNER_OF_NEXT_ENTRY( pxCurrentTCB[xPortGetCoreID()], &( pxReadyTasksLists[ uxTopReadyPriority ] ) ); \ } /* taskSELECT_HIGHEST_PRIORITY_TASK */ /*-----------------------------------------------------------*/ @@ -397,7 +397,7 @@ PRIVILEGED_DATA static volatile BaseType_t xSwitchingContext[ portNUM_PROCESSORS /* Find the highest priority queue that contains ready tasks. */ \ portGET_HIGHEST_PRIORITY( uxTopPriority, uxTopReadyPriority ); \ configASSERT( listCURRENT_LIST_LENGTH( &( pxReadyTasksLists[ uxTopPriority ] ) ) > 0 ); \ - listGET_OWNER_OF_NEXT_ENTRY( xTaskGetCurrentTaskHandle(), &( pxReadyTasksLists[ uxTopPriority ] ) ); \ + listGET_OWNER_OF_NEXT_ENTRY( pxCurrentTCB[xPortGetCoreID()], &( pxReadyTasksLists[ uxTopPriority ] ) ); \ } /* taskSELECT_HIGHEST_PRIORITY_TASK() */ /*-----------------------------------------------------------*/ @@ -2723,7 +2723,7 @@ void vTaskSwitchContext( void ) //Theoretically, this is only called from either the tick interrupt or the crosscore interrupt, so disabling //interrupts shouldn't be necessary anymore. Still, for safety we'll leave it in for now. int irqstate=portENTER_CRITICAL_NESTED(); - tskTCB * pxTCB; + if( uxSchedulerSuspended[ xPortGetCoreID() ] != ( UBaseType_t ) pdFALSE ) { /* The scheduler is currently suspended - do not allow a context @@ -2782,16 +2782,12 @@ void vTaskSwitchContext( void ) vPortCPUAcquireMutex( &xTaskQueueMutex ); #endif +#if !CONFIG_FREERTOS_UNICORE unsigned portBASE_TYPE foundNonExecutingWaiter = pdFALSE, ableToSchedule = pdFALSE, resetListHead; unsigned portBASE_TYPE holdTop=pdFALSE; + tskTCB * pxTCB; -#if configUSE_PORT_OPTIMISED_TASK_SELECTION == 1 - portBASE_TYPE uxDynamicTopReady; - portGET_HIGHEST_PRIORITY( uxDynamicTopReady, uxTopReadyPriority ); - portBASE_TYPE uxCopyOfTopReadyPrio = uxDynamicTopReady; -#else portBASE_TYPE uxDynamicTopReady = uxTopReadyPriority; -#endif /* * ToDo: This scheduler doesn't correctly implement the round-robin scheduling as done in the single-core * FreeRTOS stack when multiple tasks have the same priority and are all ready; it just keeps grabbing the @@ -2867,18 +2863,19 @@ void vTaskSwitchContext( void ) } } while ((ableToSchedule == pdFALSE) && (pxTCB != pxRefTCB)); } else { - if (!holdTop) { - #if configUSE_PORT_OPTIMISED_TASK_SELECTION == 1 - portRESET_READY_PRIORITY( uxCopyOfTopReadyPrio,uxTopReadyPriority ); - portGET_HIGHEST_PRIORITY( uxCopyOfTopReadyPrio,uxTopReadyPriority ); - #else - uxTopReadyPriority--; - #endif - } + if (!holdTop) uxTopReadyPriority--; } --uxDynamicTopReady; } +#else + //For Unicore targets we can keep the current FreeRTOS O(1) + //Scheduler. I hope to optimize better the scheduler for + //Multicore settings -- This will involve to create a per + //affinity ready task list which will impact hugely on + //tasks module + taskSELECT_HIGHEST_PRIORITY_TASK(); +#endif traceTASK_SWITCHED_IN(); xSwitchingContext[ xPortGetCoreID() ] = pdFALSE; @@ -2890,6 +2887,7 @@ void vTaskSwitchContext( void ) vPortCPUReleaseMutex( &xTaskQueueMutex ); #endif + #if CONFIG_FREERTOS_WATCHPOINT_END_OF_STACK vPortSetStackWatchpoint(pxCurrentTCB[xPortGetCoreID()]->pxStack); #endif diff --git a/components/freertos/test/test_freertos_scheduling_time.c b/components/freertos/test/test_freertos_scheduling_time.c index 68f846800..1670f520e 100644 --- a/components/freertos/test/test_freertos_scheduling_time.c +++ b/components/freertos/test/test_freertos_scheduling_time.c @@ -12,16 +12,16 @@ #include "test_utils.h" typedef struct { - uint32_t noof_runs; SemaphoreHandle_t end_sema; uint32_t before_sched; uint32_t cycles_to_sched; + TaskHandle_t t1_handle; } test_context_t; static void test_task_1(void *arg) { test_context_t *context = (test_context_t *)arg; - for(context->noof_runs = 0 ;context->noof_runs < 10000; ) { + for( ;; ) { context->before_sched = portGET_RUN_TIME_COUNTER_VALUE(); vPortYield(); } @@ -31,13 +31,15 @@ static void test_task_1(void *arg) { static void test_task_2(void *arg) { test_context_t *context = (test_context_t *)arg; + uint32_t accumulator = 0; - for( ; context->noof_runs < 10000; context->noof_runs++) { - context->cycles_to_sched += (portGET_RUN_TIME_COUNTER_VALUE() - context->before_sched); + for(int i = 0; i < 10000; i++) { + accumulator += (portGET_RUN_TIME_COUNTER_VALUE() - context->before_sched); vPortYield(); } - context->cycles_to_sched /= 10000; + context->cycles_to_sched = accumulator / 10000; + vTaskDelete(context->t1_handle); xSemaphoreGive(context->end_sema); vTaskDelete(NULL); } @@ -49,8 +51,13 @@ TEST_CASE("scheduling time test", "[freertos]") context.end_sema = xSemaphoreCreateBinary(); TEST_ASSERT(context.end_sema != NULL); - xTaskCreatePinnedToCore(test_task_1, "test1" , 4096, &context, 1, NULL,1); +#if !CONFIG_FREERTOS_UNICORE + xTaskCreatePinnedToCore(test_task_1, "test1" , 4096, &context, 1, &context.t1_handle,1); xTaskCreatePinnedToCore(test_task_2, "test2" , 4096, &context, 1, NULL,1); +#else + xTaskCreatePinnedToCore(test_task_1, "test1" , 4096, &context, 1, &context.t1_handle,0); + xTaskCreatePinnedToCore(test_task_2, "test2" , 4096, &context, 1, NULL,0); +#endif BaseType_t result = xSemaphoreTake(context.end_sema, portMAX_DELAY); TEST_ASSERT_EQUAL_HEX32(pdTRUE, result); diff --git a/components/idf_test/include/idf_performance.h b/components/idf_test/include/idf_performance.h index f5ddd9a14..0aea1b350 100644 --- a/components/idf_test/include/idf_performance.h +++ b/components/idf_test/include/idf_performance.h @@ -77,4 +77,4 @@ #endif //CONFIG_IDF_TARGET_ESP32S2BETA //time to perform the task selection plus context switch (from task) -#define IDF_PERFORMANCE_MAX_SCHEDULING_TIME 4500 +#define IDF_PERFORMANCE_MAX_SCHEDULING_TIME 1500 From 5d7564a03953ae8ed28239a172fc2a29225f91b9 Mon Sep 17 00:00:00 2001 From: Felipe Neves Date: Mon, 11 Nov 2019 09:54:15 +0800 Subject: [PATCH 4/4] freertos/Kconfig: make optimized task selection dependent on FREERTOS_UNICORE option freertos: fix decrement loop of high priority task selection --- components/freertos/Kconfig | 1 + components/freertos/include/freertos/portmacro.h | 2 +- components/freertos/tasks.c | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/components/freertos/Kconfig b/components/freertos/Kconfig index 102ed95df..86544ab91 100644 --- a/components/freertos/Kconfig +++ b/components/freertos/Kconfig @@ -41,6 +41,7 @@ menu "FreeRTOS" config FREERTOS_OPTIMIZED_SCHEDULER bool "Enable FreeRTOS pĺatform optimized scheduler" + depends on FREERTOS_UNICORE default y help On most platforms there are instructions can speedup the ready task diff --git a/components/freertos/include/freertos/portmacro.h b/components/freertos/include/freertos/portmacro.h index b96bd8ff2..f544c2d76 100644 --- a/components/freertos/include/freertos/portmacro.h +++ b/components/freertos/include/freertos/portmacro.h @@ -476,7 +476,7 @@ void vApplicationSleep( TickType_t xExpectedIdleTime ); /* Check the configuration. */ #if( configMAX_PRIORITIES > 32 ) - #error configUSE_PORT_OPTIMISED_TASK_SELECTION can only be set to 1 when configMAX_PRIORITIES is less than or equal to 32. It is very rare that a system requires more than 10 to 15 difference priorities as tasks that share a priority will time slice. + #error configUSE_PORT_OPTIMISED_TASK_SELECTION can only be set to 1 when configMAX_PRIORITIES is less than or equal to 32. It is very rare that a system requires more than 10 to 15 different priorities as tasks that share a priority will time slice. #endif /* Store/clear the ready priorities in a bit map. */ diff --git a/components/freertos/tasks.c b/components/freertos/tasks.c index 1f89457ef..f34227d3b 100644 --- a/components/freertos/tasks.c +++ b/components/freertos/tasks.c @@ -2782,7 +2782,7 @@ void vTaskSwitchContext( void ) vPortCPUAcquireMutex( &xTaskQueueMutex ); #endif -#if !CONFIG_FREERTOS_UNICORE +#if !configUSE_PORT_OPTIMISED_TASK_SELECTION unsigned portBASE_TYPE foundNonExecutingWaiter = pdFALSE, ableToSchedule = pdFALSE, resetListHead; unsigned portBASE_TYPE holdTop=pdFALSE; tskTCB * pxTCB; @@ -2863,7 +2863,7 @@ void vTaskSwitchContext( void ) } } while ((ableToSchedule == pdFALSE) && (pxTCB != pxRefTCB)); } else { - if (!holdTop) uxTopReadyPriority--; + if (!holdTop) --uxTopReadyPriority; } --uxDynamicTopReady; }