cbb84e8f5e
1. Clarify THREADPTR calculation in FreeRTOS code, explaining where the constant 0x10 offset comes from. 2. On the ESP32-S2, .flash.rodata section had different default alignment (8 bytes instead of 16), which resulted in different offset of the TLS sections. Unfortunately I haven’t found a way to query section alignment from C code, or to use a constant value to define section alignment in the linker script. The linker scripts are modified to force a fixed 16 byte alignment for .flash.rodata on the ESP32 and ESP32-S2beta. Note that the base address of .flash.rodata was already 16 byte aligned, so this has not changed the actual memory layout of the application. Full explanation of the calculation below. Assume we have the TLS template section base address (tls_section_vma), the address of a TLS variable in the template (address), and the final relocation value (offset). The linker calculates: offset = address - tls_section_vma + align_up(TCB_SIZE, alignment). At run time, the TLS section gets copied from _thread_local_start (in .rodata) to task_thread_local_start. Let’s assume that an address of a variable in the runtime TLS section is runtime_address. Access to this address will happen by calculating THREADPTR + offset. So, by a series of substitutions: THREADPTR + offset = runtime_address THREADPTR = runtime_address - offset THREADPTR = runtime_address - (address - tls_section_vma + align_up(TCB_SIZE, alignment)) THREADPTR = (runtime_address - address) + tls_section_vma - align_up(TCB_SIZE, alignment) The difference between runtime_address and address is same as the difference between task_thread_local_start and _thread_local_start. And tls_section_vma is the address of .rodata section, i.e. _rodata_start. So we arrive to THREADPTR = task_thread_local_start - _thread_local_start + _rodata_start - align_up(TCB_SIZE, alignment). The idea with TCB_SIZE being added to the THREADPTR when computing the relocation was to let the OS save TCB pointer in the TREADPTR register. The location of the run-time TLS section was assumed to be immediately after the TCB, aligned to whatever the section alignment was. However in our case the problem is that the run-time TLS section is stored not next to the TCB, but at the top of the stack. Plus, even if it was stored next to the TCB, the size of a FreeRTOS TCB is not equal to 8 bytes (TCB_SIZE hardcoded in the linker). So we have to calculate THREADPTR in a slightly obscure way, to compensate for these differences. Closes IDF-1239
111 lines
3.7 KiB
C
111 lines
3.7 KiB
C
/*
|
|
Test for thread local storage support.
|
|
*/
|
|
|
|
#include <string.h>
|
|
#include <esp_types.h>
|
|
|
|
#include "freertos/FreeRTOS.h"
|
|
#include "freertos/task.h"
|
|
#include "unity.h"
|
|
#include "test_utils.h"
|
|
#include "sdkconfig.h"
|
|
|
|
|
|
static __thread int tl_test_var1;
|
|
static __thread uint8_t tl_test_var2 = 55;
|
|
static __thread uint16_t tl_test_var3 = 44;
|
|
static __thread uint8_t tl_test_arr_var[10];
|
|
static __thread struct test_tls_var {
|
|
int f32;
|
|
uint8_t f8;
|
|
uint16_t f16;
|
|
uint8_t farr[10];
|
|
} tl_test_struct_var;
|
|
|
|
static void task_test_tls(void *arg)
|
|
{
|
|
bool *running = (bool *)arg;
|
|
uint32_t tp = (uint32_t)-1;
|
|
int test_var1_old = 0;
|
|
uint8_t test_var2_old = 0;
|
|
uint16_t test_var3_old = 0;
|
|
int f32_old = 0;
|
|
uint8_t f8_old = 0;
|
|
uint16_t f16_old = 0;
|
|
|
|
asm volatile ("rur.threadptr %0":"=r"(tp));
|
|
for (int i = 0; i < 5; i++) {
|
|
printf("Task[%x]: var = 0x%x 0x%x\n", tp, tl_test_var1, tl_test_var2);
|
|
if (i == 0) {
|
|
TEST_ASSERT_EQUAL(0, tl_test_var1);
|
|
TEST_ASSERT_EQUAL(55, tl_test_var2);
|
|
TEST_ASSERT_EQUAL(44, tl_test_var3);
|
|
for (int k = 0; k < sizeof(tl_test_arr_var); k++) {
|
|
TEST_ASSERT_EQUAL(0, tl_test_arr_var[k]);
|
|
}
|
|
TEST_ASSERT_EQUAL(0, tl_test_struct_var.f32);
|
|
TEST_ASSERT_EQUAL(0, tl_test_struct_var.f8);
|
|
TEST_ASSERT_EQUAL(0, tl_test_struct_var.f16);
|
|
for (int k = 0; k < sizeof(tl_test_struct_var.farr); k++) {
|
|
TEST_ASSERT_EQUAL(0, tl_test_struct_var.farr[k]);
|
|
}
|
|
} else {
|
|
TEST_ASSERT_EQUAL(test_var1_old+1, tl_test_var1);
|
|
TEST_ASSERT_EQUAL(test_var2_old+1, tl_test_var2);
|
|
TEST_ASSERT_EQUAL(test_var3_old+1, tl_test_var3);
|
|
for (int k = 0; k < sizeof(tl_test_arr_var); k++) {
|
|
TEST_ASSERT_EQUAL(i-1, tl_test_arr_var[k]);
|
|
}
|
|
TEST_ASSERT_EQUAL(f32_old+1, tl_test_struct_var.f32);
|
|
TEST_ASSERT_EQUAL(f8_old+1, tl_test_struct_var.f8);
|
|
TEST_ASSERT_EQUAL(f16_old+1, tl_test_struct_var.f16);
|
|
for (int k = 0; k < sizeof(tl_test_struct_var.farr); k++) {
|
|
TEST_ASSERT_EQUAL(i-1, tl_test_struct_var.farr[k]);
|
|
}
|
|
}
|
|
test_var1_old = tl_test_var1;
|
|
test_var2_old = tl_test_var2;
|
|
test_var3_old = tl_test_var3;
|
|
f32_old = tl_test_struct_var.f32;
|
|
f8_old = tl_test_struct_var.f8;
|
|
f16_old = tl_test_struct_var.f16;
|
|
tl_test_var1++;
|
|
tl_test_var2++;
|
|
tl_test_var3++;
|
|
memset(tl_test_arr_var, i, sizeof(tl_test_arr_var));
|
|
tl_test_struct_var.f32++;
|
|
tl_test_struct_var.f8++;
|
|
tl_test_struct_var.f16++;
|
|
memset(tl_test_struct_var.farr, i, sizeof(tl_test_struct_var.farr));
|
|
vTaskDelay(10);
|
|
}
|
|
|
|
if (running) {
|
|
*running = false;
|
|
vTaskDelete(NULL);
|
|
}
|
|
}
|
|
|
|
TEST_CASE("TLS test", "[freertos]")
|
|
{
|
|
const size_t stack_size = 3072;
|
|
StackType_t s_stack[stack_size]; /* with 8KB test task stack (default) this test still has ~3KB headroom */
|
|
StaticTask_t s_task;
|
|
bool running[2] = {true, true};
|
|
#if CONFIG_FREERTOS_UNICORE == 0
|
|
int other_core = 1;
|
|
#else
|
|
int other_core = 0;
|
|
#endif
|
|
|
|
xTaskCreatePinnedToCore((TaskFunction_t)&task_test_tls, "task_test_tls", stack_size, &running[0],
|
|
UNITY_FREERTOS_PRIORITY, NULL, 0);
|
|
xTaskCreateStaticPinnedToCore((TaskFunction_t)&task_test_tls, "task_test_tls", stack_size, &running[1],
|
|
UNITY_FREERTOS_PRIORITY, s_stack, &s_task, other_core);
|
|
while (running[0] || running[1]) {
|
|
vTaskDelay(10);
|
|
}
|
|
vTaskDelay(10); /* Make sure idle task can clean up s_task, before it goes out of scope */
|
|
}
|
|
|