components/esp32: add inter-processor call API and implement spi_flash through it

With this change, flash operations can run on both cores. NVS and WiFi stack can also run in dual core mode now.
2016-09-12 18:54:45 +08:00 · 2016-09-12 18:54:45 +08:00 · e9f2645b21
parent 1c6859573b
commit e9f2645b21
5 changed files with 321 additions and 85 deletions
--- a/components/esp32/cpu_start.c
+++ b/components/esp32/cpu_start.c
@ -36,6 +36,8 @@
 #include "esp_spi_flash.h"
 #include "nvs_flash.h"
 #include "esp_event.h"
+#include "esp_spi_flash.h"
+#include "esp_ipc.h"

 static void IRAM_ATTR user_start_cpu0(void);
 static void IRAM_ATTR call_user_start_cpu1();
@ -180,37 +182,39 @@ void IRAM_ATTR call_user_start_cpu1() {
 	user_start_cpu1();
 }

-extern volatile int port_xSchedulerRunning;
-extern int xPortStartScheduler();
+extern volatile int port_xSchedulerRunning[2];

-void user_start_cpu1(void) {
-	//Wait for the freertos initialization is finished on CPU0
-	while (port_xSchedulerRunning == 0) ;
-	ets_printf("Core0 started initializing FreeRTOS. Jumping to scheduler.\n");
-	//Okay, start the scheduler!
+void IRAM_ATTR user_start_cpu1(void) {
+	// Wait for FreeRTOS initialization to finish on PRO CPU
+	while (port_xSchedulerRunning[0] == 0) {
+	    ;
+	}
+	ets_printf("Starting scheduler on APP CPU.\n");
+	// Start the scheduler on APP CPU
 	xPortStartScheduler();
 }

 extern void (*__init_array_start)(void);
 extern void (*__init_array_end)(void);

-extern esp_err_t app_main();
 static void do_global_ctors(void) {
    void (**p)(void);
    for(p = &__init_array_start; p != &__init_array_end; ++p)
        (*p)();
 }

+extern esp_err_t app_main();

 void user_start_cpu0(void) {
 	ets_setup_syscalls();
 	do_global_ctors();
+	esp_ipc_init();
+	spi_flash_init();

 #if CONFIG_WIFI_ENABLED
-    ets_printf("nvs_flash_init\n");
    esp_err_t ret = nvs_flash_init(5, 3);
    if (ret != ESP_OK) {
-        ets_printf("nvs_flash_init fail, ret=%d\n", ret);
+        printf("nvs_flash_init failed, ret=%d\n", ret);
    }

    system_init();
@ -227,6 +231,7 @@ void user_start_cpu0(void) {
 	app_main();
 #endif

+	ets_printf("Starting scheduler on PRO CPU.\n");
 	vTaskStartScheduler();
 }

--- a/components/esp32/include/esp_err.h
+++ b/components/esp32/include/esp_err.h
@ -27,7 +27,10 @@ typedef int32_t esp_err_t;
 #define ESP_OK          0
 #define ESP_FAIL        -1

-#define ESP_ERR_NO_MEM  0x101
+#define ESP_ERR_NO_MEM          0x101
+#define ESP_ERR_INVALID_ARG     0x102
+#define ESP_ERR_INVALID_STATE   0x103
+

 #ifdef __cplusplus
 }
--- a/components/esp32/include/esp_ipc.h
+++ b/components/esp32/include/esp_ipc.h
@ -0,0 +1,84 @@
+// Copyright 2015-2016 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef __ESP_IPC_H__
+#define __ESP_IPC_H__
+
+#include <esp_err.h>
+
+typedef void (*esp_ipc_func_t)(void* arg);
+
+/**
+ * @brief Inter-processor call APIs
+ *
+ * FreeRTOS provides several APIs which can be used to communicate between
+ * different tasks, including tasks running on different CPUs.
+ * This module provides additional APIs to run some code on the other CPU.
+ */
+
+
+/**
+ * @brief Initialize inter-processor call module.
+ *
+ * This function start two tasks, one on each CPU. These tasks are started
+ * with high priority. These tasks are normally inactive, waiting until one of
+ * the esp_ipc_call_* functions to be used. One of these tasks will be
+ * woken up to execute the callback provided to esp_ipc_call_nonblocking or
+ * esp_ipc_call_blocking.
+ */
+void esp_ipc_init();
+
+
+/**
+ * @brief Execute function on the given CPU
+ *
+ * This will wake a high-priority task on CPU indicated by cpu_id argument,
+ * and run func(arg) in the context of that task.
+ * This function returns as soon as the high-priority task is woken up.
+ * If another IPC call is already being executed, this function will also wait
+ * for it to complete.
+ *
+ * In single-core mode, returns ESP_ERR_INVALID_ARG for cpu_id 1.
+ *
+ * @param cpu_id CPU where function should be executed (0 or 1)
+ * @param func pointer to a function which should be executed
+ * @param arg arbitrary argument to be passed into function
+ *
+ * @return ESP_ERR_INVALID_ARG if cpu_id is invalid
+ *         ESP_OK otherwise
+ */
+esp_err_t esp_ipc_call(uint32_t cpu_id, esp_ipc_func_t func, void* arg);
+
+
+/**
+ * @brief Execute function on the given CPU and wait for it to finish
+ *
+ * This will wake a high-priority task on CPU indicated by cpu_id argument,
+ * and run func(arg) in the context of that task.
+ * This function waits for func to return.
+ *
+ * In single-core mode, returns ESP_ERR_INVALID_ARG for cpu_id 1.
+ *
+ * @param cpu_id CPU where function should be executed (0 or 1)
+ * @param func pointer to a function which should be executed
+ * @param arg arbitrary argument to be passed into function
+ *
+ * @return ESP_ERR_INVALID_ARG if cpu_id is invalid
+ *         ESP_OK otherwise
+ */
+esp_err_t esp_ipc_call_blocking(uint32_t cpu_id, esp_ipc_func_t func, void* arg);
+
+
+
+#endif /* __ESP_IPC_H__ */
--- a/components/esp32/ipc.c
+++ b/components/esp32/ipc.c
@ -0,0 +1,117 @@
+// Copyright 2015-2016 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include "esp_err.h"
+#include "esp_ipc.h"
+#include "esp_attr.h"
+
+#include "freertos/FreeRTOS.h"
+#include "freertos/task.h"
+#include "freertos/semphr.h"
+
+
+static TaskHandle_t s_ipc_tasks[portNUM_PROCESSORS];         // Two high priority tasks, one for each CPU
+static SemaphoreHandle_t s_ipc_mutex;                        // This mutex is used as a global lock for esp_ipc_* APIs
+static SemaphoreHandle_t s_ipc_sem[portNUM_PROCESSORS];      // Two semaphores used to wake each of s_ipc_tasks
+static SemaphoreHandle_t s_ipc_ack;                          // Semaphore used to acknowledge that task was woken up,
+                                                             //   or function has finished running
+static volatile esp_ipc_func_t s_func;                       // Function which should be called by high priority task
+static void * volatile s_func_arg;                           // Argument to pass into s_func
+typedef enum {
+    IPC_WAIT_FOR_START,
+    IPC_WAIT_FOR_END
+} esp_ipc_wait_t;
+
+static volatile esp_ipc_wait_t s_ipc_wait;                   // This variable tells high priority task when it should give
+                                                             //   s_ipc_ack semaphore: before s_func is called, or
+                                                             //   after it returns
+
+static void IRAM_ATTR ipc_task(void* arg)
+{
+    const uint32_t cpuid = (uint32_t) arg;
+    assert(cpuid == xPortGetCoreID());
+    while (true) {
+        // Wait for IPC to be initiated.
+        // This will be indicated by giving the semaphore corresponding to
+        // this CPU.
+        if (xSemaphoreTake(s_ipc_sem[cpuid], portMAX_DELAY) != pdTRUE) {
+            // TODO: when can this happen?
+            abort();
+        }
+
+        esp_ipc_func_t func = s_func;
+        void* arg = s_func_arg;
+
+        if (s_ipc_wait == IPC_WAIT_FOR_START) {
+            xSemaphoreGive(s_ipc_ack);
+        }
+        (*func)(arg);
+        if (s_ipc_wait == IPC_WAIT_FOR_END) {
+            xSemaphoreGive(s_ipc_ack);
+        }
+    }
+    // TODO: currently this is unreachable code. Introduce esp_ipc_uninit
+    // function which will signal to both tasks that they can shut down.
+    // Not critical at this point, we don't have a use case for stopping
+    // IPC yet.
+    // Also need to delete the semaphore here.
+    vTaskDelete(NULL);
+}
+
+void esp_ipc_init()
+{
+    s_ipc_mutex = xSemaphoreCreateMutex();
+    s_ipc_ack = xSemaphoreCreateBinary();
+    const char* task_names[2] = {"ipc0", "ipc1"};
+    for (int i = 0; i < portNUM_PROCESSORS; ++i) {
+        s_ipc_sem[i] = xSemaphoreCreateBinary();
+        xTaskCreatePinnedToCore(ipc_task, task_names[i], XT_STACK_MIN_SIZE, (void*) i,
+                                configMAX_PRIORITIES - 1, &s_ipc_tasks[i], i);
+    }
+}
+
+static esp_err_t esp_ipc_call_and_wait(uint32_t cpu_id, esp_ipc_func_t func, void* arg, esp_ipc_wait_t wait_for)
+{
+    if (cpu_id >= portNUM_PROCESSORS) {
+        return ESP_ERR_INVALID_ARG;
+    }
+    if (xTaskGetSchedulerState() != taskSCHEDULER_RUNNING) {
+        return ESP_ERR_INVALID_STATE;
+    }
+
+    xSemaphoreTake(s_ipc_mutex, portMAX_DELAY);
+
+    s_func = func;
+    s_func_arg = arg;
+    s_ipc_wait = IPC_WAIT_FOR_START;
+    xSemaphoreGive(s_ipc_sem[cpu_id]);
+    xSemaphoreTake(s_ipc_ack, portMAX_DELAY);
+    xSemaphoreGive(s_ipc_mutex);
+    return ESP_OK;
+}
+
+esp_err_t esp_ipc_call(uint32_t cpu_id, esp_ipc_func_t func, void* arg)
+{
+    return esp_ipc_call_and_wait(cpu_id, func, arg, IPC_WAIT_FOR_START);
+}
+
+esp_err_t esp_ipc_call_blocking(uint32_t cpu_id, esp_ipc_func_t func, void* arg)
+{
+    return esp_ipc_call_and_wait(cpu_id, func, arg, IPC_WAIT_FOR_END);
+}
+
--- a/components/spi_flash/esp_spi_flash.c
+++ b/components/spi_flash/esp_spi_flash.c
@ -12,16 +12,20 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+#include <stdlib.h>
+#include <assert.h>
 #include <freertos/FreeRTOS.h>
 #include <freertos/task.h>
 #include <freertos/semphr.h>
-
-#include <esp_spi_flash.h>
 #include <rom/spi_flash.h>
 #include <rom/cache.h>
-#include <esp_attr.h>
+#include <soc/soc.h>
 #include <soc/dport_reg.h>
 #include "sdkconfig.h"
+#include "esp_ipc.h"
+#include "esp_attr.h"
+#include "esp_spi_flash.h"
+

 /*
    Driver for SPI flash read/write/erase operations
@ -42,7 +46,7 @@
    this flag to be set. Once the flag is set, it disables cache on CPU A and
    starts flash operation.

-    While flash operation is running, interrupts can still run on CPU B. 
+    While flash operation is running, interrupts can still run on CPU B.
    We assume that all interrupt code is placed into RAM.

    Once flash operation is complete, function on CPU A sets another flag,
@ -58,93 +62,94 @@
 */

 static esp_err_t spi_flash_translate_rc(SpiFlashOpResult rc);
-extern void Cache_Flush(int);
+static void IRAM_ATTR spi_flash_disable_cache(uint32_t cpuid, uint32_t* saved_state);
+static void IRAM_ATTR spi_flash_restore_cache(uint32_t cpuid, uint32_t saved_state);
+
+static uint32_t s_flash_op_cache_state[2];
+
+#ifndef CONFIG_FREERTOS_UNICORE
+static SemaphoreHandle_t s_flash_op_mutex;
+static bool s_flash_op_can_start = false;
+static bool s_flash_op_complete = false;
+#endif //CONFIG_FREERTOS_UNICORE
+

 #ifndef CONFIG_FREERTOS_UNICORE

-static TaskHandle_t s_flash_op_tasks[2];
-static SemaphoreHandle_t s_flash_op_mutex;
-static SemaphoreHandle_t s_flash_op_sem[2];
-static bool s_flash_op_can_start = false;
-static bool s_flash_op_complete = false;
-
-
-// Task whose duty is to block other tasks from running on a given CPU
-static void IRAM_ATTR spi_flash_op_block_task(void* arg)
+static void IRAM_ATTR spi_flash_op_block_func(void* arg)
 {
+    // Disable scheduler on this CPU
+    vTaskSuspendAll();
    uint32_t cpuid = (uint32_t) arg;
-    while (true) {
-        // Wait for flash operation to be initiated.
-        // This will be indicated by giving the semaphore corresponding to
-        // this CPU.
-        if (xSemaphoreTake(s_flash_op_sem[cpuid], portMAX_DELAY) != pdTRUE) {
-            // TODO: when can this happen?
-            abort();
-        }
-        // Disable cache on this CPU
-        Cache_Read_Disable(cpuid);
-        // Signal to the flash API function that flash operation can start
-        s_flash_op_can_start = true;
-        while (!s_flash_op_complete) {
-            // until we have a way to use interrupts for inter-CPU communication,
-            // busy loop here and wait for the other CPU to finish flash operation
-        }
-        // Flash operation is complete, re-enable cache
-        Cache_Read_Enable(cpuid);
+    // Disable cache so that flash operation can start
+    spi_flash_disable_cache(cpuid, &s_flash_op_cache_state[cpuid]);
+    s_flash_op_can_start = true;
+    while (!s_flash_op_complete) {
+        // until we have a way to use interrupts for inter-CPU communication,
+        // busy loop here and wait for the other CPU to finish flash operation
    }
-    // TODO: currently this is unreachable code. Introduce spi_flash_uninit 
-    // function which will signal to both tasks that they can shut down.
-    // Not critical at this point, we don't have a use case for stopping
-    // SPI flash driver yet.
-    // Also need to delete the semaphore here.
-    vTaskDelete(NULL);
+    // Flash operation is complete, re-enable cache
+    spi_flash_restore_cache(cpuid, s_flash_op_cache_state[cpuid]);
+    // Re-enable scheduler
+    xTaskResumeAll();
 }

 void spi_flash_init()
 {
-    s_flash_op_can_start = false;
-    s_flash_op_complete = false;
-    s_flash_op_sem[0] = xSemaphoreCreateBinary();
-    s_flash_op_sem[1] = xSemaphoreCreateBinary();
    s_flash_op_mutex = xSemaphoreCreateMutex();
-    // Start two tasks, one on each CPU, with max priorities
-    // TODO: optimize stack usage. Stack size 512 is too small.
-    xTaskCreatePinnedToCore(spi_flash_op_block_task, "flash_op_pro", 1024, (void*) 0,
-                            configMAX_PRIORITIES - 1, &s_flash_op_tasks[0], 0);
-    xTaskCreatePinnedToCore(spi_flash_op_block_task, "flash_op_app", 1024, (void*) 1,
-                            configMAX_PRIORITIES - 1, &s_flash_op_tasks[1], 1);
 }

 static void IRAM_ATTR spi_flash_disable_interrupts_caches_and_other_cpu()
 {
    // Take the API lock
    xSemaphoreTake(s_flash_op_mutex, portMAX_DELAY);
+
    const uint32_t cpuid = xPortGetCoreID();
-    uint32_t other_cpuid = !cpuid;
-    s_flash_op_can_start = false;
-    s_flash_op_complete = false;
-    // Signal to the spi_flash_op_block_task on the other CPU that we need it to
-    // disable cache there and block other tasks from executing.
-    xSemaphoreGive(s_flash_op_sem[other_cpuid]);
-    while (!s_flash_op_can_start) {
-        // Busy loop and wait for spi_flash_op_block_task to take the semaphore on the
-        // other CPU.
+    const uint32_t other_cpuid = !cpuid;
+
+    if (xTaskGetSchedulerState() == taskSCHEDULER_NOT_STARTED) {
+        // Scheduler hasn't been started yet, so we don't need to worry
+        // about cached code running on the APP CPU.
+        spi_flash_disable_cache(other_cpuid, &s_flash_op_cache_state[other_cpuid]);
+    } else {
+        // Signal to the spi_flash_op_block_task on the other CPU that we need it to
+        // disable cache there and block other tasks from executing.
+        s_flash_op_can_start = false;
+        s_flash_op_complete = false;
+        esp_ipc_call(other_cpuid, &spi_flash_op_block_func, (void*) other_cpuid);
+        while (!s_flash_op_can_start) {
+            // Busy loop and wait for spi_flash_op_block_func to disable cache
+            // on the other CPU
+        }
+        // Disable scheduler on CPU cpuid
+        vTaskSuspendAll();
+        // This is guaranteed to run on CPU <cpuid> because the other CPU is now
+        // occupied by highest priority task
+        assert(xPortGetCoreID() == cpuid);
    }
-    vTaskSuspendAll();
    // Disable cache on this CPU as well
-    Cache_Read_Disable(cpuid);
+    spi_flash_disable_cache(cpuid, &s_flash_op_cache_state[cpuid]);
 }

 static void IRAM_ATTR spi_flash_enable_interrupts_caches_and_other_cpu()
 {
-    uint32_t cpuid = xPortGetCoreID();
-    // Signal to spi_flash_op_block_task that flash operation is complete
-    s_flash_op_complete = true;
+    const uint32_t cpuid = xPortGetCoreID();
+    const uint32_t other_cpuid = !cpuid;
+
    // Re-enable cache on this CPU
-    Cache_Read_Enable(cpuid);
+    spi_flash_restore_cache(cpuid, s_flash_op_cache_state[cpuid]);
+
+    if (xTaskGetSchedulerState() == taskSCHEDULER_NOT_STARTED) {
+        // Scheduler is not running yet — just re-enable cache on APP CPU
+        spi_flash_restore_cache(other_cpuid, s_flash_op_cache_state[other_cpuid]);
+    } else {
+        // Signal to spi_flash_op_block_task that flash operation is complete
+        s_flash_op_complete = true;
+        // Resume tasks on the current CPU
+        xTaskResumeAll();
+    }
    // Release API lock
    xSemaphoreGive(s_flash_op_mutex);
-    xTaskResumeAll();
 }

 #else  // CONFIG_FREERTOS_UNICORE
@ -157,14 +162,12 @@ void spi_flash_init()
 static void IRAM_ATTR spi_flash_disable_interrupts_caches_and_other_cpu()
 {
    vTaskSuspendAll();
-    Cache_Read_Disable(0);
-    Cache_Read_Disable(1);
+    spi_flash_disable_cache(0, &s_flash_op_cache_state[0]);
 }

 static void IRAM_ATTR spi_flash_enable_interrupts_caches_and_other_cpu()
 {
-    Cache_Read_Enable(0);
-    Cache_Read_Enable(1);
+    spi_flash_restore_cache(0, s_flash_op_cache_state[0]);
    xTaskResumeAll();
 }

@ -179,8 +182,6 @@ esp_err_t IRAM_ATTR spi_flash_erase_sector(uint16_t sec)
    if (rc == SPI_FLASH_RESULT_OK) {
        rc = SPIEraseSector(sec);
    }
-    Cache_Flush(0);
-    Cache_Flush(1);
    spi_flash_enable_interrupts_caches_and_other_cpu();
    return spi_flash_translate_rc(rc);
 }
@ -193,8 +194,6 @@ esp_err_t IRAM_ATTR spi_flash_write(uint32_t dest_addr, const uint32_t *src, uin
    if (rc == SPI_FLASH_RESULT_OK) {
        rc = SPIWrite(dest_addr, src, (int32_t) size);
    }
-    Cache_Flush(0);
-    Cache_Flush(1);
    spi_flash_enable_interrupts_caches_and_other_cpu();
    return spi_flash_translate_rc(rc);
 }
@ -204,8 +203,6 @@ esp_err_t IRAM_ATTR spi_flash_read(uint32_t src_addr, uint32_t *dest, uint32_t s
    spi_flash_disable_interrupts_caches_and_other_cpu();
    SpiFlashOpResult rc;
    rc = SPIRead(src_addr, dest, (int32_t) size);
-    Cache_Flush(0);
-    Cache_Flush(1);
    spi_flash_enable_interrupts_caches_and_other_cpu();
    return spi_flash_translate_rc(rc);
 }
@ -222,3 +219,33 @@ static esp_err_t spi_flash_translate_rc(SpiFlashOpResult rc)
        return ESP_ERR_FLASH_OP_FAIL;
    }
 }
+
+static void IRAM_ATTR spi_flash_disable_cache(uint32_t cpuid, uint32_t* saved_state)
+{
+    uint32_t ret = 0;
+    if (cpuid == 0) {
+        ret |= GET_PERI_REG_BITS2(PRO_CACHE_CTRL1_REG, 0x1f, 0);
+        while (GET_PERI_REG_BITS2(PRO_DCACHE_DBUG_REG0, DPORT_PRO_CACHE_STATE, DPORT_PRO_CACHE_STATE_S) != 1) {
+            ;
+        }
+        SET_PERI_REG_BITS(PRO_CACHE_CTRL_REG, 1, 0, DPORT_PRO_CACHE_ENABLE_S);
+    } else {
+        ret |= GET_PERI_REG_BITS2(APP_CACHE_CTRL1_REG, 0x1f, 0);
+        while (GET_PERI_REG_BITS2(APP_DCACHE_DBUG_REG0, DPORT_APP_CACHE_STATE, DPORT_APP_CACHE_STATE_S) != 1) {
+            ;
+        }
+        SET_PERI_REG_BITS(APP_CACHE_CTRL_REG, 1, 0, DPORT_APP_CACHE_ENABLE_S);
+    }
+    *saved_state = ret;
+}
+
+static void IRAM_ATTR spi_flash_restore_cache(uint32_t cpuid, uint32_t saved_state)
+{
+    if (cpuid == 0) {
+        SET_PERI_REG_BITS(PRO_CACHE_CTRL_REG, 1, 1, DPORT_PRO_CACHE_ENABLE_S);
+        SET_PERI_REG_BITS(PRO_CACHE_CTRL1_REG, 0x1f, saved_state, 0);
+    } else {
+        SET_PERI_REG_BITS(APP_CACHE_CTRL_REG, 1, 1, DPORT_APP_CACHE_ENABLE_S);
+        SET_PERI_REG_BITS(APP_CACHE_CTRL1_REG, 0x1f, saved_state, 0);
+    }
+}