From d288a3d05345dc44a36401338b4e629bf880879e Mon Sep 17 00:00:00 2001 From: Deomid Ryabkov Date: Fri, 9 Dec 2016 17:29:31 +0000 Subject: [PATCH] Remove alignment reqs from spi_flash_{read,write} --- components/esp32/include/rom/spi_flash.h | 2 +- components/spi_flash/flash_ops.c | 267 ++++++++++++------- components/spi_flash/include/esp_spi_flash.h | 5 - components/spi_flash/test/test_mmap.c | 4 +- components/spi_flash/test/test_read_write.c | 207 ++++++++++++++ 5 files changed, 377 insertions(+), 108 deletions(-) create mode 100644 components/spi_flash/test/test_read_write.c diff --git a/components/esp32/include/rom/spi_flash.h b/components/esp32/include/rom/spi_flash.h index b1e4a7c93..44098475d 100644 --- a/components/esp32/include/rom/spi_flash.h +++ b/components/esp32/include/rom/spi_flash.h @@ -3,7 +3,7 @@ // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at - +// // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software diff --git a/components/spi_flash/flash_ops.c b/components/spi_flash/flash_ops.c index 101013eaa..993e68a57 100644 --- a/components/spi_flash/flash_ops.c +++ b/components/spi_flash/flash_ops.c @@ -16,6 +16,7 @@ #include #include #include +#include // For MIN/MAX(a, b) #include #include @@ -114,8 +115,7 @@ esp_err_t IRAM_ATTR spi_flash_erase_range(uint32_t start_addr, uint32_t size) rc = SPIEraseBlock(sector / sectors_per_block); sector += sectors_per_block; COUNTER_ADD_BYTES(erase, sectors_per_block * SPI_FLASH_SEC_SIZE); - } - else { + } else { rc = SPIEraseSector(sector); ++sector; COUNTER_ADD_BYTES(erase, SPI_FLASH_SEC_SIZE); @@ -127,95 +127,99 @@ esp_err_t IRAM_ATTR spi_flash_erase_range(uint32_t start_addr, uint32_t size) return spi_flash_translate_rc(rc); } -esp_err_t IRAM_ATTR spi_flash_write(size_t dest_addr, const void *src, size_t size) +esp_err_t IRAM_ATTR spi_flash_write(size_t dst, const void *srcv, size_t size) { - // Destination alignment is also checked in ROM code, but we can give - // better error code here - // TODO: add handling of unaligned destinations - uint8_t *temp_write_buf = NULL; - uint8_t pad_head = 0; - uint8_t pad_end = 0; - SpiFlashOpResult rc; // Out of bound writes are checked in ROM code, but we can give better // error code here - if (dest_addr + size > g_rom_flashchip.chip_size) { + if (dst + size > g_rom_flashchip.chip_size) { return ESP_ERR_INVALID_SIZE; } - - while(size >= 1024) { - // max need pad byte num for 1024 is 4 - temp_write_buf = (uint8_t*)malloc(1024 + 4); - if(temp_write_buf == NULL) { - return ESP_ERR_NO_MEM; - } - - if(dest_addr%4 != 0) { - pad_head = dest_addr%4; - pad_end = 4 - pad_head; - } - memset(temp_write_buf,0xFF,pad_head); - memcpy(temp_write_buf + pad_head ,src,1024); - memset(temp_write_buf + pad_head + 1024, 0xFF,pad_end); - COUNTER_START(); - spi_flash_disable_interrupts_caches_and_other_cpu(); - rc = spi_flash_unlock(); - if (rc == SPI_FLASH_RESULT_OK) { - rc = SPIWrite((uint32_t) (dest_addr - pad_head), (const uint32_t*) temp_write_buf, (int32_t) (1024 + pad_head + pad_end)); - COUNTER_ADD_BYTES(write, 1024 + pad_head + pad_end); - } - COUNTER_STOP(write); - spi_flash_enable_interrupts_caches_and_other_cpu(); - if(rc != ESP_OK) { - free(temp_write_buf); - temp_write_buf = NULL; - return spi_flash_translate_rc(rc); - } - - free(temp_write_buf); - temp_write_buf = NULL; - size -= 1024; - dest_addr += 1024; - src = (uint8_t*)src + 1024; + if (size == 0) { + return ESP_OK; } - if(size > 0) { - // max need pad byte num for rand size is 6 - temp_write_buf = (uint8_t*)malloc(size + 6); - if(temp_write_buf == NULL) { - return ESP_ERR_NO_MEM; - } - if(dest_addr%4 != 0) { - pad_head = dest_addr%4; - } - if ((pad_head + size)%4 != 0){ - pad_end = 4 - (pad_head + size) % 4; - } - memset(temp_write_buf,0xFF,pad_head); - memcpy(temp_write_buf + pad_head, src, size); - memset(temp_write_buf + pad_head + size, 0xFF,pad_end); - COUNTER_START(); + + SpiFlashOpResult rc = SPI_FLASH_RESULT_OK; + COUNTER_START(); + const char *srcc = (const char *) srcv; + /* + * Large operations are split into (up to) 3 parts: + * - Left padding: 4 bytes up to the first 4-byte aligned destination offset. + * - Middle part + * - Right padding: 4 bytes from the last 4-byte aligned offset covered. + */ + size_t left_off = dst & ~3U; + size_t left_size = MIN(((dst + 3) & ~3U) - dst, size); + size_t mid_off = left_size; + size_t mid_size = (size - left_size) & ~3U; + size_t right_off = left_size + mid_size; + size_t right_size = size - mid_size - left_size; + rc = spi_flash_unlock(); + if (rc != SPI_FLASH_RESULT_OK) { + goto out; + } + if (left_size > 0) { + uint32_t t = 0xffffffff; + memcpy(((uint8_t *) &t) + (dst - left_off), srcc, left_size); spi_flash_disable_interrupts_caches_and_other_cpu(); - rc = spi_flash_unlock(); - if (rc == SPI_FLASH_RESULT_OK) { - rc = SPIWrite((uint32_t) (dest_addr - pad_head), (const uint32_t*) temp_write_buf, (int32_t) (size + pad_head + pad_end)); - COUNTER_ADD_BYTES(write, size + pad_head + pad_end); - } - COUNTER_STOP(write); + rc = SPIWrite(left_off, &t, 4); spi_flash_enable_interrupts_caches_and_other_cpu(); - if(rc != ESP_OK) { - free(temp_write_buf); - temp_write_buf = NULL; - return spi_flash_translate_rc(rc); + if (rc != SPI_FLASH_RESULT_OK) { + goto out; } - - free(temp_write_buf); - temp_write_buf = NULL; - size = 0; - dest_addr += size; - src = (uint8_t*)src + size; - return spi_flash_translate_rc(rc); - } - return spi_flash_translate_rc(SPI_FLASH_RESULT_OK); - + COUNTER_ADD_BYTES(write, 4); + } + if (mid_size > 0) { + /* If src buffer is 4-byte aligned as well and is not in a region that + * requires cache access to be enabled, we can write it all at once. */ +#ifdef ESP_PLATFORM + bool in_dram = ((uintptr_t) srcc >= 0x3FFAE000 && + (uintptr_t) srcc < 0x40000000); +#else + bool in_dram = true; +#endif + if (in_dram && (((uintptr_t) srcc) + mid_off) % 4 == 0) { + spi_flash_disable_interrupts_caches_and_other_cpu(); + rc = SPIWrite(dst + mid_off, (const uint32_t *) (srcc + mid_off), mid_size); + spi_flash_enable_interrupts_caches_and_other_cpu(); + if (rc != SPI_FLASH_RESULT_OK) { + goto out; + } + COUNTER_ADD_BYTES(write, mid_size); + } else { + /* + * Otherwise, unlike for read, we cannot manipulate data in the + * user-provided buffer, so we write in 32 byte blocks. + */ + while (mid_size > 0) { + uint32_t t[8]; + uint32_t write_size = MIN(mid_size, sizeof(t)); + memcpy(t, srcc + mid_off, write_size); + spi_flash_disable_interrupts_caches_and_other_cpu(); + rc = SPIWrite(dst + mid_off, t, write_size); + spi_flash_enable_interrupts_caches_and_other_cpu(); + if (rc != SPI_FLASH_RESULT_OK) { + goto out; + } + COUNTER_ADD_BYTES(write, write_size); + mid_size -= write_size; + mid_off += write_size; + } + } + } + if (right_size > 0) { + uint32_t t = 0xffffffff; + memcpy(&t, srcc + right_off, right_size); + spi_flash_disable_interrupts_caches_and_other_cpu(); + rc = SPIWrite(dst + right_off, &t, 4); + spi_flash_enable_interrupts_caches_and_other_cpu(); + if (rc != SPI_FLASH_RESULT_OK) { + goto out; + } + COUNTER_ADD_BYTES(write, 4); + } +out: + COUNTER_STOP(write); + return spi_flash_translate_rc(rc); } esp_err_t IRAM_ATTR spi_flash_write_encrypted(size_t dest_addr, const void *src, size_t size) @@ -254,30 +258,93 @@ esp_err_t IRAM_ATTR spi_flash_write_encrypted(size_t dest_addr, const void *src, return spi_flash_translate_rc(rc); } -esp_err_t IRAM_ATTR spi_flash_read(size_t src_addr, void *dest, size_t size) +esp_err_t IRAM_ATTR spi_flash_read(size_t src, void *dstv, size_t size) { - // TODO: replace this check with code which deals with unaligned destinations - if (((ptrdiff_t)dest % 4) != 0) { - return ESP_ERR_INVALID_ARG; - } - // Source alignment is also checked in ROM code, but we can give - // better error code here - // TODO: add handling of unaligned destinations - if (src_addr % 4 != 0) { - return ESP_ERR_INVALID_ARG; - } - if (size % 4 != 0) { - return ESP_ERR_INVALID_SIZE; - } // Out of bound reads are checked in ROM code, but we can give better // error code here - if (src_addr + size > g_rom_flashchip.chip_size) { + if (src + size > g_rom_flashchip.chip_size) { return ESP_ERR_INVALID_SIZE; } + if (size == 0) { + return ESP_OK; + } + + SpiFlashOpResult rc = SPI_FLASH_RESULT_OK; COUNTER_START(); spi_flash_disable_interrupts_caches_and_other_cpu(); - SpiFlashOpResult rc = SPIRead((uint32_t) src_addr, (uint32_t*) dest, (int32_t) size); - COUNTER_ADD_BYTES(read, size); + /* To simplify boundary checks below, we handle small reads separately. */ + if (size < 16) { + uint32_t t[6]; /* Enough for 16 bytes + 4 on either side for padding. */ + uint32_t read_src = src & ~3U; + uint32_t left_off = src & 3U; + uint32_t read_size = (left_off + size + 3) & ~3U; + rc = SPIRead(read_src, t, read_size); + if (rc != SPI_FLASH_RESULT_OK) { + goto out; + } + COUNTER_ADD_BYTES(read, read_size); + memcpy(dstv, ((char *) t) + left_off, size); + goto out; + } + char *dstc = (char *) dstv; + intptr_t dsti = (intptr_t) dstc; + /* + * Large operations are split into (up to) 3 parts: + * - The middle part: from the first 4-aligned position in src to the first + * 4-aligned position in dst. + */ + size_t src_mid_off = (src % 4 == 0 ? 0 : 4 - (src % 4)); + size_t dst_mid_off = (dsti % 4 == 0 ? 0 : 4 - (dsti % 4)); + size_t mid_size = (size - MAX(src_mid_off, dst_mid_off)) & ~3U; + /* + * - Once the middle part is in place, src_mid_off bytes from the preceding + * 4-aligned source location are added on the left. + */ + size_t pad_left_src = src & ~3U; + size_t pad_left_size = src_mid_off; + /* + * - Finally, the right part is added: from the end of the middle part to + * the end. Depending on the alignment of source and destination, this may + * be a 4 or 8 byte read from pad_right_src. + */ + size_t pad_right_src = (src + pad_left_size + mid_size) & ~3U; + size_t pad_right_off = (pad_right_src - src); + size_t pad_right_size = (size - pad_right_off); + if (mid_size > 0) { + rc = SPIRead(src + src_mid_off, (uint32_t *) (dstc + dst_mid_off), mid_size); + if (rc != SPI_FLASH_RESULT_OK) { + goto out; + } + COUNTER_ADD_BYTES(read, mid_size); + /* + * If offsets in src and dst are different, perform an in-place shift + * to put destination data into its final position. + * Note that the shift can be left (src_mid_off < dst_mid_off) or right. + */ + if (src_mid_off != dst_mid_off) { + memmove(dstc + src_mid_off, dstc + dst_mid_off, mid_size); + } + } + if (pad_left_size > 0) { + uint32_t t; + rc = SPIRead(pad_left_src, &t, 4); + if (rc != SPI_FLASH_RESULT_OK) { + goto out; + } + COUNTER_ADD_BYTES(read, 4); + memcpy(dstc, ((uint8_t *) &t) + (4 - pad_left_size), pad_left_size); + } + if (pad_right_size > 0) { + uint32_t t[2]; + int32_t read_size = (pad_right_size <= 4 ? 4 : 8); + rc = SPIRead(pad_right_src, t, read_size); + if (rc != SPI_FLASH_RESULT_OK) { + goto out; + } + COUNTER_ADD_BYTES(read, read_size); + memcpy(dstc + pad_right_off, t, pad_right_size); + } +out: spi_flash_enable_interrupts_caches_and_other_cpu(); COUNTER_STOP(read); return spi_flash_translate_rc(rc); @@ -301,7 +368,7 @@ static esp_err_t spi_flash_translate_rc(SpiFlashOpResult rc) static inline void dump_counter(spi_flash_counter_t* counter, const char* name) { ESP_LOGI(TAG, "%s count=%8d time=%8dms bytes=%8d\n", name, - counter->count, counter->time, counter->bytes); + counter->count, counter->time, counter->bytes); } const spi_flash_counters_t* spi_flash_get_counters() diff --git a/components/spi_flash/include/esp_spi_flash.h b/components/spi_flash/include/esp_spi_flash.h index 496c44bb3..91675088a 100644 --- a/components/spi_flash/include/esp_spi_flash.h +++ b/components/spi_flash/include/esp_spi_flash.h @@ -75,8 +75,6 @@ esp_err_t spi_flash_erase_range(size_t start_address, size_t size); /** * @brief Write data to Flash. * - * @note Address in flash, dest, has to be 4-byte aligned. - * This is a temporary limitation which will be removed. * @note If source address is in DROM, this function will return * ESP_ERR_INVALID_ARG. * @@ -110,9 +108,6 @@ esp_err_t spi_flash_write_encrypted(size_t dest, const void *src, size_t size); /** * @brief Read data from Flash. * - * @note Both src and dest have to be 4-byte aligned. - * This is a temporary limitation which will be removed. - * * @param src source address of the data in Flash. * @param dest pointer to the destination buffer * @param size length of data diff --git a/components/spi_flash/test/test_mmap.c b/components/spi_flash/test/test_mmap.c index 28d9ae6dd..d3d16802e 100644 --- a/components/spi_flash/test/test_mmap.c +++ b/components/spi_flash/test/test_mmap.c @@ -10,8 +10,8 @@ uint32_t buffer[1024]; -static const uint32_t start = 0x200000; -static const uint32_t end = 0x300000; +static const uint32_t start = 0x100000; +static const uint32_t end = 0x200000; diff --git a/components/spi_flash/test/test_read_write.c b/components/spi_flash/test/test_read_write.c new file mode 100644 index 000000000..c9067463e --- /dev/null +++ b/components/spi_flash/test/test_read_write.c @@ -0,0 +1,207 @@ +// Copyright 2010-2016 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Test for spi_flash_{read,write}. + +#include +#include +#include +#include +#include + +#include +#include +#include +#include "../cache_utils.h" +#include "soc/timer_group_struct.h" +#include "soc/timer_group_reg.h" + +/* Base offset in flash for tests. */ +#define FLASH_BASE 0x120000 + +#ifndef CONFIG_SPI_FLASH_MINIMAL_TEST +#define CONFIG_SPI_FLASH_MINIMAL_TEST 1 +#endif + +static void fill(char *dest, int32_t start, int32_t len) +{ + for (int32_t i = 0; i < len; i++) { + *(dest + i) = (char) (start + i); + } +} + +static int cmp_or_dump(const void *a, const void *b, size_t len) +{ + int r = memcmp(a, b, len); + if (r != 0) { + for (int i = 0; i < len; i++) { + fprintf(stderr, "%02x", ((unsigned char *) a)[i]); + } + fprintf(stderr, "\n"); + for (int i = 0; i < len; i++) { + fprintf(stderr, "%02x", ((unsigned char *) b)[i]); + } + fprintf(stderr, "\n"); + } + return r; +} + +static void IRAM_ATTR test_read(int src_off, int dst_off, int len) +{ + uint32_t src_buf[16]; + char dst_buf[64], dst_gold[64]; + fprintf(stderr, "src=%d dst=%d len=%d\n", src_off, dst_off, len); + memset(src_buf, 0xAA, sizeof(src_buf)); + fill(((char *) src_buf) + src_off, src_off, len); + ESP_ERROR_CHECK(spi_flash_erase_sector((FLASH_BASE + src_off) / SPI_FLASH_SEC_SIZE)); + spi_flash_disable_interrupts_caches_and_other_cpu(); + SpiFlashOpResult rc = SPIWrite(FLASH_BASE, src_buf, sizeof(src_buf)); + spi_flash_enable_interrupts_caches_and_other_cpu(); + TEST_ASSERT_EQUAL_INT(rc, SPI_FLASH_RESULT_OK); + memset(dst_buf, 0x55, sizeof(dst_buf)); + memset(dst_gold, 0x55, sizeof(dst_gold)); + fill(dst_gold + dst_off, src_off, len); + + ESP_ERROR_CHECK(spi_flash_read(FLASH_BASE + src_off, dst_buf + dst_off, len)); + TEST_ASSERT_EQUAL_INT(cmp_or_dump(dst_buf, dst_gold, sizeof(dst_buf)), 0); +} + +TEST_CASE("Test spi_flash_read", "[spi_flash_read]") +{ +#if CONFIG_SPI_FLASH_MINIMAL_TEST + test_read(0, 0, 0); + test_read(0, 0, 4); + test_read(0, 0, 16); + test_read(0, 0, 64); + test_read(0, 0, 1); + test_read(0, 1, 1); + test_read(1, 0, 1); + test_read(1, 1, 1); + test_read(1, 1, 2); + test_read(1, 1, 3); + test_read(1, 1, 4); + test_read(1, 1, 5); + test_read(3, 2, 5); + test_read(0, 0, 17); + test_read(0, 1, 17); + test_read(1, 0, 17); + test_read(1, 1, 17); + test_read(1, 1, 18); + test_read(1, 1, 19); + test_read(1, 1, 20); + test_read(1, 1, 21); + test_read(3, 2, 21); + test_read(4, 4, 60); + test_read(59, 0, 5); + test_read(60, 0, 4); + test_read(60, 0, 3); + test_read(60, 0, 2); + test_read(63, 0, 1); + test_read(64, 0, 0); + test_read(59, 59, 5); + test_read(60, 60, 4); + test_read(60, 60, 3); + test_read(60, 60, 2); + test_read(63, 63, 1); + test_read(64, 64, 0); +#else + /* This will run a more thorough test but will slam flash pretty hard. */ + for (int src_off = 1; src_off < 16; src_off++) { + for (int dst_off = 0; dst_off < 16; dst_off++) { + for (int len = 0; len < 32; len++) { + test_read(dst_off, src_off, len); + } + } + } +#endif +} + +static void IRAM_ATTR test_write(int dst_off, int src_off, int len) +{ + char src_buf[64], dst_gold[64]; + uint32_t dst_buf[16]; + fprintf(stderr, "dst=%d src=%d len=%d\n", dst_off, src_off, len); + memset(src_buf, 0x55, sizeof(src_buf)); + fill(src_buf + src_off, src_off, len); + // Fills with 0xff + ESP_ERROR_CHECK(spi_flash_erase_sector((FLASH_BASE + dst_off) / SPI_FLASH_SEC_SIZE)); + memset(dst_gold, 0xff, sizeof(dst_gold)); + if (len > 0) { + int pad_left_off = (dst_off & ~3U); + memset(dst_gold + pad_left_off, 0xff, 4); + if (dst_off + len > pad_left_off + 4 && (dst_off + len) % 4 != 0) { + int pad_right_off = ((dst_off + len) & ~3U); + memset(dst_gold + pad_right_off, 0xff, 4); + } + fill(dst_gold + dst_off, src_off, len); + } + ESP_ERROR_CHECK(spi_flash_write(FLASH_BASE + dst_off, src_buf + src_off, len)); + spi_flash_disable_interrupts_caches_and_other_cpu(); + SpiFlashOpResult rc = SPIRead(FLASH_BASE, dst_buf, sizeof(dst_buf)); + spi_flash_enable_interrupts_caches_and_other_cpu(); + TEST_ASSERT_EQUAL_INT(rc, SPI_FLASH_RESULT_OK); + TEST_ASSERT_EQUAL_INT(cmp_or_dump(dst_buf, dst_gold, sizeof(dst_buf)), 0); +} + +TEST_CASE("Test spi_flash_write", "[spi_flash_write]") +{ +#if CONFIG_SPI_FLASH_MINIMAL_TEST + test_write(0, 0, 0); + test_write(0, 0, 4); + test_write(0, 0, 16); + test_write(0, 0, 64); + test_write(0, 0, 1); + test_write(0, 1, 1); + test_write(1, 0, 1); + test_write(1, 1, 1); + test_write(1, 1, 2); + test_write(1, 1, 3); + test_write(1, 1, 4); + test_write(1, 1, 5); + test_write(3, 2, 5); + test_write(4, 4, 60); + test_write(59, 0, 5); + test_write(60, 0, 4); + test_write(60, 0, 3); + test_write(60, 0, 2); + test_write(63, 0, 1); + test_write(64, 0, 0); + test_write(59, 59, 5); + test_write(60, 60, 4); + test_write(60, 60, 3); + test_write(60, 60, 2); + test_write(63, 63, 1); + test_write(64, 64, 0); +#else + /* This will run a more thorough test but will slam flash pretty hard. */ + for (int dst_off = 1; dst_off < 16; dst_off++) { + for (int src_off = 0; src_off < 16; src_off++) { + for (int len = 0; len < 16; len++) { + test_write(dst_off, src_off, len); + } + } + } +#endif + /* + * Test writing from ROM, IRAM and caches. We don't know what exactly will be + * written, we're testing that there's no crash here. + * + * NB: At the moment these only support aligned addresses, because memcpy + * is not aware of the 32-but load requirements for these regions. + */ + ESP_ERROR_CHECK(spi_flash_write(FLASH_BASE, (char *) 0x40000000, 16)); + ESP_ERROR_CHECK(spi_flash_write(FLASH_BASE, (char *) 0x40070000, 16)); + ESP_ERROR_CHECK(spi_flash_write(FLASH_BASE, (char *) 0x40078000, 16)); + ESP_ERROR_CHECK(spi_flash_write(FLASH_BASE, (char *) 0x40080000, 16)); +}