/* Customer ID=11656; Build=0x5f626; Copyright (c) 2008-2009 by Tensilica Inc. ALL RIGHTS RESERVED. These coded instructions, statements, and computer programs are the copyrighted works and confidential proprietary information of Tensilica Inc. They may not be modified, copied, reproduced, distributed, or disclosed to third parties in any manner, medium, or form, in whole or in part, without the prior written consent of Tensilica Inc. */ #ifndef _XMP_LIBRARY_H #define _XMP_LIBRARY_H #ifdef __cplusplus extern "C" { #endif #include #include #include #if XCHAL_HAVE_RELEASE_SYNC #include #endif #if XCHAL_HAVE_EXTERN_REGS #include #endif #include #include #include "xtensa/system/mpsystem.h" /* W A R N I N G: xmp library clients should treat all data structures in this file as opaque. They are only public to enable users to declare them statically. */ /* ------------------------------------------------------------------------- When using XMP on cache-incoherent systems, these macros are helpful to ensure that you are not reading stale data, and to ensure that the data you write makes it all the way back to main memory. */ #if !XCHAL_DCACHE_IS_COHERENT #define XMP_WRITE_BACK_ELEMENT(x) xthal_dcache_region_writeback((void *)x, sizeof(*x)) #define XMP_INVALIDATE_ELEMENT(x) xthal_dcache_region_invalidate((void *)x, sizeof(*x)) #define XMP_WRITE_BACK_INVALIDATE_ELEMENT(x) xthal_dcache_region_writeback_inv((void *)x, sizeof(*x)) #define XMP_WRITE_BACK_ARRAY(x) xthal_dcache_region_writeback((void *)x, sizeof(x)) #define XMP_INVALIDATE_ARRAY(x) xthal_dcache_region_invalidate((void *)x, sizeof(x)) #define XMP_WRITE_BACK_INVALIDATE_ARRAY(x) xthal_dcache_region_writeback_inv((void *)x, sizeof(x)) #define XMP_WRITE_BACK_ARRAY_ELEMENTS(x, num_elements) xthal_dcache_region_writeback((void *)x, sizeof(*x) * num_elements) #define XMP_INVALIDATE_ARRAY_ELEMENTS(x, num_elements) xthal_dcache_region_invalidate((void *)x, sizeof(*x) * num_elements) #define XMP_WRITE_BACK_INVALIDATE_ARRAY_ELEMENTS(x, num_elements) xthal_dcache_region_writeback_inv((void *)x, sizeof(*x) * num_elements) #else #define XMP_WRITE_BACK_ELEMENT(x) #define XMP_INVALIDATE_ELEMENT(x) #define XMP_WRITE_BACK_INVALIDATE_ELEMENT(x) #define XMP_WRITE_BACK_ARRAY(x) #define XMP_INVALIDATE_ARRAY(x) #define XMP_WRITE_BACK_INVALIDATE_ARRAY(x) #define XMP_WRITE_BACK_ARRAY_ELEMENTS(x, num_elements) #define XMP_INVALIDATE_ARRAY_ELEMENTS(x, num_elements) #define XMP_WRITE_BACK_INVALIDATE_ARRAY_ELEMENTS(x, num_elements) #endif /* ------------------------------------------------------------------------- Initialization, error codes, constants and house-keeping Every core should call xmp_init with the number of cores in the system. xmp_init should be called before you use any global synchronization primitive or shared data. Further, before you use a dynamically allocated synchronization primitives, you need to both initialize it by calling the xmp_*_init function, and you need to have called xmp_init, which sets up interrupt handlers and interrupt routing. The second parameter sets the interprocessor interrupt routing. Passing zero instructs the library to use the default routing, which will be suitable for most users. */ extern void xmp_init (int num_cores, unsigned int interrupt_routing); /* If you want finer-grained control than that provided by xmp_init, you can the functions below individually--however, this is more inconvenient and requires greater understanding of the library's internals. Don't use them directly unless you have a good reason. */ extern void xmp_unpack_shared (void); extern void xmp_route_interrupts (unsigned int routing); #if XCHAL_HAVE_MP_INTERRUPTS extern void xmp_enable_ipi_interrupts (void); /* Turn off certain things enabled by xmp_init */ extern void xmp_disable_ipi_interrupts (void); #endif extern void xmp_end (void); /* Only valid after xmp_init. */ extern int xmp_num_cores (void); /* How many cycles should a core wait before rechecking a synchronization variable? Higher values will reduce memory transactions, but will also result in higher latency in returning from synchronization. */ extern void xmp_spin_wait_set_cycles (unsigned int limit); /* If you would prefer to provide your own spin wait function, to go to sleep, etc. Declare a function of this type, then call this function. */ typedef void (*xmp_spin_wait_function_t)(void); extern void xmp_spin_wait_set_function (xmp_spin_wait_function_t func); extern void xmp_spin(void); #define XMP_NO_OWNER 0x07 #define XMP_MUTEX_DESTROYED 0xFE #define XMP_ERROR_FATAL 0xFD #define XMP_MAX_CORES 0x4 static inline unsigned int xmp_prid (void) { #if XCHAL_HAVE_PRID return XT_RSR_PRID() & 0xFF; #else return 0; #endif } /* ------------------------------------------------------------------------- Tracing A core must set a trace_file if it wants any synchronization tracing to occur. Sharing file descriptors among cores is very messy, so don't do it. This, unfortunately, means that two cores contending for a mutex are not able to trace to the same file. Any object (except the atomic integer) can have tracing off or on. */ extern void xmp_set_trace_file (FILE * file); extern void xmp_trace (const char * fmt, ...); /* ------------------------------------------------------------------------- Memory Allocation Functions. These do what you would expect, only from shared memory instead of private memory. */ #if XCHAL_DCACHE_IS_COHERENT extern void * xmp_malloc (size_t size); extern void * xmp_calloc (size_t nmemb, size_t size); extern void xmp_free (void * ptr); #endif extern void * xmp_sbrk(int size); /* ------------------------------------------------------------------------- xmp_atomic_int_t The most basic synchronization primitive in the xmp library. Atomic ints are sharable among processors, and even interrupt levels on the same processor. However, their semantics are fairly rudimentary. All other primitives are based on these, therefore, changing this implementation affects all other primitives. */ typedef unsigned int xmp_atomic_int_t; static inline xmp_atomic_int_t xmp_coherent_l32ai(xmp_atomic_int_t * address) { XMP_INVALIDATE_ELEMENT (address); return XT_L32AI(address, 0); } static inline void xmp_coherent_s32ri(xmp_atomic_int_t value, xmp_atomic_int_t * address) { XT_S32RI (value, address, 0); XMP_WRITE_BACK_ELEMENT (address); } #define XMP_ATOMIC_INT_INITIALIZER(value) (value) /* xmp_atomic_int_init - Initialize an int prior to use Nonsynchronizing, Nonblocking Usage: value - initial value integer - points to an uninitialized integer On exit: initialized to given value Errors: none */ static inline void xmp_atomic_int_init (xmp_atomic_int_t * integer, int value) { xmp_coherent_s32ri (value, integer); } /* xmp_atomic_int_value - Read the value Nonsynchronizing, Nonblocking Usage: integer - points to an int Returns: the value */ static inline int xmp_atomic_int_value (xmp_atomic_int_t * integer) { return xmp_coherent_l32ai (integer); } /* xmp_atomic_int_conditional_increment - Conditionally increment integer Synchronizing, nonblocking Usage: integer - points to an initialized integer amount - how much to increment prev - believed value of the integer eg: prev = xmp_atomic_int_value (integer); success = xmp_atomic_int_increment (integer, 1, prev); Returns: current value of integer - user should check if it matches the previous value of the integer. If it does, then the update was successful. */ #define USE_ASSEMBLY_IMPLEMENTATION 0 static inline int xmp_atomic_int_conditional_increment (xmp_atomic_int_t * integer, int amount, int prev) { int val; int saved; #if USE_ASSEMBLY_IMPLEMENTATION /* %0 = prev %1 = saved %2 = atomic integer pointer %3 = amount */ asm volatile ("wsr.scompare1 %0\n" "mov %1, %0\n" "add %0, %0, %3\n" "s32c1i %0, %2, 0\n" : "+&a" (prev), "+&a"(saved) : "a" (integer), "a" (amount)); return prev; #else XT_WSR_SCOMPARE1 (prev); val = prev + amount; saved = val; XT_S32C1I (val, integer, 0); return val; #endif } /* xmp_atomic_int_increment - Increment integer Synchronizing, blocking Usage: integer - points to an initialized integer amount - how much to increment Returns: new value of integer */ static inline int xmp_atomic_int_increment (xmp_atomic_int_t * integer, int amount) { int val; int saved; #if USE_ASSEMBLY_IMPLEMENTATION /* %0 = val %1 = saved %2 = atomic integer pointer %3 = amount */ asm volatile ("l32ai %0, %2, 0\n" "1:\n" "wsr.scompare1 %0\n" "mov %1, %0\n" "add %0, %0, %3\n" "s32c1i %0, %2, 0\n" "bne %0, %1, 1b\n" : "+&a" (val), "+&a"(saved) : "a" (integer), "a" (amount)); #else /* Accurately naming "val" is tricky. Sometimes it will be what we want to be the new value, but sometimes it contains the value that is currently at the location. */ /* Load location's current value */ val = xmp_coherent_l32ai (integer); do { XT_WSR_SCOMPARE1 (val); saved = val; /* Change it to what we would like to store there--"new_val" */ val = val + amount; /* Possibly store new_val, but reload location's current value no matter what. */ XT_S32C1I (val, integer, 0); if (val != saved) xmp_spin(); } while (val != saved); #endif return val + amount; } /* xmp_atomic_int_conditional_set - Set the value of an atomic integer Synchronizing, nonblocking Usage: integer - points to an initialized integer from - believed value of the integer eg: prev = xmp_atomic_int_value (integer); success = xmp_atomic_int_conditional_set (integer, 1, prev); to - new value Returns: current value of integer - user should check if it matches the previous value of the integer. If it does, then the update was successful. */ static inline int xmp_atomic_int_conditional_set (xmp_atomic_int_t * integer, int from, int to) { int val; /* Don't even try to update if the integer's value isn't what we think it should be. This prevents acquiring this cache-line for writing and therefore prevents bus transactions when various cores contend. */ val = xmp_coherent_l32ai(integer); if (val == from) { XT_WSR_SCOMPARE1 (from); val = to; /* Possibly store to, but reload location's current value no matter what. */ XT_S32C1I (val, integer, 0); } return val; } /* Macros to implement trivial spin locks. These are very primitive, but can be useful when you don't need the higher-overhead synchronization. To use an xmp_atomic_int_t as a trivial spin lock, you should initialize it to zero first. */ #define XMP_SIMPLE_SPINLOCK_ACQUIRE(atomic_int_ptr) \ { while (xmp_atomic_int_conditional_set (atomic_int_ptr, 0, xmp_prid() + 1) != 0) \ xmp_spin(); } #define XMP_SIMPLE_SPINLOCK_RELEASE(atomic_int_ptr) \ { while (xmp_atomic_int_conditional_set (atomic_int_ptr, xmp_prid() + 1, 0) != xmp_prid() + 1) \ xmp_spin(); } #define XMP_SIMPLE_SPINLOCK_OWNER(atomic_int_ptr) (xmp_atomic_int_value(atomic_int_ptr) - 1) /* ------------------------------------------------------------------------- xmp_mutex_t - An even higher-level data structure to enforce mutual exclusion between cores. A core which waits on a mutex might sleep with a waiti and be interrupted by an interrupt. Mutexes can be normal or recursive. For a normal mutex, a core attempting to acquire a mutex it already holds will result in deadlock. For a recursive mutex, a core will succeed in acquiring a mutex it already holds, and must release it as many times as it acquired it. Mutexes are not sharable between interrupt levels--because ownership is tracked by core, not thread. Like all xmp data structures, an object of type xmp_mutex_t should be treated by the programmer as opaque. They are only public in this header file to allow them to be declared statically. For configurations with 16-byte cache lines, this has the most frequently used and changed data in the first line. */ #if XCHAL_DCACHE_IS_COHERENT typedef struct xmp_mutex_t { xmp_atomic_int_t qlock; unsigned int qhead; unsigned int qtail; unsigned char queue[XMP_MAX_CORES]; unsigned short held; unsigned char owner; unsigned char recursive : 1; unsigned char trace : 1; unsigned char system : 1; unsigned char unused : 5; const char * name; } xmp_mutex_t __attribute__ ((aligned (XMP_MAX_DCACHE_LINESIZE))); #define XMP_MUTEX_INITIALIZER(name) \ { 0, 0, -1, {XMP_NO_OWNER, XMP_NO_OWNER, XMP_NO_OWNER, XMP_NO_OWNER}, \ 0, XMP_NO_OWNER, XMP_MUTEX_FLAG_NORMAL, 0, 0, 0, name } #define XMP_RECURSIVE_MUTEX_INITIALIZER(name) \ { 0, 0, -1, {XMP_NO_OWNER, XMP_NO_OWNER, XMP_NO_OWNER, XMP_NO_OWNER}, \ 0, XMP_NO_OWNER, XMP_MUTEX_FLAG_RECURSIVE, 0, 0, 0, name } #define XMP_MUTEX_FLAG_NORMAL 0 #define XMP_MUTEX_FLAG_RECURSIVE 1 #define XMP_MUTEX_ACQUIRE_FAILED -1 #define XMP_MUTEX_ERROR_DESTROY_OWNED -2 #define XMP_MUTEX_ERROR_NOT_OWNED -3 #define XMP_MUTEX_ERROR_ALREADY_OWNED -4 /* xmp_mutex_init Nonsynchronizing Nonblocking Usage: mutex - points to an uninitialized mutex name - name if you want one, NULL if not. recursive - use recursive semantices Returns zero on success (always succeeds) */ extern int xmp_mutex_init (xmp_mutex_t * mutex, const char * name, unsigned int recursive); /* int xmp_mutex_destroy (xmp_mutex_t * mutex); Synchronizing - will fail if mutex is held by anyone -- including current processor Nonblocking Usage: mutex - points to a mutex Returns zero on success non-zero if mutex is held */ extern int xmp_mutex_destroy (xmp_mutex_t * mutex); /* xmp_mutex_lock -- Synchronizing xmp_mutex_trylock Usage: mutex - points to a mutex Returns zero on success */ extern int xmp_mutex_lock (xmp_mutex_t * mutex); extern int xmp_mutex_trylock (xmp_mutex_t * mutex); /* xmp_mutex_unlock Synchronizing Nonblocking Usage: mutex - points to a mutex Returns zero on success - mutex is released non-zero on failure - mutex is owned by another core - prid of processor that does own it note that by the time this function returns, the owner of the core may have changed. */ extern int xmp_mutex_unlock (xmp_mutex_t * mutex); /* xmp_mutex_name Nonsynchronizing Nonblocking Usage: mutex - points to a mutex Returns the name of the given mutex, which may be NULL. */ const char * xmp_mutex_name (const xmp_mutex_t * mutex); /* xmp_mutex_trace_on xmp_mutex_trace_off Nonsynchronizing Nonblocking Turn off and on tracing for the mutex. These functions are only present in the debug version of the library. */ extern void xmp_mutex_trace_on (xmp_mutex_t * mutex); extern void xmp_mutex_trace_off (xmp_mutex_t * mutex); /* ------------------------------------------------------------------------- xmp_condition_t Condition Variables following Mesa semantics. Condition variables are not sharable among interrupt levels. */ typedef struct xmp_condition_t { unsigned int qhead; unsigned int qtail; unsigned char queue[XMP_MAX_CORES]; unsigned int waiting[XMP_MAX_CORES]; unsigned char trace : 1; unsigned char unused : 7; const char * name; } xmp_condition_t __attribute__ ((aligned (XMP_MAX_DCACHE_LINESIZE))); #define XMP_CONDITION_INITIALIZER(name) \ { 0, -1, {XMP_NO_OWNER, XMP_NO_OWNER, XMP_NO_OWNER, XMP_NO_OWNER}, \ {0, 0, 0, 0}, 0, 0, name} /* xmp_condition_init - Initialize a condition variable Nonsynchronizing, Nonblocking Usage: condition - pointer to an xmp_condition_t On exit: condition initialized Errors: none */ extern int xmp_condition_init (xmp_condition_t * condition, const char * name); extern int xmp_condition_destroy (xmp_condition_t * condition); /* xmp_condition_wait - Wait for a condition variable Synchronizing, blocking Usage: condition - pointer to an xmp_condition_t mutex - pointer to an xmp_mutex_t already acquired by the calling process Errors: if the mutex isn't held by this core */ extern int xmp_condition_wait (xmp_condition_t * condition, xmp_mutex_t * mutex); /* xmp_condition_signal - Signal the first (if any) core waiting on a condition variable You must hold the mutex you passed to xmp_condition_wait before calling this function. Synchronizing, nonblocking Usage: condition - pointer to an xmp_condition_t Errors: none */ extern int xmp_condition_signal (xmp_condition_t * condition); /* xmp_condition_broadcast - Signal all cores waiting on a condition variable Synchronizing, nonblocking You must hold the mutex you passed to xmp_condition_wait before calling this function. Usage: condition - pointer to an xmp_condition_t Errors: none */ extern int xmp_condition_broadcast (xmp_condition_t * condition); static inline const char * xmp_condition_name (const xmp_condition_t * condition) { return condition->name; } /* xmp_condition_trace_on xmp_condition_trace_off Nonsynchronizing Nonblocking Turn off and on statistics and tracing for the condition. For tracing you must also set a trace file for the core. These functions are only present in the debug-version of the library. */ extern void xmp_condition_trace_on (xmp_condition_t * condition); extern void xmp_condition_trace_off (xmp_condition_t * condition); #endif /* XCHAL_DCACHE_IS_COHERENT */ /* ------------------------------------------------------------------------- xmp_barrier_t Classic barriers that stop any core from continuing until a specified number of cores reach that point. Once the barrier allows cores through, the barrier is reset and will stop cores from progressing again. Barriers are not sharable among interrupt levels. */ typedef struct xmp_barrier_t { xmp_atomic_int_t count; xmp_atomic_int_t state; xmp_atomic_int_t sleeping; unsigned short num_cores; unsigned short trace : 1; unsigned short system : 1; const char * name; } xmp_barrier_t __attribute__ ((aligned (XMP_MAX_DCACHE_LINESIZE))); #define XMP_BARRIER_INITIALIZER(number, name) \ { 0, 0, 0, number, 0, 0, name } /* xmp_barrier_init - Initialize a barrier Nonsynchronizing, Nonblocking Usage: barrier - pointer to an xmp_barrier_t num_cores - number of cores needed to arrive at the barrier before any are allowed through On exit: barrier initialized Always returns zero. Errors: none */ extern int xmp_barrier_init (xmp_barrier_t * barrier, int num_cores, const char * name); /* xmp_barrier_wait - Wait on a barrier Nonsynchronizing, Nonblocking Usage: barrier - pointer to an xmp_barrier_t On exit: Enough cores (as determined at the barrier's initialization) have reached the barrier. Errors: none */ extern int xmp_barrier_wait (xmp_barrier_t * barrier); static inline const char * xmp_barrier_name (const xmp_barrier_t * barrier) { return barrier->name; } /* xmp_barrier_trace_on xmp_barrier_trace_off Nonsynchronizing Nonblocking Turn on and off tracing for the barrier. For tracing you must also set a trace file for the core. These functions are only present in the debug-version of the library. */ extern void xmp_barrier_trace_on (xmp_barrier_t * barrier); extern void xmp_barrier_trace_off (xmp_barrier_t * barrier); /* ------------------------------------------------------------------------- Portions of the library that are internal, but belong here for convenience. */ extern xmp_atomic_int_t _ResetSync; static inline void xmp_initial_sync (int num_cores) { xmp_atomic_int_increment (&_ResetSync, 1); while (xmp_coherent_l32ai (&_ResetSync) != num_cores) xmp_spin (); } #ifdef __cplusplus } #endif #endif /* _XMP_LIBRARY_H */