linux/linux-5.4.31/arch/mips/include/asm/barrier.h

/*
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 *
 * Copyright (C) 2006 by Ralf Baechle (ralf@linux-mips.org)
 */
#ifndef __ASM_BARRIER_H
#define __ASM_BARRIER_H

#include <asm/addrspace.h>

/*
 * Sync types defined by the MIPS architecture (document MD00087 table 6.5)
 * These values are used with the sync instruction to perform memory barriers.
 * Types of ordering guarantees available through the SYNC instruction:
 * - Completion Barriers
 * - Ordering Barriers
 * As compared to the completion barrier, the ordering barrier is a
 * lighter-weight operation as it does not require the specified instructions
 * before the SYNC to be already completed. Instead it only requires that those
 * specified instructions which are subsequent to the SYNC in the instruction
 * stream are never re-ordered for processing ahead of the specified
 * instructions which are before the SYNC in the instruction stream.
 * This potentially reduces how many cycles the barrier instruction must stall
 * before it completes.
 * Implementations that do not use any of the non-zero values of stype to define
 * different barriers, such as ordering barriers, must make those stype values
 * act the same as stype zero.
 */

/*
 * Completion barriers:
 * - Every synchronizable specified memory instruction (loads or stores or both)
 *   that occurs in the instruction stream before the SYNC instruction must be
 *   already globally performed before any synchronizable specified memory
 *   instructions that occur after the SYNC are allowed to be performed, with
 *   respect to any other processor or coherent I/O module.
 *
 * - The barrier does not guarantee the order in which instruction fetches are
 *   performed.
 *
 * - A stype value of zero will always be defined such that it performs the most
 *   complete set of synchronization operations that are defined.This means
 *   stype zero always does a completion barrier that affects both loads and
 *   stores preceding the SYNC instruction and both loads and stores that are
 *   subsequent to the SYNC instruction. Non-zero values of stype may be defined
 *   by the architecture or specific implementations to perform synchronization
 *   behaviors that are less complete than that of stype zero. If an
 *   implementation does not use one of these non-zero values to define a
 *   different synchronization behavior, then that non-zero value of stype must
 *   act the same as stype zero completion barrier. This allows software written
 *   for an implementation with a lighter-weight barrier to work on another
 *   implementation which only implements the stype zero completion barrier.
 *
 * - A completion barrier is required, potentially in conjunction with SSNOP (in
 *   Release 1 of the Architecture) or EHB (in Release 2 of the Architecture),
 *   to guarantee that memory reference results are visible across operating
 *   mode changes. For example, a completion barrier is required on some
 *   implementations on entry to and exit from Debug Mode to guarantee that
 *   memory effects are handled correctly.
 */

/*
 * stype 0 - A completion barrier that affects preceding loads and stores and
 * subsequent loads and stores.
 * Older instructions which must reach the load/store ordering point before the
 * SYNC instruction completes: Loads, Stores
 * Younger instructions which must reach the load/store ordering point only
 * after the SYNC instruction completes: Loads, Stores
 * Older instructions which must be globally performed when the SYNC instruction
 * completes: Loads, Stores
 */
#define STYPE_SYNC 0x0

/*
 * Ordering barriers:
 * - Every synchronizable specified memory instruction (loads or stores or both)
 *   that occurs in the instruction stream before the SYNC instruction must
 *   reach a stage in the load/store datapath after which no instruction
 *   re-ordering is possible before any synchronizable specified memory
 *   instruction which occurs after the SYNC instruction in the instruction
 *   stream reaches the same stage in the load/store datapath.
 *
 * - If any memory instruction before the SYNC instruction in program order,
 *   generates a memory request to the external memory and any memory
 *   instruction after the SYNC instruction in program order also generates a
 *   memory request to external memory, the memory request belonging to the
 *   older instruction must be globally performed before the time the memory
 *   request belonging to the younger instruction is globally performed.
 *
 * - The barrier does not guarantee the order in which instruction fetches are
 *   performed.
 */

/*
 * stype 0x10 - An ordering barrier that affects preceding loads and stores and
 * subsequent loads and stores.
 * Older instructions which must reach the load/store ordering point before the
 * SYNC instruction completes: Loads, Stores
 * Younger instructions which must reach the load/store ordering point only
 * after the SYNC instruction completes: Loads, Stores
 * Older instructions which must be globally performed when the SYNC instruction
 * completes: N/A
 */
#define STYPE_SYNC_MB 0x10

/*
 * stype 0x14 - A completion barrier specific to global invalidations
 *
 * When a sync instruction of this type completes any preceding GINVI or GINVT
 * operation has been globalized & completed on all coherent CPUs. Anything
 * that the GINV* instruction should invalidate will have been invalidated on
 * all coherent CPUs when this instruction completes. It is implementation
 * specific whether the GINV* instructions themselves will ensure completion,
 * or this sync type will.
 *
 * In systems implementing global invalidates (ie. with Config5.GI == 2 or 3)
 * this sync type also requires that previous SYNCI operations have completed.
 */
#define STYPE_GINV	0x14

#ifdef CONFIG_CPU_HAS_SYNC
#define __sync()				\
	__asm__ __volatile__(			\
		".set	push\n\t"		\
		".set	noreorder\n\t"		\
		".set	mips2\n\t"		\
		"sync\n\t"			\
		".set	pop"			\
		: /* no output */		\
		: /* no input */		\
		: "memory")
#else
#define __sync()	do { } while(0)
#endif

#define __fast_iob()				\
	__asm__ __volatile__(			\
		".set	push\n\t"		\
		".set	noreorder\n\t"		\
		"lw	$0,%0\n\t"		\
		"nop\n\t"			\
		".set	pop"			\
		: /* no output */		\
		: "m" (*(int *)CKSEG1)		\
		: "memory")
#ifdef CONFIG_CPU_CAVIUM_OCTEON
# define OCTEON_SYNCW_STR	".set push\n.set arch=octeon\nsyncw\nsyncw\n.set pop\n"
# define __syncw()	__asm__ __volatile__(OCTEON_SYNCW_STR : : : "memory")

# define fast_wmb()	__syncw()
# define fast_rmb()	barrier()
# define fast_mb()	__sync()
# define fast_iob()	do { } while (0)
#else /* ! CONFIG_CPU_CAVIUM_OCTEON */
# define fast_wmb()	__sync()
# define fast_rmb()	__sync()
# define fast_mb()	__sync()
# ifdef CONFIG_SGI_IP28
#  define fast_iob()				\
	__asm__ __volatile__(			\
		".set	push\n\t"		\
		".set	noreorder\n\t"		\
		"lw	$0,%0\n\t"		\
		"sync\n\t"			\
		"lw	$0,%0\n\t"		\
		".set	pop"			\
		: /* no output */		\
		: "m" (*(int *)CKSEG1ADDR(0x1fa00004)) \
		: "memory")
# else
#  define fast_iob()				\
	do {					\
		__sync();			\
		__fast_iob();			\
	} while (0)
# endif
#endif /* CONFIG_CPU_CAVIUM_OCTEON */

#ifdef CONFIG_CPU_HAS_WB

#include <asm/wbflush.h>

#define mb()		wbflush()
#define iob()		wbflush()

#else /* !CONFIG_CPU_HAS_WB */

#define mb()		fast_mb()
#define iob()		fast_iob()

#endif /* !CONFIG_CPU_HAS_WB */

#define wmb()		fast_wmb()
#define rmb()		fast_rmb()

#if defined(CONFIG_WEAK_ORDERING)
# ifdef CONFIG_CPU_CAVIUM_OCTEON
#  define __smp_mb()	__sync()
#  define __smp_rmb()	barrier()
#  define __smp_wmb()	__syncw()
# else
#  define __smp_mb()	__asm__ __volatile__("sync" : : :"memory")
#  define __smp_rmb()	__asm__ __volatile__("sync" : : :"memory")
#  define __smp_wmb()	__asm__ __volatile__("sync" : : :"memory")
# endif
#else
#define __smp_mb()	barrier()
#define __smp_rmb()	barrier()
#define __smp_wmb()	barrier()
#endif

/*
 * When LL/SC does imply order, it must also be a compiler barrier to avoid the
 * compiler from reordering where the CPU will not. When it does not imply
 * order, the compiler is also free to reorder across the LL/SC loop and
 * ordering will be done by smp_llsc_mb() and friends.
 */
#if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP)
#define __WEAK_LLSC_MB		"	sync	\n"
#define smp_llsc_mb()		__asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
#define __LLSC_CLOBBER
#else
#define __WEAK_LLSC_MB		"		\n"
#define smp_llsc_mb()		do { } while (0)
#define __LLSC_CLOBBER		"memory"
#endif

#ifdef CONFIG_CPU_CAVIUM_OCTEON
#define smp_mb__before_llsc() smp_wmb()
#define __smp_mb__before_llsc() __smp_wmb()
/* Cause previous writes to become visible on all CPUs as soon as possible */
#define nudge_writes() __asm__ __volatile__(".set push\n\t"		\
					    ".set arch=octeon\n\t"	\
					    "syncw\n\t"			\
					    ".set pop" : : : "memory")
#else
#define smp_mb__before_llsc() smp_llsc_mb()
#define __smp_mb__before_llsc() smp_llsc_mb()
#define nudge_writes() mb()
#endif

#define __smp_mb__before_atomic()	__smp_mb__before_llsc()
#define __smp_mb__after_atomic()	smp_llsc_mb()

/*
 * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
 * store or prefetch) in between an LL & SC can cause the SC instruction to
 * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
 * containing such sequences, this bug bites harder than we might otherwise
 * expect due to reordering & speculation:
 *
 * 1) A memory access appearing prior to the LL in program order may actually
 *    be executed after the LL - this is the reordering case.
 *
 *    In order to avoid this we need to place a memory barrier (ie. a SYNC
 *    instruction) prior to every LL instruction, in between it and any earlier
 *    memory access instructions.
 *
 *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
 *
 * 2) If a conditional branch exists between an LL & SC with a target outside
 *    of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
 *    or similar, then misprediction of the branch may allow speculative
 *    execution of memory accesses from outside of the LL-SC loop.
 *
 *    In order to avoid this we need a memory barrier (ie. a SYNC instruction)
 *    at each affected branch target, for which we also use loongson_llsc_mb()
 *    defined below.
 *
 *    This case affects all current Loongson 3 CPUs.
 *
 * The above described cases cause an error in the cache coherence protocol;
 * such that the Invalidate of a competing LL-SC goes 'missing' and SC
 * erroneously observes its core still has Exclusive state and lets the SC
 * proceed.
 *
 * Therefore the error only occurs on SMP systems.
 */
#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
#define loongson_llsc_mb()	__asm__ __volatile__("sync" : : :"memory")
#else
#define loongson_llsc_mb()	do { } while (0)
#endif

static inline void sync_ginv(void)
{
	asm volatile("sync\t%0" :: "i"(STYPE_GINV));
}

#include <asm-generic/barrier.h>

#endif /* __ASM_BARRIER_H */
1 2024-01-30 10:43:28 +00:00			`/*`
			`* This file is subject to the terms and conditions of the GNU General Public`
			`* License. See the file "COPYING" in the main directory of this archive`
			`* for more details.`
			`*`
			`* Copyright (C) 2006 by Ralf Baechle (ralf@linux-mips.org)`
			`*/`
			`#ifndef __ASM_BARRIER_H`
			`#define __ASM_BARRIER_H`

			`#include <asm/addrspace.h>`

			`/*`
			`* Sync types defined by the MIPS architecture (document MD00087 table 6.5)`
			`* These values are used with the sync instruction to perform memory barriers.`
			`* Types of ordering guarantees available through the SYNC instruction:`
			`* - Completion Barriers`
			`* - Ordering Barriers`
			`* As compared to the completion barrier, the ordering barrier is a`
			`* lighter-weight operation as it does not require the specified instructions`
			`* before the SYNC to be already completed. Instead it only requires that those`
			`* specified instructions which are subsequent to the SYNC in the instruction`
			`* stream are never re-ordered for processing ahead of the specified`
			`* instructions which are before the SYNC in the instruction stream.`
			`* This potentially reduces how many cycles the barrier instruction must stall`
			`* before it completes.`
			`* Implementations that do not use any of the non-zero values of stype to define`
			`* different barriers, such as ordering barriers, must make those stype values`
			`* act the same as stype zero.`
			`*/`

			`/*`
			`* Completion barriers:`
			`* - Every synchronizable specified memory instruction (loads or stores or both)`
			`* that occurs in the instruction stream before the SYNC instruction must be`
			`* already globally performed before any synchronizable specified memory`
			`* instructions that occur after the SYNC are allowed to be performed, with`
			`* respect to any other processor or coherent I/O module.`
			`*`
			`* - The barrier does not guarantee the order in which instruction fetches are`
			`* performed.`
			`*`
			`* - A stype value of zero will always be defined such that it performs the most`
			`* complete set of synchronization operations that are defined.This means`
			`* stype zero always does a completion barrier that affects both loads and`
			`* stores preceding the SYNC instruction and both loads and stores that are`
			`* subsequent to the SYNC instruction. Non-zero values of stype may be defined`
			`* by the architecture or specific implementations to perform synchronization`
			`* behaviors that are less complete than that of stype zero. If an`
			`* implementation does not use one of these non-zero values to define a`
			`* different synchronization behavior, then that non-zero value of stype must`
			`* act the same as stype zero completion barrier. This allows software written`
			`* for an implementation with a lighter-weight barrier to work on another`
			`* implementation which only implements the stype zero completion barrier.`
			`*`
			`* - A completion barrier is required, potentially in conjunction with SSNOP (in`
			`* Release 1 of the Architecture) or EHB (in Release 2 of the Architecture),`
			`* to guarantee that memory reference results are visible across operating`
			`* mode changes. For example, a completion barrier is required on some`
			`* implementations on entry to and exit from Debug Mode to guarantee that`
			`* memory effects are handled correctly.`
			`*/`

			`/*`
			`* stype 0 - A completion barrier that affects preceding loads and stores and`
			`* subsequent loads and stores.`
			`* Older instructions which must reach the load/store ordering point before the`
			`* SYNC instruction completes: Loads, Stores`
			`* Younger instructions which must reach the load/store ordering point only`
			`* after the SYNC instruction completes: Loads, Stores`
			`* Older instructions which must be globally performed when the SYNC instruction`
			`* completes: Loads, Stores`
			`*/`
			`#define STYPE_SYNC 0x0`

			`/*`
			`* Ordering barriers:`
			`* - Every synchronizable specified memory instruction (loads or stores or both)`
			`* that occurs in the instruction stream before the SYNC instruction must`
			`* reach a stage in the load/store datapath after which no instruction`
			`* re-ordering is possible before any synchronizable specified memory`
			`* instruction which occurs after the SYNC instruction in the instruction`
			`* stream reaches the same stage in the load/store datapath.`
			`*`
			`* - If any memory instruction before the SYNC instruction in program order,`
			`* generates a memory request to the external memory and any memory`
			`* instruction after the SYNC instruction in program order also generates a`
			`* memory request to external memory, the memory request belonging to the`
			`* older instruction must be globally performed before the time the memory`
			`* request belonging to the younger instruction is globally performed.`
			`*`
			`* - The barrier does not guarantee the order in which instruction fetches are`
			`* performed.`
			`*/`

			`/*`
			`* stype 0x10 - An ordering barrier that affects preceding loads and stores and`
			`* subsequent loads and stores.`
			`* Older instructions which must reach the load/store ordering point before the`
			`* SYNC instruction completes: Loads, Stores`
			`* Younger instructions which must reach the load/store ordering point only`
			`* after the SYNC instruction completes: Loads, Stores`
			`* Older instructions which must be globally performed when the SYNC instruction`
			`* completes: N/A`
			`*/`
			`#define STYPE_SYNC_MB 0x10`

			`/*`
			`* stype 0x14 - A completion barrier specific to global invalidations`
			`*`
			`* When a sync instruction of this type completes any preceding GINVI or GINVT`
			`* operation has been globalized & completed on all coherent CPUs. Anything`
			`* that the GINV* instruction should invalidate will have been invalidated on`
			`* all coherent CPUs when this instruction completes. It is implementation`
			`* specific whether the GINV* instructions themselves will ensure completion,`
			`* or this sync type will.`
			`*`
			`* In systems implementing global invalidates (ie. with Config5.GI == 2 or 3)`
			`* this sync type also requires that previous SYNCI operations have completed.`
			`*/`
			`#define STYPE_GINV 0x14`

			`#ifdef CONFIG_CPU_HAS_SYNC`
			`#define __sync() \`
			`__asm__ __volatile__( \`
			`".set push\n\t" \`
			`".set noreorder\n\t" \`
			`".set mips2\n\t" \`
			`"sync\n\t" \`
			`".set pop" \`
			`: /* no output */ \`
			`: /* no input */ \`
			`: "memory")`
			`#else`
			`#define __sync() do { } while(0)`
			`#endif`

			`#define __fast_iob() \`
			`__asm__ __volatile__( \`
			`".set push\n\t" \`
			`".set noreorder\n\t" \`
			`"lw $0,%0\n\t" \`
			`"nop\n\t" \`
			`".set pop" \`
			`: /* no output */ \`
			`: "m" ((int )CKSEG1) \`
			`: "memory")`
			`#ifdef CONFIG_CPU_CAVIUM_OCTEON`
			`# define OCTEON_SYNCW_STR ".set push\n.set arch=octeon\nsyncw\nsyncw\n.set pop\n"`
			`# define __syncw() __asm__ __volatile__(OCTEON_SYNCW_STR : : : "memory")`

			`# define fast_wmb() __syncw()`
			`# define fast_rmb() barrier()`
			`# define fast_mb() __sync()`
			`# define fast_iob() do { } while (0)`
			`#else /* ! CONFIG_CPU_CAVIUM_OCTEON */`
			`# define fast_wmb() __sync()`
			`# define fast_rmb() __sync()`
			`# define fast_mb() __sync()`
			`# ifdef CONFIG_SGI_IP28`
			`# define fast_iob() \`
			`__asm__ __volatile__( \`
			`".set push\n\t" \`
			`".set noreorder\n\t" \`
			`"lw $0,%0\n\t" \`
			`"sync\n\t" \`
			`"lw $0,%0\n\t" \`
			`".set pop" \`
			`: /* no output */ \`
			`: "m" ((int )CKSEG1ADDR(0x1fa00004)) \`
			`: "memory")`
			`# else`
			`# define fast_iob() \`
			`do { \`
			`__sync(); \`
			`__fast_iob(); \`
			`} while (0)`
			`# endif`
			`#endif /* CONFIG_CPU_CAVIUM_OCTEON */`

			`#ifdef CONFIG_CPU_HAS_WB`

			`#include <asm/wbflush.h>`

			`#define mb() wbflush()`
			`#define iob() wbflush()`

			`#else /* !CONFIG_CPU_HAS_WB */`

			`#define mb() fast_mb()`
			`#define iob() fast_iob()`

			`#endif /* !CONFIG_CPU_HAS_WB */`

			`#define wmb() fast_wmb()`
			`#define rmb() fast_rmb()`

			`#if defined(CONFIG_WEAK_ORDERING)`
			`# ifdef CONFIG_CPU_CAVIUM_OCTEON`
			`# define __smp_mb() __sync()`
			`# define __smp_rmb() barrier()`
			`# define __smp_wmb() __syncw()`
			`# else`
			`# define __smp_mb() __asm__ __volatile__("sync" : : :"memory")`
			`# define __smp_rmb() __asm__ __volatile__("sync" : : :"memory")`
			`# define __smp_wmb() __asm__ __volatile__("sync" : : :"memory")`
			`# endif`
			`#else`
			`#define __smp_mb() barrier()`
			`#define __smp_rmb() barrier()`
			`#define __smp_wmb() barrier()`
			`#endif`

			`/*`
			`* When LL/SC does imply order, it must also be a compiler barrier to avoid the`
			`* compiler from reordering where the CPU will not. When it does not imply`
			`* order, the compiler is also free to reorder across the LL/SC loop and`
			`* ordering will be done by smp_llsc_mb() and friends.`
			`*/`
			`#if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP)`
			`#define __WEAK_LLSC_MB " sync \n"`
			`#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")`
			`#define __LLSC_CLOBBER`
			`#else`
			`#define __WEAK_LLSC_MB " \n"`
			`#define smp_llsc_mb() do { } while (0)`
			`#define __LLSC_CLOBBER "memory"`
			`#endif`

			`#ifdef CONFIG_CPU_CAVIUM_OCTEON`
			`#define smp_mb__before_llsc() smp_wmb()`
			`#define __smp_mb__before_llsc() __smp_wmb()`
			`/* Cause previous writes to become visible on all CPUs as soon as possible */`
			`#define nudge_writes() __asm__ __volatile__(".set push\n\t" \`
			`".set arch=octeon\n\t" \`
			`"syncw\n\t" \`
			`".set pop" : : : "memory")`
			`#else`
			`#define smp_mb__before_llsc() smp_llsc_mb()`
			`#define __smp_mb__before_llsc() smp_llsc_mb()`
			`#define nudge_writes() mb()`
			`#endif`

			`#define __smp_mb__before_atomic() __smp_mb__before_llsc()`
			`#define __smp_mb__after_atomic() smp_llsc_mb()`

			`/*`
			`* Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,`
			`* store or prefetch) in between an LL & SC can cause the SC instruction to`
			`* erroneously succeed, breaking atomicity. Whilst it's unusual to write code`
			`* containing such sequences, this bug bites harder than we might otherwise`
			`* expect due to reordering & speculation:`
			`*`
			`* 1) A memory access appearing prior to the LL in program order may actually`
			`* be executed after the LL - this is the reordering case.`
			`*`
			`* In order to avoid this we need to place a memory barrier (ie. a SYNC`
			`* instruction) prior to every LL instruction, in between it and any earlier`
			`* memory access instructions.`
			`*`
			`* This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.`
			`*`
			`* 2) If a conditional branch exists between an LL & SC with a target outside`
			`* of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()`
			`* or similar, then misprediction of the branch may allow speculative`
			`* execution of memory accesses from outside of the LL-SC loop.`
			`*`
			`* In order to avoid this we need a memory barrier (ie. a SYNC instruction)`
			`* at each affected branch target, for which we also use loongson_llsc_mb()`
			`* defined below.`
			`*`
			`* This case affects all current Loongson 3 CPUs.`
			`*`
			`* The above described cases cause an error in the cache coherence protocol;`
			`* such that the Invalidate of a competing LL-SC goes 'missing' and SC`
			`* erroneously observes its core still has Exclusive state and lets the SC`
			`* proceed.`
			`*`
			`* Therefore the error only occurs on SMP systems.`
			`*/`
			`#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */`
			`#define loongson_llsc_mb() __asm__ __volatile__("sync" : : :"memory")`
			`#else`
			`#define loongson_llsc_mb() do { } while (0)`
			`#endif`

			`static inline void sync_ginv(void)`
			`{`
			`asm volatile("sync\t%0" :: "i"(STYPE_GINV));`
			`}`

			`#include <asm-generic/barrier.h>`

			`#endif /* __ASM_BARRIER_H */`