Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-only */
0002 #ifndef __MIPS_ASM_SYNC_H__
0003 #define __MIPS_ASM_SYNC_H__
0004 
0005 /*
0006  * sync types are defined by the MIPS64 Instruction Set documentation in Volume
0007  * II-A of the MIPS Architecture Reference Manual, which can be found here:
0008  *
0009  *   https://www.mips.com/?do-download=the-mips64-instruction-set-v6-06
0010  *
0011  * Two types of barrier are provided:
0012  *
0013  *   1) Completion barriers, which ensure that a memory operation has actually
0014  *      completed & often involve stalling the CPU pipeline to do so.
0015  *
0016  *   2) Ordering barriers, which only ensure that affected memory operations
0017  *      won't be reordered in the CPU pipeline in a manner that violates the
0018  *      restrictions imposed by the barrier.
0019  *
0020  * Ordering barriers can be more efficient than completion barriers, since:
0021  *
0022  *   a) Ordering barriers only require memory access instructions which preceed
0023  *      them in program order (older instructions) to reach a point in the
0024  *      load/store datapath beyond which reordering is not possible before
0025  *      allowing memory access instructions which follow them (younger
0026  *      instructions) to be performed.  That is, older instructions don't
0027  *      actually need to complete - they just need to get far enough that all
0028  *      other coherent CPUs will observe their completion before they observe
0029  *      the effects of younger instructions.
0030  *
0031  *   b) Multiple variants of ordering barrier are provided which allow the
0032  *      effects to be restricted to different combinations of older or younger
0033  *      loads or stores. By way of example, if we only care that stores older
0034  *      than a barrier are observed prior to stores that are younger than a
0035  *      barrier & don't care about the ordering of loads then the 'wmb'
0036  *      ordering barrier can be used. Limiting the barrier's effects to stores
0037  *      allows loads to continue unaffected & potentially allows the CPU to
0038  *      make progress faster than if younger loads had to wait for older stores
0039  *      to complete.
0040  */
0041 
0042 /*
0043  * No sync instruction at all; used to allow code to nullify the effect of the
0044  * __SYNC() macro without needing lots of #ifdefery.
0045  */
0046 #define __SYNC_none -1
0047 
0048 /*
0049  * A full completion barrier; all memory accesses appearing prior to this sync
0050  * instruction in program order must complete before any memory accesses
0051  * appearing after this sync instruction in program order.
0052  */
0053 #define __SYNC_full 0x00
0054 
0055 /*
0056  * For now we use a full completion barrier to implement all sync types, until
0057  * we're satisfied that lightweight ordering barriers defined by MIPSr6 are
0058  * sufficient to uphold our desired memory model.
0059  */
0060 #define __SYNC_aq   __SYNC_full
0061 #define __SYNC_rl   __SYNC_full
0062 #define __SYNC_mb   __SYNC_full
0063 
0064 /*
0065  * ...except on Cavium Octeon CPUs, which have been using the 'wmb' ordering
0066  * barrier since 2010 & omit 'rmb' barriers because the CPUs don't perform
0067  * speculative reads.
0068  */
0069 #ifdef CONFIG_CPU_CAVIUM_OCTEON
0070 # define __SYNC_rmb __SYNC_none
0071 # define __SYNC_wmb 0x04
0072 #else
0073 # define __SYNC_rmb __SYNC_full
0074 # define __SYNC_wmb __SYNC_full
0075 #endif
0076 
0077 /*
0078  * A GINV sync is a little different; it doesn't relate directly to loads or
0079  * stores, but instead causes synchronization of an icache or TLB global
0080  * invalidation operation triggered by the ginvi or ginvt instructions
0081  * respectively. In cases where we need to know that a ginvi or ginvt operation
0082  * has been performed by all coherent CPUs, we must issue a sync instruction of
0083  * this type. Once this instruction graduates all coherent CPUs will have
0084  * observed the invalidation.
0085  */
0086 #define __SYNC_ginv 0x14
0087 
0088 /* Trivial; indicate that we always need this sync instruction. */
0089 #define __SYNC_always   (1 << 0)
0090 
0091 /*
0092  * Indicate that we need this sync instruction only on systems with weakly
0093  * ordered memory access. In general this is most MIPS systems, but there are
0094  * exceptions which provide strongly ordered memory.
0095  */
0096 #ifdef CONFIG_WEAK_ORDERING
0097 # define __SYNC_weak_ordering   (1 << 1)
0098 #else
0099 # define __SYNC_weak_ordering   0
0100 #endif
0101 
0102 /*
0103  * Indicate that we need this sync instruction only on systems where LL/SC
0104  * don't implicitly provide a memory barrier. In general this is most MIPS
0105  * systems.
0106  */
0107 #ifdef CONFIG_WEAK_REORDERING_BEYOND_LLSC
0108 # define __SYNC_weak_llsc   (1 << 2)
0109 #else
0110 # define __SYNC_weak_llsc   0
0111 #endif
0112 
0113 /*
0114  * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
0115  * store or prefetch) in between an LL & SC can cause the SC instruction to
0116  * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
0117  * containing such sequences, this bug bites harder than we might otherwise
0118  * expect due to reordering & speculation:
0119  *
0120  * 1) A memory access appearing prior to the LL in program order may actually
0121  *    be executed after the LL - this is the reordering case.
0122  *
0123  *    In order to avoid this we need to place a memory barrier (ie. a SYNC
0124  *    instruction) prior to every LL instruction, in between it and any earlier
0125  *    memory access instructions.
0126  *
0127  *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
0128  *
0129  * 2) If a conditional branch exists between an LL & SC with a target outside
0130  *    of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
0131  *    or similar, then misprediction of the branch may allow speculative
0132  *    execution of memory accesses from outside of the LL-SC loop.
0133  *
0134  *    In order to avoid this we need a memory barrier (ie. a SYNC instruction)
0135  *    at each affected branch target.
0136  *
0137  *    This case affects all current Loongson 3 CPUs.
0138  *
0139  * The above described cases cause an error in the cache coherence protocol;
0140  * such that the Invalidate of a competing LL-SC goes 'missing' and SC
0141  * erroneously observes its core still has Exclusive state and lets the SC
0142  * proceed.
0143  *
0144  * Therefore the error only occurs on SMP systems.
0145  */
0146 #ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
0147 # define __SYNC_loongson3_war   (1 << 31)
0148 #else
0149 # define __SYNC_loongson3_war   0
0150 #endif
0151 
0152 /*
0153  * Some Cavium Octeon CPUs suffer from a bug that causes a single wmb ordering
0154  * barrier to be ineffective, requiring the use of 2 in sequence to provide an
0155  * effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use
0156  * optimized memory barrier primitives."). Here we specify that the affected
0157  * sync instructions should be emitted twice.
0158  * Note that this expression is evaluated by the assembler (not the compiler),
0159  * and that the assembler evaluates '==' as 0 or -1, not 0 or 1.
0160  */
0161 #ifdef CONFIG_CPU_CAVIUM_OCTEON
0162 # define __SYNC_rpt(type)   (1 - (type == __SYNC_wmb))
0163 #else
0164 # define __SYNC_rpt(type)   1
0165 #endif
0166 
0167 /*
0168  * The main event. Here we actually emit a sync instruction of a given type, if
0169  * reason is non-zero.
0170  *
0171  * In future we have the option of emitting entries in a fixups-style table
0172  * here that would allow us to opportunistically remove some sync instructions
0173  * when we detect at runtime that we're running on a CPU that doesn't need
0174  * them.
0175  */
0176 #ifdef CONFIG_CPU_HAS_SYNC
0177 # define ____SYNC(_type, _reason, _else)            \
0178     .if (( _type ) != -1) && ( _reason );       \
0179     .set    push;                       \
0180     .set    MIPS_ISA_LEVEL_RAW;             \
0181     .rept   __SYNC_rpt(_type);              \
0182     sync    _type;                      \
0183     .endr;                          \
0184     .set    pop;                        \
0185     .else;                          \
0186     _else;                          \
0187     .endif
0188 #else
0189 # define ____SYNC(_type, _reason, _else)
0190 #endif
0191 
0192 /*
0193  * Preprocessor magic to expand macros used as arguments before we insert them
0194  * into assembly code.
0195  */
0196 #ifdef __ASSEMBLY__
0197 # define ___SYNC(type, reason, else)                \
0198     ____SYNC(type, reason, else)
0199 #else
0200 # define ___SYNC(type, reason, else)                \
0201     __stringify(____SYNC(type, reason, else))
0202 #endif
0203 
0204 #define __SYNC(type, reason)                    \
0205     ___SYNC(__SYNC_##type, __SYNC_##reason, )
0206 #define __SYNC_ELSE(type, reason, else)             \
0207     ___SYNC(__SYNC_##type, __SYNC_##reason, else)
0208 
0209 #endif /* __MIPS_ASM_SYNC_H__ */