/* $NetBSD: lock_stubs.s,v 1.10 2021/08/25 13:28:51 thorpej Exp $ */ /*- * Copyright (c) 2007, 2021 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Andrew Doran, and by Jason R. Thorpe. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "opt_lockdebug.h" #include "opt_multiprocessor.h" #include __KERNEL_RCSID(0, "$NetBSD: lock_stubs.s,v 1.10 2021/08/25 13:28:51 thorpej Exp $"); #include "assym.h" #if defined(MULTIPROCESSOR) /* * These 'unop' insns will be patched with 'mb' insns at run-time if * the system has more than one processor. */ #define MB(label) label: unop #else #define MB(label) /* nothing */ #endif #if !defined(LOCKDEBUG) /* * void mutex_enter(kmutex_t *mtx); */ LEAF(mutex_enter, 1) LDGP(pv) GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ 1: mov v0, t1 ldq_l t2, 0(a0) bne t2, 2f stq_c t1, 0(a0) beq t1, 3f MB(.L_mutex_enter_mb_1) RET 2: lda t12, mutex_vector_enter jmp (t12) 3: br 1b END(mutex_enter) /* * void mutex_exit(kmutex_t *mtx); */ LEAF(mutex_exit, 1) LDGP(pv) MB(.L_mutex_exit_mb_1) GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ mov zero, t3 1: ldq_l t2, 0(a0) cmpeq v0, t2, t2 beq t2, 2f stq_c t3, 0(a0) beq t3, 3f RET 2: lda t12, mutex_vector_exit jmp (t12) 3: br 1b END(mutex_exit) #if 0 /* XXX disabled for now XXX */ /* * void mutex_spin_enter(kmutex_t *mtx); */ LEAF(mutex_spin_enter, 1); LDGP(pv) /* * STEP 1: Perform the MUTEX_SPIN_SPLRAISE() function. * (see sys/kern/kern_mutex.c) * * s = splraise(mtx->mtx_ipl); * if (curcpu->ci_mtx_count-- == 0) * curcpu->ci_mtx_oldspl = s; */ call_pal PAL_OSF1_rdps /* clobbers v0, t0, t8..t11 */ /* v0 = cur_ipl */ #ifdef __BWX__ mov a0, a1 /* a1 = mtx */ ldbu a0, MUTEX_IPL(a0) /* a0 = new_ipl */ mov v0, a4 /* save cur_ipl in a4 */ #else mov a0, a1 /* a1 = mtx */ ldq_u a2, MUTEX_IPL(a0) mov v0, a4 /* save cur_ipl in a4 */ extbl a2, MUTEX_IPL, a0 /* a0 = new_ipl */ #endif /* __BWX__ */ cmplt v0, a0, a3 /* a3 = (cur_ipl < new_ipl) */ GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ mov v0, a5 /* save curlwp in a5 */ /* * The forward-branch over the SWPIPL call is correctly predicted * not-taken by the CPU because it's rare for a code path to acquire * 2 spin mutexes. */ beq a3, 1f /* no? -> skip... */ call_pal PAL_OSF1_swpipl /* clobbers v0, t0, t8..t11 */ /* * v0 returns the old_ipl, which will be the same as the * cur_ipl we squirreled away in a4 earlier. */ 1: /* * curlwp->l_cpu is now stable. Update the counter and * stash the old_ipl. Just in case it's not clear what's * going on, we: * * - Load previous value of mtx_oldspl into t1. * - Conditionally move old_ipl into t1 if mtx_count == 0. * - Store t1 back to mtx_oldspl; if mtx_count != 0, * the store is redundant, but it's faster than a forward * branch. */ ldq a3, L_CPU(a5) /* a3 = curlwp->l_cpu (curcpu) */ ldl t0, CPU_INFO_MTX_COUNT(a3) ldl t1, CPU_INFO_MTX_OLDSPL(a3) cmoveq t0, a4, t1 /* mtx_count == 0? -> t1 = old_ipl */ subl t0, 1, t2 /* mtx_count-- */ stl t1, CPU_INFO_MTX_OLDSPL(a3) stl t2, CPU_INFO_MTX_COUNT(a3) /* * STEP 2: __cpu_simple_lock_try(&mtx->mtx_lock) */ ldl_l t0, MUTEX_SIMPLELOCK(a1) ldiq t1, __SIMPLELOCK_LOCKED bne t0, 2f /* contended */ stl_c t1, MUTEX_SIMPLELOCK(a1) beq t1, 2f /* STL_C failed; consider contended */ MB(.L_mutex_spin_enter_mb_1) RET 2: mov a1, a0 /* restore first argument */ lda pv, mutex_spin_retry jmp (pv) END(mutex_spin_enter) /* * void mutex_spin_exit(kmutex_t *mtx); */ LEAF(mutex_spin_exit, 1) LDGP(pv); MB(.L_mutex_spin_exit_mb_1) /* * STEP 1: __cpu_simple_unlock(&mtx->mtx_lock) */ stl zero, MUTEX_SIMPLELOCK(a0) /* * STEP 2: Perform the MUTEX_SPIN_SPLRESTORE() function. * (see sys/kern/kern_mutex.c) * * s = curcpu->ci_mtx_oldspl; * if (++curcpu->ci_mtx_count == 0) * splx(s); */ GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ ldq a3, L_CPU(v0) /* a3 = curlwp->l_cpu (curcpu) */ ldl t0, CPU_INFO_MTX_COUNT(a3) ldl a0, CPU_INFO_MTX_OLDSPL(a3) addl t0, 1, t2 /* mtx_count++ */ stl t2, CPU_INFO_MTX_COUNT(a3) /* * The forward-branch over the SWPIPL call is correctly predicted * not-taken by the CPU because it's rare for a code path to acquire * 2 spin mutexes. */ bne t2, 1f /* t2 != 0? Skip... */ call_pal PAL_OSF1_swpipl /* clobbers v0, t0, t8..t11 */ 1: RET END(mutex_spin_exit) #endif /* XXX disabled for now XXX */ /* * void rw_enter(krwlock_t *rwl, krw_t op); * * Acquire one hold on a RW lock. */ LEAF(rw_enter, 2) LDGP(pv) /* * RW_READER == 0 (we have a compile-time assert in machdep.c * to ensure this). * * Acquire for read is the most common case. */ bne a1, 3f /* Acquiring for read. */ 1: ldq_l t0, 0(a0) and t0, (RW_WRITE_LOCKED|RW_WRITE_WANTED), t1 addq t0, RW_READ_INCR, t2 bne t1, 4f /* contended */ stq_c t2, 0(a0) beq t2, 2f /* STQ_C failed; retry */ MB(.L_rw_enter_mb_1) RET 2: br 1b 3: /* Acquiring for write. */ GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ ldq_l t0, 0(a0) or v0, RW_WRITE_LOCKED, t2 bne t0, 4f /* contended */ stq_c t2, 0(a0) beq t2, 4f /* STQ_C failed; consider it contended */ MB(.L_rw_enter_mb_2) RET 4: lda pv, rw_vector_enter jmp (pv) END(rw_enter) /* * int rw_tryenter(krwlock_t *rwl, krw_t op); * * Try to acquire one hold on a RW lock. */ LEAF(rw_tryenter, 2) LDGP(pv) /* See above. */ bne a1, 3f /* Acquiring for read. */ 1: ldq_l t0, 0(a0) and t0, (RW_WRITE_LOCKED|RW_WRITE_WANTED), t1 addq t0, RW_READ_INCR, v0 bne t1, 4f /* contended */ stq_c v0, 0(a0) beq v0, 2f /* STQ_C failed; retry */ MB(.L_rw_tryenter_mb_1) RET /* v0 contains non-zero LOCK_FLAG from STQ_C */ 2: br 1b /* Acquiring for write. */ 3: GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ ldq_l t0, 0(a0) or v0, RW_WRITE_LOCKED, v0 bne t0, 4f /* contended */ stq_c v0, 0(a0) /* * v0 now contains the LOCK_FLAG value from STQ_C, which is either * 0 for failure, or non-zero for success. In either case, v0's * value is correct. Go ahead and perform the memory barrier even * in the failure case because we expect it to be rare and it saves * a branch-not-taken instruction in the success case. */ MB(.L_rw_tryenter_mb_2) RET 4: mov zero, v0 /* return 0 (failure) */ RET END(rw_tryenter) /* * void rw_exit(krwlock_t *rwl); * * Release one hold on a RW lock. */ LEAF(rw_exit, 1) LDGP(pv) MB(.L_rw_exit_mb_1) /* * Check for write-lock release, and get the owner/count field * on its own for sanity-checking against expected values. */ ldq a1, 0(a0) and a1, RW_WRITE_LOCKED, t1 srl a1, RW_READ_COUNT_SHIFT, a2 bne t1, 3f /* * Releasing a read-lock. Make sure the count is non-zero. * If it is zero, take the slow path where the juicy diagnostic * checks are located. */ beq a2, 4f /* * We do the following trick to check to see if we're releasing * the last read-count and there are waiters: * * 1. Set v0 to 1. * 2. Shift the new read count into t1. * 3. Conditally move t1 to v0 based on low-bit-set of t0 * (RW_HAS_WAITERS). If RW_HAS_WAITERS is not set, then * the move will not take place, and v0 will remain 1. * Otherwise, v0 will contain the updated read count. * 4. Jump to slow path if v0 == 0. */ 1: ldq_l t0, 0(a0) ldiq v0, 1 subq t0, RW_READ_INCR, t2 srl t2, RW_READ_COUNT_SHIFT, t1 cmovlbs t0, t1, v0 beq v0, 4f stq_c t2, 0(a0) beq t2, 2f /* STQ_C failed; try again */ RET 2: br 1b /* * Releasing a write-lock. Make sure the owner field points * to our LWP. If it does not, take the slow path where the * juicy diagnostic checks are located. a2 contains the owner * field shifted down. Shift it back up to compare to curlwp; * this conveniently discards the bits we don't want to compare. */ 3: GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ sll a2, RW_READ_COUNT_SHIFT, a2 mov zero, t2 /* fast-path write-unlock stores NULL */ cmpeq v0, a2, v0 /* v0 = (owner == curlwp) */ ldq_l t0, 0(a0) beq v0, 4f /* owner field mismatch; need slow path */ blbs t0, 4f /* RW_HAS_WAITERS set; need slow-path */ stq_c t2, 0(a0) beq t2, 4f /* STQ_C failed; need slow-path */ RET 4: lda pv, rw_vector_exit jmp (pv) END(rw_exit) #endif /* !LOCKDEBUG */ #if defined(MULTIPROCESSOR) /* * Table of locations to patch with MB instructions on multiprocessor * systems. */ .section ".rodata" .globl lock_stub_patch_table lock_stub_patch_table: #if !defined(LOCKDEBUG) .quad .L_mutex_enter_mb_1 .quad .L_mutex_exit_mb_1 #if 0 /* XXX disabled for now XXX */ .quad .L_mutex_spin_enter_mb_1 .quad .L_mutex_spin_exit_mb_1 #endif /* XXX disabled for now XXX */ .quad .L_rw_enter_mb_1 .quad .L_rw_enter_mb_2 .quad .L_rw_tryenter_mb_1 .quad .L_rw_tryenter_mb_2 .quad .L_rw_exit_mb_1 #endif /* ! LOCKDEBUG */ .quad 0 /* NULL terminator */ #endif /* MULTIPROCESSOR */