1 /* $NetBSD: cpuswitch.S,v 1.41 2003/11/15 08:44:18 scw Exp $ */
4 * Copyright 2003 Wasabi Systems, Inc.
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
38 * Copyright (c) 1994-1998 Mark Brinicombe.
39 * Copyright (c) 1994 Brini.
40 * All rights reserved.
42 * This code is derived from software written for Brini by Mark Brinicombe
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 * 3. All advertising materials mentioning features or use of this software
53 * must display the following acknowledgement:
54 * This product includes software developed by Brini.
55 * 4. The name of the company nor the name of the author may be used to
56 * endorse or promote products derived from this software without specific
57 * prior written permission.
59 * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
60 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
61 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
62 * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
63 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
64 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
65 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
71 * RiscBSD kernel project
75 * cpu switching functions
82 #include "opt_sched.h"
84 #include <machine/acle-compat.h>
85 #include <machine/asm.h>
86 #include <machine/asmacros.h>
87 #include <machine/armreg.h>
88 #include <machine/vfp.h>
90 __FBSDID("$FreeBSD$");
92 #if __ARM_ARCH >= 6 && defined(SMP)
93 #define GET_PCPU(tmp, tmp2) \
94 mrc p15, 0, tmp, c0, c0, 5; \
96 ldr tmp2, .Lcurpcpu+4; \
98 ldr tmp2, .Lcurpcpu; \
102 #define GET_PCPU(tmp, tmp2) \
107 .fpu vfp /* allow VFP instructions */
111 .word _C_LABEL(__pcpu)
114 .word _C_LABEL(blocked_lock)
119 #define DOMAIN_CLIENT 0x01
122 .word _C_LABEL(cpufuncs)
125 * cpu_throw(oldtd, newtd)
127 * Remove current thread state, then select the next thread to run
128 * and load its state.
140 #ifdef VFP /* This thread is dying, disable */
141 bl _C_LABEL(vfp_discard) /* VFP without preserving state. */
145 ldr r7, [r5, #(TD_PCB)] /* r7 = new thread's PCB */
147 /* Switch to lwp0 context */
150 #if !defined(CPU_ARM11) && !defined(CPU_CORTEXA) && !defined(CPU_MV_PJ4B) && !defined(CPU_KRAIT)
152 ldr pc, [r9, #CF_IDCACHE_WBINV_ALL]
154 ldr r0, [r7, #(PCB_PL1VEC)]
155 ldr r1, [r7, #(PCB_DACR)]
157 * r0 = Pointer to L1 slot for vector_page (or NULL)
165 * Ensure the vector table is accessible by fixing up lwp0's L1
167 cmp r0, #0 /* No need to fixup vector table? */
168 ldrne r3, [r0] /* But if yes, fetch current value */
169 ldrne r2, [r7, #(PCB_L1VEC)] /* Fetch new vector_page value */
170 mcr p15, 0, r1, c3, c0, 0 /* Update DACR for lwp0's context */
171 cmpne r3, r2 /* Stuffing the same value? */
172 strne r2, [r0] /* Store if not. */
174 #ifdef PMAP_INCLUDE_PTE_SYNC
176 * Need to sync the cache to make sure that last store is
177 * visible to the MMU.
181 ldrne pc, [r9, #CF_DCACHE_WB_RANGE]
182 #endif /* PMAP_INCLUDE_PTE_SYNC */
185 * Note: We don't do the same optimisation as cpu_switch() with
186 * respect to avoiding flushing the TLB if we're switching to
187 * the same L1 since this process' VM space may be about to go
188 * away, so we don't want *any* turds left in the TLB.
191 /* Switch the memory to the new process */
192 ldr r0, [r7, #(PCB_PAGEDIR)]
194 ldr pc, [r9, #CF_CONTEXT_SWITCH]
197 /* Hook in a new pcb */
198 str r7, [r6, #PC_CURPCB]
199 /* We have a new curthread now so make a note it */
200 str r5, [r6, #PC_CURTHREAD]
202 mcr p15, 0, r5, c13, c0, 4
205 ldr r6, [r5, #(TD_MD + MD_TP)]
207 mcr p15, 0, r6, c13, c0, 3
209 ldr r4, =ARM_TP_ADDRESS
211 ldr r6, [r5, #(TD_MD + MD_RAS_START)]
212 str r6, [r4, #4] /* ARM_RAS_START */
213 ldr r6, [r5, #(TD_MD + MD_RAS_END)]
214 str r6, [r4, #8] /* ARM_RAS_END */
216 /* Restore all the saved registers and exit */
218 ldmia r3, {r4-r12, sp, pc}
222 * cpu_switch(oldtd, newtd, lock)
224 * Save the current thread state, then select the next thread to run
225 * and load its state.
228 * r2 = lock (new lock for old thread)
231 /* Interrupts are disabled. */
232 /* Save all the registers in the old thread's pcb. */
233 ldr r3, [r0, #(TD_PCB)]
235 /* Restore all the saved registers and exit */
237 stmia r3, {r4-r12, sp, lr, pc}
239 mov r6, r2 /* Save the mutex */
241 /* rem: r0 = old lwp */
242 /* rem: interrupts are disabled */
244 /* Process is now on a processor. */
245 /* We have a new curthread now so make a note it */
247 str r1, [r7, #PC_CURTHREAD]
249 mcr p15, 0, r1, c13, c0, 4
252 /* Hook in a new pcb */
253 ldr r2, [r1, #TD_PCB]
254 str r2, [r7, #PC_CURPCB]
256 /* Stage two : Save old context */
258 /* Get the user structure for the old thread. */
259 ldr r2, [r0, #(TD_PCB)]
260 mov r4, r0 /* Save the old thread. */
264 * Set new tp. No need to store the old one first, userland can't
265 * change it directly on armv6.
267 ldr r9, [r1, #(TD_MD + MD_TP)]
268 mcr p15, 0, r9, c13, c0, 3
270 /* Store the old tp; userland can change it on armv4. */
271 ldr r3, =ARM_TP_ADDRESS
273 str r9, [r0, #(TD_MD + MD_TP)]
275 str r9, [r0, #(TD_MD + MD_RAS_START)]
277 str r9, [r0, #(TD_MD + MD_RAS_END)]
280 ldr r9, [r1, #(TD_MD + MD_TP)]
282 ldr r9, [r1, #(TD_MD + MD_RAS_START)]
284 ldr r9, [r1, #(TD_MD + MD_RAS_END)]
288 /* Get the user structure for the new process in r9 */
289 ldr r9, [r1, #(TD_PCB)]
291 /* rem: r2 = old PCB */
292 /* rem: r9 = new PCB */
293 /* rem: interrupts are enabled */
296 fmrx r0, fpexc /* If the VFP is enabled */
297 tst r0, #(VFPEXC_EN) /* the current thread has */
298 movne r1, #1 /* used it, so go save */
299 addne r0, r2, #(PCB_VFPSTATE) /* the state into the PCB */
300 blne _C_LABEL(vfp_store) /* and disable the VFP. */
303 /* r0-r3 now free! */
305 /* Third phase : restore saved context */
307 /* rem: r2 = old PCB */
308 /* rem: r9 = new PCB */
310 ldr r5, [r9, #(PCB_DACR)] /* r5 = new DACR */
311 mov r2, #DOMAIN_CLIENT
312 cmp r5, r2, lsl #(PMAP_DOMAIN_KERNEL * 2) /* Sw to kernel thread? */
313 beq .Lcs_context_switched /* Yup. Don't flush cache */
314 mrc p15, 0, r0, c3, c0, 0 /* r0 = old DACR */
316 * Get the new L1 table pointer into r11. If we're switching to
317 * an LWP with the same address space as the outgoing one, we can
318 * skip the cache purge and the TTB load.
320 * To avoid data dep stalls that would happen anyway, we try
321 * and get some useful work done in the mean time.
323 mrc p15, 0, r10, c2, c0, 0 /* r10 = old L1 */
324 ldr r11, [r9, #(PCB_PAGEDIR)] /* r11 = new L1 */
326 teq r10, r11 /* Same L1? */
327 cmpeq r0, r5 /* Same DACR? */
328 beq .Lcs_context_switched /* yes! */
330 #if !defined(CPU_ARM11) && !defined(CPU_CORTEXA) && !defined(CPU_MV_PJ4B) && !defined(CPU_KRAIT)
332 * Definately need to flush the cache.
337 ldr pc, [r1, #CF_IDCACHE_WBINV_ALL]
339 .Lcs_cache_purge_skipped:
341 /* rem: r9 = new PCB */
342 /* rem: r10 = old L1 */
343 /* rem: r11 = new L1 */
346 ldr r7, [r9, #(PCB_PL1VEC)]
349 * Ensure the vector table is accessible by fixing up the L1
351 cmp r7, #0 /* No need to fixup vector table? */
352 ldrne r2, [r7] /* But if yes, fetch current value */
353 ldrne r0, [r9, #(PCB_L1VEC)] /* Fetch new vector_page value */
354 mcr p15, 0, r5, c3, c0, 0 /* Update DACR for new context */
355 cmpne r2, r0 /* Stuffing the same value? */
356 #ifndef PMAP_INCLUDE_PTE_SYNC
357 strne r0, [r7] /* Nope, update it */
360 str r0, [r7] /* Otherwise, update it */
363 * Need to sync the cache to make sure that last store is
364 * visible to the MMU.
370 ldr pc, [r2, #CF_DCACHE_WB_RANGE]
373 #endif /* PMAP_INCLUDE_PTE_SYNC */
375 cmp r10, r11 /* Switching to the same L1? */
377 beq .Lcs_same_l1 /* Yup. */
379 * Do a full context switch, including full TLB flush.
383 ldr pc, [r10, #CF_CONTEXT_SWITCH]
385 b .Lcs_context_switched
388 * We're switching to a different process in the same L1.
389 * In this situation, we only need to flush the TLB for the
390 * vector_page mapping, and even then only if r7 is non-NULL.
394 movne r0, #0 /* We *know* vector_page's VA is 0x0 */
396 ldrne pc, [r10, #CF_TLB_FLUSHID_SE]
398 .Lcs_context_switched:
400 /* Release the old thread */
401 str r6, [r4, #TD_LOCK]
402 #if defined(SCHED_ULE) && defined(SMP)
403 ldr r6, .Lblocked_lock
404 GET_CURTHREAD_PTR(r3)
406 ldr r4, [r3, #TD_LOCK]
411 /* XXXSCW: Safe to re-enable FIQs here */
413 /* rem: r9 = new PCB */
415 /* Restore all the saved registers and exit */
417 ldmia r3, {r4-r12, sp, pc}
421 #else /* !ARM_NEW_PMAP */
422 #include <machine/sysreg.h>
424 ENTRY(cpu_context_switch) /* QQQ: What about macro instead of function? */
426 mcr CP15_TTBR0(r0) /* set the new TTB */
428 mov r0, #(CPU_ASID_KERNEL)
429 mcr CP15_TLBIASID(r0) /* flush not global TLBs */
431 * Flush entire Branch Target Cache because of the branch predictor
432 * is not architecturally invisible. See ARM Architecture Reference
433 * Manual ARMv7-A and ARMv7-R edition, page B2-1264(65), Branch
434 * predictors and Requirements for branch predictor maintenance
435 * operations sections.
437 * QQQ: The predictor is virtually addressed and holds virtual target
438 * addresses. Therefore, if mapping is changed, the predictor cache
439 * must be flushed.The flush is part of entire i-cache invalidation
440 * what is always called when code mapping is changed. So herein,
441 * it's the only place where standalone predictor flush must be
442 * executed in kernel (except self modifying code case).
444 mcr CP15_BPIALL /* and flush entire Branch Target Cache */
447 END(cpu_context_switch)
450 * cpu_throw(oldtd, newtd)
452 * Remove current thread state, then select the next thread to run
453 * and load its state.
458 mov r10, r0 /* r10 = oldtd */
459 mov r11, r1 /* r11 = newtd */
461 #ifdef VFP /* This thread is dying, disable */
462 bl _C_LABEL(vfp_discard) /* VFP without preserving state. */
464 GET_PCPU(r8, r9) /* r8 = current pcpu */
465 ldr r4, [r8, #PC_CPUID] /* r4 = current cpu id */
467 cmp r10, #0 /* old thread? */
468 beq 2f /* no, skip */
470 /* Remove this CPU from the active list. */
471 ldr r5, [r8, #PC_CURPMAP]
473 add r5, r0 /* r5 = old pm_active */
475 /* Compute position and mask. */
479 add r5, r0 /* r5 = position in old pm_active */
482 lsl r2, r0 /* r2 = mask */
485 lsl r2, r4 /* r2 = mask */
487 /* Clear cpu from old active list. */
502 cmp r11, #0 /* new thread? */
503 beq badsw1 /* no, panic */
505 ldr r7, [r11, #(TD_PCB)] /* r7 = new PCB */
508 * Registers at this point
509 * r4 = current cpu id
515 /* MMU switch to new thread. */
516 ldr r0, [r7, #(PCB_PAGEDIR)]
518 cmp r0, #0 /* new thread? */
519 beq badsw4 /* no, panic */
521 bl _C_LABEL(cpu_context_switch)
524 * Set new PMAP as current one.
525 * Insert cpu to new active list.
528 ldr r6, [r11, #(TD_PROC)] /* newtd->proc */
529 ldr r6, [r6, #(P_VMSPACE)] /* newtd->proc->vmspace */
530 add r6, #VM_PMAP /* newtd->proc->vmspace->pmap */
531 str r6, [r8, #PC_CURPMAP] /* store to curpmap */
534 add r6, r0 /* r6 = new pm_active */
536 /* compute position and mask */
540 add r6, r0 /* r6 = position in new pm_active */
543 lsl r2, r0 /* r2 = mask */
546 lsl r2, r4 /* r2 = mask */
548 /* Set cpu to new active list. */
561 * Registers at this point.
565 * They must match the ones in sw1 position !!!
568 b sw1 /* share new thread init with cpu_switch() */
572 * cpu_switch(oldtd, newtd, lock)
574 * Save the current thread state, then select the next thread to run
575 * and load its state.
578 * r2 = lock (new lock for old thread)
581 /* Interrupts are disabled. */
583 cmp r0, #0 /* old thread? */
584 beq badsw2 /* no, panic */
586 /* Save all the registers in the old thread's pcb. */
587 ldr r3, [r0, #(TD_PCB)]
589 stmia r3, {r4-r12, sp, lr, pc}
592 cmp r1, #0 /* new thread? */
593 beq badsw3 /* no, panic */
596 * Save arguments. Note that we can now use r0-r14 until
597 * it is time to restore them for the new thread. However,
598 * some registers are not safe over function call.
600 mov r9, r2 /* r9 = lock */
601 mov r10, r0 /* r10 = oldtd */
602 mov r11, r1 /* r11 = newtd */
604 GET_PCPU(r8, r3) /* r8 = current PCPU */
605 ldr r7, [r11, #(TD_PCB)] /* r7 = newtd->td_pcb */
610 ldr r3, [r10, #(TD_PCB)]
611 fmrx r0, fpexc /* If the VFP is enabled */
612 tst r0, #(VFPEXC_EN) /* the current thread has */
613 movne r1, #1 /* used it, so go save */
614 addne r0, r3, #(PCB_VFPSTATE) /* the state into the PCB */
615 blne _C_LABEL(vfp_store) /* and disable the VFP. */
619 * MMU switch. If we're switching to a thread with the same
620 * address space as the outgoing one, we can skip the MMU switch.
622 mrc CP15_TTBR0(r1) /* r1 = old TTB */
623 ldr r0, [r7, #(PCB_PAGEDIR)] /* r0 = new TTB */
624 cmp r0, r1 /* Switching to the TTB? */
625 beq sw0 /* same TTB, skip */
628 cmp r0, #0 /* new thread? */
629 beq badsw4 /* no, panic */
632 bl cpu_context_switch /* new TTB as argument */
635 * Registers at this point
644 * Set new PMAP as current one.
645 * Update active list on PMAPs.
647 ldr r6, [r11, #TD_PROC] /* newtd->proc */
648 ldr r6, [r6, #P_VMSPACE] /* newtd->proc->vmspace */
649 add r6, #VM_PMAP /* newtd->proc->vmspace->pmap */
651 ldr r5, [r8, #PC_CURPMAP] /* get old curpmap */
652 str r6, [r8, #PC_CURPMAP] /* and save new one */
655 add r5, r0 /* r5 = old pm_active */
656 add r6, r0 /* r6 = new pm_active */
658 /* Compute position and mask. */
659 ldr r4, [r8, #PC_CPUID]
663 add r5, r0 /* r5 = position in old pm_active */
664 add r6, r0 /* r6 = position in new pm_active */
667 lsl r2, r0 /* r2 = mask */
670 lsl r2, r4 /* r2 = mask */
672 /* Clear cpu from old active list. */
684 /* Set cpu to new active list. */
699 * Registers at this point
707 /* Change the old thread lock. */
708 add r5, r10, #TD_LOCK
719 * Registers at this point
725 #if defined(SMP) && defined(SCHED_ULE)
727 * 386 and amd64 do the blocked lock test only for SMP and SCHED_ULE
728 * QQQ: What does it mean in reality and why is it done?
730 ldr r6, =blocked_lock
732 ldr r3, [r11, #TD_LOCK] /* atomic write regular read */
736 /* Set the new tls */
737 ldr r0, [r11, #(TD_MD + MD_TP)]
738 mcr CP15_TPIDRURO(r0) /* write tls thread reg 2 */
740 /* We have a new curthread now so make a note it */
741 str r11, [r8, #PC_CURTHREAD]
742 mcr CP15_TPIDRPRW(r11)
744 /* store pcb in per cpu structure */
745 str r7, [r8, #PC_CURPCB]
748 * Restore all saved registers and return. Note that some saved
749 * registers can be changed when either cpu_fork(), cpu_set_upcall(),
750 * cpu_set_fork_handler(), or makectx() was called.
753 ldmia r3, {r4-r12, sp, pc}
757 ldr r0, =sw1_panic_str
763 ldr r0, =sw2_panic_str
769 ldr r0, =sw3_panic_str
775 ldr r0, =sw4_panic_str
781 .asciz "cpu_throw: no newthread supplied.\n"
783 .asciz "cpu_switch: no curthread supplied.\n"
785 .asciz "cpu_switch: no newthread supplied.\n"
787 .asciz "cpu_switch: new pagedir is NULL.\n"
792 #endif /* !ARM_NEW_PMAP */
798 /* Store all the registers in the thread's pcb */
799 add r3, r0, #(PCB_R4)
800 stmia r3, {r4-r12, sp, lr, pc}
802 fmrx r2, fpexc /* If the VFP is enabled */
803 tst r2, #(VFPEXC_EN) /* the current thread has */
804 movne r1, #1 /* used it, so go save */
805 addne r0, r0, #(PCB_VFPSTATE) /* the state into the PCB */
806 blne _C_LABEL(vfp_store) /* and disable the VFP. */
812 ENTRY(fork_trampoline)
813 STOP_UNWINDING /* EABI: Don't unwind beyond the thread enty point. */
814 mov fp, #0 /* OABI: Stack traceback via fp stops here. */
818 ldr lr, =swi_exit /* Go finish forking, then return */
819 b _C_LABEL(fork_exit) /* to userland via swi_exit code. */