2 * Copyright (c) 2018-2019 The FreeBSD Foundation
3 * Copyright (c) 2003 Peter Wemm.
4 * Copyright (c) 1993 The Regents of the University of California.
7 * Portions of this software were developed by
8 * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
9 * the FreeBSD Foundation.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 #include <machine/asmacros.h>
41 #include <machine/specialreg.h>
42 #include <machine/pmap.h>
50 * void bzero(void *buf, u_int len)
72 movq $PAGE_SIZE/8,%rcx
100 * bcopy(src, dst, cnt)
102 * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
111 cmpq %rcx,%rax /* overlapping && src < dst? */
114 shrq $3,%rcx /* copy by 64-bit words */
118 andq $7,%rcx /* any bytes left? */
126 addq %rcx,%rdi /* copy backwards */
130 andq $7,%rcx /* any fractional bytes? */
134 movq %rdx,%rcx /* copy remainder by 32-bit words */
146 * Note: memcpy does not support overlapping copies
152 shrq $3,%rcx /* copy by 64-bit words */
156 andq $7,%rcx /* any bytes left? */
164 * pagecopy(%rdi=from, %rsi=to)
168 movq $-PAGE_SIZE,%rax
173 prefetchnta (%rdi,%rax)
177 movq (%rdi,%rdx),%rax
178 movnti %rax,(%rsi,%rdx)
179 movq 8(%rdi,%rdx),%rax
180 movnti %rax,8(%rsi,%rdx)
181 movq 16(%rdi,%rdx),%rax
182 movnti %rax,16(%rsi,%rdx)
183 movq 24(%rdi,%rdx),%rax
184 movnti %rax,24(%rsi,%rdx)
192 /* fillw(pat, base, cnt) */
193 /* %rdi,%rsi, %rdx */
205 /*****************************************************************************/
206 /* copyout and fubyte family */
207 /*****************************************************************************/
209 * Access user memory from inside the kernel. These routines should be
210 * the only places that do this.
212 * These routines set curpcb->pcb_onfault for the time they execute. When a
213 * protection violation occurs inside the functions, the trap handler
214 * returns to *curpcb->pcb_onfault instead of the function.
218 * copyout(from_kernel, to_user, len)
223 movq PCPU(CURPCB),%rax
224 movq $copyout_fault,PCB_ONFAULT(%rax)
225 testq %rdx,%rdx /* anything to do? */
229 * Check explicitly for non-user addresses. This check is essential
230 * because it prevents usermode from writing into the kernel. We do
231 * not verify anywhere else that the user did not specify a rogue
235 * First, prevent address wrapping.
241 * XXX STOP USING VM_MAXUSER_ADDRESS.
242 * It is an end address, not a max, so every time it is used correctly it
243 * looks like there is an off by one error, and of course it caused an off
244 * by one error in several places.
246 movq $VM_MAXUSER_ADDRESS,%rcx
251 /* bcopy(%rsi, %rdi, %rdx) */
264 movq PCPU(CURPCB),%rdx
265 movq %rax,PCB_ONFAULT(%rdx)
271 movq PCPU(CURPCB),%rdx
272 movq $0,PCB_ONFAULT(%rdx)
279 * copyin(from_user, to_kernel, len)
284 movq PCPU(CURPCB),%rax
285 movq $copyin_fault,PCB_ONFAULT(%rax)
286 testq %rdx,%rdx /* anything to do? */
290 * make sure address is valid
295 movq $VM_MAXUSER_ADDRESS,%rcx
302 shrq $3,%rcx /* copy longword-wise */
306 andb $7,%cl /* copy remaining bytes */
312 movq PCPU(CURPCB),%rdx
313 movq %rax,PCB_ONFAULT(%rdx)
319 movq PCPU(CURPCB),%rdx
320 movq $0,PCB_ONFAULT(%rdx)
327 * casueword32. Compare and set user integer. Returns -1 on fault,
328 * 0 if access was successful. Old value is written to *oldp.
329 * dst = %rdi, old = %esi, oldp = %rdx, new = %ecx
333 movq PCPU(CURPCB),%r8
334 movq $fusufault,PCB_ONFAULT(%r8)
336 movq $VM_MAXUSER_ADDRESS-4,%rax
337 cmpq %rax,%rdi /* verify address is valid */
340 movl %esi,%eax /* old */
344 cmpxchgl %ecx,(%rdi) /* new = %ecx */
347 * The old value is in %eax. If the store succeeded it will be the
348 * value we expected (old) from before the store, otherwise it will
349 * be the current value. Save %eax into %esi to prepare the return
354 movq %rax,PCB_ONFAULT(%r8)
357 * Access the oldp after the pcb_onfault is cleared, to correctly
358 * catch corrupted pointer.
360 movl %esi,(%rdx) /* oldp = %rdx */
366 * casueword. Compare and set user long. Returns -1 on fault,
367 * 0 if access was successful. Old value is written to *oldp.
368 * dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx
372 movq PCPU(CURPCB),%r8
373 movq $fusufault,PCB_ONFAULT(%r8)
375 movq $VM_MAXUSER_ADDRESS-4,%rax
376 cmpq %rax,%rdi /* verify address is valid */
379 movq %rsi,%rax /* old */
383 cmpxchgq %rcx,(%rdi) /* new = %rcx */
386 * The old value is in %rax. If the store succeeded it will be the
387 * value we expected (old) from before the store, otherwise it will
388 * be the current value.
392 movq %rax,PCB_ONFAULT(%r8)
399 * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit
400 * byte from user memory.
401 * addr = %rdi, valp = %rsi
407 movq PCPU(CURPCB),%rcx
408 movq $fusufault,PCB_ONFAULT(%rcx)
410 movq $VM_MAXUSER_ADDRESS-8,%rax
411 cmpq %rax,%rdi /* verify address is valid */
416 movq %rax,PCB_ONFAULT(%rcx)
425 movq PCPU(CURPCB),%rcx
426 movq $fusufault,PCB_ONFAULT(%rcx)
428 movq $VM_MAXUSER_ADDRESS-4,%rax
429 cmpq %rax,%rdi /* verify address is valid */
434 movq %rax,PCB_ONFAULT(%rcx)
441 * fuswintr() and suswintr() are specialized variants of fuword16() and
442 * suword16(), respectively. They are called from the profiling code,
443 * potentially at interrupt time. If they fail, that's okay; good things
444 * will happen later. They always fail for now, until the trap code is
445 * able to deal with this.
456 movq PCPU(CURPCB),%rcx
457 movq $fusufault,PCB_ONFAULT(%rcx)
459 movq $VM_MAXUSER_ADDRESS-2,%rax
464 movq $0,PCB_ONFAULT(%rcx)
471 movq PCPU(CURPCB),%rcx
472 movq $fusufault,PCB_ONFAULT(%rcx)
474 movq $VM_MAXUSER_ADDRESS-1,%rax
479 movq $0,PCB_ONFAULT(%rcx)
486 movq PCPU(CURPCB),%rcx
488 movq %rax,PCB_ONFAULT(%rcx)
494 * Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to
496 * addr = %rdi, value = %rsi
501 movq PCPU(CURPCB),%rcx
502 movq $fusufault,PCB_ONFAULT(%rcx)
504 movq $VM_MAXUSER_ADDRESS-8,%rax
505 cmpq %rax,%rdi /* verify address validity */
510 movq PCPU(CURPCB),%rcx
511 movq %rax,PCB_ONFAULT(%rcx)
519 movq PCPU(CURPCB),%rcx
520 movq $fusufault,PCB_ONFAULT(%rcx)
522 movq $VM_MAXUSER_ADDRESS-4,%rax
523 cmpq %rax,%rdi /* verify address validity */
528 movq PCPU(CURPCB),%rcx
529 movq %rax,PCB_ONFAULT(%rcx)
536 movq PCPU(CURPCB),%rcx
537 movq $fusufault,PCB_ONFAULT(%rcx)
539 movq $VM_MAXUSER_ADDRESS-2,%rax
540 cmpq %rax,%rdi /* verify address validity */
545 movq PCPU(CURPCB),%rcx /* restore trashed register */
546 movq %rax,PCB_ONFAULT(%rcx)
553 movq PCPU(CURPCB),%rcx
554 movq $fusufault,PCB_ONFAULT(%rcx)
556 movq $VM_MAXUSER_ADDRESS-1,%rax
557 cmpq %rax,%rdi /* verify address validity */
563 movq PCPU(CURPCB),%rcx /* restore trashed register */
564 movq %rax,PCB_ONFAULT(%rcx)
570 * copyinstr(from, to, maxlen, int *lencopied)
571 * %rdi, %rsi, %rdx, %rcx
573 * copy a string from 'from' to 'to', stop when a 0 character is reached.
574 * return ENAMETOOLONG if string is longer than maxlen, and
575 * EFAULT on protection violations. If lencopied is non-zero,
576 * return the actual length in *lencopied.
580 movq %rdx,%r8 /* %r8 = maxlen */
581 movq %rcx,%r9 /* %r9 = *len */
582 xchgq %rdi,%rsi /* %rdi = from, %rsi = to */
583 movq PCPU(CURPCB),%rcx
584 movq $cpystrflt,PCB_ONFAULT(%rcx)
586 movq $VM_MAXUSER_ADDRESS,%rax
588 /* make sure 'from' is within bounds */
592 /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
609 /* Success -- 0 byte reached */
614 /* rdx is zero - return ENAMETOOLONG or EFAULT */
615 movq $VM_MAXUSER_ADDRESS,%rax
619 movq $ENAMETOOLONG,%rax
626 /* set *lencopied and return %eax */
627 movq PCPU(CURPCB),%rcx
628 movq $0,PCB_ONFAULT(%rcx)
640 * copystr(from, to, maxlen, int *lencopied)
641 * %rdi, %rsi, %rdx, %rcx
645 movq %rdx,%r8 /* %r8 = maxlen */
657 /* Success -- 0 byte reached */
662 /* rdx is zero -- return ENAMETOOLONG */
663 movq $ENAMETOOLONG,%rax
669 /* set *lencopied and return %rax */
678 * Handling of special amd64 registers and descriptor tables etc
680 /* void lgdt(struct region_descriptor *rdp); */
682 /* reload the descriptor table */
685 /* flush the prefetch q */
692 movl %eax,%fs /* Beware, use wrmsr to set 64 bit base */
696 /* reload code selector by turning return into intersegmental return */
704 /*****************************************************************************/
705 /* setjump, longjump */
706 /*****************************************************************************/
709 movq %rbx,0(%rdi) /* save rbx */
710 movq %rsp,8(%rdi) /* save rsp */
711 movq %rbp,16(%rdi) /* save rbp */
712 movq %r12,24(%rdi) /* save r12 */
713 movq %r13,32(%rdi) /* save r13 */
714 movq %r14,40(%rdi) /* save r14 */
715 movq %r15,48(%rdi) /* save r15 */
716 movq 0(%rsp),%rdx /* get rta */
717 movq %rdx,56(%rdi) /* save rip */
718 xorl %eax,%eax /* return(0); */
723 movq 0(%rdi),%rbx /* restore rbx */
724 movq 8(%rdi),%rsp /* restore rsp */
725 movq 16(%rdi),%rbp /* restore rbp */
726 movq 24(%rdi),%r12 /* restore r12 */
727 movq 32(%rdi),%r13 /* restore r13 */
728 movq 40(%rdi),%r14 /* restore r14 */
729 movq 48(%rdi),%r15 /* restore r15 */
730 movq 56(%rdi),%rdx /* get rta */
731 movq %rdx,0(%rsp) /* put in return frame */
732 xorl %eax,%eax /* return(1); */
738 * Support for reading MSRs in the safe manner.
741 /* int rdmsr_safe(u_int msr, uint64_t *data) */
743 movq PCPU(CURPCB),%r8
744 movq $msr_onfault,PCB_ONFAULT(%r8)
746 rdmsr /* Read MSR pointed by %ecx. Returns
747 hi byte in edx, lo in %eax */
748 salq $32,%rdx /* sign-shift %rdx left */
749 movl %eax,%eax /* zero-extend %eax -> %rax */
753 movq %rax,PCB_ONFAULT(%r8)
758 * Support for writing MSRs in the safe manner.
761 /* int wrmsr_safe(u_int msr, uint64_t data) */
763 movq PCPU(CURPCB),%r8
764 movq $msr_onfault,PCB_ONFAULT(%r8)
769 wrmsr /* Write MSR pointed by %ecx. Accepts
770 hi byte in edx, lo in %eax. */
772 movq %rax,PCB_ONFAULT(%r8)
777 * MSR operations fault handler
781 movq $0,PCB_ONFAULT(%r8)
787 * void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3);
788 * Invalidates address space addressed by ucr3, then returns to kcr3.
789 * Done in assembler to ensure no other memory accesses happen while
793 ENTRY(pmap_pti_pcid_invalidate)
796 movq %rdi,%cr3 /* to user page table */
797 movq %rsi,%cr3 /* back to kernel */
802 * void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va);
803 * Invalidates virtual address va in address space ucr3, then returns to kcr3.
806 ENTRY(pmap_pti_pcid_invlpg)
809 movq %rdi,%cr3 /* to user page table */
811 movq %rsi,%cr3 /* back to kernel */
816 * void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva,
818 * Invalidates virtual addresses between sva and eva in address space ucr3,
819 * then returns to kcr3.
822 ENTRY(pmap_pti_pcid_invlrng)
825 movq %rdi,%cr3 /* to user page table */
830 movq %rsi,%cr3 /* back to kernel */
835 .macro ibrs_seq_label l
838 .macro ibrs_call_label l
841 .macro ibrs_seq count
844 ibrs_call_label %(ll)
852 /* all callers already saved %rax, %rdx, and %rcx */
853 ENTRY(handle_ibrs_entry)
854 cmpb $0,hw_ibrs_active(%rip)
856 movl $MSR_IA32_SPEC_CTRL,%ecx
858 orl $(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
859 orl $(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32,%edx
861 movb $1,PCPU(IBPB_SET)
862 testl $CPUID_STDEXT_SMEP,cpu_stdext_feature(%rip)
866 END(handle_ibrs_entry)
868 ENTRY(handle_ibrs_exit)
869 cmpb $0,PCPU(IBPB_SET)
871 movl $MSR_IA32_SPEC_CTRL,%ecx
873 andl $~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
874 andl $~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx
876 movb $0,PCPU(IBPB_SET)
878 END(handle_ibrs_exit)
880 /* registers-neutral version, but needs stack */
881 ENTRY(handle_ibrs_exit_rs)
882 cmpb $0,PCPU(IBPB_SET)
887 movl $MSR_IA32_SPEC_CTRL,%ecx
889 andl $~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
890 andl $~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx
895 movb $0,PCPU(IBPB_SET)
897 END(handle_ibrs_exit_rs)
901 ENTRY(mds_handler_void)
903 END(mds_handler_void)
905 ENTRY(mds_handler_verw)
911 END(mds_handler_verw)
913 ENTRY(mds_handler_ivb)
922 1: movq PCPU(MDS_BUF), %rdx
923 movdqa %xmm0, PCPU(MDS_TMP)
932 2: movntdq %xmm0, (%rdx)
938 movdqa PCPU(MDS_TMP),%xmm0
948 ENTRY(mds_handler_bdw)
959 1: movq PCPU(MDS_BUF), %rbx
960 movdqa %xmm0, PCPU(MDS_TMP)
966 2: movntdq %xmm0, (%rbx)
975 movdqa PCPU(MDS_TMP),%xmm0
987 ENTRY(mds_handler_skl_sse)
997 1: movq PCPU(MDS_BUF), %rdi
998 movq PCPU(MDS_BUF64), %rdx
999 movdqa %xmm0, PCPU(MDS_TMP)
1006 2: clflushopt 5376(%rdi, %rax, 8)
1016 movdqa PCPU(MDS_TMP), %xmm0
1025 END(mds_handler_skl_sse)
1027 ENTRY(mds_handler_skl_avx)
1037 1: movq PCPU(MDS_BUF), %rdi
1038 movq PCPU(MDS_BUF64), %rdx
1039 vmovdqa %ymm0, PCPU(MDS_TMP)
1040 vpxor %ymm0, %ymm0, %ymm0
1043 vorpd (%rdx), %ymm0, %ymm0
1044 vorpd (%rdx), %ymm0, %ymm0
1046 2: clflushopt 5376(%rdi, %rax, 8)
1056 vmovdqa PCPU(MDS_TMP), %ymm0
1065 END(mds_handler_skl_avx)
1067 ENTRY(mds_handler_skl_avx512)
1077 1: movq PCPU(MDS_BUF), %rdi
1078 movq PCPU(MDS_BUF64), %rdx
1079 /* vmovdqa64 %zmm0, PCPU(MDS_TMP) */
1080 .byte 0x65, 0x62, 0xf1, 0xfd, 0x48, 0x7f, 0x04, 0x25
1082 /* vpxor %zmm0, %zmm0, %zmm0 */
1083 .byte 0x62, 0xf1, 0xfd, 0x48, 0xef, 0xc0
1086 /* vorpd (%rdx), %zmm0, %zmm0 */
1087 .byte 0x62, 0xf1, 0xfd, 0x48, 0x56, 0x02
1088 /* vorpd (%rdx), %zmm0, %zmm0 */
1089 .byte 0x62, 0xf1, 0xfd, 0x48, 0x56, 0x02
1091 2: clflushopt 5376(%rdi, %rax, 8)
1101 /* vmovdqa64 PCPU(MDS_TMP), %zmm0 */
1102 .byte 0x65, 0x62, 0xf1, 0xfd, 0x48, 0x6f, 0x04, 0x25
1112 END(mds_handler_skl_avx512)
1114 ENTRY(mds_handler_silvermont)
1123 1: movq PCPU(MDS_BUF), %rdx
1124 movdqa %xmm0, PCPU(MDS_TMP)
1128 2: movntdq %xmm0, (%rdx)
1134 movdqa PCPU(MDS_TMP),%xmm0
1142 END(mds_handler_silvermont)