From 42c08e6c82ed0de77ce7adc7de841a36358f27a4 Mon Sep 17 00:00:00 2001 From: ian Date: Fri, 13 Feb 2015 00:15:13 +0000 Subject: [PATCH] MFC r276518: Rework vfp code so it will compile on clang 3.4 and 3.5. git-svn-id: svn://svn.freebsd.org/base/stable/10@278646 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- sys/arm/arm/vfp.c | 112 +++++++++++++++++++++++++--------------------- 1 file changed, 62 insertions(+), 50 deletions(-) diff --git a/sys/arm/arm/vfp.c b/sys/arm/arm/vfp.c index f1289c36a..38aff98ad 100644 --- a/sys/arm/arm/vfp.c +++ b/sys/arm/arm/vfp.c @@ -51,30 +51,35 @@ static struct undefined_handler vfp10_uh, vfp11_uh; /* If true the VFP unit has 32 double registers, otherwise it has 16 */ static int is_d32; -/* The VFMXR command using coprocessor commands */ +/* + * About .fpu directives in this file... + * + * We should need simply .fpu vfpv3, but clang 3.5 has a quirk where setting + * vfpv3 doesn't imply that vfp2 features are also available -- both have to be + * explicitly set to get all the features of both. This is probably a bug in + * clang, so it may get fixed and require changes here some day. Other changes + * are probably coming in clang too, because there is email and open PRs + * indicating they want to completely disable the ability to use .fpu and + * similar directives in inline asm. That would be catastrophic for us, + * hopefully they come to their senses. There was also some discusion of a new + * syntax such as .push fpu=vfpv3; ...; .pop fpu; and that would be ideal for + * us, better than what we have now really. + * + * For gcc, each .fpu directive completely overrides the prior directive, unlike + * with clang, but luckily on gcc saying v3 implies all the v2 features as well. + */ + #define fmxr(reg, val) \ - __asm __volatile("mcr p10, 7, %0, " __STRING(reg) " , c0, 0" :: "r"(val)); + __asm __volatile(" .fpu vfpv2\n .fpu vfpv3\n" \ + " vmsr " __STRING(reg) ", %0" :: "r"(val)); -/* The VFMRX command using coprocessor commands */ #define fmrx(reg) \ ({ u_int val = 0;\ - __asm __volatile("mrc p10, 7, %0, " __STRING(reg) " , c0, 0" : "=r"(val));\ + __asm __volatile(" .fpu vfpv2\n .fpu vfpv3\n" \ + " vmrs %0, " __STRING(reg) : "=r"(val)); \ val; \ }) -/* - * Work around an issue with GCC where the asm it generates is not unified - * syntax and fails to assemble because it expects the ldcleq instruction in the - * form ldcl, not in the UAL form ldcl, and similar for stcleq. - */ -#ifdef __clang__ -#define LDCLNE "ldclne " -#define STCLNE "stclne " -#else -#define LDCLNE "ldcnel " -#define STCLNE "stcnel " -#endif - static u_int get_coprocessorACR(void) { @@ -103,25 +108,25 @@ vfp_init(void) coproc |= COPROC10 | COPROC11; set_coprocessorACR(coproc); - fpsid = fmrx(VFPSID); /* read the vfp system id */ - fpexc = fmrx(VFPEXC); /* read the vfp exception reg */ + fpsid = fmrx(fpsid); /* read the vfp system id */ + fpexc = fmrx(fpexc); /* read the vfp exception reg */ if (!(fpsid & VFPSID_HARDSOFT_IMP)) { vfp_exists = 1; is_d32 = 0; - PCPU_SET(vfpsid, fpsid); /* save the VFPSID */ + PCPU_SET(vfpsid, fpsid); /* save the fpsid */ vfp_arch = (fpsid & VFPSID_SUBVERSION2_MASK) >> VFPSID_SUBVERSION_OFF; if (vfp_arch >= VFP_ARCH3) { - tmp = fmrx(VMVFR0); + tmp = fmrx(mvfr0); PCPU_SET(vfpmvfr0, tmp); if ((tmp & VMVFR0_RB_MASK) == 2) is_d32 = 1; - tmp = fmrx(VMVFR1); + tmp = fmrx(mvfr1); PCPU_SET(vfpmvfr1, tmp); } @@ -161,10 +166,10 @@ vfp_bounce(u_int addr, u_int insn, struct trapframe *frame, int code) * something tried to executate a truly invalid instruction that maps to * the VFP. */ - fpexc = fmrx(VFPEXC); + fpexc = fmrx(fpexc); if (fpexc & VFPEXC_EN) { /* Clear any exceptions */ - fmxr(VFPEXC, fpexc & ~(VFPEXC_EX | VFPEXC_FP2V)); + fmxr(fpexc, fpexc & ~(VFPEXC_EX | VFPEXC_FP2V)); /* kill the process - we do not handle emulation */ critical_exit(); @@ -192,7 +197,7 @@ vfp_bounce(u_int addr, u_int insn, struct trapframe *frame, int code) * the last thread to use the VFP on this core was this thread, then the * VFP state is valid, otherwise restore this thread's state to the VFP. */ - fmxr(VFPEXC, fpexc | VFPEXC_EN); + fmxr(fpexc, fpexc | VFPEXC_EN); curpcb = curthread->td_pcb; cpu = PCPU_GET(cpu); if (curpcb->pcb_vfpcpu != cpu || curthread != PCPU_GET(fpcurthread)) { @@ -213,22 +218,26 @@ vfp_restore(struct vfp_state *vfpsave) { uint32_t fpexc; - /* On VFPv2 we may need to restore FPINST and FPINST2 */ + /* On vfpv3 we may need to restore FPINST and FPINST2 */ fpexc = vfpsave->fpexec; if (fpexc & VFPEXC_EX) { - fmxr(VFPINST, vfpsave->fpinst); + fmxr(fpinst, vfpsave->fpinst); if (fpexc & VFPEXC_FP2V) - fmxr(VFPINST2, vfpsave->fpinst2); + fmxr(fpinst2, vfpsave->fpinst2); } - fmxr(VFPSCR, vfpsave->fpscr); - - __asm __volatile("ldc p10, c0, [%0], #128\n" /* d0-d15 */ - "cmp %1, #0\n" /* -D16 or -D32? */ - LDCLNE "p11, c0, [%0], #128\n" /* d16-d31 */ - "addeq %0, %0, #128\n" /* skip missing regs */ - : : "r" (vfpsave), "r" (is_d32) : "cc"); - - fmxr(VFPEXC, fpexc); + fmxr(fpscr, vfpsave->fpscr); + + __asm __volatile( + " .fpu vfpv2\n" + " .fpu vfpv3\n" + " vldmia %0!, {d0-d15}\n" /* d0-d15 */ + " cmp %1, #0\n" /* -D16 or -D32? */ + " vldmiane %0!, {d16-d31}\n" /* d16-d31 */ + " addeq %0, %0, #128\n" /* skip missing regs */ + : "+&r" (vfpsave) : "r" (is_d32) : "cc" + ); + + fmxr(fpexc, fpexc); } /* @@ -241,28 +250,31 @@ vfp_store(struct vfp_state *vfpsave, boolean_t disable_vfp) { uint32_t fpexc; - fpexc = fmrx(VFPEXC); /* Is the vfp enabled? */ + fpexc = fmrx(fpexc); /* Is the vfp enabled? */ if (fpexc & VFPEXC_EN) { vfpsave->fpexec = fpexc; - vfpsave->fpscr = fmrx(VFPSCR); + vfpsave->fpscr = fmrx(fpscr); - /* On VFPv2 we may need to save FPINST and FPINST2 */ + /* On vfpv3 we may need to save FPINST and FPINST2 */ if (fpexc & VFPEXC_EX) { - vfpsave->fpinst = fmrx(VFPINST); + vfpsave->fpinst = fmrx(fpinst); if (fpexc & VFPEXC_FP2V) - vfpsave->fpinst2 = fmrx(VFPINST2); + vfpsave->fpinst2 = fmrx(fpinst2); fpexc &= ~VFPEXC_EX; } __asm __volatile( - "stc p11, c0, [%0], #128\n" /* d0-d15 */ - "cmp %1, #0\n" /* -D16 or -D32? */ - STCLNE "p11, c0, [%0], #128\n" /* d16-d31 */ - "addeq %0, %0, #128\n" /* skip missing regs */ - : : "r" (vfpsave), "r" (is_d32) : "cc"); + " .fpu vfpv2\n" + " .fpu vfpv3\n" + " vstmia %0!, {d0-d15}\n" /* d0-d15 */ + " cmp %1, #0\n" /* -D16 or -D32? */ + " vstmiane r0!, {d16-d31}\n" /* d16-d31 */ + " addeq %0, %0, #128\n" /* skip missing regs */ + : "+&r" (vfpsave) : "r" (is_d32) : "cc" + ); if (disable_vfp) - fmxr(VFPEXC , fpexc & ~VFPEXC_EN); + fmxr(fpexc , fpexc & ~VFPEXC_EN); } } @@ -281,9 +293,9 @@ vfp_discard(struct thread *td) if (PCPU_GET(fpcurthread) == td) PCPU_SET(fpcurthread, NULL); - tmp = fmrx(VFPEXC); + tmp = fmrx(fpexc); if (tmp & VFPEXC_EN) - fmxr(VFPEXC, tmp & ~VFPEXC_EN); + fmxr(fpexc, tmp & ~VFPEXC_EN); } #endif -- 2.45.0