From d52cc80dafc708f3148df294c03058c97c3f5418 Mon Sep 17 00:00:00 2001 From: kib Date: Fri, 20 Jan 2017 19:08:44 +0000 Subject: [PATCH] Use SFENCE for ordering CLFLUSHOPT. SDM states that CLFLUSHOPT instructions can be ordered with other writes by SFENCE, heavier MFENCE is not required. Reviewed by: alc Sponsored by: The FreeBSD Foundation MFC after: 2 weeks --- sys/amd64/amd64/pmap.c | 14 +++++++++----- sys/amd64/include/cpufunc.h | 7 +++++++ sys/i386/i386/pmap.c | 16 ++++++++++------ sys/i386/include/cpufunc.h | 7 +++++++ 4 files changed, 33 insertions(+), 11 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 817cdbe97c9..5d09929bd84 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -1862,16 +1862,16 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force) return; /* - * Otherwise, do per-cache line flush. Use the mfence + * Otherwise, do per-cache line flush. Use the sfence * instruction to insure that previous stores are * included in the write-back. The processor * propagates flush to other processors in the cache * coherence domain. */ - mfence(); + sfence(); for (; sva < eva; sva += cpu_clflush_line_size) clflushopt(sva); - mfence(); + sfence(); } else if ((cpu_feature & CPUID_CLFSH) != 0 && eva - sva < PMAP_CLFLUSH_THRESHOLD) { if (pmap_kextract(sva) == lapic_paddr) @@ -1915,7 +1915,9 @@ pmap_invalidate_cache_pages(vm_page_t *pages, int count) ((cpu_feature & CPUID_CLFSH) == 0 && !useclflushopt)) pmap_invalidate_cache(); else { - if (useclflushopt || cpu_vendor_id != CPU_VENDOR_INTEL) + if (useclflushopt) + sfence(); + else if (cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); for (i = 0; i < count; i++) { daddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pages[i])); @@ -1927,7 +1929,9 @@ pmap_invalidate_cache_pages(vm_page_t *pages, int count) clflush(daddr); } } - if (useclflushopt || cpu_vendor_id != CPU_VENDOR_INTEL) + if (useclflushopt) + sfence(); + else if (cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); } } diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h index 4b7df46ce9c..5fa0d77f3d1 100644 --- a/sys/amd64/include/cpufunc.h +++ b/sys/amd64/include/cpufunc.h @@ -326,6 +326,13 @@ mfence(void) __asm __volatile("mfence" : : : "memory"); } +static __inline void +sfence(void) +{ + + __asm __volatile("sfence" : : : "memory"); +} + static __inline void ia32_pause(void) { diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 1dc61ad4002..763c711ad8b 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -1283,16 +1283,16 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force) return; #endif /* - * Otherwise, do per-cache line flush. Use the mfence + * Otherwise, do per-cache line flush. Use the sfence * instruction to insure that previous stores are * included in the write-back. The processor * propagates flush to other processors in the cache * coherence domain. */ - mfence(); + sfence(); for (; sva < eva; sva += cpu_clflush_line_size) clflushopt(sva); - mfence(); + sfence(); } else if ((cpu_feature & CPUID_CLFSH) != 0 && eva - sva < PMAP_CLFLUSH_THRESHOLD) { #ifdef DEV_APIC @@ -5300,12 +5300,14 @@ pmap_flush_page(vm_page_t m) eva = sva + PAGE_SIZE; /* - * Use mfence despite the ordering implied by + * Use mfence or sfence despite the ordering implied by * mtx_{un,}lock() because clflush on non-Intel CPUs * and clflushopt are not guaranteed to be ordered by * any other instruction. */ - if (useclflushopt || cpu_vendor_id != CPU_VENDOR_INTEL) + if (useclflushopt) + sfence(); + else if (cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); for (; sva < eva; sva += cpu_clflush_line_size) { if (useclflushopt) @@ -5313,7 +5315,9 @@ pmap_flush_page(vm_page_t m) else clflush(sva); } - if (useclflushopt || cpu_vendor_id != CPU_VENDOR_INTEL) + if (useclflushopt) + sfence(); + else if (cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); *cmap_pte2 = 0; sched_unpin(); diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h index f43319476d3..23be5a3cdd0 100644 --- a/sys/i386/include/cpufunc.h +++ b/sys/i386/include/cpufunc.h @@ -158,6 +158,13 @@ mfence(void) __asm __volatile("mfence" : : : "memory"); } +static __inline void +sfence(void) +{ + + __asm __volatile("sfence" : : : "memory"); +} + #ifdef _KERNEL #define HAVE_INLINE_FFS -- 2.45.0