From 10f83cdf0a459ed959605be219efd36ff9183a1e Mon Sep 17 00:00:00 2001 From: alc Date: Thu, 8 Jul 2010 03:35:00 +0000 Subject: [PATCH] Correctly maintain the per-cpu field "curpmap" on amd64 just like we do on i386. The consequences of not doing so on amd64 became apparent with the introduction of the COUNT_IPIS and COUNT_XINVLTLB_HITS options. Specifically, single-threaded applications were generating unnecessary IPIs to shoot-down the TLB on other processors. However, this is clearly nonsensical because a single-threaded application is only running on the current processor. The reason that this happens is that pmap_activate() is unable to properly update the old pmap's field "pm_active" without the correct "curpmap". So, in effect, stale bits in "pm_active" were leading pmap_protect(), pmap_remove(), pmap_remove_pages(), etc. to flush the TLB contents on some arbitrary processor that wasn't even running the same application. Reviewed by: kib MFC after: 3 weeks --- sys/amd64/amd64/cpu_switch.S | 22 ++++++++++------------ sys/amd64/amd64/pmap.c | 2 ++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index c71bcd020cc..cfb4204f956 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -69,16 +69,13 @@ * %rsi = newtd */ ENTRY(cpu_throw) + movl PCPU(CPUID),%eax testq %rdi,%rdi - jnz 1f - movq PCPU(IDLETHREAD),%rdi -1: - movq TD_PCB(%rdi),%r8 /* Old pcb */ - movl PCPU(CPUID), %eax + jz 1f /* release bit from old pm_active */ - movq TD_PROC(%rdi), %rdx /* oldtd->td_proc */ - movq P_VMSPACE(%rdx), %rdx /* proc->p_vmspace */ - LK btrl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */ + movq PCPU(CURPMAP),%rdx + LK btrl %eax,PM_ACTIVE(%rdx) /* clear old */ +1: movq TD_PCB(%rsi),%r8 /* newtd->td_proc */ movq PCB_CR3(%r8),%rdx movq %rdx,%cr3 /* new address space */ @@ -140,15 +137,16 @@ swinact: movq %rcx,%cr3 /* new address space */ movl PCPU(CPUID), %eax /* Release bit from old pmap->pm_active */ - movq TD_PROC(%rdi), %rcx /* oldproc */ - movq P_VMSPACE(%rcx), %rcx - LK btrl %eax, VM_PMAP+PM_ACTIVE(%rcx) /* clear old */ + movq PCPU(CURPMAP),%rcx + LK btrl %eax,PM_ACTIVE(%rcx) /* clear old */ SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */ swact: /* Set bit in new pmap->pm_active */ movq TD_PROC(%rsi),%rdx /* newproc */ movq P_VMSPACE(%rdx), %rdx - LK btsl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */ + addq $VM_PMAP,%rdx + LK btsl %eax,PM_ACTIVE(%rdx) /* set new */ + movq %rdx,PCPU(CURPMAP) sw1: #if defined(SCHED_ULE) && defined(SMP) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index a8a6b01a99c..8836157f622 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -1574,6 +1574,7 @@ pmap_pinit0(pmap_t pmap) pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); pmap->pm_root = NULL; pmap->pm_active = 0; + PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); } @@ -5008,6 +5009,7 @@ if (oldpmap) /* XXX FIXME */ cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4); td->td_pcb->pcb_cr3 = cr3; load_cr3(cr3); + PCPU_SET(curpmap, pmap); critical_exit(); } -- 2.45.2