]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/kern_membarrier.c
MFV: zlib: examples: define functions as static ones. (PR #855)
[FreeBSD/FreeBSD.git] / sys / kern / kern_membarrier.c
1 /*-
2  * Copyright (c) 2021 The FreeBSD Foundation
3  *
4  * This software were developed by Konstantin Belousov <kib@FreeBSD.org>
5  * under sponsorship from the FreeBSD Foundation.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/cpuset.h>
33 #include <sys/lock.h>
34 #include <sys/membarrier.h>
35 #include <sys/mutex.h>
36 #include <sys/proc.h>
37 #include <sys/sched.h>
38 #include <sys/smp.h>
39 #include <sys/syscallsubr.h>
40 #include <sys/sysproto.h>
41
42 #include <vm/vm_param.h>
43 #include <vm/vm.h>
44 #include <vm/pmap.h>
45 #include <vm/vm_map.h>
46
47 #define MEMBARRIER_SUPPORTED_CMDS       (                       \
48     MEMBARRIER_CMD_GLOBAL |                                     \
49     MEMBARRIER_CMD_GLOBAL_EXPEDITED |                           \
50     MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED |                  \
51     MEMBARRIER_CMD_PRIVATE_EXPEDITED |                          \
52     MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED |                 \
53     MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE |                \
54     MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)
55
56 static void
57 membarrier_action_seqcst(void *arg __unused)
58 {
59         atomic_thread_fence_seq_cst();
60 }
61
62 static void
63 membarrier_action_seqcst_sync_core(void *arg __unused)
64 {
65         atomic_thread_fence_seq_cst();
66         cpu_sync_core();
67 }
68
69 static void
70 do_membarrier_ipi(cpuset_t *csp, void (*func)(void *))
71 {
72         atomic_thread_fence_seq_cst();
73         smp_rendezvous_cpus(*csp, smp_no_rendezvous_barrier, func,
74             smp_no_rendezvous_barrier, NULL);
75         atomic_thread_fence_seq_cst();
76 }
77
78 static void
79 check_cpu_switched(int c, cpuset_t *csp, uint64_t *swt, bool init)
80 {
81         struct pcpu *pc;
82         uint64_t sw;
83
84         if (CPU_ISSET(c, csp))
85                 return;
86
87         pc = cpuid_to_pcpu[c];
88         if (pc->pc_curthread == pc->pc_idlethread) {
89                 CPU_SET(c, csp);
90                 return;
91         }
92
93         /*
94          * Sync with context switch to ensure that override of
95          * pc_curthread with non-idle thread pointer is visible before
96          * reading of pc_switchtime.
97          */
98         atomic_thread_fence_acq();
99
100         sw = pc->pc_switchtime;
101         if (init)
102                 swt[c] = sw;
103         else if (sw != swt[c])
104                 CPU_SET(c, csp);
105 }
106
107 /*
108  *
109  * XXXKIB: We execute the requested action (seq_cst and possibly
110  * sync_core) on current CPU as well.  There is no guarantee that
111  * current thread executes anything with the full fence semantics
112  * during syscall execution.  Similarly, cpu_core_sync() semantics
113  * might be not provided by the syscall return.  E.g. on amd64 we
114  * typically return without IRET.
115  */
116 int
117 kern_membarrier(struct thread *td, int cmd, unsigned flags, int cpu_id)
118 {
119         struct proc *p, *p1;
120         struct thread *td1;
121         cpuset_t cs;
122         uint64_t *swt;
123         int c, error;
124         bool first;
125
126         if (flags != 0 || (cmd & ~MEMBARRIER_SUPPORTED_CMDS) != 0)
127                 return (EINVAL);
128
129         if (cmd == MEMBARRIER_CMD_QUERY) {
130                 td->td_retval[0] = MEMBARRIER_SUPPORTED_CMDS;
131                 return (0);
132         }
133
134         p = td->td_proc;
135         error = 0;
136
137         switch (cmd) {
138         case MEMBARRIER_CMD_GLOBAL:
139                 swt = malloc((mp_maxid + 1) * sizeof(*swt), M_TEMP, M_WAITOK);
140                 CPU_ZERO(&cs);
141                 sched_pin();
142                 CPU_SET(PCPU_GET(cpuid), &cs);
143                 for (first = true; error == 0; first = false) {
144                         CPU_FOREACH(c)
145                                 check_cpu_switched(c, &cs, swt, first);
146                         if (CPU_CMP(&cs, &all_cpus) == 0)
147                                 break;
148                         error = pause_sig("mmbr", 1);
149                         if (error == EWOULDBLOCK)
150                                 error = 0;
151                 }
152                 sched_unpin();
153                 free(swt, M_TEMP);
154                 atomic_thread_fence_seq_cst();
155                 break;
156
157         case MEMBARRIER_CMD_GLOBAL_EXPEDITED:
158                 if ((td->td_proc->p_flag2 & P2_MEMBAR_GLOBE) == 0) {
159                         error = EPERM;
160                 } else {
161                         CPU_ZERO(&cs);
162                         CPU_FOREACH(c) {
163                                 td1 = cpuid_to_pcpu[c]->pc_curthread;
164                                 p1 = td1->td_proc;
165                                 if (p1 != NULL &&
166                                     (p1->p_flag2 & P2_MEMBAR_GLOBE) != 0)
167                                         CPU_SET(c, &cs);
168                         }
169                         do_membarrier_ipi(&cs, membarrier_action_seqcst);
170                 }
171                 break;
172
173         case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
174                 if ((p->p_flag2 & P2_MEMBAR_GLOBE) == 0) {
175                         PROC_LOCK(p);
176                         p->p_flag2 |= P2_MEMBAR_GLOBE;
177                         PROC_UNLOCK(p);
178                 }
179                 break;
180
181         case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
182                 if ((td->td_proc->p_flag2 & P2_MEMBAR_PRIVE) == 0) {
183                         error = EPERM;
184                 } else {
185                         pmap_active_cpus(vmspace_pmap(p->p_vmspace), &cs);
186                         do_membarrier_ipi(&cs, membarrier_action_seqcst);
187                 }
188                 break;
189
190         case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
191                 if ((p->p_flag2 & P2_MEMBAR_PRIVE) == 0) {
192                         PROC_LOCK(p);
193                         p->p_flag2 |= P2_MEMBAR_PRIVE;
194                         PROC_UNLOCK(p);
195                 }
196                 break;
197
198         case MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
199                 if ((td->td_proc->p_flag2 & P2_MEMBAR_PRIVE_SYNCORE) == 0) {
200                         error = EPERM;
201                 } else {
202                         /*
203                          * Calculating the IPI multicast mask from
204                          * pmap active mask means that we do not call
205                          * cpu_sync_core() on CPUs that were missed
206                          * from pmap active mask but could be switched
207                          * from or to meantime.  This is fine at least
208                          * on amd64 because threads always use slow
209                          * (IRETQ) path to return from syscall after
210                          * context switch.
211                          */
212                         pmap_active_cpus(vmspace_pmap(p->p_vmspace), &cs);
213
214                         do_membarrier_ipi(&cs,
215                             membarrier_action_seqcst_sync_core);
216                 }
217                 break;
218
219         case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
220                 if ((p->p_flag2 & P2_MEMBAR_PRIVE_SYNCORE) == 0) {
221                         PROC_LOCK(p);
222                         p->p_flag2 |= P2_MEMBAR_PRIVE_SYNCORE;
223                         PROC_UNLOCK(p);
224                 }
225                 break;
226
227         default:
228                 error = EINVAL;
229                 break;
230         }
231
232         return (error);
233 }
234
235 int
236 sys_membarrier(struct thread *td, struct membarrier_args *uap)
237 {
238         return (kern_membarrier(td, uap->cmd, uap->flags, uap->cpu_id));
239 }