]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/powerpc/powernv/platform_powernv.c
zfs: merge openzfs/zfs@688514e47
[FreeBSD/FreeBSD.git] / sys / powerpc / powernv / platform_powernv.c
1 /*-
2  * Copyright (c) 2015 Nathan Whitehorn
3  * Copyright (c) 2017-2018 Semihalf
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/bus.h>
32 #include <sys/pcpu.h>
33 #include <sys/proc.h>
34 #include <sys/smp.h>
35 #include <vm/vm.h>
36 #include <vm/pmap.h>
37
38 #include <machine/bus.h>
39 #include <machine/cpu.h>
40 #include <machine/hid.h>
41 #include <machine/platformvar.h>
42 #include <machine/pmap.h>
43 #include <machine/rtas.h>
44 #include <machine/smp.h>
45 #include <machine/spr.h>
46 #include <machine/trap.h>
47
48 #include <dev/ofw/openfirm.h>
49 #include <dev/ofw/ofw_bus.h>
50 #include <dev/ofw/ofw_bus_subr.h>
51 #include <machine/ofw_machdep.h>
52 #include <powerpc/aim/mmu_oea64.h>
53
54 #include "platform_if.h"
55 #include "opal.h"
56
57 #ifdef SMP
58 extern void *ap_pcpu;
59 #endif
60
61 void (*powernv_smp_ap_extra_init)(void);
62
63 static int powernv_probe(platform_t);
64 static int powernv_attach(platform_t);
65 void powernv_mem_regions(platform_t, struct mem_region *phys, int *physsz,
66     struct mem_region *avail, int *availsz);
67 static void powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz);
68 static u_long powernv_timebase_freq(platform_t, struct cpuref *cpuref);
69 static int powernv_smp_first_cpu(platform_t, struct cpuref *cpuref);
70 static int powernv_smp_next_cpu(platform_t, struct cpuref *cpuref);
71 static int powernv_smp_get_bsp(platform_t, struct cpuref *cpuref);
72 static void powernv_smp_ap_init(platform_t);
73 #ifdef SMP
74 static int powernv_smp_start_cpu(platform_t, struct pcpu *cpu);
75 static void powernv_smp_probe_threads(platform_t);
76 static struct cpu_group *powernv_smp_topo(platform_t plat);
77 #endif
78 static void powernv_reset(platform_t);
79 static void powernv_cpu_idle(sbintime_t sbt);
80 static int powernv_cpuref_init(void);
81 static int powernv_node_numa_domain(platform_t platform, phandle_t node);
82
83 static platform_method_t powernv_methods[] = {
84         PLATFORMMETHOD(platform_probe,          powernv_probe),
85         PLATFORMMETHOD(platform_attach,         powernv_attach),
86         PLATFORMMETHOD(platform_mem_regions,    powernv_mem_regions),
87         PLATFORMMETHOD(platform_numa_mem_regions,       powernv_numa_mem_regions),
88         PLATFORMMETHOD(platform_timebase_freq,  powernv_timebase_freq),
89
90         PLATFORMMETHOD(platform_smp_ap_init,    powernv_smp_ap_init),
91         PLATFORMMETHOD(platform_smp_first_cpu,  powernv_smp_first_cpu),
92         PLATFORMMETHOD(platform_smp_next_cpu,   powernv_smp_next_cpu),
93         PLATFORMMETHOD(platform_smp_get_bsp,    powernv_smp_get_bsp),
94 #ifdef SMP
95         PLATFORMMETHOD(platform_smp_start_cpu,  powernv_smp_start_cpu),
96         PLATFORMMETHOD(platform_smp_probe_threads,      powernv_smp_probe_threads),
97         PLATFORMMETHOD(platform_smp_topo,       powernv_smp_topo),
98 #endif
99         PLATFORMMETHOD(platform_node_numa_domain,       powernv_node_numa_domain),
100
101         PLATFORMMETHOD(platform_reset,          powernv_reset),
102         { 0, 0 }
103 };
104
105 static platform_def_t powernv_platform = {
106         "powernv",
107         powernv_methods,
108         0
109 };
110
111 static struct cpuref platform_cpuref[MAXCPU];
112 static int platform_cpuref_cnt;
113 static int platform_cpuref_valid;
114 static int platform_associativity;
115
116 PLATFORM_DEF(powernv_platform);
117
118 static uint64_t powernv_boot_pir;
119
120 static int
121 powernv_probe(platform_t plat)
122 {
123         if (opal_check() == 0)
124                 return (BUS_PROBE_SPECIFIC);
125
126         return (ENXIO);
127 }
128
129 static int
130 powernv_attach(platform_t plat)
131 {
132         uint32_t nptlp, shift = 0, slb_encoding = 0;
133         int32_t lp_size, lp_encoding;
134         char buf[255];
135         pcell_t refpoints[3];
136         pcell_t prop;
137         phandle_t cpu;
138         phandle_t opal;
139         int res, len, idx;
140         register_t msr;
141         bool has_lp;
142
143         /* Ping OPAL again just to make sure */
144         opal_check();
145
146 #if BYTE_ORDER == LITTLE_ENDIAN
147         opal_call(OPAL_REINIT_CPUS, 2 /* Little endian */);
148 #else
149         opal_call(OPAL_REINIT_CPUS, 1 /* Big endian */);
150 #endif
151         opal = OF_finddevice("/ibm,opal");
152
153         platform_associativity = 4; /* Skiboot default. */
154         if (OF_getencprop(opal, "ibm,associativity-reference-points", refpoints,
155             sizeof(refpoints)) > 0) {
156                 platform_associativity = refpoints[0];
157         }
158
159        if (cpu_idle_hook == NULL)
160                 cpu_idle_hook = powernv_cpu_idle;
161
162         powernv_boot_pir = mfspr(SPR_PIR);
163
164         /* LPID must not be altered when PSL_DR or PSL_IR is set */
165         msr = mfmsr();
166         mtmsr(msr & ~(PSL_DR | PSL_IR));
167
168         /* Direct interrupts to SRR instead of HSRR and reset LPCR otherwise */
169         mtspr(SPR_LPID, 0);
170         isync();
171
172         if (cpu_features2 & PPC_FEATURE2_ARCH_3_00)
173                 lpcr |= LPCR_HVICE;
174
175 #if BYTE_ORDER == LITTLE_ENDIAN
176         lpcr |= LPCR_ILE;
177 #endif
178
179         mtspr(SPR_LPCR, lpcr);
180         isync();
181
182         mtmsr(msr);
183
184         powernv_cpuref_init();
185
186         /* Set SLB count from device tree */
187         cpu = OF_peer(0);
188         cpu = OF_child(cpu);
189         while (cpu != 0) {
190                 res = OF_getprop(cpu, "name", buf, sizeof(buf));
191                 if (res > 0 && strcmp(buf, "cpus") == 0)
192                         break;
193                 cpu = OF_peer(cpu);
194         }
195         if (cpu == 0)
196                 goto out;
197
198         cpu = OF_child(cpu);
199         while (cpu != 0) {
200                 res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
201                 if (res > 0 && strcmp(buf, "cpu") == 0)
202                         break;
203                 cpu = OF_peer(cpu);
204         }
205         if (cpu == 0)
206                 goto out;
207
208         res = OF_getencprop(cpu, "ibm,slb-size", &prop, sizeof(prop));
209         if (res > 0)
210                 n_slbs = prop;
211
212         /*
213          * Scan the large page size property for PAPR compatible machines.
214          * See PAPR D.5 Changes to Section 5.1.4, 'CPU Node Properties'
215          * for the encoding of the property.
216          */
217
218         len = OF_getproplen(cpu, "ibm,segment-page-sizes");
219         if (len > 0) {
220                 /*
221                  * We have to use a variable length array on the stack
222                  * since we have very limited stack space.
223                  */
224                 pcell_t arr[len/sizeof(cell_t)];
225                 res = OF_getencprop(cpu, "ibm,segment-page-sizes", arr,
226                     sizeof(arr));
227                 len /= 4;
228                 idx = 0;
229                 has_lp = false;
230                 while (len > 0) {
231                         shift = arr[idx];
232                         slb_encoding = arr[idx + 1];
233                         nptlp = arr[idx + 2];
234                         idx += 3;
235                         len -= 3;
236                         while (len > 0 && nptlp) {
237                                 lp_size = arr[idx];
238                                 lp_encoding = arr[idx+1];
239                                 if (slb_encoding == SLBV_L && lp_encoding == 0)
240                                         has_lp = true;
241
242                                 if (slb_encoding == SLB_PGSZ_4K_4K &&
243                                     lp_encoding == LP_4K_16M)
244                                         moea64_has_lp_4k_16m = true;
245
246                                 idx += 2;
247                                 len -= 2;
248                                 nptlp--;
249                         }
250                         if (has_lp && moea64_has_lp_4k_16m)
251                                 break;
252                 }
253
254                 if (!has_lp)
255                         panic("Standard large pages (SLB[L] = 1, PTE[LP] = 0) "
256                             "not supported by this system.");
257
258                 moea64_large_page_shift = shift;
259                 moea64_large_page_size = 1ULL << lp_size;
260         }
261
262 out:
263         return (0);
264 }
265
266 void
267 powernv_mem_regions(platform_t plat, struct mem_region *phys, int *physsz,
268     struct mem_region *avail, int *availsz)
269 {
270
271         ofw_mem_regions(phys, physsz, avail, availsz);
272 }
273
274 static void
275 powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz)
276 {
277
278         ofw_numa_mem_regions(phys, physsz);
279 }
280
281 static u_long
282 powernv_timebase_freq(platform_t plat, struct cpuref *cpuref)
283 {
284         char buf[8];
285         phandle_t cpu, dev, root;
286         int res;
287         int32_t ticks = -1;
288
289         root = OF_peer(0);
290         dev = OF_child(root);
291         while (dev != 0) {
292                 res = OF_getprop(dev, "name", buf, sizeof(buf));
293                 if (res > 0 && strcmp(buf, "cpus") == 0)
294                         break;
295                 dev = OF_peer(dev);
296         }
297
298         for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) {
299                 res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
300                 if (res > 0 && strcmp(buf, "cpu") == 0)
301                         break;
302         }
303         if (cpu == 0)
304                 return (512000000);
305
306         OF_getencprop(cpu, "timebase-frequency", &ticks, sizeof(ticks));
307
308         if (ticks <= 0)
309                 panic("Unable to determine timebase frequency!");
310
311         return (ticks);
312
313 }
314
315 static int
316 powernv_cpuref_init(void)
317 {
318         phandle_t cpu, dev;
319         char buf[32];
320         int a, res, tmp_cpuref_cnt;
321         static struct cpuref tmp_cpuref[MAXCPU];
322         cell_t interrupt_servers[32];
323         uint64_t bsp;
324
325         if (platform_cpuref_valid)
326                 return (0);
327
328         dev = OF_peer(0);
329         dev = OF_child(dev);
330         while (dev != 0) {
331                 res = OF_getprop(dev, "name", buf, sizeof(buf));
332                 if (res > 0 && strcmp(buf, "cpus") == 0)
333                         break;
334                 dev = OF_peer(dev);
335         }
336
337         bsp = 0;
338         tmp_cpuref_cnt = 0;
339         for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) {
340                 res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
341                 if (res > 0 && strcmp(buf, "cpu") == 0) {
342                         if (!ofw_bus_node_status_okay(cpu))
343                                 continue;
344                         res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s");
345                         if (res > 0) {
346                                 OF_getencprop(cpu, "ibm,ppc-interrupt-server#s",
347                                     interrupt_servers, res);
348
349                                 for (a = 0; a < res/sizeof(cell_t); a++) {
350                                         tmp_cpuref[tmp_cpuref_cnt].cr_hwref = interrupt_servers[a];
351                                         tmp_cpuref[tmp_cpuref_cnt].cr_cpuid = tmp_cpuref_cnt;
352                                         tmp_cpuref[tmp_cpuref_cnt].cr_domain =
353                                             powernv_node_numa_domain(NULL, cpu);
354                                         if (interrupt_servers[a] == (uint32_t)powernv_boot_pir)
355                                                 bsp = tmp_cpuref_cnt;
356
357                                         tmp_cpuref_cnt++;
358                                 }
359                         }
360                 }
361         }
362
363         /* Map IDs, so BSP has CPUID 0 regardless of hwref */
364         for (a = bsp; a < tmp_cpuref_cnt; a++) {
365                 platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref;
366                 platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt;
367                 platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain;
368                 platform_cpuref_cnt++;
369         }
370         for (a = 0; a < bsp; a++) {
371                 platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref;
372                 platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt;
373                 platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain;
374                 platform_cpuref_cnt++;
375         }
376
377         platform_cpuref_valid = 1;
378
379         return (0);
380 }
381
382 static int
383 powernv_smp_first_cpu(platform_t plat, struct cpuref *cpuref)
384 {
385         if (platform_cpuref_valid == 0)
386                 return (EINVAL);
387
388         cpuref->cr_cpuid = 0;
389         cpuref->cr_hwref = platform_cpuref[0].cr_hwref;
390         cpuref->cr_domain = platform_cpuref[0].cr_domain;
391
392         return (0);
393 }
394
395 static int
396 powernv_smp_next_cpu(platform_t plat, struct cpuref *cpuref)
397 {
398         int id;
399
400         if (platform_cpuref_valid == 0)
401                 return (EINVAL);
402
403         id = cpuref->cr_cpuid + 1;
404         if (id >= platform_cpuref_cnt)
405                 return (ENOENT);
406
407         cpuref->cr_cpuid = platform_cpuref[id].cr_cpuid;
408         cpuref->cr_hwref = platform_cpuref[id].cr_hwref;
409         cpuref->cr_domain = platform_cpuref[id].cr_domain;
410
411         return (0);
412 }
413
414 static int
415 powernv_smp_get_bsp(platform_t plat, struct cpuref *cpuref)
416 {
417
418         cpuref->cr_cpuid = platform_cpuref[0].cr_cpuid;
419         cpuref->cr_hwref = platform_cpuref[0].cr_hwref;
420         cpuref->cr_domain = platform_cpuref[0].cr_domain;
421         return (0);
422 }
423
424 #ifdef SMP
425 static int
426 powernv_smp_start_cpu(platform_t plat, struct pcpu *pc)
427 {
428         int result;
429
430         ap_pcpu = pc;
431         powerpc_sync();
432
433         result = opal_call(OPAL_START_CPU, pc->pc_hwref, EXC_RST);
434         if (result != OPAL_SUCCESS) {
435                 printf("OPAL error (%d): unable to start AP %d\n",
436                     result, (int)pc->pc_hwref);
437                 return (ENXIO);
438         }
439
440         return (0);
441 }
442
443 static void
444 powernv_smp_probe_threads(platform_t plat)
445 {
446         char buf[8];
447         phandle_t cpu, dev, root;
448         int res, nthreads;
449
450         root = OF_peer(0);
451
452         dev = OF_child(root);
453         while (dev != 0) {
454                 res = OF_getprop(dev, "name", buf, sizeof(buf));
455                 if (res > 0 && strcmp(buf, "cpus") == 0)
456                         break;
457                 dev = OF_peer(dev);
458         }
459
460         nthreads = 1;
461         for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) {
462                 res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
463                 if (res <= 0 || strcmp(buf, "cpu") != 0)
464                         continue;
465
466                 res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s");
467
468                 if (res >= 0)
469                         nthreads = res / sizeof(cell_t);
470                 else
471                         nthreads = 1;
472                 break;
473         }
474
475         smp_threads_per_core = nthreads;
476         if (mp_ncpus % nthreads == 0)
477                 mp_ncores = mp_ncpus / nthreads;
478 }
479
480 static struct cpu_group *
481 cpu_group_init(struct cpu_group *group, struct cpu_group *parent,
482     const cpuset_t *cpus, int children, int level, int flags)
483 {
484         struct cpu_group *child;
485
486         child = children != 0 ? smp_topo_alloc(children) : NULL;
487
488         group->cg_parent = parent;
489         group->cg_child = child;
490         CPU_COPY(cpus, &group->cg_mask);
491         group->cg_count = CPU_COUNT(cpus);
492         group->cg_children = children;
493         group->cg_level = level;
494         group->cg_flags = flags;
495
496         return (child);
497 }
498
499 static struct cpu_group *
500 powernv_smp_topo(platform_t plat)
501 {
502         struct cpu_group *core, *dom, *root;
503         cpuset_t corecpus, domcpus;
504         int cpuid, i, j, k, ncores;
505
506         if (mp_ncpus % smp_threads_per_core != 0) {
507                 printf("%s: irregular SMP topology (%d threads, %d per core)\n",
508                     __func__, mp_ncpus, smp_threads_per_core);
509                 return (smp_topo_none());
510         }
511
512         root = smp_topo_alloc(1);
513         dom = cpu_group_init(root, NULL, &all_cpus, vm_ndomains, CG_SHARE_NONE,
514             0);
515
516         /*
517          * Redundant layers will be collapsed by the caller so we don't need a
518          * special case for a single domain.
519          */
520         for (i = 0; i < vm_ndomains; i++, dom++) {
521                 CPU_COPY(&cpuset_domain[i], &domcpus);
522                 ncores = CPU_COUNT(&domcpus) / smp_threads_per_core;
523                 KASSERT(CPU_COUNT(&domcpus) % smp_threads_per_core == 0,
524                     ("%s: domain %d core count not divisible by thread count",
525                     __func__, i));
526
527                 core = cpu_group_init(dom, root, &domcpus, ncores, CG_SHARE_L3,
528                     0);
529                 for (j = 0; j < ncores; j++, core++) {
530                         /*
531                          * Assume that consecutive CPU IDs correspond to sibling
532                          * threads.
533                          */
534                         CPU_ZERO(&corecpus);
535                         for (k = 0; k < smp_threads_per_core; k++) {
536                                 cpuid = CPU_FFS(&domcpus) - 1;
537                                 CPU_CLR(cpuid, &domcpus);
538                                 CPU_SET(cpuid, &corecpus);
539                         }
540                         (void)cpu_group_init(core, dom, &corecpus, 0,
541                             CG_SHARE_L1, CG_FLAG_SMT);
542                 }
543         }
544
545         return (root);
546 }
547
548 #endif
549
550 static void
551 powernv_reset(platform_t platform)
552 {
553
554         opal_call(OPAL_CEC_REBOOT);
555 }
556
557 static void
558 powernv_smp_ap_init(platform_t platform)
559 {
560
561         if (powernv_smp_ap_extra_init != NULL)
562                 powernv_smp_ap_extra_init();
563 }
564
565 static void
566 powernv_cpu_idle(sbintime_t sbt)
567 {
568 }
569
570 static int
571 powernv_node_numa_domain(platform_t platform, phandle_t node)
572 {
573         /* XXX: Is locking necessary in here? */
574         static int numa_domains[MAXMEMDOM];
575         static int numa_max_domain;
576         cell_t associativity[5];
577         int i, res;
578
579 #ifndef NUMA
580         return (0);
581 #endif
582         i = 0;
583         TUNABLE_INT_FETCH("vm.numa.disabled", &i);
584         if (i)
585                 return (0);
586
587         res = OF_getencprop(node, "ibm,associativity",
588                 associativity, sizeof(associativity));
589
590         /*
591          * If this node doesn't have associativity, or if there are not
592          * enough elements in it, check its parent.
593          */
594         if (res < (int)(sizeof(cell_t) * (platform_associativity + 1))) {
595                 node = OF_parent(node);
596                 /* If already at the root, use default domain. */
597                 if (node == 0)
598                         return (0);
599                 return (powernv_node_numa_domain(platform, node));
600         }
601
602         for (i = 0; i < numa_max_domain; i++) {
603                 if (numa_domains[i] == associativity[platform_associativity])
604                         return (i);
605         }
606         if (i < MAXMEMDOM)
607                 numa_domains[numa_max_domain++] =
608                     associativity[platform_associativity];
609         else
610                 i = 0;
611
612         return (i);
613 }
614
615 /* Set up the Nest MMU on POWER9 relatively early, but after pmap is setup. */
616 static void
617 powernv_setup_nmmu(void *unused)
618 {
619         if (opal_check() != 0)
620                 return;
621         opal_call(OPAL_NMMU_SET_PTCR, -1, mfspr(SPR_PTCR));
622 }
623
624 SYSINIT(powernv_setup_nmmu, SI_SUB_CPU, SI_ORDER_ANY, powernv_setup_nmmu, NULL);