]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/powerpc/powernv/platform_powernv.c
libedit: update to snapshot 2023-01-06
[FreeBSD/FreeBSD.git] / sys / powerpc / powernv / platform_powernv.c
1 /*-
2  * Copyright (c) 2015 Nathan Whitehorn
3  * Copyright (c) 2017-2018 Semihalf
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/bus.h>
35 #include <sys/pcpu.h>
36 #include <sys/proc.h>
37 #include <sys/smp.h>
38 #include <vm/vm.h>
39 #include <vm/pmap.h>
40
41 #include <machine/bus.h>
42 #include <machine/cpu.h>
43 #include <machine/hid.h>
44 #include <machine/platformvar.h>
45 #include <machine/pmap.h>
46 #include <machine/rtas.h>
47 #include <machine/smp.h>
48 #include <machine/spr.h>
49 #include <machine/trap.h>
50
51 #include <dev/ofw/openfirm.h>
52 #include <dev/ofw/ofw_bus.h>
53 #include <dev/ofw/ofw_bus_subr.h>
54 #include <machine/ofw_machdep.h>
55 #include <powerpc/aim/mmu_oea64.h>
56
57 #include "platform_if.h"
58 #include "opal.h"
59
60 #ifdef SMP
61 extern void *ap_pcpu;
62 #endif
63
64 void (*powernv_smp_ap_extra_init)(void);
65
66 static int powernv_probe(platform_t);
67 static int powernv_attach(platform_t);
68 void powernv_mem_regions(platform_t, struct mem_region *phys, int *physsz,
69     struct mem_region *avail, int *availsz);
70 static void powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz);
71 static u_long powernv_timebase_freq(platform_t, struct cpuref *cpuref);
72 static int powernv_smp_first_cpu(platform_t, struct cpuref *cpuref);
73 static int powernv_smp_next_cpu(platform_t, struct cpuref *cpuref);
74 static int powernv_smp_get_bsp(platform_t, struct cpuref *cpuref);
75 static void powernv_smp_ap_init(platform_t);
76 #ifdef SMP
77 static int powernv_smp_start_cpu(platform_t, struct pcpu *cpu);
78 static void powernv_smp_probe_threads(platform_t);
79 static struct cpu_group *powernv_smp_topo(platform_t plat);
80 #endif
81 static void powernv_reset(platform_t);
82 static void powernv_cpu_idle(sbintime_t sbt);
83 static int powernv_cpuref_init(void);
84 static int powernv_node_numa_domain(platform_t platform, phandle_t node);
85
86 static platform_method_t powernv_methods[] = {
87         PLATFORMMETHOD(platform_probe,          powernv_probe),
88         PLATFORMMETHOD(platform_attach,         powernv_attach),
89         PLATFORMMETHOD(platform_mem_regions,    powernv_mem_regions),
90         PLATFORMMETHOD(platform_numa_mem_regions,       powernv_numa_mem_regions),
91         PLATFORMMETHOD(platform_timebase_freq,  powernv_timebase_freq),
92
93         PLATFORMMETHOD(platform_smp_ap_init,    powernv_smp_ap_init),
94         PLATFORMMETHOD(platform_smp_first_cpu,  powernv_smp_first_cpu),
95         PLATFORMMETHOD(platform_smp_next_cpu,   powernv_smp_next_cpu),
96         PLATFORMMETHOD(platform_smp_get_bsp,    powernv_smp_get_bsp),
97 #ifdef SMP
98         PLATFORMMETHOD(platform_smp_start_cpu,  powernv_smp_start_cpu),
99         PLATFORMMETHOD(platform_smp_probe_threads,      powernv_smp_probe_threads),
100         PLATFORMMETHOD(platform_smp_topo,       powernv_smp_topo),
101 #endif
102         PLATFORMMETHOD(platform_node_numa_domain,       powernv_node_numa_domain),
103
104         PLATFORMMETHOD(platform_reset,          powernv_reset),
105         { 0, 0 }
106 };
107
108 static platform_def_t powernv_platform = {
109         "powernv",
110         powernv_methods,
111         0
112 };
113
114 static struct cpuref platform_cpuref[MAXCPU];
115 static int platform_cpuref_cnt;
116 static int platform_cpuref_valid;
117 static int platform_associativity;
118
119 PLATFORM_DEF(powernv_platform);
120
121 static uint64_t powernv_boot_pir;
122
123 static int
124 powernv_probe(platform_t plat)
125 {
126         if (opal_check() == 0)
127                 return (BUS_PROBE_SPECIFIC);
128
129         return (ENXIO);
130 }
131
132 static int
133 powernv_attach(platform_t plat)
134 {
135         uint32_t nptlp, shift = 0, slb_encoding = 0;
136         int32_t lp_size, lp_encoding;
137         char buf[255];
138         pcell_t refpoints[3];
139         pcell_t prop;
140         phandle_t cpu;
141         phandle_t opal;
142         int res, len, idx;
143         register_t msr;
144         bool has_lp;
145
146         /* Ping OPAL again just to make sure */
147         opal_check();
148
149 #if BYTE_ORDER == LITTLE_ENDIAN
150         opal_call(OPAL_REINIT_CPUS, 2 /* Little endian */);
151 #else
152         opal_call(OPAL_REINIT_CPUS, 1 /* Big endian */);
153 #endif
154         opal = OF_finddevice("/ibm,opal");
155
156         platform_associativity = 4; /* Skiboot default. */
157         if (OF_getencprop(opal, "ibm,associativity-reference-points", refpoints,
158             sizeof(refpoints)) > 0) {
159                 platform_associativity = refpoints[0];
160         }
161
162        if (cpu_idle_hook == NULL)
163                 cpu_idle_hook = powernv_cpu_idle;
164
165         powernv_boot_pir = mfspr(SPR_PIR);
166
167         /* LPID must not be altered when PSL_DR or PSL_IR is set */
168         msr = mfmsr();
169         mtmsr(msr & ~(PSL_DR | PSL_IR));
170
171         /* Direct interrupts to SRR instead of HSRR and reset LPCR otherwise */
172         mtspr(SPR_LPID, 0);
173         isync();
174
175         if (cpu_features2 & PPC_FEATURE2_ARCH_3_00)
176                 lpcr |= LPCR_HVICE;
177
178 #if BYTE_ORDER == LITTLE_ENDIAN
179         lpcr |= LPCR_ILE;
180 #endif
181
182         mtspr(SPR_LPCR, lpcr);
183         isync();
184
185         mtmsr(msr);
186
187         powernv_cpuref_init();
188
189         /* Set SLB count from device tree */
190         cpu = OF_peer(0);
191         cpu = OF_child(cpu);
192         while (cpu != 0) {
193                 res = OF_getprop(cpu, "name", buf, sizeof(buf));
194                 if (res > 0 && strcmp(buf, "cpus") == 0)
195                         break;
196                 cpu = OF_peer(cpu);
197         }
198         if (cpu == 0)
199                 goto out;
200
201         cpu = OF_child(cpu);
202         while (cpu != 0) {
203                 res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
204                 if (res > 0 && strcmp(buf, "cpu") == 0)
205                         break;
206                 cpu = OF_peer(cpu);
207         }
208         if (cpu == 0)
209                 goto out;
210
211         res = OF_getencprop(cpu, "ibm,slb-size", &prop, sizeof(prop));
212         if (res > 0)
213                 n_slbs = prop;
214
215         /*
216          * Scan the large page size property for PAPR compatible machines.
217          * See PAPR D.5 Changes to Section 5.1.4, 'CPU Node Properties'
218          * for the encoding of the property.
219          */
220
221         len = OF_getproplen(cpu, "ibm,segment-page-sizes");
222         if (len > 0) {
223                 /*
224                  * We have to use a variable length array on the stack
225                  * since we have very limited stack space.
226                  */
227                 pcell_t arr[len/sizeof(cell_t)];
228                 res = OF_getencprop(cpu, "ibm,segment-page-sizes", arr,
229                     sizeof(arr));
230                 len /= 4;
231                 idx = 0;
232                 has_lp = false;
233                 while (len > 0) {
234                         shift = arr[idx];
235                         slb_encoding = arr[idx + 1];
236                         nptlp = arr[idx + 2];
237                         idx += 3;
238                         len -= 3;
239                         while (len > 0 && nptlp) {
240                                 lp_size = arr[idx];
241                                 lp_encoding = arr[idx+1];
242                                 if (slb_encoding == SLBV_L && lp_encoding == 0)
243                                         has_lp = true;
244
245                                 if (slb_encoding == SLB_PGSZ_4K_4K &&
246                                     lp_encoding == LP_4K_16M)
247                                         moea64_has_lp_4k_16m = true;
248
249                                 idx += 2;
250                                 len -= 2;
251                                 nptlp--;
252                         }
253                         if (has_lp && moea64_has_lp_4k_16m)
254                                 break;
255                 }
256
257                 if (!has_lp)
258                         panic("Standard large pages (SLB[L] = 1, PTE[LP] = 0) "
259                             "not supported by this system.");
260
261                 moea64_large_page_shift = shift;
262                 moea64_large_page_size = 1ULL << lp_size;
263         }
264
265 out:
266         return (0);
267 }
268
269 void
270 powernv_mem_regions(platform_t plat, struct mem_region *phys, int *physsz,
271     struct mem_region *avail, int *availsz)
272 {
273
274         ofw_mem_regions(phys, physsz, avail, availsz);
275 }
276
277 static void
278 powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz)
279 {
280
281         ofw_numa_mem_regions(phys, physsz);
282 }
283
284 static u_long
285 powernv_timebase_freq(platform_t plat, struct cpuref *cpuref)
286 {
287         char buf[8];
288         phandle_t cpu, dev, root;
289         int res;
290         int32_t ticks = -1;
291
292         root = OF_peer(0);
293         dev = OF_child(root);
294         while (dev != 0) {
295                 res = OF_getprop(dev, "name", buf, sizeof(buf));
296                 if (res > 0 && strcmp(buf, "cpus") == 0)
297                         break;
298                 dev = OF_peer(dev);
299         }
300
301         for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) {
302                 res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
303                 if (res > 0 && strcmp(buf, "cpu") == 0)
304                         break;
305         }
306         if (cpu == 0)
307                 return (512000000);
308
309         OF_getencprop(cpu, "timebase-frequency", &ticks, sizeof(ticks));
310
311         if (ticks <= 0)
312                 panic("Unable to determine timebase frequency!");
313
314         return (ticks);
315
316 }
317
318 static int
319 powernv_cpuref_init(void)
320 {
321         phandle_t cpu, dev;
322         char buf[32];
323         int a, res, tmp_cpuref_cnt;
324         static struct cpuref tmp_cpuref[MAXCPU];
325         cell_t interrupt_servers[32];
326         uint64_t bsp;
327
328         if (platform_cpuref_valid)
329                 return (0);
330
331         dev = OF_peer(0);
332         dev = OF_child(dev);
333         while (dev != 0) {
334                 res = OF_getprop(dev, "name", buf, sizeof(buf));
335                 if (res > 0 && strcmp(buf, "cpus") == 0)
336                         break;
337                 dev = OF_peer(dev);
338         }
339
340         bsp = 0;
341         tmp_cpuref_cnt = 0;
342         for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) {
343                 res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
344                 if (res > 0 && strcmp(buf, "cpu") == 0) {
345                         if (!ofw_bus_node_status_okay(cpu))
346                                 continue;
347                         res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s");
348                         if (res > 0) {
349                                 OF_getencprop(cpu, "ibm,ppc-interrupt-server#s",
350                                     interrupt_servers, res);
351
352                                 for (a = 0; a < res/sizeof(cell_t); a++) {
353                                         tmp_cpuref[tmp_cpuref_cnt].cr_hwref = interrupt_servers[a];
354                                         tmp_cpuref[tmp_cpuref_cnt].cr_cpuid = tmp_cpuref_cnt;
355                                         tmp_cpuref[tmp_cpuref_cnt].cr_domain =
356                                             powernv_node_numa_domain(NULL, cpu);
357                                         if (interrupt_servers[a] == (uint32_t)powernv_boot_pir)
358                                                 bsp = tmp_cpuref_cnt;
359
360                                         tmp_cpuref_cnt++;
361                                 }
362                         }
363                 }
364         }
365
366         /* Map IDs, so BSP has CPUID 0 regardless of hwref */
367         for (a = bsp; a < tmp_cpuref_cnt; a++) {
368                 platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref;
369                 platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt;
370                 platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain;
371                 platform_cpuref_cnt++;
372         }
373         for (a = 0; a < bsp; a++) {
374                 platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref;
375                 platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt;
376                 platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain;
377                 platform_cpuref_cnt++;
378         }
379
380         platform_cpuref_valid = 1;
381
382         return (0);
383 }
384
385 static int
386 powernv_smp_first_cpu(platform_t plat, struct cpuref *cpuref)
387 {
388         if (platform_cpuref_valid == 0)
389                 return (EINVAL);
390
391         cpuref->cr_cpuid = 0;
392         cpuref->cr_hwref = platform_cpuref[0].cr_hwref;
393         cpuref->cr_domain = platform_cpuref[0].cr_domain;
394
395         return (0);
396 }
397
398 static int
399 powernv_smp_next_cpu(platform_t plat, struct cpuref *cpuref)
400 {
401         int id;
402
403         if (platform_cpuref_valid == 0)
404                 return (EINVAL);
405
406         id = cpuref->cr_cpuid + 1;
407         if (id >= platform_cpuref_cnt)
408                 return (ENOENT);
409
410         cpuref->cr_cpuid = platform_cpuref[id].cr_cpuid;
411         cpuref->cr_hwref = platform_cpuref[id].cr_hwref;
412         cpuref->cr_domain = platform_cpuref[id].cr_domain;
413
414         return (0);
415 }
416
417 static int
418 powernv_smp_get_bsp(platform_t plat, struct cpuref *cpuref)
419 {
420
421         cpuref->cr_cpuid = platform_cpuref[0].cr_cpuid;
422         cpuref->cr_hwref = platform_cpuref[0].cr_hwref;
423         cpuref->cr_domain = platform_cpuref[0].cr_domain;
424         return (0);
425 }
426
427 #ifdef SMP
428 static int
429 powernv_smp_start_cpu(platform_t plat, struct pcpu *pc)
430 {
431         int result;
432
433         ap_pcpu = pc;
434         powerpc_sync();
435
436         result = opal_call(OPAL_START_CPU, pc->pc_hwref, EXC_RST);
437         if (result != OPAL_SUCCESS) {
438                 printf("OPAL error (%d): unable to start AP %d\n",
439                     result, (int)pc->pc_hwref);
440                 return (ENXIO);
441         }
442
443         return (0);
444 }
445
446 static void
447 powernv_smp_probe_threads(platform_t plat)
448 {
449         char buf[8];
450         phandle_t cpu, dev, root;
451         int res, nthreads;
452
453         root = OF_peer(0);
454
455         dev = OF_child(root);
456         while (dev != 0) {
457                 res = OF_getprop(dev, "name", buf, sizeof(buf));
458                 if (res > 0 && strcmp(buf, "cpus") == 0)
459                         break;
460                 dev = OF_peer(dev);
461         }
462
463         nthreads = 1;
464         for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) {
465                 res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
466                 if (res <= 0 || strcmp(buf, "cpu") != 0)
467                         continue;
468
469                 res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s");
470
471                 if (res >= 0)
472                         nthreads = res / sizeof(cell_t);
473                 else
474                         nthreads = 1;
475                 break;
476         }
477
478         smp_threads_per_core = nthreads;
479         if (mp_ncpus % nthreads == 0)
480                 mp_ncores = mp_ncpus / nthreads;
481 }
482
483 static struct cpu_group *
484 cpu_group_init(struct cpu_group *group, struct cpu_group *parent,
485     const cpuset_t *cpus, int children, int level, int flags)
486 {
487         struct cpu_group *child;
488
489         child = children != 0 ? smp_topo_alloc(children) : NULL;
490
491         group->cg_parent = parent;
492         group->cg_child = child;
493         CPU_COPY(cpus, &group->cg_mask);
494         group->cg_count = CPU_COUNT(cpus);
495         group->cg_children = children;
496         group->cg_level = level;
497         group->cg_flags = flags;
498
499         return (child);
500 }
501
502 static struct cpu_group *
503 powernv_smp_topo(platform_t plat)
504 {
505         struct cpu_group *core, *dom, *root;
506         cpuset_t corecpus, domcpus;
507         int cpuid, i, j, k, ncores;
508
509         if (mp_ncpus % smp_threads_per_core != 0) {
510                 printf("%s: irregular SMP topology (%d threads, %d per core)\n",
511                     __func__, mp_ncpus, smp_threads_per_core);
512                 return (smp_topo_none());
513         }
514
515         root = smp_topo_alloc(1);
516         dom = cpu_group_init(root, NULL, &all_cpus, vm_ndomains, CG_SHARE_NONE,
517             0);
518
519         /*
520          * Redundant layers will be collapsed by the caller so we don't need a
521          * special case for a single domain.
522          */
523         for (i = 0; i < vm_ndomains; i++, dom++) {
524                 CPU_COPY(&cpuset_domain[i], &domcpus);
525                 ncores = CPU_COUNT(&domcpus) / smp_threads_per_core;
526                 KASSERT(CPU_COUNT(&domcpus) % smp_threads_per_core == 0,
527                     ("%s: domain %d core count not divisible by thread count",
528                     __func__, i));
529
530                 core = cpu_group_init(dom, root, &domcpus, ncores, CG_SHARE_L3,
531                     0);
532                 for (j = 0; j < ncores; j++, core++) {
533                         /*
534                          * Assume that consecutive CPU IDs correspond to sibling
535                          * threads.
536                          */
537                         CPU_ZERO(&corecpus);
538                         for (k = 0; k < smp_threads_per_core; k++) {
539                                 cpuid = CPU_FFS(&domcpus) - 1;
540                                 CPU_CLR(cpuid, &domcpus);
541                                 CPU_SET(cpuid, &corecpus);
542                         }
543                         (void)cpu_group_init(core, dom, &corecpus, 0,
544                             CG_SHARE_L1, CG_FLAG_SMT);
545                 }
546         }
547
548         return (root);
549 }
550
551 #endif
552
553 static void
554 powernv_reset(platform_t platform)
555 {
556
557         opal_call(OPAL_CEC_REBOOT);
558 }
559
560 static void
561 powernv_smp_ap_init(platform_t platform)
562 {
563
564         if (powernv_smp_ap_extra_init != NULL)
565                 powernv_smp_ap_extra_init();
566 }
567
568 static void
569 powernv_cpu_idle(sbintime_t sbt)
570 {
571 }
572
573 static int
574 powernv_node_numa_domain(platform_t platform, phandle_t node)
575 {
576         /* XXX: Is locking necessary in here? */
577         static int numa_domains[MAXMEMDOM];
578         static int numa_max_domain;
579         cell_t associativity[5];
580         int i, res;
581
582 #ifndef NUMA
583         return (0);
584 #endif
585         i = 0;
586         TUNABLE_INT_FETCH("vm.numa.disabled", &i);
587         if (i)
588                 return (0);
589
590         res = OF_getencprop(node, "ibm,associativity",
591                 associativity, sizeof(associativity));
592
593         /*
594          * If this node doesn't have associativity, or if there are not
595          * enough elements in it, check its parent.
596          */
597         if (res < (int)(sizeof(cell_t) * (platform_associativity + 1))) {
598                 node = OF_parent(node);
599                 /* If already at the root, use default domain. */
600                 if (node == 0)
601                         return (0);
602                 return (powernv_node_numa_domain(platform, node));
603         }
604
605         for (i = 0; i < numa_max_domain; i++) {
606                 if (numa_domains[i] == associativity[platform_associativity])
607                         return (i);
608         }
609         if (i < MAXMEMDOM)
610                 numa_domains[numa_max_domain++] =
611                     associativity[platform_associativity];
612         else
613                 i = 0;
614
615         return (i);
616 }
617
618 /* Set up the Nest MMU on POWER9 relatively early, but after pmap is setup. */
619 static void
620 powernv_setup_nmmu(void *unused)
621 {
622         if (opal_check() != 0)
623                 return;
624         opal_call(OPAL_NMMU_SET_PTCR, -1, mfspr(SPR_PTCR));
625 }
626
627 SYSINIT(powernv_setup_nmmu, SI_SUB_CPU, SI_ORDER_ANY, powernv_setup_nmmu, NULL);