]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - sys/cddl/contrib/opensolaris/uts/common/dtrace/profile.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / sys / cddl / contrib / opensolaris / uts / common / dtrace / profile.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25
26 #pragma ident   "%Z%%M% %I%     %E% SMI"
27
28 #include <sys/errno.h>
29 #include <sys/stat.h>
30 #include <sys/modctl.h>
31 #include <sys/conf.h>
32 #include <sys/systm.h>
33 #include <sys/ddi.h>
34 #include <sys/sunddi.h>
35 #include <sys/cpuvar.h>
36 #include <sys/kmem.h>
37 #include <sys/strsubr.h>
38 #include <sys/dtrace.h>
39 #include <sys/cyclic.h>
40 #include <sys/atomic.h>
41
42 static dev_info_t *profile_devi;
43 static dtrace_provider_id_t profile_id;
44
45 /*
46  * Regardless of platform, the stack frames look like this in the case of the
47  * profile provider:
48  *
49  *      profile_fire
50  *      cyclic_expire
51  *      cyclic_fire
52  *      [ cbe ]
53  *      [ interrupt code ]
54  *
55  * On x86, there are five frames from the generic interrupt code; further, the
56  * interrupted instruction appears as its own stack frame, giving us a total of
57  * 10.
58  *
59  * On SPARC, the picture is further complicated because the compiler
60  * optimizes away tail-calls -- so the following frames are optimized away:
61  *
62  *      profile_fire
63  *      cyclic_expire
64  *
65  * This gives three frames.  However, on DEBUG kernels, the cyclic_expire
66  * frame cannot be tail-call eliminated, yielding four frames in this case.
67  *
68  * All of the above constraints lead to the mess below.  Yes, the profile
69  * provider should ideally figure this out on-the-fly by hitting one of its own
70  * probes and then walking its own stack trace.  This is complicated, however,
71  * and the static definition doesn't seem to be overly brittle.  Still, we
72  * allow for a manual override in case we get it completely wrong.
73  */
74 #ifdef __x86
75 #define PROF_ARTIFICIAL_FRAMES  10
76 #else
77 #ifdef __sparc
78 #ifdef DEBUG
79 #define PROF_ARTIFICIAL_FRAMES  4
80 #else
81 #define PROF_ARTIFICIAL_FRAMES  3
82 #endif
83 #endif
84 #endif
85
86 #define PROF_NAMELEN            15
87
88 #define PROF_PROFILE            0
89 #define PROF_TICK               1
90 #define PROF_PREFIX_PROFILE     "profile-"
91 #define PROF_PREFIX_TICK        "tick-"
92
93 typedef struct profile_probe {
94         char            prof_name[PROF_NAMELEN];
95         dtrace_id_t     prof_id;
96         int             prof_kind;
97         hrtime_t        prof_interval;
98         cyclic_id_t     prof_cyclic;
99 } profile_probe_t;
100
101 typedef struct profile_probe_percpu {
102         hrtime_t        profc_expected;
103         hrtime_t        profc_interval;
104         profile_probe_t *profc_probe;
105 } profile_probe_percpu_t;
106
107 hrtime_t        profile_interval_min = NANOSEC / 5000;          /* 5000 hz */
108 int             profile_aframes = 0;                            /* override */
109
110 static int profile_rates[] = {
111     97, 199, 499, 997, 1999,
112     4001, 4999, 0, 0, 0,
113     0, 0, 0, 0, 0,
114     0, 0, 0, 0, 0
115 };
116
117 static int profile_ticks[] = {
118     1, 10, 100, 500, 1000,
119     5000, 0, 0, 0, 0,
120     0, 0, 0, 0, 0
121 };
122
123 /*
124  * profile_max defines the upper bound on the number of profile probes that
125  * can exist (this is to prevent malicious or clumsy users from exhausing
126  * system resources by creating a slew of profile probes). At mod load time,
127  * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's
128  * present in the profile.conf file.
129  */
130 #define PROFILE_MAX_DEFAULT     1000    /* default max. number of probes */
131 static uint32_t profile_max;            /* maximum number of profile probes */
132 static uint32_t profile_total;  /* current number of profile probes */
133
134 static void
135 profile_fire(void *arg)
136 {
137         profile_probe_percpu_t *pcpu = arg;
138         profile_probe_t *prof = pcpu->profc_probe;
139         hrtime_t late;
140
141         late = dtrace_gethrtime() - pcpu->profc_expected;
142         pcpu->profc_expected += pcpu->profc_interval;
143
144         dtrace_probe(prof->prof_id, CPU->cpu_profile_pc,
145             CPU->cpu_profile_upc, late, 0, 0);
146 }
147
148 static void
149 profile_tick(void *arg)
150 {
151         profile_probe_t *prof = arg;
152
153         dtrace_probe(prof->prof_id, CPU->cpu_profile_pc,
154             CPU->cpu_profile_upc, 0, 0, 0);
155 }
156
157 static void
158 profile_create(hrtime_t interval, const char *name, int kind)
159 {
160         profile_probe_t *prof;
161         int nr_frames = PROF_ARTIFICIAL_FRAMES + dtrace_mach_aframes();
162
163         if (profile_aframes)
164                 nr_frames = profile_aframes;
165
166         if (interval < profile_interval_min)
167                 return;
168
169         if (dtrace_probe_lookup(profile_id, NULL, NULL, name) != 0)
170                 return;
171
172         atomic_add_32(&profile_total, 1);
173         if (profile_total > profile_max) {
174                 atomic_add_32(&profile_total, -1);
175                 return;
176         }
177
178         prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP);
179         (void) strcpy(prof->prof_name, name);
180         prof->prof_interval = interval;
181         prof->prof_cyclic = CYCLIC_NONE;
182         prof->prof_kind = kind;
183         prof->prof_id = dtrace_probe_create(profile_id,
184             NULL, NULL, name, nr_frames, prof);
185 }
186
187 /*ARGSUSED*/
188 static void
189 profile_provide(void *arg, const dtrace_probedesc_t *desc)
190 {
191         int i, j, rate, kind;
192         hrtime_t val = 0, mult = 1, len;
193         const char *name, *suffix = NULL;
194
195         const struct {
196                 char *prefix;
197                 int kind;
198         } types[] = {
199                 { PROF_PREFIX_PROFILE, PROF_PROFILE },
200                 { PROF_PREFIX_TICK, PROF_TICK },
201                 { NULL, NULL }
202         };
203
204         const struct {
205                 char *name;
206                 hrtime_t mult;
207         } suffixes[] = {
208                 { "ns",         NANOSEC / NANOSEC },
209                 { "nsec",       NANOSEC / NANOSEC },
210                 { "us",         NANOSEC / MICROSEC },
211                 { "usec",       NANOSEC / MICROSEC },
212                 { "ms",         NANOSEC / MILLISEC },
213                 { "msec",       NANOSEC / MILLISEC },
214                 { "s",          NANOSEC / SEC },
215                 { "sec",        NANOSEC / SEC },
216                 { "m",          NANOSEC * (hrtime_t)60 },
217                 { "min",        NANOSEC * (hrtime_t)60 },
218                 { "h",          NANOSEC * (hrtime_t)(60 * 60) },
219                 { "hour",       NANOSEC * (hrtime_t)(60 * 60) },
220                 { "d",          NANOSEC * (hrtime_t)(24 * 60 * 60) },
221                 { "day",        NANOSEC * (hrtime_t)(24 * 60 * 60) },
222                 { "hz",         0 },
223                 { NULL }
224         };
225
226         if (desc == NULL) {
227                 char n[PROF_NAMELEN];
228
229                 /*
230                  * If no description was provided, provide all of our probes.
231                  */
232                 for (i = 0; i < sizeof (profile_rates) / sizeof (int); i++) {
233                         if ((rate = profile_rates[i]) == 0)
234                                 continue;
235
236                         (void) snprintf(n, PROF_NAMELEN, "%s%d",
237                             PROF_PREFIX_PROFILE, rate);
238                         profile_create(NANOSEC / rate, n, PROF_PROFILE);
239                 }
240
241                 for (i = 0; i < sizeof (profile_ticks) / sizeof (int); i++) {
242                         if ((rate = profile_ticks[i]) == 0)
243                                 continue;
244
245                         (void) snprintf(n, PROF_NAMELEN, "%s%d",
246                             PROF_PREFIX_TICK, rate);
247                         profile_create(NANOSEC / rate, n, PROF_TICK);
248                 }
249
250                 return;
251         }
252
253         name = desc->dtpd_name;
254
255         for (i = 0; types[i].prefix != NULL; i++) {
256                 len = strlen(types[i].prefix);
257
258                 if (strncmp(name, types[i].prefix, len) != 0)
259                         continue;
260                 break;
261         }
262
263         if (types[i].prefix == NULL)
264                 return;
265
266         kind = types[i].kind;
267         j = strlen(name) - len;
268
269         /*
270          * We need to start before any time suffix.
271          */
272         for (j = strlen(name); j >= len; j--) {
273                 if (name[j] >= '0' && name[j] <= '9')
274                         break;
275                 suffix = &name[j];
276         }
277
278         ASSERT(suffix != NULL);
279
280         /*
281          * Now determine the numerical value present in the probe name.
282          */
283         for (; j >= len; j--) {
284                 if (name[j] < '0' || name[j] > '9')
285                         return;
286
287                 val += (name[j] - '0') * mult;
288                 mult *= (hrtime_t)10;
289         }
290
291         if (val == 0)
292                 return;
293
294         /*
295          * Look-up the suffix to determine the multiplier.
296          */
297         for (i = 0, mult = 0; suffixes[i].name != NULL; i++) {
298                 if (strcasecmp(suffixes[i].name, suffix) == 0) {
299                         mult = suffixes[i].mult;
300                         break;
301                 }
302         }
303
304         if (suffixes[i].name == NULL && *suffix != '\0')
305                 return;
306
307         if (mult == 0) {
308                 /*
309                  * The default is frequency-per-second.
310                  */
311                 val = NANOSEC / val;
312         } else {
313                 val *= mult;
314         }
315
316         profile_create(val, name, kind);
317 }
318
319 /*ARGSUSED*/
320 static void
321 profile_destroy(void *arg, dtrace_id_t id, void *parg)
322 {
323         profile_probe_t *prof = parg;
324
325         ASSERT(prof->prof_cyclic == CYCLIC_NONE);
326         kmem_free(prof, sizeof (profile_probe_t));
327
328         ASSERT(profile_total >= 1);
329         atomic_add_32(&profile_total, -1);
330 }
331
332 /*ARGSUSED*/
333 static void
334 profile_online(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
335 {
336         profile_probe_t *prof = arg;
337         profile_probe_percpu_t *pcpu;
338
339         pcpu = kmem_zalloc(sizeof (profile_probe_percpu_t), KM_SLEEP);
340         pcpu->profc_probe = prof;
341
342         hdlr->cyh_func = profile_fire;
343         hdlr->cyh_arg = pcpu;
344         hdlr->cyh_level = CY_HIGH_LEVEL;
345
346         when->cyt_interval = prof->prof_interval;
347         when->cyt_when = dtrace_gethrtime() + when->cyt_interval;
348
349         pcpu->profc_expected = when->cyt_when;
350         pcpu->profc_interval = when->cyt_interval;
351 }
352
353 /*ARGSUSED*/
354 static void
355 profile_offline(void *arg, cpu_t *cpu, void *oarg)
356 {
357         profile_probe_percpu_t *pcpu = oarg;
358
359         ASSERT(pcpu->profc_probe == arg);
360         kmem_free(pcpu, sizeof (profile_probe_percpu_t));
361 }
362
363 /*ARGSUSED*/
364 static void
365 profile_enable(void *arg, dtrace_id_t id, void *parg)
366 {
367         profile_probe_t *prof = parg;
368         cyc_omni_handler_t omni;
369         cyc_handler_t hdlr;
370         cyc_time_t when;
371
372         ASSERT(prof->prof_interval != 0);
373         ASSERT(MUTEX_HELD(&cpu_lock));
374
375         if (prof->prof_kind == PROF_TICK) {
376                 hdlr.cyh_func = profile_tick;
377                 hdlr.cyh_arg = prof;
378                 hdlr.cyh_level = CY_HIGH_LEVEL;
379
380                 when.cyt_interval = prof->prof_interval;
381                 when.cyt_when = dtrace_gethrtime() + when.cyt_interval;
382         } else {
383                 ASSERT(prof->prof_kind == PROF_PROFILE);
384                 omni.cyo_online = profile_online;
385                 omni.cyo_offline = profile_offline;
386                 omni.cyo_arg = prof;
387         }
388
389         if (prof->prof_kind == PROF_TICK) {
390                 prof->prof_cyclic = cyclic_add(&hdlr, &when);
391         } else {
392                 prof->prof_cyclic = cyclic_add_omni(&omni);
393         }
394 }
395
396 /*ARGSUSED*/
397 static void
398 profile_disable(void *arg, dtrace_id_t id, void *parg)
399 {
400         profile_probe_t *prof = parg;
401
402         ASSERT(prof->prof_cyclic != CYCLIC_NONE);
403         ASSERT(MUTEX_HELD(&cpu_lock));
404
405         cyclic_remove(prof->prof_cyclic);
406         prof->prof_cyclic = CYCLIC_NONE;
407 }
408
409 /*ARGSUSED*/
410 static int
411 profile_usermode(void *arg, dtrace_id_t id, void *parg)
412 {
413         return (CPU->cpu_profile_pc == 0);
414 }
415
416 static dtrace_pattr_t profile_attr = {
417 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
418 { DTRACE_STABILITY_UNSTABLE, DTRACE_STABILITY_UNSTABLE, DTRACE_CLASS_UNKNOWN },
419 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
420 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
421 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
422 };
423
424 static dtrace_pops_t profile_pops = {
425         profile_provide,
426         NULL,
427         profile_enable,
428         profile_disable,
429         NULL,
430         NULL,
431         NULL,
432         NULL,
433         profile_usermode,
434         profile_destroy
435 };
436
437 static int
438 profile_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
439 {
440         switch (cmd) {
441         case DDI_ATTACH:
442                 break;
443         case DDI_RESUME:
444                 return (DDI_SUCCESS);
445         default:
446                 return (DDI_FAILURE);
447         }
448
449         if (ddi_create_minor_node(devi, "profile", S_IFCHR, 0,
450             DDI_PSEUDO, NULL) == DDI_FAILURE ||
451             dtrace_register("profile", &profile_attr,
452             DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER, NULL,
453             &profile_pops, NULL, &profile_id) != 0) {
454                 ddi_remove_minor_node(devi, NULL);
455                 return (DDI_FAILURE);
456         }
457
458         profile_max = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
459             "profile-max-probes", PROFILE_MAX_DEFAULT);
460
461         ddi_report_dev(devi);
462         profile_devi = devi;
463         return (DDI_SUCCESS);
464 }
465
466 static int
467 profile_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
468 {
469         switch (cmd) {
470         case DDI_DETACH:
471                 break;
472         case DDI_SUSPEND:
473                 return (DDI_SUCCESS);
474         default:
475                 return (DDI_FAILURE);
476         }
477
478         if (dtrace_unregister(profile_id) != 0)
479                 return (DDI_FAILURE);
480
481         ddi_remove_minor_node(devi, NULL);
482         return (DDI_SUCCESS);
483 }
484
485 /*ARGSUSED*/
486 static int
487 profile_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
488 {
489         int error;
490
491         switch (infocmd) {
492         case DDI_INFO_DEVT2DEVINFO:
493                 *result = (void *)profile_devi;
494                 error = DDI_SUCCESS;
495                 break;
496         case DDI_INFO_DEVT2INSTANCE:
497                 *result = (void *)0;
498                 error = DDI_SUCCESS;
499                 break;
500         default:
501                 error = DDI_FAILURE;
502         }
503         return (error);
504 }
505
506 /*ARGSUSED*/
507 static int
508 profile_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
509 {
510         return (0);
511 }
512
513 static struct cb_ops profile_cb_ops = {
514         profile_open,           /* open */
515         nodev,                  /* close */
516         nulldev,                /* strategy */
517         nulldev,                /* print */
518         nodev,                  /* dump */
519         nodev,                  /* read */
520         nodev,                  /* write */
521         nodev,                  /* ioctl */
522         nodev,                  /* devmap */
523         nodev,                  /* mmap */
524         nodev,                  /* segmap */
525         nochpoll,               /* poll */
526         ddi_prop_op,            /* cb_prop_op */
527         0,                      /* streamtab  */
528         D_NEW | D_MP            /* Driver compatibility flag */
529 };
530
531 static struct dev_ops profile_ops = {
532         DEVO_REV,               /* devo_rev, */
533         0,                      /* refcnt  */
534         profile_info,           /* get_dev_info */
535         nulldev,                /* identify */
536         nulldev,                /* probe */
537         profile_attach,         /* attach */
538         profile_detach,         /* detach */
539         nodev,                  /* reset */
540         &profile_cb_ops,        /* driver operations */
541         NULL,                   /* bus operations */
542         nodev                   /* dev power */
543 };
544
545 /*
546  * Module linkage information for the kernel.
547  */
548 static struct modldrv modldrv = {
549         &mod_driverops,         /* module type (this is a pseudo driver) */
550         "Profile Interrupt Tracing",    /* name of module */
551         &profile_ops,           /* driver ops */
552 };
553
554 static struct modlinkage modlinkage = {
555         MODREV_1,
556         (void *)&modldrv,
557         NULL
558 };
559
560 int
561 _init(void)
562 {
563         return (mod_install(&modlinkage));
564 }
565
566 int
567 _info(struct modinfo *modinfop)
568 {
569         return (mod_info(&modlinkage, modinfop));
570 }
571
572 int
573 _fini(void)
574 {
575         return (mod_remove(&modlinkage));
576 }