4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
25 static int dtrace_verbose_ioctl;
26 SYSCTL_INT(_debug_dtrace, OID_AUTO, verbose_ioctl, CTLFLAG_RW, &dtrace_verbose_ioctl, 0, "");
28 #define DTRACE_IOCTL_PRINTF(fmt, ...) if (dtrace_verbose_ioctl) printf(fmt, ## __VA_ARGS__ )
32 dtrace_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
33 int flags __unused, struct thread *td)
35 dtrace_state_t *state;
36 devfs_get_cdevpriv((void **) &state);
41 if (state->dts_anon) {
42 ASSERT(dtrace_anon.dta_state == NULL);
43 state = state->dts_anon;
47 case DTRACEIOC_AGGDESC: {
48 dtrace_aggdesc_t **paggdesc = (dtrace_aggdesc_t **) addr;
49 dtrace_aggdesc_t aggdesc;
51 dtrace_aggregation_t *agg;
54 dtrace_recdesc_t *lrec;
59 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_AGGDESC\n",__func__,__LINE__);
61 if (copyin((void *) *paggdesc, &aggdesc, sizeof (aggdesc)) != 0)
64 mutex_enter(&dtrace_lock);
66 if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) {
67 mutex_exit(&dtrace_lock);
71 aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid;
73 nrecs = aggdesc.dtagd_nrecs;
74 aggdesc.dtagd_nrecs = 0;
76 offs = agg->dtag_base;
77 lrec = &agg->dtag_action.dta_rec;
78 aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs;
80 for (act = agg->dtag_first; ; act = act->dta_next) {
81 ASSERT(act->dta_intuple ||
82 DTRACEACT_ISAGG(act->dta_kind));
85 * If this action has a record size of zero, it
86 * denotes an argument to the aggregating action.
87 * Because the presence of this record doesn't (or
88 * shouldn't) affect the way the data is interpreted,
89 * we don't copy it out to save user-level the
90 * confusion of dealing with a zero-length record.
92 if (act->dta_rec.dtrd_size == 0) {
93 ASSERT(agg->dtag_hasarg);
97 aggdesc.dtagd_nrecs++;
99 if (act == &agg->dtag_action)
104 * Now that we have the size, we need to allocate a temporary
105 * buffer in which to store the complete description. We need
106 * the temporary buffer to be able to drop dtrace_lock()
107 * across the copyout(), below.
109 size = sizeof (dtrace_aggdesc_t) +
110 (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t));
112 buf = kmem_alloc(size, KM_SLEEP);
113 dest = (uintptr_t)buf;
115 bcopy(&aggdesc, (void *)dest, sizeof (aggdesc));
116 dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]);
118 for (act = agg->dtag_first; ; act = act->dta_next) {
119 dtrace_recdesc_t rec = act->dta_rec;
122 * See the comment in the above loop for why we pass
123 * over zero-length records.
125 if (rec.dtrd_size == 0) {
126 ASSERT(agg->dtag_hasarg);
133 rec.dtrd_offset -= offs;
134 bcopy(&rec, (void *)dest, sizeof (rec));
135 dest += sizeof (dtrace_recdesc_t);
137 if (act == &agg->dtag_action)
141 mutex_exit(&dtrace_lock);
143 if (copyout(buf, (void *) *paggdesc, dest - (uintptr_t)buf) != 0) {
144 kmem_free(buf, size);
148 kmem_free(buf, size);
151 case DTRACEIOC_AGGSNAP:
152 case DTRACEIOC_BUFSNAP: {
153 dtrace_bufdesc_t **pdesc = (dtrace_bufdesc_t **) addr;
154 dtrace_bufdesc_t desc;
156 dtrace_buffer_t *buf;
158 dtrace_debug_output();
160 if (copyin((void *) *pdesc, &desc, sizeof (desc)) != 0)
163 DTRACE_IOCTL_PRINTF("%s(%d): %s curcpu %d cpu %d\n",
165 cmd == DTRACEIOC_AGGSNAP ?
166 "DTRACEIOC_AGGSNAP":"DTRACEIOC_BUFSNAP",
167 curcpu, desc.dtbd_cpu);
169 if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU)
171 if (pcpu_find(desc.dtbd_cpu) == NULL)
174 mutex_enter(&dtrace_lock);
176 if (cmd == DTRACEIOC_BUFSNAP) {
177 buf = &state->dts_buffer[desc.dtbd_cpu];
179 buf = &state->dts_aggbuffer[desc.dtbd_cpu];
182 if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) {
183 size_t sz = buf->dtb_offset;
185 if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) {
186 mutex_exit(&dtrace_lock);
191 * If this buffer has already been consumed, we're
192 * going to indicate that there's nothing left here
195 if (buf->dtb_flags & DTRACEBUF_CONSUMED) {
196 mutex_exit(&dtrace_lock);
200 desc.dtbd_errors = 0;
201 desc.dtbd_oldest = 0;
204 if (copyout(&desc, (void *) *pdesc, sz) != 0)
211 * If this is a ring buffer that has wrapped, we want
212 * to copy the whole thing out.
214 if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
215 dtrace_buffer_polish(buf);
219 if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) {
220 mutex_exit(&dtrace_lock);
225 desc.dtbd_drops = buf->dtb_drops;
226 desc.dtbd_errors = buf->dtb_errors;
227 desc.dtbd_oldest = buf->dtb_xamot_offset;
229 mutex_exit(&dtrace_lock);
231 if (copyout(&desc, (void *) *pdesc, sizeof (desc)) != 0)
234 buf->dtb_flags |= DTRACEBUF_CONSUMED;
239 if (buf->dtb_tomax == NULL) {
240 ASSERT(buf->dtb_xamot == NULL);
241 mutex_exit(&dtrace_lock);
245 cached = buf->dtb_tomax;
246 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
248 dtrace_xcall(desc.dtbd_cpu,
249 (dtrace_xcall_t)dtrace_buffer_switch, buf);
251 state->dts_errors += buf->dtb_xamot_errors;
254 * If the buffers did not actually switch, then the cross call
255 * did not take place -- presumably because the given CPU is
256 * not in the ready set. If this is the case, we'll return
259 if (buf->dtb_tomax == cached) {
260 ASSERT(buf->dtb_xamot != cached);
261 mutex_exit(&dtrace_lock);
265 ASSERT(cached == buf->dtb_xamot);
267 DTRACE_IOCTL_PRINTF("%s(%d): copyout the buffer snapshot\n",__func__,__LINE__);
270 * We have our snapshot; now copy it out.
272 if (copyout(buf->dtb_xamot, desc.dtbd_data,
273 buf->dtb_xamot_offset) != 0) {
274 mutex_exit(&dtrace_lock);
278 desc.dtbd_size = buf->dtb_xamot_offset;
279 desc.dtbd_drops = buf->dtb_xamot_drops;
280 desc.dtbd_errors = buf->dtb_xamot_errors;
281 desc.dtbd_oldest = 0;
283 mutex_exit(&dtrace_lock);
285 DTRACE_IOCTL_PRINTF("%s(%d): copyout buffer desc: size %zd drops %lu errors %lu\n",__func__,__LINE__,(size_t) desc.dtbd_size,(u_long) desc.dtbd_drops,(u_long) desc.dtbd_errors);
288 * Finally, copy out the buffer description.
290 if (copyout(&desc, (void *) *pdesc, sizeof (desc)) != 0)
295 case DTRACEIOC_CONF: {
298 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_CONF\n",__func__,__LINE__);
300 bzero(&conf, sizeof (conf));
301 conf.dtc_difversion = DIF_VERSION;
302 conf.dtc_difintregs = DIF_DIR_NREGS;
303 conf.dtc_diftupregs = DIF_DTR_NREGS;
304 conf.dtc_ctfmodel = CTF_MODEL_NATIVE;
306 *((dtrace_conf_t *) addr) = conf;
310 case DTRACEIOC_DOFGET: {
311 dof_hdr_t **pdof = (dof_hdr_t **) addr;
312 dof_hdr_t hdr, *dof = *pdof;
316 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_DOFGET\n",__func__,__LINE__);
318 if (copyin((void *)dof, &hdr, sizeof (hdr)) != 0)
321 mutex_enter(&dtrace_lock);
322 dof = dtrace_dof_create(state);
323 mutex_exit(&dtrace_lock);
325 len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz);
326 rval = copyout(dof, (void *) *pdof, len);
327 dtrace_dof_destroy(dof);
329 return (rval == 0 ? 0 : EFAULT);
331 case DTRACEIOC_ENABLE: {
332 dof_hdr_t *dof = NULL;
333 dtrace_enabling_t *enab = NULL;
334 dtrace_vstate_t *vstate;
337 dtrace_enable_io_t *p = (dtrace_enable_io_t *) addr;
339 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_ENABLE\n",__func__,__LINE__);
342 * If a NULL argument has been passed, we take this as our
343 * cue to reevaluate our enablings.
345 if (p->dof == NULL) {
346 dtrace_enabling_matchall();
351 if ((dof = dtrace_dof_copyin((uintptr_t) p->dof, &rval)) == NULL)
354 mutex_enter(&cpu_lock);
355 mutex_enter(&dtrace_lock);
356 vstate = &state->dts_vstate;
358 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
359 mutex_exit(&dtrace_lock);
360 mutex_exit(&cpu_lock);
361 dtrace_dof_destroy(dof);
365 if (dtrace_dof_slurp(dof, vstate, td->td_ucred, &enab, 0, B_TRUE) != 0) {
366 mutex_exit(&dtrace_lock);
367 mutex_exit(&cpu_lock);
368 dtrace_dof_destroy(dof);
372 if ((rval = dtrace_dof_options(dof, state)) != 0) {
373 dtrace_enabling_destroy(enab);
374 mutex_exit(&dtrace_lock);
375 mutex_exit(&cpu_lock);
376 dtrace_dof_destroy(dof);
380 if ((err = dtrace_enabling_match(enab, &p->n_matched)) == 0) {
381 err = dtrace_enabling_retain(enab);
383 dtrace_enabling_destroy(enab);
386 mutex_exit(&cpu_lock);
387 mutex_exit(&dtrace_lock);
388 dtrace_dof_destroy(dof);
392 case DTRACEIOC_EPROBE: {
393 dtrace_eprobedesc_t **pepdesc = (dtrace_eprobedesc_t **) addr;
394 dtrace_eprobedesc_t epdesc;
396 dtrace_action_t *act;
402 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_EPROBE\n",__func__,__LINE__);
404 if (copyin((void *)*pepdesc, &epdesc, sizeof (epdesc)) != 0)
407 mutex_enter(&dtrace_lock);
409 if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) {
410 mutex_exit(&dtrace_lock);
414 if (ecb->dte_probe == NULL) {
415 mutex_exit(&dtrace_lock);
419 epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id;
420 epdesc.dtepd_uarg = ecb->dte_uarg;
421 epdesc.dtepd_size = ecb->dte_size;
423 nrecs = epdesc.dtepd_nrecs;
424 epdesc.dtepd_nrecs = 0;
425 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
426 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
429 epdesc.dtepd_nrecs++;
433 * Now that we have the size, we need to allocate a temporary
434 * buffer in which to store the complete description. We need
435 * the temporary buffer to be able to drop dtrace_lock()
436 * across the copyout(), below.
438 size = sizeof (dtrace_eprobedesc_t) +
439 (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t));
441 buf = kmem_alloc(size, KM_SLEEP);
442 dest = (uintptr_t)buf;
444 bcopy(&epdesc, (void *)dest, sizeof (epdesc));
445 dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]);
447 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
448 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
454 bcopy(&act->dta_rec, (void *)dest,
455 sizeof (dtrace_recdesc_t));
456 dest += sizeof (dtrace_recdesc_t);
459 mutex_exit(&dtrace_lock);
461 if (copyout(buf, (void *) *pepdesc, dest - (uintptr_t)buf) != 0) {
462 kmem_free(buf, size);
466 kmem_free(buf, size);
469 case DTRACEIOC_FORMAT: {
470 dtrace_fmtdesc_t *fmt = (dtrace_fmtdesc_t *) addr;
474 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_FORMAT\n",__func__,__LINE__);
476 mutex_enter(&dtrace_lock);
478 if (fmt->dtfd_format == 0 ||
479 fmt->dtfd_format > state->dts_nformats) {
480 mutex_exit(&dtrace_lock);
485 * Format strings are allocated contiguously and they are
486 * never freed; if a format index is less than the number
487 * of formats, we can assert that the format map is non-NULL
488 * and that the format for the specified index is non-NULL.
490 ASSERT(state->dts_formats != NULL);
491 str = state->dts_formats[fmt->dtfd_format - 1];
494 len = strlen(str) + 1;
496 if (len > fmt->dtfd_length) {
497 fmt->dtfd_length = len;
499 if (copyout(str, fmt->dtfd_string, len) != 0) {
500 mutex_exit(&dtrace_lock);
505 mutex_exit(&dtrace_lock);
510 processorid_t *cpuid = (processorid_t *) addr;
512 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_GO\n",__func__,__LINE__);
514 rval = dtrace_state_go(state, cpuid);
518 case DTRACEIOC_PROBEARG: {
519 dtrace_argdesc_t *desc = (dtrace_argdesc_t *) addr;
520 dtrace_probe_t *probe;
521 dtrace_provider_t *prov;
523 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_PROBEARG\n",__func__,__LINE__);
525 if (desc->dtargd_id == DTRACE_IDNONE)
528 if (desc->dtargd_ndx == DTRACE_ARGNONE)
531 mutex_enter(&dtrace_provider_lock);
532 mutex_enter(&mod_lock);
533 mutex_enter(&dtrace_lock);
535 if (desc->dtargd_id > dtrace_nprobes) {
536 mutex_exit(&dtrace_lock);
537 mutex_exit(&mod_lock);
538 mutex_exit(&dtrace_provider_lock);
542 if ((probe = dtrace_probes[desc->dtargd_id - 1]) == NULL) {
543 mutex_exit(&dtrace_lock);
544 mutex_exit(&mod_lock);
545 mutex_exit(&dtrace_provider_lock);
549 mutex_exit(&dtrace_lock);
551 prov = probe->dtpr_provider;
553 if (prov->dtpv_pops.dtps_getargdesc == NULL) {
555 * There isn't any typed information for this probe.
556 * Set the argument number to DTRACE_ARGNONE.
558 desc->dtargd_ndx = DTRACE_ARGNONE;
560 desc->dtargd_native[0] = '\0';
561 desc->dtargd_xlate[0] = '\0';
562 desc->dtargd_mapping = desc->dtargd_ndx;
564 prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg,
565 probe->dtpr_id, probe->dtpr_arg, desc);
568 mutex_exit(&mod_lock);
569 mutex_exit(&dtrace_provider_lock);
573 case DTRACEIOC_PROBEMATCH:
574 case DTRACEIOC_PROBES: {
575 dtrace_probedesc_t *p_desc = (dtrace_probedesc_t *) addr;
576 dtrace_probe_t *probe = NULL;
577 dtrace_probekey_t pkey;
584 DTRACE_IOCTL_PRINTF("%s(%d): %s\n",__func__,__LINE__,
585 cmd == DTRACEIOC_PROBEMATCH ?
586 "DTRACEIOC_PROBEMATCH":"DTRACEIOC_PROBES");
588 p_desc->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
589 p_desc->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
590 p_desc->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
591 p_desc->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
594 * Before we attempt to match this probe, we want to give
595 * all providers the opportunity to provide it.
597 if (p_desc->dtpd_id == DTRACE_IDNONE) {
598 mutex_enter(&dtrace_provider_lock);
599 dtrace_probe_provide(p_desc, NULL);
600 mutex_exit(&dtrace_provider_lock);
604 if (cmd == DTRACEIOC_PROBEMATCH) {
605 dtrace_probekey(p_desc, &pkey);
606 pkey.dtpk_id = DTRACE_IDNONE;
609 dtrace_cred2priv(td->td_ucred, &priv, &uid, &zoneid);
611 mutex_enter(&dtrace_lock);
613 if (cmd == DTRACEIOC_PROBEMATCH) {
614 for (i = p_desc->dtpd_id; i <= dtrace_nprobes; i++) {
615 if ((probe = dtrace_probes[i - 1]) != NULL &&
616 (m = dtrace_match_probe(probe, &pkey,
617 priv, uid, zoneid)) != 0)
622 mutex_exit(&dtrace_lock);
627 for (i = p_desc->dtpd_id; i <= dtrace_nprobes; i++) {
628 if ((probe = dtrace_probes[i - 1]) != NULL &&
629 dtrace_match_priv(probe, priv, uid, zoneid))
635 mutex_exit(&dtrace_lock);
639 dtrace_probe_description(probe, p_desc);
640 mutex_exit(&dtrace_lock);
644 case DTRACEIOC_PROVIDER: {
645 dtrace_providerdesc_t *pvd = (dtrace_providerdesc_t *) addr;
646 dtrace_provider_t *pvp;
648 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_PROVIDER\n",__func__,__LINE__);
650 pvd->dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0';
651 mutex_enter(&dtrace_provider_lock);
653 for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) {
654 if (strcmp(pvp->dtpv_name, pvd->dtvd_name) == 0)
658 mutex_exit(&dtrace_provider_lock);
663 bcopy(&pvp->dtpv_priv, &pvd->dtvd_priv, sizeof (dtrace_ppriv_t));
664 bcopy(&pvp->dtpv_attr, &pvd->dtvd_attr, sizeof (dtrace_pattr_t));
668 case DTRACEIOC_REPLICATE: {
669 dtrace_repldesc_t *desc = (dtrace_repldesc_t *) addr;
670 dtrace_probedesc_t *match = &desc->dtrpd_match;
671 dtrace_probedesc_t *create = &desc->dtrpd_create;
674 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_REPLICATE\n",__func__,__LINE__);
676 match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
677 match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
678 match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
679 match->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
681 create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
682 create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
683 create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
684 create->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
686 mutex_enter(&dtrace_lock);
687 err = dtrace_enabling_replicate(state, match, create);
688 mutex_exit(&dtrace_lock);
692 case DTRACEIOC_STATUS: {
693 dtrace_status_t *stat = (dtrace_status_t *) addr;
694 dtrace_dstate_t *dstate;
698 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_STATUS\n",__func__,__LINE__);
701 * See the comment in dtrace_state_deadman() for the reason
702 * for setting dts_laststatus to INT64_MAX before setting
703 * it to the correct value.
705 state->dts_laststatus = INT64_MAX;
706 dtrace_membar_producer();
707 state->dts_laststatus = dtrace_gethrtime();
709 bzero(stat, sizeof (*stat));
711 mutex_enter(&dtrace_lock);
713 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) {
714 mutex_exit(&dtrace_lock);
718 if (state->dts_activity == DTRACE_ACTIVITY_DRAINING)
719 stat->dtst_exiting = 1;
721 nerrs = state->dts_errors;
722 dstate = &state->dts_vstate.dtvs_dynvars;
724 for (i = 0; i < NCPU; i++) {
726 if (pcpu_find(i) == NULL)
729 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i];
731 stat->dtst_dyndrops += dcpu->dtdsc_drops;
732 stat->dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops;
733 stat->dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops;
735 if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL)
738 nerrs += state->dts_buffer[i].dtb_errors;
740 for (j = 0; j < state->dts_nspeculations; j++) {
741 dtrace_speculation_t *spec;
742 dtrace_buffer_t *buf;
744 spec = &state->dts_speculations[j];
745 buf = &spec->dtsp_buffer[i];
746 stat->dtst_specdrops += buf->dtb_xamot_drops;
750 stat->dtst_specdrops_busy = state->dts_speculations_busy;
751 stat->dtst_specdrops_unavail = state->dts_speculations_unavail;
752 stat->dtst_stkstroverflows = state->dts_stkstroverflows;
753 stat->dtst_dblerrors = state->dts_dblerrors;
755 (state->dts_activity == DTRACE_ACTIVITY_KILLED);
756 stat->dtst_errors = nerrs;
758 mutex_exit(&dtrace_lock);
762 case DTRACEIOC_STOP: {
764 processorid_t *cpuid = (processorid_t *) addr;
766 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_STOP\n",__func__,__LINE__);
768 mutex_enter(&dtrace_lock);
769 rval = dtrace_state_stop(state, cpuid);
770 mutex_exit(&dtrace_lock);