4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
25 static int dtrace_verbose_ioctl;
26 SYSCTL_INT(_debug_dtrace, OID_AUTO, verbose_ioctl, CTLFLAG_RW, &dtrace_verbose_ioctl, 0, "");
28 #define DTRACE_IOCTL_PRINTF(fmt, ...) if (dtrace_verbose_ioctl) printf(fmt, ## __VA_ARGS__ )
32 dtrace_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
33 int flags __unused, struct thread *td)
35 dtrace_state_t *state = dev->si_drv1;
40 if (state->dts_anon) {
41 ASSERT(dtrace_anon.dta_state == NULL);
42 state = state->dts_anon;
46 case DTRACEIOC_AGGDESC: {
47 dtrace_aggdesc_t **paggdesc = (dtrace_aggdesc_t **) addr;
48 dtrace_aggdesc_t aggdesc;
50 dtrace_aggregation_t *agg;
53 dtrace_recdesc_t *lrec;
58 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_AGGDESC\n",__func__,__LINE__);
60 if (copyin((void *) *paggdesc, &aggdesc, sizeof (aggdesc)) != 0)
63 mutex_enter(&dtrace_lock);
65 if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) {
66 mutex_exit(&dtrace_lock);
70 aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid;
72 nrecs = aggdesc.dtagd_nrecs;
73 aggdesc.dtagd_nrecs = 0;
75 offs = agg->dtag_base;
76 lrec = &agg->dtag_action.dta_rec;
77 aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs;
79 for (act = agg->dtag_first; ; act = act->dta_next) {
80 ASSERT(act->dta_intuple ||
81 DTRACEACT_ISAGG(act->dta_kind));
84 * If this action has a record size of zero, it
85 * denotes an argument to the aggregating action.
86 * Because the presence of this record doesn't (or
87 * shouldn't) affect the way the data is interpreted,
88 * we don't copy it out to save user-level the
89 * confusion of dealing with a zero-length record.
91 if (act->dta_rec.dtrd_size == 0) {
92 ASSERT(agg->dtag_hasarg);
96 aggdesc.dtagd_nrecs++;
98 if (act == &agg->dtag_action)
103 * Now that we have the size, we need to allocate a temporary
104 * buffer in which to store the complete description. We need
105 * the temporary buffer to be able to drop dtrace_lock()
106 * across the copyout(), below.
108 size = sizeof (dtrace_aggdesc_t) +
109 (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t));
111 buf = kmem_alloc(size, KM_SLEEP);
112 dest = (uintptr_t)buf;
114 bcopy(&aggdesc, (void *)dest, sizeof (aggdesc));
115 dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]);
117 for (act = agg->dtag_first; ; act = act->dta_next) {
118 dtrace_recdesc_t rec = act->dta_rec;
121 * See the comment in the above loop for why we pass
122 * over zero-length records.
124 if (rec.dtrd_size == 0) {
125 ASSERT(agg->dtag_hasarg);
132 rec.dtrd_offset -= offs;
133 bcopy(&rec, (void *)dest, sizeof (rec));
134 dest += sizeof (dtrace_recdesc_t);
136 if (act == &agg->dtag_action)
140 mutex_exit(&dtrace_lock);
142 if (copyout(buf, (void *) *paggdesc, dest - (uintptr_t)buf) != 0) {
143 kmem_free(buf, size);
147 kmem_free(buf, size);
150 case DTRACEIOC_AGGSNAP:
151 case DTRACEIOC_BUFSNAP: {
152 dtrace_bufdesc_t **pdesc = (dtrace_bufdesc_t **) addr;
153 dtrace_bufdesc_t desc;
155 dtrace_buffer_t *buf;
157 dtrace_debug_output();
159 if (copyin((void *) *pdesc, &desc, sizeof (desc)) != 0)
162 DTRACE_IOCTL_PRINTF("%s(%d): %s curcpu %d cpu %d\n",
164 cmd == DTRACEIOC_AGGSNAP ?
165 "DTRACEIOC_AGGSNAP":"DTRACEIOC_BUFSNAP",
166 curcpu, desc.dtbd_cpu);
168 if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU)
170 if (pcpu_find(desc.dtbd_cpu) == NULL)
173 mutex_enter(&dtrace_lock);
175 if (cmd == DTRACEIOC_BUFSNAP) {
176 buf = &state->dts_buffer[desc.dtbd_cpu];
178 buf = &state->dts_aggbuffer[desc.dtbd_cpu];
181 if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) {
182 size_t sz = buf->dtb_offset;
184 if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) {
185 mutex_exit(&dtrace_lock);
190 * If this buffer has already been consumed, we're
191 * going to indicate that there's nothing left here
194 if (buf->dtb_flags & DTRACEBUF_CONSUMED) {
195 mutex_exit(&dtrace_lock);
199 desc.dtbd_errors = 0;
200 desc.dtbd_oldest = 0;
203 if (copyout(&desc, (void *) *pdesc, sz) != 0)
210 * If this is a ring buffer that has wrapped, we want
211 * to copy the whole thing out.
213 if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
214 dtrace_buffer_polish(buf);
218 if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) {
219 mutex_exit(&dtrace_lock);
224 desc.dtbd_drops = buf->dtb_drops;
225 desc.dtbd_errors = buf->dtb_errors;
226 desc.dtbd_oldest = buf->dtb_xamot_offset;
228 mutex_exit(&dtrace_lock);
230 if (copyout(&desc, (void *) *pdesc, sizeof (desc)) != 0)
233 buf->dtb_flags |= DTRACEBUF_CONSUMED;
238 if (buf->dtb_tomax == NULL) {
239 ASSERT(buf->dtb_xamot == NULL);
240 mutex_exit(&dtrace_lock);
244 cached = buf->dtb_tomax;
245 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
247 dtrace_xcall(desc.dtbd_cpu,
248 (dtrace_xcall_t)dtrace_buffer_switch, buf);
250 state->dts_errors += buf->dtb_xamot_errors;
253 * If the buffers did not actually switch, then the cross call
254 * did not take place -- presumably because the given CPU is
255 * not in the ready set. If this is the case, we'll return
258 if (buf->dtb_tomax == cached) {
259 ASSERT(buf->dtb_xamot != cached);
260 mutex_exit(&dtrace_lock);
264 ASSERT(cached == buf->dtb_xamot);
266 DTRACE_IOCTL_PRINTF("%s(%d): copyout the buffer snapshot\n",__func__,__LINE__);
269 * We have our snapshot; now copy it out.
271 if (copyout(buf->dtb_xamot, desc.dtbd_data,
272 buf->dtb_xamot_offset) != 0) {
273 mutex_exit(&dtrace_lock);
277 desc.dtbd_size = buf->dtb_xamot_offset;
278 desc.dtbd_drops = buf->dtb_xamot_drops;
279 desc.dtbd_errors = buf->dtb_xamot_errors;
280 desc.dtbd_oldest = 0;
282 mutex_exit(&dtrace_lock);
284 DTRACE_IOCTL_PRINTF("%s(%d): copyout buffer desc: size %zd drops %lu errors %lu\n",__func__,__LINE__,(size_t) desc.dtbd_size,(u_long) desc.dtbd_drops,(u_long) desc.dtbd_errors);
287 * Finally, copy out the buffer description.
289 if (copyout(&desc, (void *) *pdesc, sizeof (desc)) != 0)
294 case DTRACEIOC_CONF: {
297 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_CONF\n",__func__,__LINE__);
299 bzero(&conf, sizeof (conf));
300 conf.dtc_difversion = DIF_VERSION;
301 conf.dtc_difintregs = DIF_DIR_NREGS;
302 conf.dtc_diftupregs = DIF_DTR_NREGS;
303 conf.dtc_ctfmodel = CTF_MODEL_NATIVE;
305 *((dtrace_conf_t *) addr) = conf;
309 case DTRACEIOC_DOFGET: {
310 dof_hdr_t **pdof = (dof_hdr_t **) addr;
311 dof_hdr_t hdr, *dof = *pdof;
315 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_DOFGET\n",__func__,__LINE__);
317 if (copyin((void *)dof, &hdr, sizeof (hdr)) != 0)
320 mutex_enter(&dtrace_lock);
321 dof = dtrace_dof_create(state);
322 mutex_exit(&dtrace_lock);
324 len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz);
325 rval = copyout(dof, (void *) *pdof, len);
326 dtrace_dof_destroy(dof);
328 return (rval == 0 ? 0 : EFAULT);
330 case DTRACEIOC_ENABLE: {
331 dof_hdr_t *dof = NULL;
332 dtrace_enabling_t *enab = NULL;
333 dtrace_vstate_t *vstate;
336 dtrace_enable_io_t *p = (dtrace_enable_io_t *) addr;
338 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_ENABLE\n",__func__,__LINE__);
341 * If a NULL argument has been passed, we take this as our
342 * cue to reevaluate our enablings.
344 if (p->dof == NULL) {
345 dtrace_enabling_matchall();
350 if ((dof = dtrace_dof_copyin((uintptr_t) p->dof, &rval)) == NULL)
353 mutex_enter(&cpu_lock);
354 mutex_enter(&dtrace_lock);
355 vstate = &state->dts_vstate;
357 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
358 mutex_exit(&dtrace_lock);
359 mutex_exit(&cpu_lock);
360 dtrace_dof_destroy(dof);
364 if (dtrace_dof_slurp(dof, vstate, td->td_ucred, &enab, 0, B_TRUE) != 0) {
365 mutex_exit(&dtrace_lock);
366 mutex_exit(&cpu_lock);
367 dtrace_dof_destroy(dof);
371 if ((rval = dtrace_dof_options(dof, state)) != 0) {
372 dtrace_enabling_destroy(enab);
373 mutex_exit(&dtrace_lock);
374 mutex_exit(&cpu_lock);
375 dtrace_dof_destroy(dof);
379 if ((err = dtrace_enabling_match(enab, &p->n_matched)) == 0) {
380 err = dtrace_enabling_retain(enab);
382 dtrace_enabling_destroy(enab);
385 mutex_exit(&cpu_lock);
386 mutex_exit(&dtrace_lock);
387 dtrace_dof_destroy(dof);
391 case DTRACEIOC_EPROBE: {
392 dtrace_eprobedesc_t **pepdesc = (dtrace_eprobedesc_t **) addr;
393 dtrace_eprobedesc_t epdesc;
395 dtrace_action_t *act;
401 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_EPROBE\n",__func__,__LINE__);
403 if (copyin((void *)*pepdesc, &epdesc, sizeof (epdesc)) != 0)
406 mutex_enter(&dtrace_lock);
408 if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) {
409 mutex_exit(&dtrace_lock);
413 if (ecb->dte_probe == NULL) {
414 mutex_exit(&dtrace_lock);
418 epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id;
419 epdesc.dtepd_uarg = ecb->dte_uarg;
420 epdesc.dtepd_size = ecb->dte_size;
422 nrecs = epdesc.dtepd_nrecs;
423 epdesc.dtepd_nrecs = 0;
424 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
425 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
428 epdesc.dtepd_nrecs++;
432 * Now that we have the size, we need to allocate a temporary
433 * buffer in which to store the complete description. We need
434 * the temporary buffer to be able to drop dtrace_lock()
435 * across the copyout(), below.
437 size = sizeof (dtrace_eprobedesc_t) +
438 (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t));
440 buf = kmem_alloc(size, KM_SLEEP);
441 dest = (uintptr_t)buf;
443 bcopy(&epdesc, (void *)dest, sizeof (epdesc));
444 dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]);
446 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
447 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
453 bcopy(&act->dta_rec, (void *)dest,
454 sizeof (dtrace_recdesc_t));
455 dest += sizeof (dtrace_recdesc_t);
458 mutex_exit(&dtrace_lock);
460 if (copyout(buf, (void *) *pepdesc, dest - (uintptr_t)buf) != 0) {
461 kmem_free(buf, size);
465 kmem_free(buf, size);
468 case DTRACEIOC_FORMAT: {
469 dtrace_fmtdesc_t *fmt = (dtrace_fmtdesc_t *) addr;
473 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_FORMAT\n",__func__,__LINE__);
475 mutex_enter(&dtrace_lock);
477 if (fmt->dtfd_format == 0 ||
478 fmt->dtfd_format > state->dts_nformats) {
479 mutex_exit(&dtrace_lock);
484 * Format strings are allocated contiguously and they are
485 * never freed; if a format index is less than the number
486 * of formats, we can assert that the format map is non-NULL
487 * and that the format for the specified index is non-NULL.
489 ASSERT(state->dts_formats != NULL);
490 str = state->dts_formats[fmt->dtfd_format - 1];
493 len = strlen(str) + 1;
495 if (len > fmt->dtfd_length) {
496 fmt->dtfd_length = len;
498 if (copyout(str, fmt->dtfd_string, len) != 0) {
499 mutex_exit(&dtrace_lock);
504 mutex_exit(&dtrace_lock);
509 processorid_t *cpuid = (processorid_t *) addr;
511 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_GO\n",__func__,__LINE__);
513 rval = dtrace_state_go(state, cpuid);
517 case DTRACEIOC_PROBEARG: {
518 dtrace_argdesc_t *desc = (dtrace_argdesc_t *) addr;
519 dtrace_probe_t *probe;
520 dtrace_provider_t *prov;
522 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_PROBEARG\n",__func__,__LINE__);
524 if (desc->dtargd_id == DTRACE_IDNONE)
527 if (desc->dtargd_ndx == DTRACE_ARGNONE)
530 mutex_enter(&dtrace_provider_lock);
531 mutex_enter(&mod_lock);
532 mutex_enter(&dtrace_lock);
534 if (desc->dtargd_id > dtrace_nprobes) {
535 mutex_exit(&dtrace_lock);
536 mutex_exit(&mod_lock);
537 mutex_exit(&dtrace_provider_lock);
541 if ((probe = dtrace_probes[desc->dtargd_id - 1]) == NULL) {
542 mutex_exit(&dtrace_lock);
543 mutex_exit(&mod_lock);
544 mutex_exit(&dtrace_provider_lock);
548 mutex_exit(&dtrace_lock);
550 prov = probe->dtpr_provider;
552 if (prov->dtpv_pops.dtps_getargdesc == NULL) {
554 * There isn't any typed information for this probe.
555 * Set the argument number to DTRACE_ARGNONE.
557 desc->dtargd_ndx = DTRACE_ARGNONE;
559 desc->dtargd_native[0] = '\0';
560 desc->dtargd_xlate[0] = '\0';
561 desc->dtargd_mapping = desc->dtargd_ndx;
563 prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg,
564 probe->dtpr_id, probe->dtpr_arg, desc);
567 mutex_exit(&mod_lock);
568 mutex_exit(&dtrace_provider_lock);
572 case DTRACEIOC_PROBEMATCH:
573 case DTRACEIOC_PROBES: {
574 dtrace_probedesc_t *p_desc = (dtrace_probedesc_t *) addr;
575 dtrace_probe_t *probe = NULL;
576 dtrace_probekey_t pkey;
583 DTRACE_IOCTL_PRINTF("%s(%d): %s\n",__func__,__LINE__,
584 cmd == DTRACEIOC_PROBEMATCH ?
585 "DTRACEIOC_PROBEMATCH":"DTRACEIOC_PROBES");
587 p_desc->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
588 p_desc->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
589 p_desc->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
590 p_desc->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
593 * Before we attempt to match this probe, we want to give
594 * all providers the opportunity to provide it.
596 if (p_desc->dtpd_id == DTRACE_IDNONE) {
597 mutex_enter(&dtrace_provider_lock);
598 dtrace_probe_provide(p_desc, NULL);
599 mutex_exit(&dtrace_provider_lock);
603 if (cmd == DTRACEIOC_PROBEMATCH) {
604 dtrace_probekey(p_desc, &pkey);
605 pkey.dtpk_id = DTRACE_IDNONE;
608 dtrace_cred2priv(td->td_ucred, &priv, &uid, &zoneid);
610 mutex_enter(&dtrace_lock);
612 if (cmd == DTRACEIOC_PROBEMATCH) {
613 for (i = p_desc->dtpd_id; i <= dtrace_nprobes; i++) {
614 if ((probe = dtrace_probes[i - 1]) != NULL &&
615 (m = dtrace_match_probe(probe, &pkey,
616 priv, uid, zoneid)) != 0)
621 mutex_exit(&dtrace_lock);
626 for (i = p_desc->dtpd_id; i <= dtrace_nprobes; i++) {
627 if ((probe = dtrace_probes[i - 1]) != NULL &&
628 dtrace_match_priv(probe, priv, uid, zoneid))
634 mutex_exit(&dtrace_lock);
638 dtrace_probe_description(probe, p_desc);
639 mutex_exit(&dtrace_lock);
643 case DTRACEIOC_PROVIDER: {
644 dtrace_providerdesc_t *pvd = (dtrace_providerdesc_t *) addr;
645 dtrace_provider_t *pvp;
647 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_PROVIDER\n",__func__,__LINE__);
649 pvd->dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0';
650 mutex_enter(&dtrace_provider_lock);
652 for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) {
653 if (strcmp(pvp->dtpv_name, pvd->dtvd_name) == 0)
657 mutex_exit(&dtrace_provider_lock);
662 bcopy(&pvp->dtpv_priv, &pvd->dtvd_priv, sizeof (dtrace_ppriv_t));
663 bcopy(&pvp->dtpv_attr, &pvd->dtvd_attr, sizeof (dtrace_pattr_t));
667 case DTRACEIOC_REPLICATE: {
668 dtrace_repldesc_t *desc = (dtrace_repldesc_t *) addr;
669 dtrace_probedesc_t *match = &desc->dtrpd_match;
670 dtrace_probedesc_t *create = &desc->dtrpd_create;
673 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_REPLICATE\n",__func__,__LINE__);
675 match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
676 match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
677 match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
678 match->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
680 create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
681 create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
682 create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
683 create->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
685 mutex_enter(&dtrace_lock);
686 err = dtrace_enabling_replicate(state, match, create);
687 mutex_exit(&dtrace_lock);
691 case DTRACEIOC_STATUS: {
692 dtrace_status_t *stat = (dtrace_status_t *) addr;
693 dtrace_dstate_t *dstate;
697 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_STATUS\n",__func__,__LINE__);
700 * See the comment in dtrace_state_deadman() for the reason
701 * for setting dts_laststatus to INT64_MAX before setting
702 * it to the correct value.
704 state->dts_laststatus = INT64_MAX;
705 dtrace_membar_producer();
706 state->dts_laststatus = dtrace_gethrtime();
708 bzero(stat, sizeof (*stat));
710 mutex_enter(&dtrace_lock);
712 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) {
713 mutex_exit(&dtrace_lock);
717 if (state->dts_activity == DTRACE_ACTIVITY_DRAINING)
718 stat->dtst_exiting = 1;
720 nerrs = state->dts_errors;
721 dstate = &state->dts_vstate.dtvs_dynvars;
723 for (i = 0; i < NCPU; i++) {
725 if (pcpu_find(i) == NULL)
728 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i];
730 stat->dtst_dyndrops += dcpu->dtdsc_drops;
731 stat->dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops;
732 stat->dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops;
734 if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL)
737 nerrs += state->dts_buffer[i].dtb_errors;
739 for (j = 0; j < state->dts_nspeculations; j++) {
740 dtrace_speculation_t *spec;
741 dtrace_buffer_t *buf;
743 spec = &state->dts_speculations[j];
744 buf = &spec->dtsp_buffer[i];
745 stat->dtst_specdrops += buf->dtb_xamot_drops;
749 stat->dtst_specdrops_busy = state->dts_speculations_busy;
750 stat->dtst_specdrops_unavail = state->dts_speculations_unavail;
751 stat->dtst_stkstroverflows = state->dts_stkstroverflows;
752 stat->dtst_dblerrors = state->dts_dblerrors;
754 (state->dts_activity == DTRACE_ACTIVITY_KILLED);
755 stat->dtst_errors = nerrs;
757 mutex_exit(&dtrace_lock);
761 case DTRACEIOC_STOP: {
763 processorid_t *cpuid = (processorid_t *) addr;
765 DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_STOP\n",__func__,__LINE__);
767 mutex_enter(&dtrace_lock);
768 rval = dtrace_state_stop(state, cpuid);
769 mutex_exit(&dtrace_lock);