2 * Copyright (c) 2009-2012,2016 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
34 #include <sys/mutex.h>
35 #include <sys/sysctl.h>
37 #include <dev/hyperv/vmbus/vmbus_reg.h>
38 #include <dev/hyperv/vmbus/vmbus_brvar.h>
40 /* Amount of space available for write */
41 #define VMBUS_BR_WAVAIL(r, w, z) \
42 (((w) >= (r)) ? ((z) - ((w) - (r))) : ((r) - (w)))
44 /* Increase bufing index */
45 #define VMBUS_BR_IDXINC(idx, inc, sz) (((idx) + (inc)) % (sz))
47 static int vmbus_br_sysctl_state(SYSCTL_HANDLER_ARGS);
48 static int vmbus_br_sysctl_state_bin(SYSCTL_HANDLER_ARGS);
49 static void vmbus_br_setup(struct vmbus_br *, void *, int);
52 vmbus_br_sysctl_state(SYSCTL_HANDLER_ARGS)
54 const struct vmbus_br *br = arg1;
55 uint32_t rindex, windex, imask, psndsz, fvalue, ravail, wavail;
59 intrcnt = br->vbr_intrcnt;
60 rindex = br->vbr_rindex;
61 windex = br->vbr_windex;
62 imask = br->vbr_imask;
63 psndsz = br->vbr_psndsz;
64 fvalue = br->vbr_fvalue;
65 wavail = VMBUS_BR_WAVAIL(rindex, windex, br->vbr_dsize);
66 ravail = br->vbr_dsize - wavail;
68 snprintf(state, sizeof(state),
69 "intrcnt:%ju rindex:%u windex:%u imask:%u psndsz:%u fvalue:%u "
70 "ravail:%u wavail:%u",
71 (uintmax_t)intrcnt, rindex, windex, imask, psndsz, fvalue,
73 return sysctl_handle_string(oidp, state, sizeof(state), req);
77 * Binary bufring states.
80 vmbus_br_sysctl_state_bin(SYSCTL_HANDLER_ARGS)
82 #define BR_STATE_RIDX 0
83 #define BR_STATE_WIDX 1
84 #define BR_STATE_IMSK 2
85 #define BR_STATE_PSSZ 3
86 #define BR_STATE_FVAL 4
87 #define BR_STATE_RSPC 5
88 #define BR_STATE_WSPC 6
89 #define BR_STATE_MAX 7
91 const struct vmbus_br *br = arg1;
92 uint32_t rindex, windex, wavail, state[BR_STATE_MAX];
94 rindex = br->vbr_rindex;
95 windex = br->vbr_windex;
96 wavail = VMBUS_BR_WAVAIL(rindex, windex, br->vbr_dsize);
98 state[BR_STATE_RIDX] = rindex;
99 state[BR_STATE_WIDX] = windex;
100 state[BR_STATE_IMSK] = br->vbr_imask;
101 state[BR_STATE_PSSZ] = br->vbr_psndsz;
102 state[BR_STATE_FVAL] = br->vbr_fvalue;
103 state[BR_STATE_WSPC] = wavail;
104 state[BR_STATE_RSPC] = br->vbr_dsize - wavail;
106 return sysctl_handle_opaque(oidp, state, sizeof(state), req);
110 vmbus_br_sysctl_create(struct sysctl_ctx_list *ctx, struct sysctl_oid *br_tree,
111 struct vmbus_br *br, const char *name)
113 struct sysctl_oid *tree;
116 tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(br_tree), OID_AUTO,
117 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
121 snprintf(desc, sizeof(desc), "%s state", name);
122 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "state",
123 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
124 br, 0, vmbus_br_sysctl_state, "A", desc);
126 snprintf(desc, sizeof(desc), "%s binary state", name);
127 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "state_bin",
128 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
129 br, 0, vmbus_br_sysctl_state_bin, "IU", desc);
133 vmbus_rxbr_intr_mask(struct vmbus_rxbr *rbr)
139 static __inline uint32_t
140 vmbus_rxbr_avail(const struct vmbus_rxbr *rbr)
142 uint32_t rindex, windex;
145 rindex = rbr->rxbr_rindex;
146 windex = rbr->rxbr_windex;
148 return (rbr->rxbr_dsize -
149 VMBUS_BR_WAVAIL(rindex, windex, rbr->rxbr_dsize));
153 vmbus_rxbr_available(const struct vmbus_rxbr *rbr)
155 return (vmbus_rxbr_avail(rbr));
159 vmbus_rxbr_intr_unmask(struct vmbus_rxbr *rbr)
165 * Now check to see if the ring buffer is still empty.
166 * If it is not, we raced and we need to process new
167 * incoming channel packets.
169 return vmbus_rxbr_avail(rbr);
173 vmbus_br_setup(struct vmbus_br *br, void *buf, int blen)
176 br->vbr_dsize = blen - sizeof(struct vmbus_bufring);
180 vmbus_rxbr_init(struct vmbus_rxbr *rbr)
182 mtx_init(&rbr->rxbr_lock, "vmbus_rxbr", NULL, MTX_SPIN);
186 vmbus_rxbr_deinit(struct vmbus_rxbr *rbr)
188 mtx_destroy(&rbr->rxbr_lock);
192 vmbus_rxbr_setup(struct vmbus_rxbr *rbr, void *buf, int blen)
194 vmbus_br_setup(&rbr->rxbr, buf, blen);
197 static __inline boolean_t
198 vmbus_rxbr_need_signal(const struct vmbus_rxbr *rbr, uint32_t bytes_read)
200 uint32_t pending_snd_sz, canwrite_size;
202 /* No need to signal if host doesn't want us to */
203 if (!rbr->rxbr_fpsndsz)
208 pending_snd_sz = rbr->rxbr_psndsz;
209 /* No need to signal if host sets pending_snd_sz to 0 */
215 canwrite_size = rbr->rxbr_dsize - vmbus_rxbr_avail(rbr);
217 /* No need to signal if br already has enough space before read */
218 if (canwrite_size - bytes_read > pending_snd_sz)
222 * No need to signal if still doesn't have enough space
225 if (canwrite_size <= pending_snd_sz)
232 vmbus_txbr_init(struct vmbus_txbr *tbr)
234 mtx_init(&tbr->txbr_lock, "vmbus_txbr", NULL, MTX_SPIN);
238 vmbus_txbr_deinit(struct vmbus_txbr *tbr)
240 mtx_destroy(&tbr->txbr_lock);
244 vmbus_txbr_setup(struct vmbus_txbr *tbr, void *buf, int blen)
246 vmbus_br_setup(&tbr->txbr, buf, blen);
248 /* Set feature bit enabling flow control */
249 tbr->txbr_fpsndsz = 1;
253 vmbus_txbr_get_imask(const struct vmbus_txbr *tbr)
257 return(tbr->txbr_imask);
261 vmbus_txbr_set_pending_snd_sz(struct vmbus_txbr *tbr, uint32_t size)
263 tbr->txbr_psndsz = size;
267 * When we write to the ring buffer, check if the host needs to be
271 * - The host guarantees that while it is draining the TX bufring,
272 * it will set the br_imask to indicate it does not need to be
273 * interrupted when new data are added.
274 * - The host guarantees that it will completely drain the TX bufring
275 * before exiting the read loop. Further, once the TX bufring is
276 * empty, it will clear the br_imask and re-check to see if new
279 static __inline boolean_t
280 vmbus_txbr_need_signal(const struct vmbus_txbr *tbr, uint32_t old_windex)
289 * This is the only case we need to signal when the
290 * ring transitions from being empty to non-empty.
292 if (old_windex == tbr->txbr_rindex)
298 static __inline uint32_t
299 vmbus_txbr_avail(const struct vmbus_txbr *tbr)
301 uint32_t rindex, windex;
304 rindex = tbr->txbr_rindex;
305 windex = tbr->txbr_windex;
307 return VMBUS_BR_WAVAIL(rindex, windex, tbr->txbr_dsize);
310 static __inline uint32_t
311 vmbus_txbr_copyto(const struct vmbus_txbr *tbr, uint32_t windex,
312 const void *src0, uint32_t cplen)
314 const uint8_t *src = src0;
315 uint8_t *br_data = tbr->txbr_data;
316 uint32_t br_dsize = tbr->txbr_dsize;
318 if (cplen > br_dsize - windex) {
319 uint32_t fraglen = br_dsize - windex;
321 /* Wrap-around detected */
322 memcpy(br_data + windex, src, fraglen);
323 memcpy(br_data, src + fraglen, cplen - fraglen);
325 memcpy(br_data + windex, src, cplen);
327 return VMBUS_BR_IDXINC(windex, cplen, br_dsize);
330 static __inline uint32_t
331 vmbus_txbr_copyto_call(const struct vmbus_txbr *tbr, uint32_t windex,
332 uint32_t cplen, vmbus_br_copy_callback_t cb, void *cbarg, int *ret)
334 uint8_t *br_data = tbr->txbr_data;
335 uint32_t br_dsize = tbr->txbr_dsize;
338 if (cplen > br_dsize - windex) {
339 uint32_t fraglen = br_dsize - windex;
341 /* Wrap-around detected */
342 err = cb((void *)(br_data + windex), fraglen, cbarg);
344 err = cb((void *)br_data, cplen - fraglen, cbarg);
346 err = cb((void *)(br_data + windex), cplen, cbarg);
351 return VMBUS_BR_IDXINC(windex, cplen, br_dsize);
355 vmbus_txbr_available(const struct vmbus_txbr *tbr)
357 return (vmbus_txbr_avail(tbr));
362 * Not holding lock when calling user provided callback routine.
363 * Caller should hold lock to serialize ring buffer accesses.
366 vmbus_txbr_write_call(struct vmbus_txbr *tbr,
367 const struct iovec iov[], int iovlen,
368 vmbus_br_copy_callback_t cb, void *cbarg,
371 uint32_t old_windex, windex, total;
372 uint64_t save_windex;
377 for (i = 0; i < iovlen; i++)
378 total += iov[i].iov_len;
379 total += sizeof(save_windex);
384 * If this write is going to make br_windex same as br_rindex,
385 * i.e. the available space for write is same as the write size,
386 * we can't do it then, since br_windex == br_rindex means that
387 * the bufring is empty.
389 if (vmbus_txbr_avail(tbr) <= total) {
393 /* Save br_windex for later use */
394 old_windex = tbr->txbr_windex;
397 * Copy the scattered channel packet to the TX bufring.
400 for (i = 0; i < iovlen; i++) {
401 if (iov[i].iov_base != NULL) {
402 windex = vmbus_txbr_copyto(tbr, windex,
403 iov[i].iov_base, iov[i].iov_len);
404 } else if (cb != NULL) {
405 windex = vmbus_txbr_copyto_call(tbr, windex,
406 iov[i].iov_len, cb, cbarg, &cb_ret);
408 * If callback fails, return without updating
416 mtx_lock_spin(&tbr->txbr_lock);
419 * Set the offset of the current channel packet.
421 save_windex = ((uint64_t)old_windex) << 32;
422 windex = vmbus_txbr_copyto(tbr, windex, &save_windex,
423 sizeof(save_windex));
426 * Update the write index _after_ the channel packet
430 tbr->txbr_windex = windex;
432 mtx_unlock_spin(&tbr->txbr_lock);
435 *need_sig = vmbus_txbr_need_signal(tbr, old_windex);
441 * Write scattered channel packet to TX bufring.
443 * The offset of this channel packet is written as a 64bits value
444 * immediately after this channel packet.
447 vmbus_txbr_write(struct vmbus_txbr *tbr, const struct iovec iov[], int iovlen,
450 uint32_t old_windex, windex, total;
451 uint64_t save_windex;
455 for (i = 0; i < iovlen; i++)
456 total += iov[i].iov_len;
457 total += sizeof(save_windex);
459 mtx_lock_spin(&tbr->txbr_lock);
463 * If this write is going to make br_windex same as br_rindex,
464 * i.e. the available space for write is same as the write size,
465 * we can't do it then, since br_windex == br_rindex means that
466 * the bufring is empty.
468 if (vmbus_txbr_avail(tbr) <= total) {
469 mtx_unlock_spin(&tbr->txbr_lock);
473 /* Save br_windex for later use */
474 old_windex = tbr->txbr_windex;
477 * Copy the scattered channel packet to the TX bufring.
480 for (i = 0; i < iovlen; i++) {
481 windex = vmbus_txbr_copyto(tbr, windex,
482 iov[i].iov_base, iov[i].iov_len);
486 * Set the offset of the current channel packet.
488 save_windex = ((uint64_t)old_windex) << 32;
489 windex = vmbus_txbr_copyto(tbr, windex, &save_windex,
490 sizeof(save_windex));
493 * Update the write index _after_ the channel packet
497 tbr->txbr_windex = windex;
499 mtx_unlock_spin(&tbr->txbr_lock);
501 *need_sig = vmbus_txbr_need_signal(tbr, old_windex);
506 static __inline uint32_t
507 vmbus_rxbr_copyfrom(const struct vmbus_rxbr *rbr, uint32_t rindex,
508 void *dst0, int cplen)
511 const uint8_t *br_data = rbr->rxbr_data;
512 uint32_t br_dsize = rbr->rxbr_dsize;
514 if (cplen > br_dsize - rindex) {
515 uint32_t fraglen = br_dsize - rindex;
517 /* Wrap-around detected. */
518 memcpy(dst, br_data + rindex, fraglen);
519 memcpy(dst + fraglen, br_data, cplen - fraglen);
521 memcpy(dst, br_data + rindex, cplen);
523 return VMBUS_BR_IDXINC(rindex, cplen, br_dsize);
526 static __inline uint32_t
527 vmbus_rxbr_copyfrom_call(const struct vmbus_rxbr *rbr, uint32_t rindex,
528 int cplen, vmbus_br_copy_callback_t cb, void *cbarg)
530 uint8_t *br_data = rbr->rxbr_data;
531 uint32_t br_dsize = rbr->rxbr_dsize;
534 if (cplen > br_dsize - rindex) {
535 uint32_t fraglen = br_dsize - rindex;
537 /* Wrap-around detected. */
538 error = cb((void *)(br_data + rindex), fraglen, cbarg);
540 error = cb((void *)br_data, cplen - fraglen, cbarg);
542 error = cb((void *)(br_data + rindex), cplen, cbarg);
548 vmbus_rxbr_peek(struct vmbus_rxbr *rbr, void *data, int dlen)
550 mtx_lock_spin(&rbr->rxbr_lock);
553 * The requested data and the 64bits channel packet
554 * offset should be there at least.
556 if (vmbus_rxbr_avail(rbr) < dlen + sizeof(uint64_t)) {
557 mtx_unlock_spin(&rbr->rxbr_lock);
560 vmbus_rxbr_copyfrom(rbr, rbr->rxbr_rindex, data, dlen);
562 mtx_unlock_spin(&rbr->rxbr_lock);
569 * We only hold spin lock to check the ring buffer space. It is
570 * released before calling user provided callback routine.
571 * Caller should hold lock to serialize ring buffer accesses.
574 vmbus_rxbr_peek_call(struct vmbus_rxbr *rbr, int dlen, uint32_t skip,
575 vmbus_br_copy_callback_t cb, void *cbarg)
577 uint32_t rindex, br_dsize0 = rbr->rxbr_dsize;
580 mtx_lock_spin(&rbr->rxbr_lock);
582 * The requested data + skip and the 64bits channel packet
583 * offset should be there at least.
585 if (vmbus_rxbr_avail(rbr) < skip + dlen + sizeof(uint64_t)) {
586 mtx_unlock_spin(&rbr->rxbr_lock);
590 rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex, skip, br_dsize0);
591 mtx_unlock_spin(&rbr->rxbr_lock);
593 ret = vmbus_rxbr_copyfrom_call(rbr, rindex, dlen, cb, cbarg);
600 * We assume idx_adv == sizeof(channel packet).
603 vmbus_rxbr_idxadv_peek(struct vmbus_rxbr *rbr, void *data, int dlen,
604 uint32_t idx_adv, boolean_t *need_sig)
606 uint32_t rindex, br_dsize = rbr->rxbr_dsize;
608 mtx_lock_spin(&rbr->rxbr_lock);
610 * Make sure it has enough data to read.
612 if (vmbus_rxbr_avail(rbr) < idx_adv + sizeof(uint64_t) + dlen) {
613 mtx_unlock_spin(&rbr->rxbr_lock);
619 * Advance the read index first, including the channel's 64bit
620 * previous write offset.
622 rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex,
623 idx_adv + sizeof(uint64_t), br_dsize);
625 rbr->rxbr_rindex = rindex;
628 vmbus_rxbr_copyfrom(rbr, rbr->rxbr_rindex, data, dlen);
630 mtx_unlock_spin(&rbr->rxbr_lock);
635 vmbus_rxbr_need_signal(rbr, idx_adv +
646 * Just update the RX rb index.
649 vmbus_rxbr_idxadv(struct vmbus_rxbr *rbr, uint32_t idx_adv,
652 uint32_t rindex, br_dsize = rbr->rxbr_dsize;
654 mtx_lock_spin(&rbr->rxbr_lock);
656 * Make sure it has enough space to advance.
658 if (vmbus_rxbr_avail(rbr) < idx_adv + sizeof(uint64_t)) {
659 mtx_unlock_spin(&rbr->rxbr_lock);
664 * Advance the read index, including the channel's 64bit
665 * previous write offset.
667 rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex,
668 idx_adv + sizeof(uint64_t), br_dsize);
670 rbr->rxbr_rindex = rindex;
672 mtx_unlock_spin(&rbr->rxbr_lock);
676 vmbus_rxbr_need_signal(rbr, idx_adv + sizeof(uint64_t));
684 * We assume (dlen + skip) == sizeof(channel packet).
687 vmbus_rxbr_read(struct vmbus_rxbr *rbr, void *data, int dlen, uint32_t skip)
689 uint32_t rindex, br_dsize = rbr->rxbr_dsize;
691 KASSERT(dlen + skip > 0, ("invalid dlen %d, offset %u", dlen, skip));
693 mtx_lock_spin(&rbr->rxbr_lock);
695 if (vmbus_rxbr_avail(rbr) < dlen + skip + sizeof(uint64_t)) {
696 mtx_unlock_spin(&rbr->rxbr_lock);
701 * Copy channel packet from RX bufring.
703 rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex, skip, br_dsize);
704 rindex = vmbus_rxbr_copyfrom(rbr, rindex, data, dlen);
707 * Discard this channel packet's 64bits offset, which is useless to us.
709 rindex = VMBUS_BR_IDXINC(rindex, sizeof(uint64_t), br_dsize);
712 * Update the read index _after_ the channel packet is fetched.
715 rbr->rxbr_rindex = rindex;
717 mtx_unlock_spin(&rbr->rxbr_lock);