2 * Copyright (c) 2009-2012 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
34 #include <sys/mutex.h>
35 #include <sys/sysctl.h>
37 #include "hv_vmbus_priv.h"
39 /* Amount of space to write to */
40 #define HV_BYTES_AVAIL_TO_WRITE(r, w, z) ((w) >= (r))? \
41 ((z) - ((w) - (r))):((r) - (w))
44 hv_rbi_sysctl_stats(SYSCTL_HANDLER_ARGS)
46 hv_vmbus_ring_buffer_info* rbi;
47 uint32_t read_index, write_index, interrupt_mask, sz;
48 uint32_t read_avail, write_avail;
51 rbi = (hv_vmbus_ring_buffer_info*)arg1;
52 read_index = rbi->ring_buffer->read_index;
53 write_index = rbi->ring_buffer->write_index;
54 interrupt_mask = rbi->ring_buffer->interrupt_mask;
55 sz = rbi->ring_data_size;
56 write_avail = HV_BYTES_AVAIL_TO_WRITE(read_index,
58 read_avail = sz - write_avail;
59 snprintf(rbi_stats, sizeof(rbi_stats),
65 read_index, write_index, interrupt_mask,
66 read_avail, write_avail);
68 return (sysctl_handle_string(oidp, rbi_stats,
69 sizeof(rbi_stats), req));
74 struct sysctl_ctx_list *ctx,
75 struct sysctl_oid_list *tree_node,
76 hv_vmbus_ring_buffer_info *rbi,
79 SYSCTL_ADD_PROC(ctx, tree_node, OID_AUTO,
81 CTLTYPE_STRING|CTLFLAG_RD, rbi, 0,
82 hv_rbi_sysctl_stats, "A", desc);
85 * @brief Get number of bytes available to read and to write to
86 * for the specified ring buffer
89 get_ring_buffer_avail_bytes(
90 hv_vmbus_ring_buffer_info* rbi,
94 uint32_t read_loc, write_loc;
97 * Capture the read/write indices before they changed
99 read_loc = rbi->ring_buffer->read_index;
100 write_loc = rbi->ring_buffer->write_index;
102 *write = HV_BYTES_AVAIL_TO_WRITE(
103 read_loc, write_loc, rbi->ring_data_size);
104 *read = rbi->ring_data_size - *write;
108 * @brief Get the next write location for the specified ring buffer
110 static inline uint32_t
111 get_next_write_location(hv_vmbus_ring_buffer_info* ring_info)
113 uint32_t next = ring_info->ring_buffer->write_index;
118 * @brief Set the next write location for the specified ring buffer
121 set_next_write_location(
122 hv_vmbus_ring_buffer_info* ring_info,
123 uint32_t next_write_location)
125 ring_info->ring_buffer->write_index = next_write_location;
129 * @brief Get the next read location for the specified ring buffer
131 static inline uint32_t
132 get_next_read_location(hv_vmbus_ring_buffer_info* ring_info)
134 uint32_t next = ring_info->ring_buffer->read_index;
139 * @brief Get the next read location + offset for the specified ring buffer.
140 * This allows the caller to skip.
142 static inline uint32_t
143 get_next_read_location_with_offset(
144 hv_vmbus_ring_buffer_info* ring_info,
147 uint32_t next = ring_info->ring_buffer->read_index;
149 next %= ring_info->ring_data_size;
154 * @brief Set the next read location for the specified ring buffer
157 set_next_read_location(
158 hv_vmbus_ring_buffer_info* ring_info,
159 uint32_t next_read_location)
161 ring_info->ring_buffer->read_index = next_read_location;
165 * @brief Get the start of the ring buffer
168 get_ring_buffer(hv_vmbus_ring_buffer_info* ring_info)
170 return (void *) ring_info->ring_buffer->buffer;
174 * @brief Get the size of the ring buffer.
176 static inline uint32_t
177 get_ring_buffer_size(hv_vmbus_ring_buffer_info* ring_info)
179 return ring_info->ring_data_size;
183 * Get the read and write indices as uint64_t of the specified ring buffer.
185 static inline uint64_t
186 get_ring_buffer_indices(hv_vmbus_ring_buffer_info* ring_info)
188 return (uint64_t) ring_info->ring_buffer->write_index << 32;
192 hv_ring_buffer_read_begin(
193 hv_vmbus_ring_buffer_info* ring_info)
195 ring_info->ring_buffer->interrupt_mask = 1;
200 hv_ring_buffer_read_end(
201 hv_vmbus_ring_buffer_info* ring_info)
203 uint32_t read, write;
205 ring_info->ring_buffer->interrupt_mask = 0;
209 * Now check to see if the ring buffer is still empty.
210 * If it is not, we raced and we need to process new
213 get_ring_buffer_avail_bytes(ring_info, &read, &write);
219 * When we write to the ring buffer, check if the host needs to
220 * be signaled. Here is the details of this protocol:
222 * 1. The host guarantees that while it is draining the
223 * ring buffer, it will set the interrupt_mask to
224 * indicate it does not need to be interrupted when
225 * new data is placed.
227 * 2. The host guarantees that it will completely drain
228 * the ring buffer before exiting the read loop. Further,
229 * once the ring buffer is empty, it will clear the
230 * interrupt_mask and re-check to see if new data has
234 hv_ring_buffer_needsig_on_write(
235 uint32_t old_write_location,
236 hv_vmbus_ring_buffer_info* rbi)
239 if (rbi->ring_buffer->interrupt_mask)
242 /* Read memory barrier */
245 * This is the only case we need to signal when the
246 * ring transitions from being empty to non-empty.
248 if (old_write_location == rbi->ring_buffer->read_index)
254 static uint32_t copy_to_ring_buffer(
255 hv_vmbus_ring_buffer_info* ring_info,
256 uint32_t start_write_offset,
260 static uint32_t copy_from_ring_buffer(
261 hv_vmbus_ring_buffer_info* ring_info,
264 uint32_t start_read_offset);
268 * @brief Get the interrupt mask for the specified ring buffer.
271 hv_vmbus_get_ring_buffer_interrupt_mask(hv_vmbus_ring_buffer_info *rbi)
273 return rbi->ring_buffer->interrupt_mask;
277 * @brief Initialize the ring buffer.
280 hv_vmbus_ring_buffer_init(
281 hv_vmbus_ring_buffer_info* ring_info,
285 memset(ring_info, 0, sizeof(hv_vmbus_ring_buffer_info));
287 ring_info->ring_buffer = (hv_vmbus_ring_buffer*) buffer;
288 ring_info->ring_buffer->read_index =
289 ring_info->ring_buffer->write_index = 0;
291 ring_info->ring_size = buffer_len;
292 ring_info->ring_data_size = buffer_len - sizeof(hv_vmbus_ring_buffer);
294 mtx_init(&ring_info->ring_lock, "vmbus ring buffer", NULL, MTX_SPIN);
300 * @brief Cleanup the ring buffer.
302 void hv_ring_buffer_cleanup(hv_vmbus_ring_buffer_info* ring_info)
304 mtx_destroy(&ring_info->ring_lock);
308 * @brief Write to the ring buffer.
311 hv_ring_buffer_write(
312 hv_vmbus_ring_buffer_info* out_ring_info,
313 hv_vmbus_sg_buffer_list sg_buffers[],
314 uint32_t sg_buffer_count,
318 uint32_t byte_avail_to_write;
319 uint32_t byte_avail_to_read;
320 uint32_t old_write_location;
321 uint32_t total_bytes_to_write = 0;
323 volatile uint32_t next_write_location;
324 uint64_t prev_indices = 0;
326 for (i = 0; i < sg_buffer_count; i++) {
327 total_bytes_to_write += sg_buffers[i].length;
330 total_bytes_to_write += sizeof(uint64_t);
332 mtx_lock_spin(&out_ring_info->ring_lock);
334 get_ring_buffer_avail_bytes(out_ring_info, &byte_avail_to_read,
335 &byte_avail_to_write);
338 * If there is only room for the packet, assume it is full.
339 * Otherwise, the next time around, we think the ring buffer
340 * is empty since the read index == write index
343 if (byte_avail_to_write <= total_bytes_to_write) {
345 mtx_unlock_spin(&out_ring_info->ring_lock);
350 * Write to the ring buffer
352 next_write_location = get_next_write_location(out_ring_info);
354 old_write_location = next_write_location;
356 for (i = 0; i < sg_buffer_count; i++) {
357 next_write_location = copy_to_ring_buffer(out_ring_info,
358 next_write_location, (char *) sg_buffers[i].data,
359 sg_buffers[i].length);
363 * Set previous packet start
365 prev_indices = get_ring_buffer_indices(out_ring_info);
367 next_write_location = copy_to_ring_buffer(
368 out_ring_info, next_write_location,
369 (char *) &prev_indices, sizeof(uint64_t));
372 * Full memory barrier before upding the write index.
377 * Now, update the write location
379 set_next_write_location(out_ring_info, next_write_location);
381 mtx_unlock_spin(&out_ring_info->ring_lock);
383 *need_sig = hv_ring_buffer_needsig_on_write(old_write_location,
390 * @brief Read without advancing the read index.
394 hv_vmbus_ring_buffer_info* in_ring_info,
398 uint32_t bytesAvailToWrite;
399 uint32_t bytesAvailToRead;
400 uint32_t nextReadLocation = 0;
402 mtx_lock_spin(&in_ring_info->ring_lock);
404 get_ring_buffer_avail_bytes(in_ring_info, &bytesAvailToRead,
408 * Make sure there is something to read
410 if (bytesAvailToRead < buffer_len) {
411 mtx_unlock_spin(&in_ring_info->ring_lock);
416 * Convert to byte offset
418 nextReadLocation = get_next_read_location(in_ring_info);
420 nextReadLocation = copy_from_ring_buffer(
421 in_ring_info, (char *)buffer, buffer_len, nextReadLocation);
423 mtx_unlock_spin(&in_ring_info->ring_lock);
429 * @brief Read and advance the read index.
433 hv_vmbus_ring_buffer_info* in_ring_info,
438 uint32_t bytes_avail_to_write;
439 uint32_t bytes_avail_to_read;
440 uint32_t next_read_location = 0;
441 uint64_t prev_indices = 0;
446 mtx_lock_spin(&in_ring_info->ring_lock);
448 get_ring_buffer_avail_bytes(
449 in_ring_info, &bytes_avail_to_read,
450 &bytes_avail_to_write);
453 * Make sure there is something to read
455 if (bytes_avail_to_read < buffer_len) {
456 mtx_unlock_spin(&in_ring_info->ring_lock);
460 next_read_location = get_next_read_location_with_offset(
464 next_read_location = copy_from_ring_buffer(
470 next_read_location = copy_from_ring_buffer(
472 (char *) &prev_indices,
477 * Make sure all reads are done before we update the read index since
478 * the writer may start writing to the read area once the read index
484 * Update the read index
486 set_next_read_location(in_ring_info, next_read_location);
488 mtx_unlock_spin(&in_ring_info->ring_lock);
494 * @brief Helper routine to copy from source to ring buffer.
496 * Assume there is enough room. Handles wrap-around in dest case only!
500 hv_vmbus_ring_buffer_info* ring_info,
501 uint32_t start_write_offset,
505 char *ring_buffer = get_ring_buffer(ring_info);
506 uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
509 if (src_len > ring_buffer_size - start_write_offset) {
510 /* wrap-around detected! */
511 fragLen = ring_buffer_size - start_write_offset;
512 memcpy(ring_buffer + start_write_offset, src, fragLen);
513 memcpy(ring_buffer, src + fragLen, src_len - fragLen);
515 memcpy(ring_buffer + start_write_offset, src, src_len);
518 start_write_offset += src_len;
519 start_write_offset %= ring_buffer_size;
521 return (start_write_offset);
525 * @brief Helper routine to copy to source from ring buffer.
527 * Assume there is enough room. Handles wrap-around in src case only!
530 copy_from_ring_buffer(
531 hv_vmbus_ring_buffer_info* ring_info,
534 uint32_t start_read_offset)
537 char *ring_buffer = get_ring_buffer(ring_info);
538 uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
540 if (dest_len > ring_buffer_size - start_read_offset) {
541 /* wrap-around detected at the src */
542 fragLen = ring_buffer_size - start_read_offset;
543 memcpy(dest, ring_buffer + start_read_offset, fragLen);
544 memcpy(dest + fragLen, ring_buffer, dest_len - fragLen);
546 memcpy(dest, ring_buffer + start_read_offset, dest_len);
549 start_read_offset += dest_len;
550 start_read_offset %= ring_buffer_size;
552 return (start_read_offset);