]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/hyperv/vmbus/hv_ring_buffer.c
MFC 296181,296184,296187,296188,296252,296253,296289,296290
[FreeBSD/stable/10.git] / sys / dev / hyperv / vmbus / hv_ring_buffer.c
1 /*-
2  * Copyright (c) 2009-2012 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/lock.h>
34 #include <sys/mutex.h>
35 #include <sys/sysctl.h>
36
37 #include "hv_vmbus_priv.h"
38
39 /* Amount of space to write to */
40 #define HV_BYTES_AVAIL_TO_WRITE(r, w, z) ((w) >= (r))? \
41                                 ((z) - ((w) - (r))):((r) - (w))
42
43 static int
44 hv_rbi_sysctl_stats(SYSCTL_HANDLER_ARGS)
45 {
46         hv_vmbus_ring_buffer_info* rbi;
47         uint32_t read_index, write_index, interrupt_mask, sz;
48         uint32_t read_avail, write_avail;
49         char rbi_stats[256];
50
51         rbi = (hv_vmbus_ring_buffer_info*)arg1;
52         read_index = rbi->ring_buffer->read_index;
53         write_index = rbi->ring_buffer->write_index;
54         interrupt_mask = rbi->ring_buffer->interrupt_mask;
55         sz = rbi->ring_data_size;
56         write_avail = HV_BYTES_AVAIL_TO_WRITE(read_index,
57                         write_index, sz);
58         read_avail = sz - write_avail;
59         snprintf(rbi_stats, sizeof(rbi_stats),
60                 "r_idx:%d "
61                 "w_idx:%d "
62                 "int_mask:%d "
63                 "r_avail:%d "
64                 "w_avail:%d",
65                 read_index, write_index, interrupt_mask,
66                 read_avail, write_avail);
67
68         return (sysctl_handle_string(oidp, rbi_stats,
69                         sizeof(rbi_stats), req));
70 }
71
72 void
73 hv_ring_buffer_stat(
74         struct sysctl_ctx_list          *ctx,
75         struct sysctl_oid_list          *tree_node,
76         hv_vmbus_ring_buffer_info       *rbi,
77         const char                      *desc)  
78 {
79         SYSCTL_ADD_PROC(ctx, tree_node, OID_AUTO,
80             "ring_buffer_stats",
81             CTLTYPE_STRING|CTLFLAG_RD, rbi, 0,
82             hv_rbi_sysctl_stats, "A", desc);
83 }
84 /**
85  * @brief Get number of bytes available to read and to write to
86  * for the specified ring buffer
87  */
88 static inline void
89 get_ring_buffer_avail_bytes(
90             hv_vmbus_ring_buffer_info*  rbi,
91             uint32_t*                   read,
92             uint32_t*                   write)
93 {
94         uint32_t read_loc, write_loc;
95
96         /*
97          * Capture the read/write indices before they changed
98          */
99         read_loc = rbi->ring_buffer->read_index;
100         write_loc = rbi->ring_buffer->write_index;
101
102         *write = HV_BYTES_AVAIL_TO_WRITE(
103                 read_loc, write_loc, rbi->ring_data_size);
104         *read = rbi->ring_data_size - *write;
105 }
106
107 /**
108  * @brief Get the next write location for the specified ring buffer
109  */
110 static inline uint32_t
111 get_next_write_location(hv_vmbus_ring_buffer_info* ring_info) 
112 {
113         uint32_t next = ring_info->ring_buffer->write_index;
114         return (next);
115 }
116
117 /**
118  * @brief Set the next write location for the specified ring buffer
119  */
120 static inline void
121 set_next_write_location(
122         hv_vmbus_ring_buffer_info*      ring_info,
123         uint32_t                        next_write_location)
124 {
125         ring_info->ring_buffer->write_index = next_write_location;
126 }
127
128 /**
129  * @brief Get the next read location for the specified ring buffer
130  */
131 static inline uint32_t
132 get_next_read_location(hv_vmbus_ring_buffer_info* ring_info) 
133 {
134         uint32_t next = ring_info->ring_buffer->read_index;
135         return (next);
136 }
137
138 /**
139  * @brief Get the next read location + offset for the specified ring buffer.
140  * This allows the caller to skip.
141  */
142 static inline uint32_t
143 get_next_read_location_with_offset(
144         hv_vmbus_ring_buffer_info*      ring_info,
145         uint32_t                        offset)
146 {
147         uint32_t next = ring_info->ring_buffer->read_index;
148         next += offset;
149         next %= ring_info->ring_data_size;
150         return (next);
151 }
152
153 /**
154  * @brief Set the next read location for the specified ring buffer
155  */
156 static inline void
157 set_next_read_location(
158         hv_vmbus_ring_buffer_info*      ring_info,
159         uint32_t                        next_read_location)
160 {
161         ring_info->ring_buffer->read_index = next_read_location;
162 }
163
164 /**
165  * @brief Get the start of the ring buffer
166  */
167 static inline void *
168 get_ring_buffer(hv_vmbus_ring_buffer_info* ring_info) 
169 {
170         return (void *) ring_info->ring_buffer->buffer;
171 }
172
173 /**
174  * @brief Get the size of the ring buffer.
175  */
176 static inline uint32_t
177 get_ring_buffer_size(hv_vmbus_ring_buffer_info* ring_info) 
178 {
179         return ring_info->ring_data_size;
180 }
181
182 /**
183  * Get the read and write indices as uint64_t of the specified ring buffer.
184  */
185 static inline uint64_t
186 get_ring_buffer_indices(hv_vmbus_ring_buffer_info* ring_info) 
187 {
188         return (uint64_t) ring_info->ring_buffer->write_index << 32;
189 }
190
191 void
192 hv_ring_buffer_read_begin(
193         hv_vmbus_ring_buffer_info*      ring_info)
194 {
195         ring_info->ring_buffer->interrupt_mask = 1;
196         mb();
197 }
198
199 uint32_t
200 hv_ring_buffer_read_end(
201         hv_vmbus_ring_buffer_info*      ring_info)
202 {
203         uint32_t read, write;   
204
205         ring_info->ring_buffer->interrupt_mask = 0;
206         mb();
207
208         /*
209          * Now check to see if the ring buffer is still empty.
210          * If it is not, we raced and we need to process new
211          * incoming messages.
212          */
213         get_ring_buffer_avail_bytes(ring_info, &read, &write);
214
215         return (read);
216 }
217
218 /*
219  * When we write to the ring buffer, check if the host needs to
220  * be signaled. Here is the details of this protocol:
221  *
222  *      1. The host guarantees that while it is draining the
223  *         ring buffer, it will set the interrupt_mask to
224  *         indicate it does not need to be interrupted when
225  *         new data is placed.
226  *
227  *      2. The host guarantees that it will completely drain
228  *         the ring buffer before exiting the read loop. Further,
229  *         once the ring buffer is empty, it will clear the
230  *         interrupt_mask and re-check to see if new data has
231  *         arrived.
232  */
233 static boolean_t
234 hv_ring_buffer_needsig_on_write(
235         uint32_t                        old_write_location,
236         hv_vmbus_ring_buffer_info*      rbi)
237 {
238         mb();
239         if (rbi->ring_buffer->interrupt_mask)
240                 return (FALSE);
241
242         /* Read memory barrier */
243         rmb();
244         /*
245          * This is the only case we need to signal when the
246          * ring transitions from being empty to non-empty.
247          */
248         if (old_write_location == rbi->ring_buffer->read_index)
249                 return (TRUE);
250
251         return (FALSE);
252 }
253
254 static uint32_t copy_to_ring_buffer(
255                         hv_vmbus_ring_buffer_info*      ring_info,
256                         uint32_t                        start_write_offset,
257                         char*                           src,
258                         uint32_t                        src_len);
259
260 static uint32_t copy_from_ring_buffer(
261                         hv_vmbus_ring_buffer_info*      ring_info,
262                         char*                           dest,
263                         uint32_t                        dest_len,
264                         uint32_t                        start_read_offset);
265
266
267 /**
268  * @brief Get the interrupt mask for the specified ring buffer.
269  */
270 uint32_t
271 hv_vmbus_get_ring_buffer_interrupt_mask(hv_vmbus_ring_buffer_info *rbi) 
272 {
273         return rbi->ring_buffer->interrupt_mask;
274 }
275
276 /**
277  * @brief Initialize the ring buffer.
278  */
279 int
280 hv_vmbus_ring_buffer_init(
281         hv_vmbus_ring_buffer_info*      ring_info,
282         void*                           buffer,
283         uint32_t                        buffer_len)
284 {
285         memset(ring_info, 0, sizeof(hv_vmbus_ring_buffer_info));
286
287         ring_info->ring_buffer = (hv_vmbus_ring_buffer*) buffer;
288         ring_info->ring_buffer->read_index =
289             ring_info->ring_buffer->write_index = 0;
290
291         ring_info->ring_size = buffer_len;
292         ring_info->ring_data_size = buffer_len - sizeof(hv_vmbus_ring_buffer);
293
294         mtx_init(&ring_info->ring_lock, "vmbus ring buffer", NULL, MTX_SPIN);
295
296         return (0);
297 }
298
299 /**
300  * @brief Cleanup the ring buffer.
301  */
302 void hv_ring_buffer_cleanup(hv_vmbus_ring_buffer_info* ring_info) 
303 {
304         mtx_destroy(&ring_info->ring_lock);
305 }
306
307 /**
308  * @brief Write to the ring buffer.
309  */
310 int
311 hv_ring_buffer_write(
312         hv_vmbus_ring_buffer_info*      out_ring_info,
313         hv_vmbus_sg_buffer_list         sg_buffers[],
314         uint32_t                        sg_buffer_count,
315         boolean_t                       *need_sig)
316 {
317         int i = 0;
318         uint32_t byte_avail_to_write;
319         uint32_t byte_avail_to_read;
320         uint32_t old_write_location;
321         uint32_t total_bytes_to_write = 0;
322
323         volatile uint32_t next_write_location;
324         uint64_t prev_indices = 0;
325
326         for (i = 0; i < sg_buffer_count; i++) {
327             total_bytes_to_write += sg_buffers[i].length;
328         }
329
330         total_bytes_to_write += sizeof(uint64_t);
331
332         mtx_lock_spin(&out_ring_info->ring_lock);
333
334         get_ring_buffer_avail_bytes(out_ring_info, &byte_avail_to_read,
335             &byte_avail_to_write);
336
337         /*
338          * If there is only room for the packet, assume it is full.
339          * Otherwise, the next time around, we think the ring buffer
340          * is empty since the read index == write index
341          */
342
343         if (byte_avail_to_write <= total_bytes_to_write) {
344
345             mtx_unlock_spin(&out_ring_info->ring_lock);
346             return (EAGAIN);
347         }
348
349         /*
350          * Write to the ring buffer
351          */
352         next_write_location = get_next_write_location(out_ring_info);
353
354         old_write_location = next_write_location;
355
356         for (i = 0; i < sg_buffer_count; i++) {
357             next_write_location = copy_to_ring_buffer(out_ring_info,
358                 next_write_location, (char *) sg_buffers[i].data,
359                 sg_buffers[i].length);
360         }
361
362         /*
363          * Set previous packet start
364          */
365         prev_indices = get_ring_buffer_indices(out_ring_info);
366
367         next_write_location = copy_to_ring_buffer(
368                 out_ring_info, next_write_location,
369                 (char *) &prev_indices, sizeof(uint64_t));
370
371         /*
372          * Full memory barrier before upding the write index. 
373          */
374         mb();
375
376         /*
377          * Now, update the write location
378          */
379         set_next_write_location(out_ring_info, next_write_location);
380
381         mtx_unlock_spin(&out_ring_info->ring_lock);
382
383         *need_sig = hv_ring_buffer_needsig_on_write(old_write_location,
384             out_ring_info);
385
386         return (0);
387 }
388
389 /**
390  * @brief Read without advancing the read index.
391  */
392 int
393 hv_ring_buffer_peek(
394         hv_vmbus_ring_buffer_info*      in_ring_info,
395         void*                           buffer,
396         uint32_t                        buffer_len)
397 {
398         uint32_t bytesAvailToWrite;
399         uint32_t bytesAvailToRead;
400         uint32_t nextReadLocation = 0;
401
402         mtx_lock_spin(&in_ring_info->ring_lock);
403
404         get_ring_buffer_avail_bytes(in_ring_info, &bytesAvailToRead,
405                 &bytesAvailToWrite);
406
407         /*
408          * Make sure there is something to read
409          */
410         if (bytesAvailToRead < buffer_len) {
411             mtx_unlock_spin(&in_ring_info->ring_lock);
412             return (EAGAIN);
413         }
414
415         /*
416          * Convert to byte offset
417          */
418         nextReadLocation = get_next_read_location(in_ring_info);
419
420         nextReadLocation = copy_from_ring_buffer(
421                 in_ring_info, (char *)buffer, buffer_len, nextReadLocation);
422
423         mtx_unlock_spin(&in_ring_info->ring_lock);
424
425         return (0);
426 }
427
428 /**
429  * @brief Read and advance the read index.
430  */
431 int
432 hv_ring_buffer_read(
433         hv_vmbus_ring_buffer_info*      in_ring_info,
434         void*                           buffer,
435         uint32_t                        buffer_len,
436         uint32_t                        offset)
437 {
438         uint32_t bytes_avail_to_write;
439         uint32_t bytes_avail_to_read;
440         uint32_t next_read_location = 0;
441         uint64_t prev_indices = 0;
442
443         if (buffer_len <= 0)
444             return (EINVAL);
445
446         mtx_lock_spin(&in_ring_info->ring_lock);
447
448         get_ring_buffer_avail_bytes(
449             in_ring_info, &bytes_avail_to_read,
450             &bytes_avail_to_write);
451
452         /*
453          * Make sure there is something to read
454          */
455         if (bytes_avail_to_read < buffer_len) {
456             mtx_unlock_spin(&in_ring_info->ring_lock);
457             return (EAGAIN);
458         }
459
460         next_read_location = get_next_read_location_with_offset(
461             in_ring_info,
462             offset);
463
464         next_read_location = copy_from_ring_buffer(
465             in_ring_info,
466             (char *) buffer,
467             buffer_len,
468             next_read_location);
469
470         next_read_location = copy_from_ring_buffer(
471             in_ring_info,
472             (char *) &prev_indices,
473             sizeof(uint64_t),
474             next_read_location);
475
476         /*
477          * Make sure all reads are done before we update the read index since
478          * the writer may start writing to the read area once the read index
479          * is updated.
480          */
481         wmb();
482
483         /*
484          * Update the read index
485          */
486         set_next_read_location(in_ring_info, next_read_location);
487
488         mtx_unlock_spin(&in_ring_info->ring_lock);
489
490         return (0);
491 }
492
493 /**
494  * @brief Helper routine to copy from source to ring buffer.
495  *
496  * Assume there is enough room. Handles wrap-around in dest case only!
497  */
498 uint32_t
499 copy_to_ring_buffer(
500         hv_vmbus_ring_buffer_info*      ring_info,
501         uint32_t                        start_write_offset,
502         char*                           src,
503         uint32_t                        src_len)
504 {
505         char *ring_buffer = get_ring_buffer(ring_info);
506         uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
507         uint32_t fragLen;
508
509         if (src_len > ring_buffer_size - start_write_offset)  {
510             /* wrap-around detected! */
511             fragLen = ring_buffer_size - start_write_offset;
512             memcpy(ring_buffer + start_write_offset, src, fragLen);
513             memcpy(ring_buffer, src + fragLen, src_len - fragLen);
514         } else {
515             memcpy(ring_buffer + start_write_offset, src, src_len);
516         }
517
518         start_write_offset += src_len;
519         start_write_offset %= ring_buffer_size;
520
521         return (start_write_offset);
522 }
523
524 /**
525  * @brief Helper routine to copy to source from ring buffer.
526  *
527  * Assume there is enough room. Handles wrap-around in src case only!
528  */
529 uint32_t
530 copy_from_ring_buffer(
531         hv_vmbus_ring_buffer_info*      ring_info,
532         char*                           dest,
533         uint32_t                        dest_len,
534         uint32_t                        start_read_offset)
535 {
536         uint32_t fragLen;
537         char *ring_buffer = get_ring_buffer(ring_info);
538         uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
539
540         if (dest_len > ring_buffer_size - start_read_offset) {
541             /*  wrap-around detected at the src */
542             fragLen = ring_buffer_size - start_read_offset;
543             memcpy(dest, ring_buffer + start_read_offset, fragLen);
544             memcpy(dest + fragLen, ring_buffer, dest_len - fragLen);
545         } else {
546             memcpy(dest, ring_buffer + start_read_offset, dest_len);
547         }
548
549         start_read_offset += dest_len;
550         start_read_offset %= ring_buffer_size;
551
552         return (start_read_offset);
553 }
554