]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/hyperv/vmbus/hv_ring_buffer.c
Add an EARLY_AP_STARTUP option to start APs earlier during boot.
[FreeBSD/FreeBSD.git] / sys / dev / hyperv / vmbus / hv_ring_buffer.c
1 /*-
2  * Copyright (c) 2009-2012,2016 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29
30 #include <sys/param.h>
31 #include <sys/lock.h>
32 #include <sys/mutex.h>
33 #include <sys/sysctl.h>
34
35 #include "hv_vmbus_priv.h"
36
37 /* Amount of space to write to */
38 #define HV_BYTES_AVAIL_TO_WRITE(r, w, z) ((w) >= (r))? \
39                                 ((z) - ((w) - (r))):((r) - (w))
40
41 static int
42 hv_rbi_sysctl_stats(SYSCTL_HANDLER_ARGS)
43 {
44         hv_vmbus_ring_buffer_info* rbi;
45         uint32_t read_index, write_index, interrupt_mask, sz;
46         uint32_t read_avail, write_avail;
47         char rbi_stats[256];
48
49         rbi = (hv_vmbus_ring_buffer_info*)arg1;
50         read_index = rbi->ring_buffer->read_index;
51         write_index = rbi->ring_buffer->write_index;
52         interrupt_mask = rbi->ring_buffer->interrupt_mask;
53         sz = rbi->ring_data_size;
54         write_avail = HV_BYTES_AVAIL_TO_WRITE(read_index,
55                         write_index, sz);
56         read_avail = sz - write_avail;
57         snprintf(rbi_stats, sizeof(rbi_stats),
58                 "r_idx:%d "
59                 "w_idx:%d "
60                 "int_mask:%d "
61                 "r_avail:%d "
62                 "w_avail:%d",
63                 read_index, write_index, interrupt_mask,
64                 read_avail, write_avail);
65
66         return (sysctl_handle_string(oidp, rbi_stats,
67                         sizeof(rbi_stats), req));
68 }
69
70 void
71 hv_ring_buffer_stat(
72         struct sysctl_ctx_list          *ctx,
73         struct sysctl_oid_list          *tree_node,
74         hv_vmbus_ring_buffer_info       *rbi,
75         const char                      *desc)  
76 {
77         SYSCTL_ADD_PROC(ctx, tree_node, OID_AUTO,
78             "ring_buffer_stats",
79             CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE, rbi, 0,
80             hv_rbi_sysctl_stats, "A", desc);
81 }
82 /**
83  * @brief Get number of bytes available to read and to write to
84  * for the specified ring buffer
85  */
86 static inline void
87 get_ring_buffer_avail_bytes(
88             hv_vmbus_ring_buffer_info*  rbi,
89             uint32_t*                   read,
90             uint32_t*                   write)
91 {
92         uint32_t read_loc, write_loc;
93
94         /*
95          * Capture the read/write indices before they changed
96          */
97         read_loc = rbi->ring_buffer->read_index;
98         write_loc = rbi->ring_buffer->write_index;
99
100         *write = HV_BYTES_AVAIL_TO_WRITE(
101                 read_loc, write_loc, rbi->ring_data_size);
102         *read = rbi->ring_data_size - *write;
103 }
104
105 /**
106  * @brief Get the next write location for the specified ring buffer
107  */
108 static inline uint32_t
109 get_next_write_location(hv_vmbus_ring_buffer_info* ring_info) 
110 {
111         uint32_t next = ring_info->ring_buffer->write_index;
112         return (next);
113 }
114
115 /**
116  * @brief Set the next write location for the specified ring buffer
117  */
118 static inline void
119 set_next_write_location(
120         hv_vmbus_ring_buffer_info*      ring_info,
121         uint32_t                        next_write_location)
122 {
123         ring_info->ring_buffer->write_index = next_write_location;
124 }
125
126 /**
127  * @brief Get the next read location for the specified ring buffer
128  */
129 static inline uint32_t
130 get_next_read_location(hv_vmbus_ring_buffer_info* ring_info) 
131 {
132         uint32_t next = ring_info->ring_buffer->read_index;
133         return (next);
134 }
135
136 /**
137  * @brief Get the next read location + offset for the specified ring buffer.
138  * This allows the caller to skip.
139  */
140 static inline uint32_t
141 get_next_read_location_with_offset(
142         hv_vmbus_ring_buffer_info*      ring_info,
143         uint32_t                        offset)
144 {
145         uint32_t next = ring_info->ring_buffer->read_index;
146         next += offset;
147         next %= ring_info->ring_data_size;
148         return (next);
149 }
150
151 /**
152  * @brief Set the next read location for the specified ring buffer
153  */
154 static inline void
155 set_next_read_location(
156         hv_vmbus_ring_buffer_info*      ring_info,
157         uint32_t                        next_read_location)
158 {
159         ring_info->ring_buffer->read_index = next_read_location;
160 }
161
162 /**
163  * @brief Get the start of the ring buffer
164  */
165 static inline void *
166 get_ring_buffer(hv_vmbus_ring_buffer_info* ring_info) 
167 {
168         return (void *) ring_info->ring_buffer->buffer;
169 }
170
171 /**
172  * @brief Get the size of the ring buffer.
173  */
174 static inline uint32_t
175 get_ring_buffer_size(hv_vmbus_ring_buffer_info* ring_info) 
176 {
177         return ring_info->ring_data_size;
178 }
179
180 /**
181  * Get the read and write indices as uint64_t of the specified ring buffer.
182  */
183 static inline uint64_t
184 get_ring_buffer_indices(hv_vmbus_ring_buffer_info* ring_info) 
185 {
186         return (uint64_t) ring_info->ring_buffer->write_index << 32;
187 }
188
189 void
190 hv_ring_buffer_read_begin(
191         hv_vmbus_ring_buffer_info*      ring_info)
192 {
193         ring_info->ring_buffer->interrupt_mask = 1;
194         mb();
195 }
196
197 uint32_t
198 hv_ring_buffer_read_end(
199         hv_vmbus_ring_buffer_info*      ring_info)
200 {
201         uint32_t read, write;   
202
203         ring_info->ring_buffer->interrupt_mask = 0;
204         mb();
205
206         /*
207          * Now check to see if the ring buffer is still empty.
208          * If it is not, we raced and we need to process new
209          * incoming messages.
210          */
211         get_ring_buffer_avail_bytes(ring_info, &read, &write);
212
213         return (read);
214 }
215
216 /*
217  * When we write to the ring buffer, check if the host needs to
218  * be signaled. Here is the details of this protocol:
219  *
220  *      1. The host guarantees that while it is draining the
221  *         ring buffer, it will set the interrupt_mask to
222  *         indicate it does not need to be interrupted when
223  *         new data is placed.
224  *
225  *      2. The host guarantees that it will completely drain
226  *         the ring buffer before exiting the read loop. Further,
227  *         once the ring buffer is empty, it will clear the
228  *         interrupt_mask and re-check to see if new data has
229  *         arrived.
230  */
231 static boolean_t
232 hv_ring_buffer_needsig_on_write(
233         uint32_t                        old_write_location,
234         hv_vmbus_ring_buffer_info*      rbi)
235 {
236         mb();
237         if (rbi->ring_buffer->interrupt_mask)
238                 return (FALSE);
239
240         /* Read memory barrier */
241         rmb();
242         /*
243          * This is the only case we need to signal when the
244          * ring transitions from being empty to non-empty.
245          */
246         if (old_write_location == rbi->ring_buffer->read_index)
247                 return (TRUE);
248
249         return (FALSE);
250 }
251
252 static uint32_t copy_to_ring_buffer(
253                         hv_vmbus_ring_buffer_info*      ring_info,
254                         uint32_t                        start_write_offset,
255                         char*                           src,
256                         uint32_t                        src_len);
257
258 static uint32_t copy_from_ring_buffer(
259                         hv_vmbus_ring_buffer_info*      ring_info,
260                         char*                           dest,
261                         uint32_t                        dest_len,
262                         uint32_t                        start_read_offset);
263
264
265 /**
266  * @brief Get the interrupt mask for the specified ring buffer.
267  */
268 uint32_t
269 hv_vmbus_get_ring_buffer_interrupt_mask(hv_vmbus_ring_buffer_info *rbi) 
270 {
271         return rbi->ring_buffer->interrupt_mask;
272 }
273
274 /**
275  * @brief Initialize the ring buffer.
276  */
277 int
278 hv_vmbus_ring_buffer_init(
279         hv_vmbus_ring_buffer_info*      ring_info,
280         void*                           buffer,
281         uint32_t                        buffer_len)
282 {
283         memset(ring_info, 0, sizeof(hv_vmbus_ring_buffer_info));
284
285         ring_info->ring_buffer = (hv_vmbus_ring_buffer*) buffer;
286         ring_info->ring_buffer->read_index =
287             ring_info->ring_buffer->write_index = 0;
288
289         ring_info->ring_size = buffer_len;
290         ring_info->ring_data_size = buffer_len - sizeof(hv_vmbus_ring_buffer);
291
292         mtx_init(&ring_info->ring_lock, "vmbus ring buffer", NULL, MTX_SPIN);
293
294         return (0);
295 }
296
297 /**
298  * @brief Cleanup the ring buffer.
299  */
300 void hv_ring_buffer_cleanup(hv_vmbus_ring_buffer_info* ring_info) 
301 {
302         mtx_destroy(&ring_info->ring_lock);
303 }
304
305 /**
306  * @brief Write to the ring buffer.
307  */
308 int
309 hv_ring_buffer_write(
310         hv_vmbus_ring_buffer_info*      out_ring_info,
311         hv_vmbus_sg_buffer_list         sg_buffers[],
312         uint32_t                        sg_buffer_count,
313         boolean_t                       *need_sig)
314 {
315         int i = 0;
316         uint32_t byte_avail_to_write;
317         uint32_t byte_avail_to_read;
318         uint32_t old_write_location;
319         uint32_t total_bytes_to_write = 0;
320
321         volatile uint32_t next_write_location;
322         uint64_t prev_indices = 0;
323
324         for (i = 0; i < sg_buffer_count; i++) {
325             total_bytes_to_write += sg_buffers[i].length;
326         }
327
328         total_bytes_to_write += sizeof(uint64_t);
329
330         mtx_lock_spin(&out_ring_info->ring_lock);
331
332         get_ring_buffer_avail_bytes(out_ring_info, &byte_avail_to_read,
333             &byte_avail_to_write);
334
335         /*
336          * If there is only room for the packet, assume it is full.
337          * Otherwise, the next time around, we think the ring buffer
338          * is empty since the read index == write index
339          */
340
341         if (byte_avail_to_write <= total_bytes_to_write) {
342
343             mtx_unlock_spin(&out_ring_info->ring_lock);
344             return (EAGAIN);
345         }
346
347         /*
348          * Write to the ring buffer
349          */
350         next_write_location = get_next_write_location(out_ring_info);
351
352         old_write_location = next_write_location;
353
354         for (i = 0; i < sg_buffer_count; i++) {
355             next_write_location = copy_to_ring_buffer(out_ring_info,
356                 next_write_location, (char *) sg_buffers[i].data,
357                 sg_buffers[i].length);
358         }
359
360         /*
361          * Set previous packet start
362          */
363         prev_indices = get_ring_buffer_indices(out_ring_info);
364
365         next_write_location = copy_to_ring_buffer(
366                 out_ring_info, next_write_location,
367                 (char *) &prev_indices, sizeof(uint64_t));
368
369         /*
370          * Full memory barrier before upding the write index. 
371          */
372         mb();
373
374         /*
375          * Now, update the write location
376          */
377         set_next_write_location(out_ring_info, next_write_location);
378
379         mtx_unlock_spin(&out_ring_info->ring_lock);
380
381         *need_sig = hv_ring_buffer_needsig_on_write(old_write_location,
382             out_ring_info);
383
384         return (0);
385 }
386
387 /**
388  * @brief Read without advancing the read index.
389  */
390 int
391 hv_ring_buffer_peek(
392         hv_vmbus_ring_buffer_info*      in_ring_info,
393         void*                           buffer,
394         uint32_t                        buffer_len)
395 {
396         uint32_t bytesAvailToWrite;
397         uint32_t bytesAvailToRead;
398         uint32_t nextReadLocation = 0;
399
400         mtx_lock_spin(&in_ring_info->ring_lock);
401
402         get_ring_buffer_avail_bytes(in_ring_info, &bytesAvailToRead,
403                 &bytesAvailToWrite);
404
405         /*
406          * Make sure there is something to read
407          */
408         if (bytesAvailToRead < buffer_len) {
409             mtx_unlock_spin(&in_ring_info->ring_lock);
410             return (EAGAIN);
411         }
412
413         /*
414          * Convert to byte offset
415          */
416         nextReadLocation = get_next_read_location(in_ring_info);
417
418         nextReadLocation = copy_from_ring_buffer(
419                 in_ring_info, (char *)buffer, buffer_len, nextReadLocation);
420
421         mtx_unlock_spin(&in_ring_info->ring_lock);
422
423         return (0);
424 }
425
426 /**
427  * @brief Read and advance the read index.
428  */
429 int
430 hv_ring_buffer_read(
431         hv_vmbus_ring_buffer_info*      in_ring_info,
432         void*                           buffer,
433         uint32_t                        buffer_len,
434         uint32_t                        offset)
435 {
436         uint32_t bytes_avail_to_write;
437         uint32_t bytes_avail_to_read;
438         uint32_t next_read_location = 0;
439         uint64_t prev_indices = 0;
440
441         if (buffer_len <= 0)
442             return (EINVAL);
443
444         mtx_lock_spin(&in_ring_info->ring_lock);
445
446         get_ring_buffer_avail_bytes(
447             in_ring_info, &bytes_avail_to_read,
448             &bytes_avail_to_write);
449
450         /*
451          * Make sure there is something to read
452          */
453         if (bytes_avail_to_read < buffer_len) {
454             mtx_unlock_spin(&in_ring_info->ring_lock);
455             return (EAGAIN);
456         }
457
458         next_read_location = get_next_read_location_with_offset(
459             in_ring_info,
460             offset);
461
462         next_read_location = copy_from_ring_buffer(
463             in_ring_info,
464             (char *) buffer,
465             buffer_len,
466             next_read_location);
467
468         next_read_location = copy_from_ring_buffer(
469             in_ring_info,
470             (char *) &prev_indices,
471             sizeof(uint64_t),
472             next_read_location);
473
474         /*
475          * Make sure all reads are done before we update the read index since
476          * the writer may start writing to the read area once the read index
477          * is updated.
478          */
479         wmb();
480
481         /*
482          * Update the read index
483          */
484         set_next_read_location(in_ring_info, next_read_location);
485
486         mtx_unlock_spin(&in_ring_info->ring_lock);
487
488         return (0);
489 }
490
491 /**
492  * @brief Helper routine to copy from source to ring buffer.
493  *
494  * Assume there is enough room. Handles wrap-around in dest case only!
495  */
496 uint32_t
497 copy_to_ring_buffer(
498         hv_vmbus_ring_buffer_info*      ring_info,
499         uint32_t                        start_write_offset,
500         char*                           src,
501         uint32_t                        src_len)
502 {
503         char *ring_buffer = get_ring_buffer(ring_info);
504         uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
505         uint32_t fragLen;
506
507         if (src_len > ring_buffer_size - start_write_offset)  {
508             /* wrap-around detected! */
509             fragLen = ring_buffer_size - start_write_offset;
510             memcpy(ring_buffer + start_write_offset, src, fragLen);
511             memcpy(ring_buffer, src + fragLen, src_len - fragLen);
512         } else {
513             memcpy(ring_buffer + start_write_offset, src, src_len);
514         }
515
516         start_write_offset += src_len;
517         start_write_offset %= ring_buffer_size;
518
519         return (start_write_offset);
520 }
521
522 /**
523  * @brief Helper routine to copy to source from ring buffer.
524  *
525  * Assume there is enough room. Handles wrap-around in src case only!
526  */
527 uint32_t
528 copy_from_ring_buffer(
529         hv_vmbus_ring_buffer_info*      ring_info,
530         char*                           dest,
531         uint32_t                        dest_len,
532         uint32_t                        start_read_offset)
533 {
534         uint32_t fragLen;
535         char *ring_buffer = get_ring_buffer(ring_info);
536         uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
537
538         if (dest_len > ring_buffer_size - start_read_offset) {
539             /*  wrap-around detected at the src */
540             fragLen = ring_buffer_size - start_read_offset;
541             memcpy(dest, ring_buffer + start_read_offset, fragLen);
542             memcpy(dest + fragLen, ring_buffer, dest_len - fragLen);
543         } else {
544             memcpy(dest, ring_buffer + start_read_offset, dest_len);
545         }
546
547         start_read_offset += dest_len;
548         start_read_offset %= ring_buffer_size;
549
550         return (start_read_offset);
551 }
552