]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/hyperv/vmbus/hv_connection.c
Merge bmake 20151020
[FreeBSD/FreeBSD.git] / sys / dev / hyperv / vmbus / hv_connection.c
1 /*-
2  * Copyright (c) 2009-2012 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include <sys/param.h>
30 #include <sys/malloc.h>
31 #include <sys/systm.h>
32 #include <sys/lock.h>
33 #include <sys/mutex.h>
34 #include <machine/bus.h>
35 #include <vm/vm.h>
36 #include <vm/vm_param.h>
37 #include <vm/pmap.h>
38
39 #include "hv_vmbus_priv.h"
40
41 /*
42  * Globals
43  */
44 hv_vmbus_connection hv_vmbus_g_connection =
45         { .connect_state = HV_DISCONNECTED,
46           .next_gpadl_handle = 0xE1E10, };
47
48 uint32_t hv_vmbus_protocal_version = HV_VMBUS_VERSION_WS2008;
49
50 static uint32_t
51 hv_vmbus_get_next_version(uint32_t current_ver)
52 {
53         switch (current_ver) {
54         case (HV_VMBUS_VERSION_WIN7):
55                 return(HV_VMBUS_VERSION_WS2008);
56
57         case (HV_VMBUS_VERSION_WIN8):
58                 return(HV_VMBUS_VERSION_WIN7);
59
60         case (HV_VMBUS_VERSION_WIN8_1):
61                 return(HV_VMBUS_VERSION_WIN8);
62
63         case (HV_VMBUS_VERSION_WS2008):
64         default:
65                 return(HV_VMBUS_VERSION_INVALID);
66         }
67 }
68
69 /**
70  * Negotiate the highest supported hypervisor version.
71  */
72 static int
73 hv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info,
74         uint32_t version)
75 {
76         int                                     ret = 0;
77         hv_vmbus_channel_initiate_contact       *msg;
78
79         sema_init(&msg_info->wait_sema, 0, "Msg Info Sema");
80         msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg;
81
82         msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT;
83         msg->vmbus_version_requested = version;
84
85         msg->interrupt_page = hv_get_phys_addr(
86                 hv_vmbus_g_connection.interrupt_page);
87
88         msg->monitor_page_1 = hv_get_phys_addr(
89                 hv_vmbus_g_connection.monitor_pages);
90
91         msg->monitor_page_2 =
92                 hv_get_phys_addr(
93                         ((uint8_t *) hv_vmbus_g_connection.monitor_pages
94                         + PAGE_SIZE));
95
96         /**
97          * Add to list before we send the request since we may receive the
98          * response before returning from this routine
99          */
100         mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
101
102         TAILQ_INSERT_TAIL(
103                 &hv_vmbus_g_connection.channel_msg_anchor,
104                 msg_info,
105                 msg_list_entry);
106
107         mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
108
109         ret = hv_vmbus_post_message(
110                 msg,
111                 sizeof(hv_vmbus_channel_initiate_contact));
112
113         if (ret != 0) {
114                 mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
115                 TAILQ_REMOVE(
116                         &hv_vmbus_g_connection.channel_msg_anchor,
117                         msg_info,
118                         msg_list_entry);
119                 mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
120                 return (ret);
121         }
122
123         /**
124          * Wait for the connection response
125          */
126         ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds */
127
128         mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
129         TAILQ_REMOVE(
130                 &hv_vmbus_g_connection.channel_msg_anchor,
131                 msg_info,
132                 msg_list_entry);
133         mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
134
135         /**
136          * Check if successful
137          */
138         if (msg_info->response.version_response.version_supported) {
139                 hv_vmbus_g_connection.connect_state = HV_CONNECTED;
140         } else {
141                 ret = ECONNREFUSED;
142         }
143
144         return (ret);
145 }
146
147 /**
148  * Send a connect request on the partition service connection
149  */
150 int
151 hv_vmbus_connect(void) {
152         int                                     ret = 0;
153         uint32_t                                version;
154         hv_vmbus_channel_msg_info*              msg_info = NULL;
155
156         /**
157          * Make sure we are not connecting or connected
158          */
159         if (hv_vmbus_g_connection.connect_state != HV_DISCONNECTED) {
160                 return (-1);
161         }
162
163         /**
164          * Initialize the vmbus connection
165          */
166         hv_vmbus_g_connection.connect_state = HV_CONNECTING;
167         hv_vmbus_g_connection.work_queue = hv_work_queue_create("vmbusQ");
168         sema_init(&hv_vmbus_g_connection.control_sema, 1, "control_sema");
169
170         TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor);
171         mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg",
172                 NULL, MTX_SPIN);
173
174         TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor);
175         mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel",
176                 NULL, MTX_DEF);
177
178         /**
179          * Setup the vmbus event connection for channel interrupt abstraction
180          * stuff
181          */
182         hv_vmbus_g_connection.interrupt_page = contigmalloc(
183                                         PAGE_SIZE, M_DEVBUF,
184                                         M_NOWAIT | M_ZERO, 0UL,
185                                         BUS_SPACE_MAXADDR,
186                                         PAGE_SIZE, 0);
187         KASSERT(hv_vmbus_g_connection.interrupt_page != NULL,
188             ("Error VMBUS: malloc failed to allocate Channel"
189                 " Request Event message!"));
190         if (hv_vmbus_g_connection.interrupt_page == NULL) {
191             ret = ENOMEM;
192             goto cleanup;
193         }
194
195         hv_vmbus_g_connection.recv_interrupt_page =
196                 hv_vmbus_g_connection.interrupt_page;
197
198         hv_vmbus_g_connection.send_interrupt_page =
199                 ((uint8_t *) hv_vmbus_g_connection.interrupt_page +
200                     (PAGE_SIZE >> 1));
201
202         /**
203          * Set up the monitor notification facility. The 1st page for
204          * parent->child and the 2nd page for child->parent
205          */
206         hv_vmbus_g_connection.monitor_pages = contigmalloc(
207                 2 * PAGE_SIZE,
208                 M_DEVBUF,
209                 M_NOWAIT | M_ZERO,
210                 0UL,
211                 BUS_SPACE_MAXADDR,
212                 PAGE_SIZE,
213                 0);
214         KASSERT(hv_vmbus_g_connection.monitor_pages != NULL,
215             ("Error VMBUS: malloc failed to allocate Monitor Pages!"));
216         if (hv_vmbus_g_connection.monitor_pages == NULL) {
217             ret = ENOMEM;
218             goto cleanup;
219         }
220
221         msg_info = (hv_vmbus_channel_msg_info*)
222                 malloc(sizeof(hv_vmbus_channel_msg_info) +
223                         sizeof(hv_vmbus_channel_initiate_contact),
224                         M_DEVBUF, M_NOWAIT | M_ZERO);
225         KASSERT(msg_info != NULL,
226             ("Error VMBUS: malloc failed for Initiate Contact message!"));
227         if (msg_info == NULL) {
228             ret = ENOMEM;
229             goto cleanup;
230         }
231
232         /*
233          * Find the highest vmbus version number we can support.
234          */
235         version = HV_VMBUS_VERSION_CURRENT;
236
237         do {
238                 ret = hv_vmbus_negotiate_version(msg_info, version);
239                 if (ret == EWOULDBLOCK) {
240                         /*
241                          * We timed out.
242                          */
243                         goto cleanup;
244                 }
245
246                 if (hv_vmbus_g_connection.connect_state == HV_CONNECTED)
247                         break;
248
249                 version = hv_vmbus_get_next_version(version);
250         } while (version != HV_VMBUS_VERSION_INVALID);
251
252         hv_vmbus_protocal_version = version;
253         if (bootverbose)
254                 printf("VMBUS: Portocal Version: %d.%d\n",
255                     version >> 16, version & 0xFFFF);
256
257         sema_destroy(&msg_info->wait_sema);
258         free(msg_info, M_DEVBUF);
259
260         return (0);
261
262         /*
263          * Cleanup after failure!
264          */
265         cleanup:
266
267         hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
268
269         hv_work_queue_close(hv_vmbus_g_connection.work_queue);
270         sema_destroy(&hv_vmbus_g_connection.control_sema);
271         mtx_destroy(&hv_vmbus_g_connection.channel_lock);
272         mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
273
274         if (hv_vmbus_g_connection.interrupt_page != NULL) {
275                 contigfree(
276                         hv_vmbus_g_connection.interrupt_page,
277                         PAGE_SIZE,
278                         M_DEVBUF);
279                 hv_vmbus_g_connection.interrupt_page = NULL;
280         }
281
282         if (hv_vmbus_g_connection.monitor_pages != NULL) {
283                 contigfree(
284                         hv_vmbus_g_connection.monitor_pages,
285                         2 * PAGE_SIZE,
286                         M_DEVBUF);
287                 hv_vmbus_g_connection.monitor_pages = NULL;
288         }
289
290         if (msg_info) {
291                 sema_destroy(&msg_info->wait_sema);
292                 free(msg_info, M_DEVBUF);
293         }
294
295         return (ret);
296 }
297
298 /**
299  * Send a disconnect request on the partition service connection
300  */
301 int
302 hv_vmbus_disconnect(void) {
303         int                      ret = 0;
304         hv_vmbus_channel_unload* msg;
305
306         msg = malloc(sizeof(hv_vmbus_channel_unload),
307             M_DEVBUF, M_NOWAIT | M_ZERO);
308         KASSERT(msg != NULL,
309             ("Error VMBUS: malloc failed to allocate Channel Unload Msg!"));
310         if (msg == NULL)
311             return (ENOMEM);
312
313         msg->message_type = HV_CHANNEL_MESSAGE_UNLOAD;
314
315         ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_unload));
316
317
318         contigfree(hv_vmbus_g_connection.interrupt_page, PAGE_SIZE, M_DEVBUF);
319
320         mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
321
322         hv_work_queue_close(hv_vmbus_g_connection.work_queue);
323         sema_destroy(&hv_vmbus_g_connection.control_sema);
324
325         hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
326
327         free(msg, M_DEVBUF);
328
329         return (ret);
330 }
331
332 /**
333  * Get the channel object given its child relative id (ie channel id)
334  */
335 hv_vmbus_channel*
336 hv_vmbus_get_channel_from_rel_id(uint32_t rel_id) {
337
338         hv_vmbus_channel* channel;
339         hv_vmbus_channel* foundChannel = NULL;
340
341         /*
342          * TODO:
343          * Consider optimization where relids are stored in a fixed size array
344          *  and channels are accessed without the need to take this lock or search
345          *  the list.
346          */
347         mtx_lock(&hv_vmbus_g_connection.channel_lock);
348         TAILQ_FOREACH(channel,
349                 &hv_vmbus_g_connection.channel_anchor, list_entry) {
350
351             if (channel->offer_msg.child_rel_id == rel_id) {
352                 foundChannel = channel;
353                 break;
354             }
355         }
356         mtx_unlock(&hv_vmbus_g_connection.channel_lock);
357
358         return (foundChannel);
359 }
360
361 /**
362  * Process a channel event notification
363  */
364 static void
365 VmbusProcessChannelEvent(uint32_t relid) 
366 {
367         void* arg;
368         uint32_t bytes_to_read;
369         hv_vmbus_channel* channel;
370         boolean_t is_batched_reading;
371
372         /**
373          * Find the channel based on this relid and invokes
374          * the channel callback to process the event
375          */
376
377         channel = hv_vmbus_get_channel_from_rel_id(relid);
378
379         if (channel == NULL) {
380                 return;
381         }
382         /**
383          * To deal with the race condition where we might
384          * receive a packet while the relevant driver is 
385          * being unloaded, dispatch the callback while 
386          * holding the channel lock. The unloading driver
387          * will acquire the same channel lock to set the
388          * callback to NULL. This closes the window.
389          */
390
391         /*
392          * Disable the lock due to newly added WITNESS check in r277723.
393          * Will seek other way to avoid race condition.
394          * -- whu
395          */
396         // mtx_lock(&channel->inbound_lock);
397         if (channel->on_channel_callback != NULL) {
398                 arg = channel->channel_callback_context;
399                 is_batched_reading = channel->batched_reading;
400                 /*
401                  * Optimize host to guest signaling by ensuring:
402                  * 1. While reading the channel, we disable interrupts from
403                  *    host.
404                  * 2. Ensure that we process all posted messages from the host
405                  *    before returning from this callback.
406                  * 3. Once we return, enable signaling from the host. Once this
407                  *    state is set we check to see if additional packets are
408                  *    available to read. In this case we repeat the process.
409                  */
410                 do {
411                         if (is_batched_reading)
412                                 hv_ring_buffer_read_begin(&channel->inbound);
413
414                         channel->on_channel_callback(arg);
415
416                         if (is_batched_reading)
417                                 bytes_to_read =
418                                     hv_ring_buffer_read_end(&channel->inbound);
419                         else
420                                 bytes_to_read = 0;
421                 } while (is_batched_reading && (bytes_to_read != 0));
422         }
423         // mtx_unlock(&channel->inbound_lock);
424 }
425
426 #ifdef HV_DEBUG_INTR
427 extern uint32_t hv_intr_count;
428 extern uint32_t hv_vmbus_swintr_event_cpu[MAXCPU];
429 extern uint32_t hv_vmbus_intr_cpu[MAXCPU];
430 #endif
431
432 /**
433  * Handler for events
434  */
435 void
436 hv_vmbus_on_events(void *arg) 
437 {
438         int bit;
439         int cpu;
440         int dword;
441         void *page_addr;
442         uint32_t* recv_interrupt_page = NULL;
443         int rel_id;
444         int maxdword;
445         hv_vmbus_synic_event_flags *event;
446         /* int maxdword = PAGE_SIZE >> 3; */
447
448         cpu = (int)(long)arg;
449         KASSERT(cpu <= mp_maxid, ("VMBUS: hv_vmbus_on_events: "
450             "cpu out of range!"));
451
452 #ifdef HV_DEBUG_INTR
453         int i;
454         hv_vmbus_swintr_event_cpu[cpu]++;
455         if (hv_intr_count % 10000 == 0) {
456                 printf("VMBUS: Total interrupt %d\n", hv_intr_count);
457                 for (i = 0; i < mp_ncpus; i++)
458                         printf("VMBUS: hw cpu[%d]: %d, event sw intr cpu[%d]: %d\n",
459                             i, hv_vmbus_intr_cpu[i], i, hv_vmbus_swintr_event_cpu[i]);
460         }
461 #endif
462
463         if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
464             (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
465                 maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5;
466                 /*
467                  * receive size is 1/2 page and divide that by 4 bytes
468                  */
469                 recv_interrupt_page =
470                     hv_vmbus_g_connection.recv_interrupt_page;
471         } else {
472                 /*
473                  * On Host with Win8 or above, the event page can be
474                  * checked directly to get the id of the channel
475                  * that has the pending interrupt.
476                  */
477                 maxdword = HV_EVENT_FLAGS_DWORD_COUNT;
478                 page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
479                 event = (hv_vmbus_synic_event_flags *)
480                     page_addr + HV_VMBUS_MESSAGE_SINT;
481                 recv_interrupt_page = event->flags32;
482         }
483
484         /*
485          * Check events
486          */
487         if (recv_interrupt_page != NULL) {
488             for (dword = 0; dword < maxdword; dword++) {
489                 if (recv_interrupt_page[dword]) {
490                     for (bit = 0; bit < 32; bit++) {
491                         if (synch_test_and_clear_bit(bit,
492                             (uint32_t *) &recv_interrupt_page[dword])) {
493                             rel_id = (dword << 5) + bit;
494                             if (rel_id == 0) {
495                                 /*
496                                  * Special case -
497                                  * vmbus channel protocol msg.
498                                  */
499                                 continue;
500                             } else {
501                                 VmbusProcessChannelEvent(rel_id);
502
503                             }
504                         }
505                     }
506                 }
507             }
508         }
509
510         return;
511 }
512
513 /**
514  * Send a msg on the vmbus's message connection
515  */
516 int hv_vmbus_post_message(void *buffer, size_t bufferLen) {
517         int ret = 0;
518         hv_vmbus_connection_id connId;
519         unsigned retries = 0;
520
521         /* NetScaler delays from previous code were consolidated here */
522         static int delayAmount[] = {100, 100, 100, 500, 500, 5000, 5000, 5000};
523
524         /* for(each entry in delayAmount) try to post message,
525          *  delay a little bit before retrying
526          */
527         for (retries = 0;
528             retries < sizeof(delayAmount)/sizeof(delayAmount[0]); retries++) {
529             connId.as_uint32_t = 0;
530             connId.u.id = HV_VMBUS_MESSAGE_CONNECTION_ID;
531             ret = hv_vmbus_post_msg_via_msg_ipc(connId, 1, buffer, bufferLen);
532             if (ret != HV_STATUS_INSUFFICIENT_BUFFERS)
533                 break;
534             /* TODO: KYS We should use a blocking wait call */
535             DELAY(delayAmount[retries]);
536         }
537
538         KASSERT(ret == 0, ("Error VMBUS: Message Post Failed\n"));
539
540         return (ret);
541 }
542
543 /**
544  * Send an event notification to the parent
545  */
546 int
547 hv_vmbus_set_event(hv_vmbus_channel *channel) {
548         int ret = 0;
549         uint32_t child_rel_id = channel->offer_msg.child_rel_id;
550
551         /* Each uint32_t represents 32 channels */
552
553         synch_set_bit(child_rel_id & 31,
554                 (((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
555                         + (child_rel_id >> 5))));
556         ret = hv_vmbus_signal_event(channel->signal_event_param);
557
558         return (ret);
559 }