]> CyberLeo.Net >> Repos - FreeBSD/releng/10.2.git/blob - sys/dev/hyperv/vmbus/hv_connection.c
- Copy stable/10@285827 to releng/10.2 in preparation for 10.2-RC1
[FreeBSD/releng/10.2.git] / sys / dev / hyperv / vmbus / hv_connection.c
1 /*-
2  * Copyright (c) 2009-2012 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/malloc.h>
34 #include <sys/systm.h>
35 #include <sys/lock.h>
36 #include <sys/mutex.h>
37 #include <machine/bus.h>
38 #include <vm/vm.h>
39 #include <vm/vm_param.h>
40 #include <vm/pmap.h>
41
42 #include "hv_vmbus_priv.h"
43
44 /*
45  * Globals
46  */
47 hv_vmbus_connection hv_vmbus_g_connection =
48         { .connect_state = HV_DISCONNECTED,
49           .next_gpadl_handle = 0xE1E10, };
50
51 uint32_t hv_vmbus_protocal_version = HV_VMBUS_VERSION_WS2008;
52
53 static uint32_t
54 hv_vmbus_get_next_version(uint32_t current_ver)
55 {
56         switch (current_ver) {
57         case (HV_VMBUS_VERSION_WIN7):
58                 return(HV_VMBUS_VERSION_WS2008);
59
60         case (HV_VMBUS_VERSION_WIN8):
61                 return(HV_VMBUS_VERSION_WIN7);
62
63         case (HV_VMBUS_VERSION_WIN8_1):
64                 return(HV_VMBUS_VERSION_WIN8);
65
66         case (HV_VMBUS_VERSION_WS2008):
67         default:
68                 return(HV_VMBUS_VERSION_INVALID);
69         }
70 }
71
72 /**
73  * Negotiate the highest supported hypervisor version.
74  */
75 static int
76 hv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info,
77         uint32_t version)
78 {
79         int                                     ret = 0;
80         hv_vmbus_channel_initiate_contact       *msg;
81
82         sema_init(&msg_info->wait_sema, 0, "Msg Info Sema");
83         msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg;
84
85         msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT;
86         msg->vmbus_version_requested = version;
87
88         msg->interrupt_page = hv_get_phys_addr(
89                 hv_vmbus_g_connection.interrupt_page);
90
91         msg->monitor_page_1 = hv_get_phys_addr(
92                 hv_vmbus_g_connection.monitor_pages);
93
94         msg->monitor_page_2 =
95                 hv_get_phys_addr(
96                         ((uint8_t *) hv_vmbus_g_connection.monitor_pages
97                         + PAGE_SIZE));
98
99         /**
100          * Add to list before we send the request since we may receive the
101          * response before returning from this routine
102          */
103         mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
104
105         TAILQ_INSERT_TAIL(
106                 &hv_vmbus_g_connection.channel_msg_anchor,
107                 msg_info,
108                 msg_list_entry);
109
110         mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
111
112         ret = hv_vmbus_post_message(
113                 msg,
114                 sizeof(hv_vmbus_channel_initiate_contact));
115
116         if (ret != 0) {
117                 mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
118                 TAILQ_REMOVE(
119                         &hv_vmbus_g_connection.channel_msg_anchor,
120                         msg_info,
121                         msg_list_entry);
122                 mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
123                 return (ret);
124         }
125
126         /**
127          * Wait for the connection response
128          */
129         ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds */
130
131         mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
132         TAILQ_REMOVE(
133                 &hv_vmbus_g_connection.channel_msg_anchor,
134                 msg_info,
135                 msg_list_entry);
136         mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
137
138         /**
139          * Check if successful
140          */
141         if (msg_info->response.version_response.version_supported) {
142                 hv_vmbus_g_connection.connect_state = HV_CONNECTED;
143         } else {
144                 ret = ECONNREFUSED;
145         }
146
147         return (ret);
148 }
149
150 /**
151  * Send a connect request on the partition service connection
152  */
153 int
154 hv_vmbus_connect(void) {
155         int                                     ret = 0;
156         uint32_t                                version;
157         hv_vmbus_channel_msg_info*              msg_info = NULL;
158
159         /**
160          * Make sure we are not connecting or connected
161          */
162         if (hv_vmbus_g_connection.connect_state != HV_DISCONNECTED) {
163                 return (-1);
164         }
165
166         /**
167          * Initialize the vmbus connection
168          */
169         hv_vmbus_g_connection.connect_state = HV_CONNECTING;
170         hv_vmbus_g_connection.work_queue = hv_work_queue_create("vmbusQ");
171         sema_init(&hv_vmbus_g_connection.control_sema, 1, "control_sema");
172
173         TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor);
174         mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg",
175                 NULL, MTX_SPIN);
176
177         TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor);
178         mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel",
179                 NULL, MTX_DEF);
180
181         /**
182          * Setup the vmbus event connection for channel interrupt abstraction
183          * stuff
184          */
185         hv_vmbus_g_connection.interrupt_page = contigmalloc(
186                                         PAGE_SIZE, M_DEVBUF,
187                                         M_NOWAIT | M_ZERO, 0UL,
188                                         BUS_SPACE_MAXADDR,
189                                         PAGE_SIZE, 0);
190         KASSERT(hv_vmbus_g_connection.interrupt_page != NULL,
191             ("Error VMBUS: malloc failed to allocate Channel"
192                 " Request Event message!"));
193         if (hv_vmbus_g_connection.interrupt_page == NULL) {
194             ret = ENOMEM;
195             goto cleanup;
196         }
197
198         hv_vmbus_g_connection.recv_interrupt_page =
199                 hv_vmbus_g_connection.interrupt_page;
200
201         hv_vmbus_g_connection.send_interrupt_page =
202                 ((uint8_t *) hv_vmbus_g_connection.interrupt_page +
203                     (PAGE_SIZE >> 1));
204
205         /**
206          * Set up the monitor notification facility. The 1st page for
207          * parent->child and the 2nd page for child->parent
208          */
209         hv_vmbus_g_connection.monitor_pages = contigmalloc(
210                 2 * PAGE_SIZE,
211                 M_DEVBUF,
212                 M_NOWAIT | M_ZERO,
213                 0UL,
214                 BUS_SPACE_MAXADDR,
215                 PAGE_SIZE,
216                 0);
217         KASSERT(hv_vmbus_g_connection.monitor_pages != NULL,
218             ("Error VMBUS: malloc failed to allocate Monitor Pages!"));
219         if (hv_vmbus_g_connection.monitor_pages == NULL) {
220             ret = ENOMEM;
221             goto cleanup;
222         }
223
224         msg_info = (hv_vmbus_channel_msg_info*)
225                 malloc(sizeof(hv_vmbus_channel_msg_info) +
226                         sizeof(hv_vmbus_channel_initiate_contact),
227                         M_DEVBUF, M_NOWAIT | M_ZERO);
228         KASSERT(msg_info != NULL,
229             ("Error VMBUS: malloc failed for Initiate Contact message!"));
230         if (msg_info == NULL) {
231             ret = ENOMEM;
232             goto cleanup;
233         }
234
235         /*
236          * Find the highest vmbus version number we can support.
237          */
238         version = HV_VMBUS_VERSION_CURRENT;
239
240         do {
241                 ret = hv_vmbus_negotiate_version(msg_info, version);
242                 if (ret == EWOULDBLOCK) {
243                         /*
244                          * We timed out.
245                          */
246                         goto cleanup;
247                 }
248
249                 if (hv_vmbus_g_connection.connect_state == HV_CONNECTED)
250                         break;
251
252                 version = hv_vmbus_get_next_version(version);
253         } while (version != HV_VMBUS_VERSION_INVALID);
254
255         hv_vmbus_protocal_version = version;
256         if (bootverbose)
257                 printf("VMBUS: Portocal Version: %d.%d\n",
258                     version >> 16, version & 0xFFFF);
259
260         sema_destroy(&msg_info->wait_sema);
261         free(msg_info, M_DEVBUF);
262
263         return (0);
264
265         /*
266          * Cleanup after failure!
267          */
268         cleanup:
269
270         hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
271
272         hv_work_queue_close(hv_vmbus_g_connection.work_queue);
273         sema_destroy(&hv_vmbus_g_connection.control_sema);
274         mtx_destroy(&hv_vmbus_g_connection.channel_lock);
275         mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
276
277         if (hv_vmbus_g_connection.interrupt_page != NULL) {
278                 contigfree(
279                         hv_vmbus_g_connection.interrupt_page,
280                         PAGE_SIZE,
281                         M_DEVBUF);
282                 hv_vmbus_g_connection.interrupt_page = NULL;
283         }
284
285         if (hv_vmbus_g_connection.monitor_pages != NULL) {
286                 contigfree(
287                         hv_vmbus_g_connection.monitor_pages,
288                         2 * PAGE_SIZE,
289                         M_DEVBUF);
290                 hv_vmbus_g_connection.monitor_pages = NULL;
291         }
292
293         if (msg_info) {
294                 sema_destroy(&msg_info->wait_sema);
295                 free(msg_info, M_DEVBUF);
296         }
297
298         return (ret);
299 }
300
301 /**
302  * Send a disconnect request on the partition service connection
303  */
304 int
305 hv_vmbus_disconnect(void) {
306         int                      ret = 0;
307         hv_vmbus_channel_unload* msg;
308
309         msg = malloc(sizeof(hv_vmbus_channel_unload),
310             M_DEVBUF, M_NOWAIT | M_ZERO);
311         KASSERT(msg != NULL,
312             ("Error VMBUS: malloc failed to allocate Channel Unload Msg!"));
313         if (msg == NULL)
314             return (ENOMEM);
315
316         msg->message_type = HV_CHANNEL_MESSAGE_UNLOAD;
317
318         ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_unload));
319
320
321         contigfree(hv_vmbus_g_connection.interrupt_page, PAGE_SIZE, M_DEVBUF);
322
323         mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
324
325         hv_work_queue_close(hv_vmbus_g_connection.work_queue);
326         sema_destroy(&hv_vmbus_g_connection.control_sema);
327
328         hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
329
330         free(msg, M_DEVBUF);
331
332         return (ret);
333 }
334
335 /**
336  * Get the channel object given its child relative id (ie channel id)
337  */
338 hv_vmbus_channel*
339 hv_vmbus_get_channel_from_rel_id(uint32_t rel_id) {
340
341         hv_vmbus_channel* channel;
342         hv_vmbus_channel* foundChannel = NULL;
343
344         /*
345          * TODO:
346          * Consider optimization where relids are stored in a fixed size array
347          *  and channels are accessed without the need to take this lock or search
348          *  the list.
349          */
350         mtx_lock(&hv_vmbus_g_connection.channel_lock);
351         TAILQ_FOREACH(channel,
352                 &hv_vmbus_g_connection.channel_anchor, list_entry) {
353
354             if (channel->offer_msg.child_rel_id == rel_id) {
355                 foundChannel = channel;
356                 break;
357             }
358         }
359         mtx_unlock(&hv_vmbus_g_connection.channel_lock);
360
361         return (foundChannel);
362 }
363
364 /**
365  * Process a channel event notification
366  */
367 static void
368 VmbusProcessChannelEvent(uint32_t relid) 
369 {
370         void* arg;
371         uint32_t bytes_to_read;
372         hv_vmbus_channel* channel;
373         boolean_t is_batched_reading;
374
375         /**
376          * Find the channel based on this relid and invokes
377          * the channel callback to process the event
378          */
379
380         channel = hv_vmbus_get_channel_from_rel_id(relid);
381
382         if (channel == NULL) {
383                 return;
384         }
385         /**
386          * To deal with the race condition where we might
387          * receive a packet while the relevant driver is 
388          * being unloaded, dispatch the callback while 
389          * holding the channel lock. The unloading driver
390          * will acquire the same channel lock to set the
391          * callback to NULL. This closes the window.
392          */
393
394         /*
395          * Disable the lock due to newly added WITNESS check in r277723.
396          * Will seek other way to avoid race condition.
397          * -- whu
398          */
399         // mtx_lock(&channel->inbound_lock);
400         if (channel->on_channel_callback != NULL) {
401                 arg = channel->channel_callback_context;
402                 is_batched_reading = channel->batched_reading;
403                 /*
404                  * Optimize host to guest signaling by ensuring:
405                  * 1. While reading the channel, we disable interrupts from
406                  *    host.
407                  * 2. Ensure that we process all posted messages from the host
408                  *    before returning from this callback.
409                  * 3. Once we return, enable signaling from the host. Once this
410                  *    state is set we check to see if additional packets are
411                  *    available to read. In this case we repeat the process.
412                  */
413                 do {
414                         if (is_batched_reading)
415                                 hv_ring_buffer_read_begin(&channel->inbound);
416
417                         channel->on_channel_callback(arg);
418
419                         if (is_batched_reading)
420                                 bytes_to_read =
421                                     hv_ring_buffer_read_end(&channel->inbound);
422                         else
423                                 bytes_to_read = 0;
424                 } while (is_batched_reading && (bytes_to_read != 0));
425         }
426         // mtx_unlock(&channel->inbound_lock);
427 }
428
429 #ifdef HV_DEBUG_INTR
430 extern uint32_t hv_intr_count;
431 extern uint32_t hv_vmbus_swintr_event_cpu[MAXCPU];
432 extern uint32_t hv_vmbus_intr_cpu[MAXCPU];
433 #endif
434
435 /**
436  * Handler for events
437  */
438 void
439 hv_vmbus_on_events(void *arg) 
440 {
441         int bit;
442         int cpu;
443         int dword;
444         void *page_addr;
445         uint32_t* recv_interrupt_page = NULL;
446         int rel_id;
447         int maxdword;
448         hv_vmbus_synic_event_flags *event;
449         /* int maxdword = PAGE_SIZE >> 3; */
450
451         cpu = (int)(long)arg;
452         KASSERT(cpu <= mp_maxid, ("VMBUS: hv_vmbus_on_events: "
453             "cpu out of range!"));
454
455 #ifdef HV_DEBUG_INTR
456         int i;
457         hv_vmbus_swintr_event_cpu[cpu]++;
458         if (hv_intr_count % 10000 == 0) {
459                 printf("VMBUS: Total interrupt %d\n", hv_intr_count);
460                 for (i = 0; i < mp_ncpus; i++)
461                         printf("VMBUS: hw cpu[%d]: %d, event sw intr cpu[%d]: %d\n",
462                             i, hv_vmbus_intr_cpu[i], i, hv_vmbus_swintr_event_cpu[i]);
463         }
464 #endif
465
466         if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
467             (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
468                 maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5;
469                 /*
470                  * receive size is 1/2 page and divide that by 4 bytes
471                  */
472                 recv_interrupt_page =
473                     hv_vmbus_g_connection.recv_interrupt_page;
474         } else {
475                 /*
476                  * On Host with Win8 or above, the event page can be
477                  * checked directly to get the id of the channel
478                  * that has the pending interrupt.
479                  */
480                 maxdword = HV_EVENT_FLAGS_DWORD_COUNT;
481                 page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
482                 event = (hv_vmbus_synic_event_flags *)
483                     page_addr + HV_VMBUS_MESSAGE_SINT;
484                 recv_interrupt_page = event->flags32;
485         }
486
487         /*
488          * Check events
489          */
490         if (recv_interrupt_page != NULL) {
491             for (dword = 0; dword < maxdword; dword++) {
492                 if (recv_interrupt_page[dword]) {
493                     for (bit = 0; bit < 32; bit++) {
494                         if (synch_test_and_clear_bit(bit,
495                             (uint32_t *) &recv_interrupt_page[dword])) {
496                             rel_id = (dword << 5) + bit;
497                             if (rel_id == 0) {
498                                 /*
499                                  * Special case -
500                                  * vmbus channel protocol msg.
501                                  */
502                                 continue;
503                             } else {
504                                 VmbusProcessChannelEvent(rel_id);
505
506                             }
507                         }
508                     }
509                 }
510             }
511         }
512
513         return;
514 }
515
516 /**
517  * Send a msg on the vmbus's message connection
518  */
519 int hv_vmbus_post_message(void *buffer, size_t bufferLen) {
520         int ret = 0;
521         hv_vmbus_connection_id connId;
522         unsigned retries = 0;
523
524         /* NetScaler delays from previous code were consolidated here */
525         static int delayAmount[] = {100, 100, 100, 500, 500, 5000, 5000, 5000};
526
527         /* for(each entry in delayAmount) try to post message,
528          *  delay a little bit before retrying
529          */
530         for (retries = 0;
531             retries < sizeof(delayAmount)/sizeof(delayAmount[0]); retries++) {
532             connId.as_uint32_t = 0;
533             connId.u.id = HV_VMBUS_MESSAGE_CONNECTION_ID;
534             ret = hv_vmbus_post_msg_via_msg_ipc(connId, 1, buffer, bufferLen);
535             if (ret != HV_STATUS_INSUFFICIENT_BUFFERS)
536                 break;
537             /* TODO: KYS We should use a blocking wait call */
538             DELAY(delayAmount[retries]);
539         }
540
541         KASSERT(ret == 0, ("Error VMBUS: Message Post Failed\n"));
542
543         return (ret);
544 }
545
546 /**
547  * Send an event notification to the parent
548  */
549 int
550 hv_vmbus_set_event(hv_vmbus_channel *channel) {
551         int ret = 0;
552         uint32_t child_rel_id = channel->offer_msg.child_rel_id;
553
554         /* Each uint32_t represents 32 channels */
555
556         synch_set_bit(child_rel_id & 31,
557                 (((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
558                         + (child_rel_id >> 5))));
559         ret = hv_vmbus_signal_event(channel->signal_event_param);
560
561         return (ret);
562 }