]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/hyperv/vmbus/hv_channel.c
MFC 302864
[FreeBSD/stable/10.git] / sys / dev / hyperv / vmbus / hv_channel.c
1 /*-
2  * Copyright (c) 2009-2012,2016 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/malloc.h>
35 #include <sys/systm.h>
36 #include <sys/mbuf.h>
37 #include <sys/lock.h>
38 #include <sys/mutex.h>
39 #include <sys/sysctl.h>
40
41 #include <machine/atomic.h>
42 #include <machine/bus.h>
43
44 #include <vm/vm.h>
45 #include <vm/vm_param.h>
46 #include <vm/pmap.h>
47
48 #include <dev/hyperv/vmbus/hv_vmbus_priv.h>
49 #include <dev/hyperv/vmbus/hyperv_var.h>
50 #include <dev/hyperv/vmbus/vmbus_reg.h>
51 #include <dev/hyperv/vmbus/vmbus_var.h>
52
53 static void     vmbus_chan_send_event(hv_vmbus_channel* channel);
54 static void     vmbus_chan_update_evtflagcnt(struct vmbus_softc *,
55                     const struct hv_vmbus_channel *);
56
57 static void     vmbus_chan_task(void *, int);
58 static void     vmbus_chan_task_nobatch(void *, int);
59 static void     vmbus_chan_detach_task(void *, int);
60
61 static void     vmbus_chan_msgproc_choffer(struct vmbus_softc *,
62                     const struct vmbus_message *);
63 static void     vmbus_chan_msgproc_chrescind(struct vmbus_softc *,
64                     const struct vmbus_message *);
65
66 /*
67  * Vmbus channel message processing.
68  */
69 static const vmbus_chanmsg_proc_t
70 vmbus_chan_msgprocs[VMBUS_CHANMSG_TYPE_MAX] = {
71         VMBUS_CHANMSG_PROC(CHOFFER,     vmbus_chan_msgproc_choffer),
72         VMBUS_CHANMSG_PROC(CHRESCIND,   vmbus_chan_msgproc_chrescind),
73
74         VMBUS_CHANMSG_PROC_WAKEUP(CHOPEN_RESP),
75         VMBUS_CHANMSG_PROC_WAKEUP(GPADL_CONNRESP),
76         VMBUS_CHANMSG_PROC_WAKEUP(GPADL_DISCONNRESP)
77 };
78
79 /**
80  *  @brief Trigger an event notification on the specified channel
81  */
82 static void
83 vmbus_chan_send_event(hv_vmbus_channel *channel)
84 {
85         struct vmbus_softc *sc = channel->vmbus_sc;
86         uint32_t chanid = channel->ch_id;
87
88         atomic_set_long(&sc->vmbus_tx_evtflags[chanid >> VMBUS_EVTFLAG_SHIFT],
89             1UL << (chanid & VMBUS_EVTFLAG_MASK));
90
91         if (channel->ch_flags & VMBUS_CHAN_FLAG_HASMNF) {
92                 atomic_set_int(
93                 &sc->vmbus_mnf2->mnf_trigs[channel->ch_montrig_idx].mt_pending,
94                 channel->ch_montrig_mask);
95         } else {
96                 hypercall_signal_event(channel->ch_monprm_dma.hv_paddr);
97         }
98 }
99
100 static int
101 vmbus_channel_sysctl_monalloc(SYSCTL_HANDLER_ARGS)
102 {
103         struct hv_vmbus_channel *chan = arg1;
104         int alloc = 0;
105
106         if (chan->ch_flags & VMBUS_CHAN_FLAG_HASMNF)
107                 alloc = 1;
108         return sysctl_handle_int(oidp, &alloc, 0, req);
109 }
110
111 static void
112 vmbus_channel_sysctl_create(hv_vmbus_channel* channel)
113 {
114         device_t dev;
115         struct sysctl_oid *devch_sysctl;
116         struct sysctl_oid *devch_id_sysctl, *devch_sub_sysctl;
117         struct sysctl_oid *devch_id_in_sysctl, *devch_id_out_sysctl;
118         struct sysctl_ctx_list *ctx;
119         uint32_t ch_id;
120         uint16_t sub_ch_id;
121         char name[16];
122         
123         hv_vmbus_channel* primary_ch = channel->ch_prichan;
124
125         if (primary_ch == NULL) {
126                 dev = channel->ch_dev;
127                 ch_id = channel->ch_id;
128         } else {
129                 dev = primary_ch->ch_dev;
130                 ch_id = primary_ch->ch_id;
131                 sub_ch_id = channel->ch_subidx;
132         }
133         ctx = &channel->ch_sysctl_ctx;
134         sysctl_ctx_init(ctx);
135         /* This creates dev.DEVNAME.DEVUNIT.channel tree */
136         devch_sysctl = SYSCTL_ADD_NODE(ctx,
137                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
138                     OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
139         /* This creates dev.DEVNAME.DEVUNIT.channel.CHANID tree */
140         snprintf(name, sizeof(name), "%d", ch_id);
141         devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
142                     SYSCTL_CHILDREN(devch_sysctl),
143                     OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
144
145         if (primary_ch != NULL) {
146                 devch_sub_sysctl = SYSCTL_ADD_NODE(ctx,
147                         SYSCTL_CHILDREN(devch_id_sysctl),
148                         OID_AUTO, "sub", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
149                 snprintf(name, sizeof(name), "%d", sub_ch_id);
150                 devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
151                         SYSCTL_CHILDREN(devch_sub_sysctl),
152                         OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
153
154                 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl),
155                     OID_AUTO, "chanid", CTLFLAG_RD,
156                     &channel->ch_id, 0, "channel id");
157         }
158         SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
159             "cpu", CTLFLAG_RD, &channel->target_cpu, 0, "owner CPU id");
160         SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
161             "monitor_allocated", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
162             channel, 0, vmbus_channel_sysctl_monalloc, "I",
163             "is monitor allocated to this channel");
164
165         devch_id_in_sysctl = SYSCTL_ADD_NODE(ctx,
166                     SYSCTL_CHILDREN(devch_id_sysctl),
167                     OID_AUTO,
168                     "in",
169                     CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
170         devch_id_out_sysctl = SYSCTL_ADD_NODE(ctx,
171                     SYSCTL_CHILDREN(devch_id_sysctl),
172                     OID_AUTO,
173                     "out",
174                     CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
175         hv_ring_buffer_stat(ctx,
176                 SYSCTL_CHILDREN(devch_id_in_sysctl),
177                 &(channel->inbound),
178                 "inbound ring buffer stats");
179         hv_ring_buffer_stat(ctx,
180                 SYSCTL_CHILDREN(devch_id_out_sysctl),
181                 &(channel->outbound),
182                 "outbound ring buffer stats");
183 }
184
185 /**
186  * @brief Open the specified channel
187  */
188 int
189 hv_vmbus_channel_open(
190         hv_vmbus_channel*               new_channel,
191         uint32_t                        send_ring_buffer_size,
192         uint32_t                        recv_ring_buffer_size,
193         void*                           user_data,
194         uint32_t                        user_data_len,
195         hv_vmbus_pfn_channel_callback   pfn_on_channel_callback,
196         void*                           context)
197 {
198         struct vmbus_softc *sc = new_channel->vmbus_sc;
199         const struct vmbus_chanmsg_chopen_resp *resp;
200         const struct vmbus_message *msg;
201         struct vmbus_chanmsg_chopen *req;
202         struct vmbus_msghc *mh;
203         uint32_t status;
204         int ret = 0;
205         void *in, *out;
206
207         if (user_data_len > VMBUS_CHANMSG_CHOPEN_UDATA_SIZE) {
208                 device_printf(sc->vmbus_dev,
209                     "invalid udata len %u for chan%u\n",
210                     user_data_len, new_channel->ch_id);
211                 return EINVAL;
212         }
213
214         if (atomic_testandset_int(&new_channel->ch_stflags,
215             VMBUS_CHAN_ST_OPENED_SHIFT))
216                 panic("double-open chan%u", new_channel->ch_id);
217
218         new_channel->on_channel_callback = pfn_on_channel_callback;
219         new_channel->channel_callback_context = context;
220
221         vmbus_chan_update_evtflagcnt(sc, new_channel);
222
223         new_channel->rxq = VMBUS_PCPU_GET(new_channel->vmbus_sc, event_tq,
224             new_channel->target_cpu);
225         if (new_channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD) {
226                 TASK_INIT(&new_channel->channel_task, 0,
227                     vmbus_chan_task, new_channel);
228         } else {
229                 TASK_INIT(&new_channel->channel_task, 0,
230                     vmbus_chan_task_nobatch, new_channel);
231         }
232
233         /* Allocate the ring buffer */
234         out = contigmalloc((send_ring_buffer_size + recv_ring_buffer_size),
235             M_DEVBUF, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
236         KASSERT(out != NULL,
237             ("Error VMBUS: contigmalloc failed to allocate Ring Buffer!"));
238         if (out == NULL) {
239                 ret = ENOMEM;
240                 goto failed;
241         }
242
243         in = ((uint8_t *) out + send_ring_buffer_size);
244
245         new_channel->ring_buffer_pages = out;
246         new_channel->ring_buffer_page_count = (send_ring_buffer_size +
247             recv_ring_buffer_size) >> PAGE_SHIFT;
248         new_channel->ring_buffer_size = send_ring_buffer_size +
249             recv_ring_buffer_size;
250
251         hv_vmbus_ring_buffer_init(
252                 &new_channel->outbound,
253                 out,
254                 send_ring_buffer_size);
255
256         hv_vmbus_ring_buffer_init(
257                 &new_channel->inbound,
258                 in,
259                 recv_ring_buffer_size);
260
261         /* Create sysctl tree for this channel */
262         vmbus_channel_sysctl_create(new_channel);
263
264         /**
265          * Establish the gpadl for the ring buffer
266          */
267         new_channel->ring_buffer_gpadl_handle = 0;
268
269         ret = hv_vmbus_channel_establish_gpadl(new_channel,
270                 new_channel->outbound.ring_buffer,
271                 send_ring_buffer_size + recv_ring_buffer_size,
272                 &new_channel->ring_buffer_gpadl_handle);
273
274         /*
275          * Open channel w/ the bufring GPADL on the target CPU.
276          */
277         mh = vmbus_msghc_get(sc, sizeof(*req));
278         if (mh == NULL) {
279                 device_printf(sc->vmbus_dev,
280                     "can not get msg hypercall for chopen(chan%u)\n",
281                     new_channel->ch_id);
282                 ret = ENXIO;
283                 goto failed;
284         }
285
286         req = vmbus_msghc_dataptr(mh);
287         req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHOPEN;
288         req->chm_chanid = new_channel->ch_id;
289         req->chm_openid = new_channel->ch_id;
290         req->chm_gpadl = new_channel->ring_buffer_gpadl_handle;
291         req->chm_vcpuid = new_channel->target_vcpu;
292         req->chm_rxbr_pgofs = send_ring_buffer_size >> PAGE_SHIFT;
293         if (user_data_len)
294                 memcpy(req->chm_udata, user_data, user_data_len);
295
296         ret = vmbus_msghc_exec(sc, mh);
297         if (ret != 0) {
298                 device_printf(sc->vmbus_dev,
299                     "chopen(chan%u) msg hypercall exec failed: %d\n",
300                     new_channel->ch_id, ret);
301                 vmbus_msghc_put(sc, mh);
302                 goto failed;
303         }
304
305         msg = vmbus_msghc_wait_result(sc, mh);
306         resp = (const struct vmbus_chanmsg_chopen_resp *)msg->msg_data;
307         status = resp->chm_status;
308
309         vmbus_msghc_put(sc, mh);
310
311         if (status == 0) {
312                 if (bootverbose) {
313                         device_printf(sc->vmbus_dev, "chan%u opened\n",
314                             new_channel->ch_id);
315                 }
316                 return 0;
317         }
318
319         device_printf(sc->vmbus_dev, "failed to open chan%u\n",
320             new_channel->ch_id);
321         ret = ENXIO;
322
323 failed:
324         atomic_clear_int(&new_channel->ch_stflags, VMBUS_CHAN_ST_OPENED);
325         return ret;
326 }
327
328 /**
329  * @brief Establish a GPADL for the specified buffer
330  */
331 int
332 hv_vmbus_channel_establish_gpadl(struct hv_vmbus_channel *channel,
333     void *contig_buffer, uint32_t size, uint32_t *gpadl0)
334 {
335         struct vmbus_softc *sc = channel->vmbus_sc;
336         struct vmbus_msghc *mh;
337         struct vmbus_chanmsg_gpadl_conn *req;
338         const struct vmbus_message *msg;
339         size_t reqsz;
340         uint32_t gpadl, status;
341         int page_count, range_len, i, cnt, error;
342         uint64_t page_id, paddr;
343
344         /*
345          * Preliminary checks.
346          */
347
348         KASSERT((size & PAGE_MASK) == 0,
349             ("invalid GPA size %u, not multiple page size", size));
350         page_count = size >> PAGE_SHIFT;
351
352         paddr = hv_get_phys_addr(contig_buffer);
353         KASSERT((paddr & PAGE_MASK) == 0,
354             ("GPA is not page aligned %jx", (uintmax_t)paddr));
355         page_id = paddr >> PAGE_SHIFT;
356
357         range_len = __offsetof(struct vmbus_gpa_range, gpa_page[page_count]);
358         /*
359          * We don't support multiple GPA ranges.
360          */
361         if (range_len > UINT16_MAX) {
362                 device_printf(sc->vmbus_dev, "GPA too large, %d pages\n",
363                     page_count);
364                 return EOPNOTSUPP;
365         }
366
367         /*
368          * Allocate GPADL id.
369          */
370         gpadl = vmbus_gpadl_alloc(sc);
371         *gpadl0 = gpadl;
372
373         /*
374          * Connect this GPADL to the target channel.
375          *
376          * NOTE:
377          * Since each message can only hold small set of page
378          * addresses, several messages may be required to
379          * complete the connection.
380          */
381         if (page_count > VMBUS_CHANMSG_GPADL_CONN_PGMAX)
382                 cnt = VMBUS_CHANMSG_GPADL_CONN_PGMAX;
383         else
384                 cnt = page_count;
385         page_count -= cnt;
386
387         reqsz = __offsetof(struct vmbus_chanmsg_gpadl_conn,
388             chm_range.gpa_page[cnt]);
389         mh = vmbus_msghc_get(sc, reqsz);
390         if (mh == NULL) {
391                 device_printf(sc->vmbus_dev,
392                     "can not get msg hypercall for gpadl->chan%u\n",
393                     channel->ch_id);
394                 return EIO;
395         }
396
397         req = vmbus_msghc_dataptr(mh);
398         req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_CONN;
399         req->chm_chanid = channel->ch_id;
400         req->chm_gpadl = gpadl;
401         req->chm_range_len = range_len;
402         req->chm_range_cnt = 1;
403         req->chm_range.gpa_len = size;
404         req->chm_range.gpa_ofs = 0;
405         for (i = 0; i < cnt; ++i)
406                 req->chm_range.gpa_page[i] = page_id++;
407
408         error = vmbus_msghc_exec(sc, mh);
409         if (error) {
410                 device_printf(sc->vmbus_dev,
411                     "gpadl->chan%u msg hypercall exec failed: %d\n",
412                     channel->ch_id, error);
413                 vmbus_msghc_put(sc, mh);
414                 return error;
415         }
416
417         while (page_count > 0) {
418                 struct vmbus_chanmsg_gpadl_subconn *subreq;
419
420                 if (page_count > VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX)
421                         cnt = VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX;
422                 else
423                         cnt = page_count;
424                 page_count -= cnt;
425
426                 reqsz = __offsetof(struct vmbus_chanmsg_gpadl_subconn,
427                     chm_gpa_page[cnt]);
428                 vmbus_msghc_reset(mh, reqsz);
429
430                 subreq = vmbus_msghc_dataptr(mh);
431                 subreq->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_SUBCONN;
432                 subreq->chm_gpadl = gpadl;
433                 for (i = 0; i < cnt; ++i)
434                         subreq->chm_gpa_page[i] = page_id++;
435
436                 vmbus_msghc_exec_noresult(mh);
437         }
438         KASSERT(page_count == 0, ("invalid page count %d", page_count));
439
440         msg = vmbus_msghc_wait_result(sc, mh);
441         status = ((const struct vmbus_chanmsg_gpadl_connresp *)
442             msg->msg_data)->chm_status;
443
444         vmbus_msghc_put(sc, mh);
445
446         if (status != 0) {
447                 device_printf(sc->vmbus_dev, "gpadl->chan%u failed: "
448                     "status %u\n", channel->ch_id, status);
449                 return EIO;
450         } else {
451                 if (bootverbose) {
452                         device_printf(sc->vmbus_dev, "gpadl->chan%u "
453                             "succeeded\n", channel->ch_id);
454                 }
455         }
456         return 0;
457 }
458
459 /*
460  * Disconnect the GPA from the target channel
461  */
462 int
463 hv_vmbus_channel_teardown_gpdal(struct hv_vmbus_channel *chan, uint32_t gpadl)
464 {
465         struct vmbus_softc *sc = chan->vmbus_sc;
466         struct vmbus_msghc *mh;
467         struct vmbus_chanmsg_gpadl_disconn *req;
468         int error;
469
470         mh = vmbus_msghc_get(sc, sizeof(*req));
471         if (mh == NULL) {
472                 device_printf(sc->vmbus_dev,
473                     "can not get msg hypercall for gpa x->chan%u\n",
474                     chan->ch_id);
475                 return EBUSY;
476         }
477
478         req = vmbus_msghc_dataptr(mh);
479         req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_DISCONN;
480         req->chm_chanid = chan->ch_id;
481         req->chm_gpadl = gpadl;
482
483         error = vmbus_msghc_exec(sc, mh);
484         if (error) {
485                 device_printf(sc->vmbus_dev,
486                     "gpa x->chan%u msg hypercall exec failed: %d\n",
487                     chan->ch_id, error);
488                 vmbus_msghc_put(sc, mh);
489                 return error;
490         }
491
492         vmbus_msghc_wait_result(sc, mh);
493         /* Discard result; no useful information */
494         vmbus_msghc_put(sc, mh);
495
496         return 0;
497 }
498
499 static void
500 hv_vmbus_channel_close_internal(hv_vmbus_channel *channel)
501 {
502         struct vmbus_softc *sc = channel->vmbus_sc;
503         struct vmbus_msghc *mh;
504         struct vmbus_chanmsg_chclose *req;
505         struct taskqueue *rxq = channel->rxq;
506         int error;
507
508         /* TODO: stringent check */
509         atomic_clear_int(&channel->ch_stflags, VMBUS_CHAN_ST_OPENED);
510
511         sysctl_ctx_free(&channel->ch_sysctl_ctx);
512
513         /*
514          * set rxq to NULL to avoid more requests be scheduled
515          */
516         channel->rxq = NULL;
517         taskqueue_drain(rxq, &channel->channel_task);
518         channel->on_channel_callback = NULL;
519
520         /**
521          * Send a closing message
522          */
523
524         mh = vmbus_msghc_get(sc, sizeof(*req));
525         if (mh == NULL) {
526                 device_printf(sc->vmbus_dev,
527                     "can not get msg hypercall for chclose(chan%u)\n",
528                     channel->ch_id);
529                 return;
530         }
531
532         req = vmbus_msghc_dataptr(mh);
533         req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHCLOSE;
534         req->chm_chanid = channel->ch_id;
535
536         error = vmbus_msghc_exec_noresult(mh);
537         vmbus_msghc_put(sc, mh);
538
539         if (error) {
540                 device_printf(sc->vmbus_dev,
541                     "chclose(chan%u) msg hypercall exec failed: %d\n",
542                     channel->ch_id, error);
543                 return;
544         } else if (bootverbose) {
545                 device_printf(sc->vmbus_dev, "close chan%u\n",
546                     channel->ch_id);
547         }
548
549         /* Tear down the gpadl for the channel's ring buffer */
550         if (channel->ring_buffer_gpadl_handle) {
551                 hv_vmbus_channel_teardown_gpdal(channel,
552                         channel->ring_buffer_gpadl_handle);
553         }
554
555         /* TODO: Send a msg to release the childRelId */
556
557         /* cleanup the ring buffers for this channel */
558         hv_ring_buffer_cleanup(&channel->outbound);
559         hv_ring_buffer_cleanup(&channel->inbound);
560
561         contigfree(channel->ring_buffer_pages, channel->ring_buffer_size,
562             M_DEVBUF);
563 }
564
565 /*
566  * Caller should make sure that all sub-channels have
567  * been added to 'chan' and all to-be-closed channels
568  * are not being opened.
569  */
570 void
571 hv_vmbus_channel_close(struct hv_vmbus_channel *chan)
572 {
573         int subchan_cnt;
574
575         if (!VMBUS_CHAN_ISPRIMARY(chan)) {
576                 /*
577                  * Sub-channel is closed when its primary channel
578                  * is closed; done.
579                  */
580                 return;
581         }
582
583         /*
584          * Close all sub-channels, if any.
585          */
586         subchan_cnt = chan->ch_subchan_cnt;
587         if (subchan_cnt > 0) {
588                 struct hv_vmbus_channel **subchan;
589                 int i;
590
591                 subchan = vmbus_get_subchan(chan, subchan_cnt);
592                 for (i = 0; i < subchan_cnt; ++i)
593                         hv_vmbus_channel_close_internal(subchan[i]);
594                 vmbus_rel_subchan(subchan, subchan_cnt);
595         }
596
597         /* Then close the primary channel. */
598         hv_vmbus_channel_close_internal(chan);
599 }
600
601 /**
602  * @brief Send the specified buffer on the given channel
603  */
604 int
605 hv_vmbus_channel_send_packet(
606         hv_vmbus_channel*       channel,
607         void*                   buffer,
608         uint32_t                buffer_len,
609         uint64_t                request_id,
610         hv_vmbus_packet_type    type,
611         uint32_t                flags)
612 {
613         int                     ret = 0;
614         hv_vm_packet_descriptor desc;
615         uint32_t                packet_len;
616         uint64_t                aligned_data;
617         uint32_t                packet_len_aligned;
618         boolean_t               need_sig;
619         hv_vmbus_sg_buffer_list buffer_list[3];
620
621         packet_len = sizeof(hv_vm_packet_descriptor) + buffer_len;
622         packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
623         aligned_data = 0;
624
625         /* Setup the descriptor */
626         desc.type = type;   /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND;             */
627         desc.flags = flags; /* HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */
628                             /* in 8-bytes granularity */
629         desc.data_offset8 = sizeof(hv_vm_packet_descriptor) >> 3;
630         desc.length8 = (uint16_t) (packet_len_aligned >> 3);
631         desc.transaction_id = request_id;
632
633         buffer_list[0].data = &desc;
634         buffer_list[0].length = sizeof(hv_vm_packet_descriptor);
635
636         buffer_list[1].data = buffer;
637         buffer_list[1].length = buffer_len;
638
639         buffer_list[2].data = &aligned_data;
640         buffer_list[2].length = packet_len_aligned - packet_len;
641
642         ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
643             &need_sig);
644
645         /* TODO: We should determine if this is optional */
646         if (ret == 0 && need_sig)
647                 vmbus_chan_send_event(channel);
648
649         return (ret);
650 }
651
652 /**
653  * @brief Send a range of single-page buffer packets using
654  * a GPADL Direct packet type
655  */
656 int
657 hv_vmbus_channel_send_packet_pagebuffer(
658         hv_vmbus_channel*       channel,
659         hv_vmbus_page_buffer    page_buffers[],
660         uint32_t                page_count,
661         void*                   buffer,
662         uint32_t                buffer_len,
663         uint64_t                request_id)
664 {
665
666         int                                     ret = 0;
667         boolean_t                               need_sig;
668         uint32_t                                packet_len;
669         uint32_t                                page_buflen;
670         uint32_t                                packetLen_aligned;
671         hv_vmbus_sg_buffer_list                 buffer_list[4];
672         hv_vmbus_channel_packet_page_buffer     desc;
673         uint32_t                                descSize;
674         uint64_t                                alignedData = 0;
675
676         if (page_count > HV_MAX_PAGE_BUFFER_COUNT)
677                 return (EINVAL);
678
679         /*
680          * Adjust the size down since hv_vmbus_channel_packet_page_buffer
681          *  is the largest size we support
682          */
683         descSize = __offsetof(hv_vmbus_channel_packet_page_buffer, range);
684         page_buflen = sizeof(hv_vmbus_page_buffer) * page_count;
685         packet_len = descSize + page_buflen + buffer_len;
686         packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
687
688         /* Setup the descriptor */
689         desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
690         desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
691         /* in 8-bytes granularity */
692         desc.data_offset8 = (descSize + page_buflen) >> 3;
693         desc.length8 = (uint16_t) (packetLen_aligned >> 3);
694         desc.transaction_id = request_id;
695         desc.range_count = page_count;
696
697         buffer_list[0].data = &desc;
698         buffer_list[0].length = descSize;
699
700         buffer_list[1].data = page_buffers;
701         buffer_list[1].length = page_buflen;
702
703         buffer_list[2].data = buffer;
704         buffer_list[2].length = buffer_len;
705
706         buffer_list[3].data = &alignedData;
707         buffer_list[3].length = packetLen_aligned - packet_len;
708
709         ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 4,
710             &need_sig);
711
712         /* TODO: We should determine if this is optional */
713         if (ret == 0 && need_sig)
714                 vmbus_chan_send_event(channel);
715
716         return (ret);
717 }
718
719 /**
720  * @brief Send a multi-page buffer packet using a GPADL Direct packet type
721  */
722 int
723 hv_vmbus_channel_send_packet_multipagebuffer(
724         hv_vmbus_channel*               channel,
725         hv_vmbus_multipage_buffer*      multi_page_buffer,
726         void*                           buffer,
727         uint32_t                        buffer_len,
728         uint64_t                        request_id)
729 {
730
731         int                     ret = 0;
732         uint32_t                desc_size;
733         boolean_t               need_sig;
734         uint32_t                packet_len;
735         uint32_t                packet_len_aligned;
736         uint32_t                pfn_count;
737         uint64_t                aligned_data = 0;
738         hv_vmbus_sg_buffer_list buffer_list[3];
739         hv_vmbus_channel_packet_multipage_buffer desc;
740
741         pfn_count =
742             HV_NUM_PAGES_SPANNED(
743                     multi_page_buffer->offset,
744                     multi_page_buffer->length);
745
746         if ((pfn_count == 0) || (pfn_count > HV_MAX_MULTIPAGE_BUFFER_COUNT))
747             return (EINVAL);
748         /*
749          * Adjust the size down since hv_vmbus_channel_packet_multipage_buffer
750          * is the largest size we support
751          */
752         desc_size =
753             sizeof(hv_vmbus_channel_packet_multipage_buffer) -
754                     ((HV_MAX_MULTIPAGE_BUFFER_COUNT - pfn_count) *
755                         sizeof(uint64_t));
756         packet_len = desc_size + buffer_len;
757         packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
758
759         /*
760          * Setup the descriptor
761          */
762         desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
763         desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
764         desc.data_offset8 = desc_size >> 3; /* in 8-bytes granularity */
765         desc.length8 = (uint16_t) (packet_len_aligned >> 3);
766         desc.transaction_id = request_id;
767         desc.range_count = 1;
768
769         desc.range.length = multi_page_buffer->length;
770         desc.range.offset = multi_page_buffer->offset;
771
772         memcpy(desc.range.pfn_array, multi_page_buffer->pfn_array,
773                 pfn_count * sizeof(uint64_t));
774
775         buffer_list[0].data = &desc;
776         buffer_list[0].length = desc_size;
777
778         buffer_list[1].data = buffer;
779         buffer_list[1].length = buffer_len;
780
781         buffer_list[2].data = &aligned_data;
782         buffer_list[2].length = packet_len_aligned - packet_len;
783
784         ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
785             &need_sig);
786
787         /* TODO: We should determine if this is optional */
788         if (ret == 0 && need_sig)
789                 vmbus_chan_send_event(channel);
790
791         return (ret);
792 }
793
794 /**
795  * @brief Retrieve the user packet on the specified channel
796  */
797 int
798 hv_vmbus_channel_recv_packet(
799         hv_vmbus_channel*       channel,
800         void*                   Buffer,
801         uint32_t                buffer_len,
802         uint32_t*               buffer_actual_len,
803         uint64_t*               request_id)
804 {
805         int                     ret;
806         uint32_t                user_len;
807         uint32_t                packet_len;
808         hv_vm_packet_descriptor desc;
809
810         *buffer_actual_len = 0;
811         *request_id = 0;
812
813         ret = hv_ring_buffer_peek(&channel->inbound, &desc,
814                 sizeof(hv_vm_packet_descriptor));
815         if (ret != 0)
816                 return (0);
817
818         packet_len = desc.length8 << 3;
819         user_len = packet_len - (desc.data_offset8 << 3);
820
821         *buffer_actual_len = user_len;
822
823         if (user_len > buffer_len)
824                 return (EINVAL);
825
826         *request_id = desc.transaction_id;
827
828         /* Copy over the packet to the user buffer */
829         ret = hv_ring_buffer_read(&channel->inbound, Buffer, user_len,
830                 (desc.data_offset8 << 3));
831
832         return (0);
833 }
834
835 /**
836  * @brief Retrieve the raw packet on the specified channel
837  */
838 int
839 hv_vmbus_channel_recv_packet_raw(
840         hv_vmbus_channel*       channel,
841         void*                   buffer,
842         uint32_t                buffer_len,
843         uint32_t*               buffer_actual_len,
844         uint64_t*               request_id)
845 {
846         int             ret;
847         uint32_t        packetLen;
848         hv_vm_packet_descriptor desc;
849
850         *buffer_actual_len = 0;
851         *request_id = 0;
852
853         ret = hv_ring_buffer_peek(
854                 &channel->inbound, &desc,
855                 sizeof(hv_vm_packet_descriptor));
856
857         if (ret != 0)
858             return (0);
859
860         packetLen = desc.length8 << 3;
861         *buffer_actual_len = packetLen;
862
863         if (packetLen > buffer_len)
864             return (ENOBUFS);
865
866         *request_id = desc.transaction_id;
867
868         /* Copy over the entire packet to the user buffer */
869         ret = hv_ring_buffer_read(&channel->inbound, buffer, packetLen, 0);
870
871         return (0);
872 }
873
874 static void
875 vmbus_chan_task(void *xchan, int pending __unused)
876 {
877         struct hv_vmbus_channel *chan = xchan;
878         void (*callback)(void *);
879         void *arg;
880
881         arg = chan->channel_callback_context;
882         callback = chan->on_channel_callback;
883
884         /*
885          * Optimize host to guest signaling by ensuring:
886          * 1. While reading the channel, we disable interrupts from
887          *    host.
888          * 2. Ensure that we process all posted messages from the host
889          *    before returning from this callback.
890          * 3. Once we return, enable signaling from the host. Once this
891          *    state is set we check to see if additional packets are
892          *    available to read. In this case we repeat the process.
893          *
894          * NOTE: Interrupt has been disabled in the ISR.
895          */
896         for (;;) {
897                 uint32_t left;
898
899                 callback(arg);
900
901                 left = hv_ring_buffer_read_end(&chan->inbound);
902                 if (left == 0) {
903                         /* No more data in RX bufring; done */
904                         break;
905                 }
906                 hv_ring_buffer_read_begin(&chan->inbound);
907         }
908 }
909
910 static void
911 vmbus_chan_task_nobatch(void *xchan, int pending __unused)
912 {
913         struct hv_vmbus_channel *chan = xchan;
914
915         chan->on_channel_callback(chan->channel_callback_context);
916 }
917
918 static __inline void
919 vmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags,
920     int flag_cnt)
921 {
922         int f;
923
924         for (f = 0; f < flag_cnt; ++f) {
925                 uint32_t chid_base;
926                 u_long flags;
927                 int chid_ofs;
928
929                 if (event_flags[f] == 0)
930                         continue;
931
932                 flags = atomic_swap_long(&event_flags[f], 0);
933                 chid_base = f << VMBUS_EVTFLAG_SHIFT;
934
935                 while ((chid_ofs = ffsl(flags)) != 0) {
936                         struct hv_vmbus_channel *channel;
937
938                         --chid_ofs; /* NOTE: ffsl is 1-based */
939                         flags &= ~(1UL << chid_ofs);
940
941                         channel = sc->vmbus_chmap[chid_base + chid_ofs];
942
943                         /* if channel is closed or closing */
944                         if (channel == NULL || channel->rxq == NULL)
945                                 continue;
946
947                         if (channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
948                                 hv_ring_buffer_read_begin(&channel->inbound);
949                         taskqueue_enqueue(channel->rxq, &channel->channel_task);
950                 }
951         }
952 }
953
954 void
955 vmbus_event_proc(struct vmbus_softc *sc, int cpu)
956 {
957         struct vmbus_evtflags *eventf;
958
959         /*
960          * On Host with Win8 or above, the event page can be checked directly
961          * to get the id of the channel that has the pending interrupt.
962          */
963         eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
964         vmbus_event_flags_proc(sc, eventf->evt_flags,
965             VMBUS_PCPU_GET(sc, event_flags_cnt, cpu));
966 }
967
968 void
969 vmbus_event_proc_compat(struct vmbus_softc *sc, int cpu)
970 {
971         struct vmbus_evtflags *eventf;
972
973         eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
974         if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) {
975                 vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags,
976                     VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT);
977         }
978 }
979
980 static void
981 vmbus_chan_update_evtflagcnt(struct vmbus_softc *sc,
982     const struct hv_vmbus_channel *chan)
983 {
984         volatile int *flag_cnt_ptr;
985         int flag_cnt;
986
987         flag_cnt = (chan->ch_id / VMBUS_EVTFLAG_LEN) + 1;
988         flag_cnt_ptr = VMBUS_PCPU_PTR(sc, event_flags_cnt, chan->target_cpu);
989
990         for (;;) {
991                 int old_flag_cnt;
992
993                 old_flag_cnt = *flag_cnt_ptr;
994                 if (old_flag_cnt >= flag_cnt)
995                         break;
996                 if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
997                         if (bootverbose) {
998                                 device_printf(sc->vmbus_dev,
999                                     "channel%u update cpu%d flag_cnt to %d\n",
1000                                     chan->ch_id,
1001                                     chan->target_cpu, flag_cnt);
1002                         }
1003                         break;
1004                 }
1005         }
1006 }
1007
1008 static struct hv_vmbus_channel *
1009 vmbus_chan_alloc(struct vmbus_softc *sc)
1010 {
1011         struct hv_vmbus_channel *chan;
1012
1013         chan = malloc(sizeof(*chan), M_DEVBUF, M_WAITOK | M_ZERO);
1014
1015         chan->ch_monprm = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
1016             HYPERCALL_PARAM_ALIGN, 0, sizeof(struct hyperv_mon_param),
1017             &chan->ch_monprm_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
1018         if (chan->ch_monprm == NULL) {
1019                 device_printf(sc->vmbus_dev, "monprm alloc failed\n");
1020                 free(chan, M_DEVBUF);
1021                 return NULL;
1022         }
1023
1024         chan->vmbus_sc = sc;
1025         mtx_init(&chan->ch_subchan_lock, "vmbus subchan", NULL, MTX_DEF);
1026         TAILQ_INIT(&chan->ch_subchans);
1027         TASK_INIT(&chan->ch_detach_task, 0, vmbus_chan_detach_task, chan);
1028
1029         return chan;
1030 }
1031
1032 static void
1033 vmbus_chan_free(struct hv_vmbus_channel *chan)
1034 {
1035         /* TODO: assert sub-channel list is empty */
1036         /* TODO: asset no longer on the primary channel's sub-channel list */
1037         /* TODO: asset no longer on the vmbus channel list */
1038         hyperv_dmamem_free(&chan->ch_monprm_dma, chan->ch_monprm);
1039         mtx_destroy(&chan->ch_subchan_lock);
1040         free(chan, M_DEVBUF);
1041 }
1042
1043 static int
1044 vmbus_chan_add(struct hv_vmbus_channel *newchan)
1045 {
1046         struct vmbus_softc *sc = newchan->vmbus_sc;
1047         struct hv_vmbus_channel *prichan;
1048
1049         if (newchan->ch_id == 0) {
1050                 /*
1051                  * XXX
1052                  * Chan0 will neither be processed nor should be offered;
1053                  * skip it.
1054                  */
1055                 device_printf(sc->vmbus_dev, "got chan0 offer, discard\n");
1056                 return EINVAL;
1057         } else if (newchan->ch_id >= VMBUS_CHAN_MAX) {
1058                 device_printf(sc->vmbus_dev, "invalid chan%u offer\n",
1059                     newchan->ch_id);
1060                 return EINVAL;
1061         }
1062         sc->vmbus_chmap[newchan->ch_id] = newchan;
1063
1064         if (bootverbose) {
1065                 device_printf(sc->vmbus_dev, "chan%u subidx%u offer\n",
1066                     newchan->ch_id, newchan->ch_subidx);
1067         }
1068
1069         mtx_lock(&sc->vmbus_prichan_lock);
1070         TAILQ_FOREACH(prichan, &sc->vmbus_prichans, ch_prilink) {
1071                 /*
1072                  * Sub-channel will have the same type GUID and instance
1073                  * GUID as its primary channel.
1074                  */
1075                 if (memcmp(&prichan->ch_guid_type, &newchan->ch_guid_type,
1076                     sizeof(struct hyperv_guid)) == 0 &&
1077                     memcmp(&prichan->ch_guid_inst, &newchan->ch_guid_inst,
1078                     sizeof(struct hyperv_guid)) == 0)
1079                         break;
1080         }
1081         if (VMBUS_CHAN_ISPRIMARY(newchan)) {
1082                 if (prichan == NULL) {
1083                         /* Install the new primary channel */
1084                         TAILQ_INSERT_TAIL(&sc->vmbus_prichans, newchan,
1085                             ch_prilink);
1086                         mtx_unlock(&sc->vmbus_prichan_lock);
1087                         return 0;
1088                 } else {
1089                         mtx_unlock(&sc->vmbus_prichan_lock);
1090                         device_printf(sc->vmbus_dev, "duplicated primary "
1091                             "chan%u\n", newchan->ch_id);
1092                         return EINVAL;
1093                 }
1094         } else { /* Sub-channel */
1095                 if (prichan == NULL) {
1096                         mtx_unlock(&sc->vmbus_prichan_lock);
1097                         device_printf(sc->vmbus_dev, "no primary chan for "
1098                             "chan%u\n", newchan->ch_id);
1099                         return EINVAL;
1100                 }
1101                 /*
1102                  * Found the primary channel for this sub-channel and
1103                  * move on.
1104                  *
1105                  * XXX refcnt prichan
1106                  */
1107         }
1108         mtx_unlock(&sc->vmbus_prichan_lock);
1109
1110         /*
1111          * This is a sub-channel; link it with the primary channel.
1112          */
1113         KASSERT(!VMBUS_CHAN_ISPRIMARY(newchan),
1114             ("new channel is not sub-channel"));
1115         KASSERT(prichan != NULL, ("no primary channel"));
1116
1117         newchan->ch_prichan = prichan;
1118         newchan->ch_dev = prichan->ch_dev;
1119
1120         mtx_lock(&prichan->ch_subchan_lock);
1121         TAILQ_INSERT_TAIL(&prichan->ch_subchans, newchan, ch_sublink);
1122         /*
1123          * Bump up sub-channel count and notify anyone that is
1124          * interested in this sub-channel, after this sub-channel
1125          * is setup.
1126          */
1127         prichan->ch_subchan_cnt++;
1128         mtx_unlock(&prichan->ch_subchan_lock);
1129         wakeup(prichan);
1130
1131         return 0;
1132 }
1133
1134 void
1135 vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu)
1136 {
1137         KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));
1138
1139         if (chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1140             chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WIN7) {
1141                 /* Only cpu0 is supported */
1142                 cpu = 0;
1143         }
1144
1145         chan->target_cpu = cpu;
1146         chan->target_vcpu = VMBUS_PCPU_GET(chan->vmbus_sc, vcpuid, cpu);
1147
1148         if (bootverbose) {
1149                 printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n",
1150                     chan->ch_id,
1151                     chan->target_cpu, chan->target_vcpu);
1152         }
1153 }
1154
1155 void
1156 vmbus_channel_cpu_rr(struct hv_vmbus_channel *chan)
1157 {
1158         static uint32_t vmbus_chan_nextcpu;
1159         int cpu;
1160
1161         cpu = atomic_fetchadd_int(&vmbus_chan_nextcpu, 1) % mp_ncpus;
1162         vmbus_channel_cpu_set(chan, cpu);
1163 }
1164
1165 static void
1166 vmbus_chan_cpu_default(struct hv_vmbus_channel *chan)
1167 {
1168         /*
1169          * By default, pin the channel to cpu0.  Devices having
1170          * special channel-cpu mapping requirement should call
1171          * vmbus_channel_cpu_{set,rr}().
1172          */
1173         vmbus_channel_cpu_set(chan, 0);
1174 }
1175
1176 static void
1177 vmbus_chan_msgproc_choffer(struct vmbus_softc *sc,
1178     const struct vmbus_message *msg)
1179 {
1180         const struct vmbus_chanmsg_choffer *offer;
1181         struct hv_vmbus_channel *chan;
1182         int error;
1183
1184         offer = (const struct vmbus_chanmsg_choffer *)msg->msg_data;
1185
1186         chan = vmbus_chan_alloc(sc);
1187         if (chan == NULL) {
1188                 device_printf(sc->vmbus_dev, "allocate chan%u failed\n",
1189                     offer->chm_chanid);
1190                 return;
1191         }
1192
1193         chan->ch_id = offer->chm_chanid;
1194         chan->ch_subidx = offer->chm_subidx;
1195         chan->ch_guid_type = offer->chm_chtype;
1196         chan->ch_guid_inst = offer->chm_chinst;
1197
1198         /* Batch reading is on by default */
1199         chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD;
1200
1201         chan->ch_monprm->mp_connid = VMBUS_CONNID_EVENT;
1202         if (sc->vmbus_version != VMBUS_VERSION_WS2008)
1203                 chan->ch_monprm->mp_connid = offer->chm_connid;
1204
1205         if (offer->chm_flags1 & VMBUS_CHOFFER_FLAG1_HASMNF) {
1206                 /*
1207                  * Setup MNF stuffs.
1208                  */
1209                 chan->ch_flags |= VMBUS_CHAN_FLAG_HASMNF;
1210                 chan->ch_montrig_idx = offer->chm_montrig / VMBUS_MONTRIG_LEN;
1211                 if (chan->ch_montrig_idx >= VMBUS_MONTRIGS_MAX)
1212                         panic("invalid monitor trigger %u", offer->chm_montrig);
1213                 chan->ch_montrig_mask =
1214                     1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN);
1215         }
1216
1217         /* Select default cpu for this channel. */
1218         vmbus_chan_cpu_default(chan);
1219
1220         error = vmbus_chan_add(chan);
1221         if (error) {
1222                 device_printf(sc->vmbus_dev, "add chan%u failed: %d\n",
1223                     chan->ch_id, error);
1224                 vmbus_chan_free(chan);
1225                 return;
1226         }
1227
1228         if (VMBUS_CHAN_ISPRIMARY(chan)) {
1229                 /*
1230                  * Add device for this primary channel.
1231                  *
1232                  * NOTE:
1233                  * Error is ignored here; don't have much to do if error
1234                  * really happens.
1235                  */
1236                 hv_vmbus_child_device_register(chan);
1237         }
1238 }
1239
1240 /*
1241  * XXX pretty broken; need rework.
1242  */
1243 static void
1244 vmbus_chan_msgproc_chrescind(struct vmbus_softc *sc,
1245     const struct vmbus_message *msg)
1246 {
1247         const struct vmbus_chanmsg_chrescind *note;
1248         struct hv_vmbus_channel *chan;
1249
1250         note = (const struct vmbus_chanmsg_chrescind *)msg->msg_data;
1251         if (note->chm_chanid > VMBUS_CHAN_MAX) {
1252                 device_printf(sc->vmbus_dev, "invalid rescinded chan%u\n",
1253                     note->chm_chanid);
1254                 return;
1255         }
1256
1257         if (bootverbose) {
1258                 device_printf(sc->vmbus_dev, "chan%u rescinded\n",
1259                     note->chm_chanid);
1260         }
1261
1262         chan = sc->vmbus_chmap[note->chm_chanid];
1263         if (chan == NULL)
1264                 return;
1265         sc->vmbus_chmap[note->chm_chanid] = NULL;
1266
1267         taskqueue_enqueue(taskqueue_thread, &chan->ch_detach_task);
1268 }
1269
1270 static void
1271 vmbus_chan_detach_task(void *xchan, int pending __unused)
1272 {
1273         struct hv_vmbus_channel *chan = xchan;
1274
1275         if (VMBUS_CHAN_ISPRIMARY(chan)) {
1276                 /* Only primary channel owns the device */
1277                 hv_vmbus_child_device_unregister(chan);
1278                 /* NOTE: DO NOT free primary channel for now */
1279         } else {
1280                 struct vmbus_softc *sc = chan->vmbus_sc;
1281                 struct hv_vmbus_channel *pri_chan = chan->ch_prichan;
1282                 struct vmbus_chanmsg_chfree *req;
1283                 struct vmbus_msghc *mh;
1284                 int error;
1285
1286                 mh = vmbus_msghc_get(sc, sizeof(*req));
1287                 if (mh == NULL) {
1288                         device_printf(sc->vmbus_dev,
1289                             "can not get msg hypercall for chfree(chan%u)\n",
1290                             chan->ch_id);
1291                         goto remove;
1292                 }
1293
1294                 req = vmbus_msghc_dataptr(mh);
1295                 req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHFREE;
1296                 req->chm_chanid = chan->ch_id;
1297
1298                 error = vmbus_msghc_exec_noresult(mh);
1299                 vmbus_msghc_put(sc, mh);
1300
1301                 if (error) {
1302                         device_printf(sc->vmbus_dev,
1303                             "chfree(chan%u) failed: %d",
1304                             chan->ch_id, error);
1305                         /* NOTE: Move on! */
1306                 } else {
1307                         if (bootverbose) {
1308                                 device_printf(sc->vmbus_dev, "chan%u freed\n",
1309                                     chan->ch_id);
1310                         }
1311                 }
1312 remove:
1313                 mtx_lock(&pri_chan->ch_subchan_lock);
1314                 TAILQ_REMOVE(&pri_chan->ch_subchans, chan, ch_sublink);
1315                 KASSERT(pri_chan->ch_subchan_cnt > 0,
1316                     ("invalid subchan_cnt %d", pri_chan->ch_subchan_cnt));
1317                 pri_chan->ch_subchan_cnt--;
1318                 mtx_unlock(&pri_chan->ch_subchan_lock);
1319                 wakeup(pri_chan);
1320
1321                 vmbus_chan_free(chan);
1322         }
1323 }
1324
1325 /*
1326  * Detach all devices and destroy the corresponding primary channels.
1327  */
1328 void
1329 vmbus_chan_destroy_all(struct vmbus_softc *sc)
1330 {
1331         struct hv_vmbus_channel *chan;
1332
1333         mtx_lock(&sc->vmbus_prichan_lock);
1334         while ((chan = TAILQ_FIRST(&sc->vmbus_prichans)) != NULL) {
1335                 KASSERT(VMBUS_CHAN_ISPRIMARY(chan), ("not primary channel"));
1336                 TAILQ_REMOVE(&sc->vmbus_prichans, chan, ch_prilink);
1337                 mtx_unlock(&sc->vmbus_prichan_lock);
1338
1339                 hv_vmbus_child_device_unregister(chan);
1340                 vmbus_chan_free(chan);
1341
1342                 mtx_lock(&sc->vmbus_prichan_lock);
1343         }
1344         bzero(sc->vmbus_chmap,
1345             sizeof(struct hv_vmbus_channel *) * VMBUS_CHAN_MAX);
1346         mtx_unlock(&sc->vmbus_prichan_lock);
1347 }
1348
1349 /**
1350  * @brief Select the best outgoing channel
1351  * 
1352  * The channel whose vcpu binding is closest to the currect vcpu will
1353  * be selected.
1354  * If no multi-channel, always select primary channel
1355  * 
1356  * @param primary - primary channel
1357  */
1358 struct hv_vmbus_channel *
1359 vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
1360 {
1361         hv_vmbus_channel *new_channel = NULL;
1362         hv_vmbus_channel *outgoing_channel = primary;
1363         int old_cpu_distance = 0;
1364         int new_cpu_distance = 0;
1365         int cur_vcpu = 0;
1366         int smp_pro_id = PCPU_GET(cpuid);
1367
1368         if (TAILQ_EMPTY(&primary->ch_subchans)) {
1369                 return outgoing_channel;
1370         }
1371
1372         if (smp_pro_id >= MAXCPU) {
1373                 return outgoing_channel;
1374         }
1375
1376         cur_vcpu = VMBUS_PCPU_GET(primary->vmbus_sc, vcpuid, smp_pro_id);
1377         
1378         /* XXX need lock */
1379         TAILQ_FOREACH(new_channel, &primary->ch_subchans, ch_sublink) {
1380                 if ((new_channel->ch_stflags & VMBUS_CHAN_ST_OPENED) == 0) {
1381                         continue;
1382                 }
1383
1384                 if (new_channel->target_vcpu == cur_vcpu){
1385                         return new_channel;
1386                 }
1387
1388                 old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ?
1389                     (outgoing_channel->target_vcpu - cur_vcpu) :
1390                     (cur_vcpu - outgoing_channel->target_vcpu));
1391
1392                 new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ?
1393                     (new_channel->target_vcpu - cur_vcpu) :
1394                     (cur_vcpu - new_channel->target_vcpu));
1395
1396                 if (old_cpu_distance < new_cpu_distance) {
1397                         continue;
1398                 }
1399
1400                 outgoing_channel = new_channel;
1401         }
1402
1403         return(outgoing_channel);
1404 }
1405
1406 struct hv_vmbus_channel **
1407 vmbus_get_subchan(struct hv_vmbus_channel *pri_chan, int subchan_cnt)
1408 {
1409         struct hv_vmbus_channel **ret, *chan;
1410         int i;
1411
1412         ret = malloc(subchan_cnt * sizeof(struct hv_vmbus_channel *), M_TEMP,
1413             M_WAITOK);
1414
1415         mtx_lock(&pri_chan->ch_subchan_lock);
1416
1417         while (pri_chan->ch_subchan_cnt < subchan_cnt)
1418                 mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "subch", 0);
1419
1420         i = 0;
1421         TAILQ_FOREACH(chan, &pri_chan->ch_subchans, ch_sublink) {
1422                 /* TODO: refcnt chan */
1423                 ret[i] = chan;
1424
1425                 ++i;
1426                 if (i == subchan_cnt)
1427                         break;
1428         }
1429         KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d",
1430             pri_chan->ch_subchan_cnt, subchan_cnt));
1431
1432         mtx_unlock(&pri_chan->ch_subchan_lock);
1433
1434         return ret;
1435 }
1436
1437 void
1438 vmbus_rel_subchan(struct hv_vmbus_channel **subchan, int subchan_cnt __unused)
1439 {
1440
1441         free(subchan, M_TEMP);
1442 }
1443
1444 void
1445 vmbus_drain_subchan(struct hv_vmbus_channel *pri_chan)
1446 {
1447         mtx_lock(&pri_chan->ch_subchan_lock);
1448         while (pri_chan->ch_subchan_cnt > 0)
1449                 mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "dsubch", 0);
1450         mtx_unlock(&pri_chan->ch_subchan_lock);
1451 }
1452
1453 void
1454 vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg)
1455 {
1456         vmbus_chanmsg_proc_t msg_proc;
1457         uint32_t msg_type;
1458
1459         msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
1460         KASSERT(msg_type < VMBUS_CHANMSG_TYPE_MAX,
1461             ("invalid message type %u", msg_type));
1462
1463         msg_proc = vmbus_chan_msgprocs[msg_type];
1464         if (msg_proc != NULL)
1465                 msg_proc(sc, msg);
1466 }