]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
MFC 304790,304791
[FreeBSD/stable/10.git] / sys / dev / hyperv / storvsc / hv_storvsc_drv_freebsd.c
1 /*-
2  * Copyright (c) 2009-2012,2016 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 /**
30  * StorVSC driver for Hyper-V.  This driver presents a SCSI HBA interface
31  * to the Comman Access Method (CAM) layer.  CAM control blocks (CCBs) are
32  * converted into VSCSI protocol messages which are delivered to the parent
33  * partition StorVSP driver over the Hyper-V VMBUS.
34  */
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 #include <sys/param.h>
39 #include <sys/proc.h>
40 #include <sys/condvar.h>
41 #include <sys/time.h>
42 #include <sys/systm.h>
43 #include <sys/sysctl.h>
44 #include <sys/sockio.h>
45 #include <sys/mbuf.h>
46 #include <sys/malloc.h>
47 #include <sys/module.h>
48 #include <sys/kernel.h>
49 #include <sys/queue.h>
50 #include <sys/lock.h>
51 #include <sys/sx.h>
52 #include <sys/taskqueue.h>
53 #include <sys/bus.h>
54 #include <sys/mutex.h>
55 #include <sys/callout.h>
56 #include <sys/smp.h>
57 #include <vm/vm.h>
58 #include <vm/pmap.h>
59 #include <vm/uma.h>
60 #include <sys/lock.h>
61 #include <sys/sema.h>
62 #include <sys/sglist.h>
63 #include <machine/bus.h>
64 #include <sys/bus_dma.h>
65
66 #include <cam/cam.h>
67 #include <cam/cam_ccb.h>
68 #include <cam/cam_periph.h>
69 #include <cam/cam_sim.h>
70 #include <cam/cam_xpt_sim.h>
71 #include <cam/cam_xpt_internal.h>
72 #include <cam/cam_debug.h>
73 #include <cam/scsi/scsi_all.h>
74 #include <cam/scsi/scsi_message.h>
75
76 #include <dev/hyperv/include/hyperv.h>
77 #include <dev/hyperv/include/vmbus.h>
78 #include "hv_vstorage.h"
79 #include "vmbus_if.h"
80
81 #define STORVSC_MAX_LUNS_PER_TARGET     (64)
82 #define STORVSC_MAX_IO_REQUESTS         (STORVSC_MAX_LUNS_PER_TARGET * 2)
83 #define BLKVSC_MAX_IDE_DISKS_PER_TARGET (1)
84 #define BLKVSC_MAX_IO_REQUESTS          STORVSC_MAX_IO_REQUESTS
85 #define STORVSC_MAX_TARGETS             (2)
86
87 #define VSTOR_PKT_SIZE  (sizeof(struct vstor_packet) - vmscsi_size_delta)
88
89 /*
90  * 33 segments are needed to allow 128KB maxio, in case the data
91  * in the first page is _not_ PAGE_SIZE aligned, e.g.
92  *
93  *     |<----------- 128KB ----------->|
94  *     |                               |
95  *  0  2K 4K    8K   16K   124K  128K  130K
96  *  |  |  |     |     |       |     |  |
97  *  +--+--+-----+-----+.......+-----+--+--+
98  *  |  |  |     |     |       |     |  |  | DATA
99  *  |  |  |     |     |       |     |  |  |
100  *  +--+--+-----+-----+.......------+--+--+
101  *     |  |                         |  |
102  *     | 1|            31           | 1| ...... # of segments
103  */
104 #define STORVSC_DATA_SEGCNT_MAX         33
105 #define STORVSC_DATA_SEGSZ_MAX          PAGE_SIZE
106 #define STORVSC_DATA_SIZE_MAX           \
107         ((STORVSC_DATA_SEGCNT_MAX - 1) * STORVSC_DATA_SEGSZ_MAX)
108
109 struct storvsc_softc;
110
111 struct hv_sgl_node {
112         LIST_ENTRY(hv_sgl_node) link;
113         struct sglist *sgl_data;
114 };
115
116 struct hv_sgl_page_pool{
117         LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
118         LIST_HEAD(, hv_sgl_node) free_sgl_list;
119         boolean_t                is_init;
120 } g_hv_sgl_page_pool;
121
122 enum storvsc_request_type {
123         WRITE_TYPE,
124         READ_TYPE,
125         UNKNOWN_TYPE
126 };
127
128 SYSCTL_NODE(_hw, OID_AUTO, storvsc, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
129         "Hyper-V storage interface");
130
131 static u_int hv_storvsc_use_win8ext_flags = 1;
132 SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_win8ext_flags, CTLFLAG_RW,
133         &hv_storvsc_use_win8ext_flags, 0,
134         "Use win8 extension flags or not");
135
136 static u_int hv_storvsc_use_pim_unmapped = 1;
137 SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_pim_unmapped, CTLFLAG_RDTUN,
138         &hv_storvsc_use_pim_unmapped, 0,
139         "Optimize storvsc by using unmapped I/O");
140
141 static u_int hv_storvsc_ringbuffer_size = (64 * PAGE_SIZE);
142 SYSCTL_UINT(_hw_storvsc, OID_AUTO, ringbuffer_size, CTLFLAG_RDTUN,
143         &hv_storvsc_ringbuffer_size, 0, "Hyper-V storage ringbuffer size");
144
145 static u_int hv_storvsc_max_io = 512;
146 SYSCTL_UINT(_hw_storvsc, OID_AUTO, max_io, CTLFLAG_RDTUN,
147         &hv_storvsc_max_io, 0, "Hyper-V storage max io limit");
148
149 #define STORVSC_MAX_IO                                          \
150         vmbus_chan_prplist_nelem(hv_storvsc_ringbuffer_size,    \
151            STORVSC_DATA_SEGCNT_MAX, VSTOR_PKT_SIZE)
152
153 struct hv_storvsc_sysctl {
154         u_long          data_bio_cnt;
155         u_long          data_vaddr_cnt;
156         u_long          data_sg_cnt;
157         u_long          chan_send_cnt[MAXCPU];
158 };
159
160 struct storvsc_gpa_range {
161         struct vmbus_gpa_range  gpa_range;
162         uint64_t                gpa_page[STORVSC_DATA_SEGCNT_MAX];
163 } __packed;
164
165 struct hv_storvsc_request {
166         LIST_ENTRY(hv_storvsc_request)  link;
167         struct vstor_packet             vstor_packet;
168         int                             prp_cnt;
169         struct storvsc_gpa_range        prp_list;
170         void                            *sense_data;
171         uint8_t                         sense_info_len;
172         uint8_t                         retries;
173         union ccb                       *ccb;
174         struct storvsc_softc            *softc;
175         struct callout                  callout;
176         struct sema                     synch_sema; /*Synchronize the request/response if needed */
177         struct sglist                   *bounce_sgl;
178         unsigned int                    bounce_sgl_count;
179         uint64_t                        not_aligned_seg_bits;
180         bus_dmamap_t                    data_dmap;
181 };
182
183 struct storvsc_softc {
184         struct vmbus_channel            *hs_chan;
185         LIST_HEAD(, hv_storvsc_request) hs_free_list;
186         struct mtx                      hs_lock;
187         struct storvsc_driver_props     *hs_drv_props;
188         int                             hs_unit;
189         uint32_t                        hs_frozen;
190         struct cam_sim                  *hs_sim;
191         struct cam_path                 *hs_path;
192         uint32_t                        hs_num_out_reqs;
193         boolean_t                       hs_destroy;
194         boolean_t                       hs_drain_notify;
195         struct sema                     hs_drain_sema;  
196         struct hv_storvsc_request       hs_init_req;
197         struct hv_storvsc_request       hs_reset_req;
198         device_t                        hs_dev;
199         bus_dma_tag_t                   storvsc_req_dtag;
200         struct hv_storvsc_sysctl        sysctl_data;
201         uint32_t                        hs_nchan;
202         struct vmbus_channel            *hs_sel_chan[MAXCPU];
203 };
204
205 /*
206  * The size of the vmscsi_request has changed in win8. The
207  * additional size is for the newly added elements in the
208  * structure. These elements are valid only when we are talking
209  * to a win8 host.
210  * Track the correct size we need to apply.
211  */
212 static int vmscsi_size_delta = sizeof(struct vmscsi_win8_extension);
213
214 /**
215  * HyperV storvsc timeout testing cases:
216  * a. IO returned after first timeout;
217  * b. IO returned after second timeout and queue freeze;
218  * c. IO returned while timer handler is running
219  * The first can be tested by "sg_senddiag -vv /dev/daX",
220  * and the second and third can be done by
221  * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
222  */
223 #define HVS_TIMEOUT_TEST 0
224
225 /*
226  * Bus/adapter reset functionality on the Hyper-V host is
227  * buggy and it will be disabled until
228  * it can be further tested.
229  */
230 #define HVS_HOST_RESET 0
231
232 struct storvsc_driver_props {
233         char            *drv_name;
234         char            *drv_desc;
235         uint8_t         drv_max_luns_per_target;
236         uint32_t        drv_max_ios_per_target;
237         uint32_t        drv_ringbuffer_size;
238 };
239
240 enum hv_storage_type {
241         DRIVER_BLKVSC,
242         DRIVER_STORVSC,
243         DRIVER_UNKNOWN
244 };
245
246 #define HS_MAX_ADAPTERS 10
247
248 #define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
249
250 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
251 static const struct hyperv_guid gStorVscDeviceType={
252         .hv_guid = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
253                  0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
254 };
255
256 /* {32412632-86cb-44a2-9b5c-50d1417354f5} */
257 static const struct hyperv_guid gBlkVscDeviceType={
258         .hv_guid = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
259                  0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
260 };
261
262 static struct storvsc_driver_props g_drv_props_table[] = {
263         {"blkvsc", "Hyper-V IDE Storage Interface",
264          BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS,
265          20*PAGE_SIZE},
266         {"storvsc", "Hyper-V SCSI Storage Interface",
267          STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS,
268          20*PAGE_SIZE}
269 };
270
271 /*
272  * Sense buffer size changed in win8; have a run-time
273  * variable to track the size we should use.
274  */
275 static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
276
277 /*
278  * The storage protocol version is determined during the
279  * initial exchange with the host.  It will indicate which
280  * storage functionality is available in the host.
281 */
282 static int vmstor_proto_version;
283
284 struct vmstor_proto {
285         int proto_version;
286         int sense_buffer_size;
287         int vmscsi_size_delta;
288 };
289
290 static const struct vmstor_proto vmstor_proto_list[] = {
291         {
292                 VMSTOR_PROTOCOL_VERSION_WIN10,
293                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
294                 0
295         },
296         {
297                 VMSTOR_PROTOCOL_VERSION_WIN8_1,
298                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
299                 0
300         },
301         {
302                 VMSTOR_PROTOCOL_VERSION_WIN8,
303                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
304                 0
305         },
306         {
307                 VMSTOR_PROTOCOL_VERSION_WIN7,
308                 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
309                 sizeof(struct vmscsi_win8_extension),
310         },
311         {
312                 VMSTOR_PROTOCOL_VERSION_WIN6,
313                 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
314                 sizeof(struct vmscsi_win8_extension),
315         }
316 };
317
318 /* static functions */
319 static int storvsc_probe(device_t dev);
320 static int storvsc_attach(device_t dev);
321 static int storvsc_detach(device_t dev);
322 static void storvsc_poll(struct cam_sim * sim);
323 static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
324 static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
325 static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
326 static enum hv_storage_type storvsc_get_storage_type(device_t dev);
327 static void hv_storvsc_rescan_target(struct storvsc_softc *sc);
328 static void hv_storvsc_on_channel_callback(struct vmbus_channel *chan, void *xsc);
329 static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
330                                         struct vstor_packet *vstor_packet,
331                                         struct hv_storvsc_request *request);
332 static int hv_storvsc_connect_vsp(struct storvsc_softc *);
333 static void storvsc_io_done(struct hv_storvsc_request *reqp);
334 static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
335                                 bus_dma_segment_t *orig_sgl,
336                                 unsigned int orig_sgl_count,
337                                 uint64_t seg_bits);
338 void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
339                                 unsigned int dest_sgl_count,
340                                 struct sglist* src_sgl,
341                                 uint64_t seg_bits);
342
343 static device_method_t storvsc_methods[] = {
344         /* Device interface */
345         DEVMETHOD(device_probe,         storvsc_probe),
346         DEVMETHOD(device_attach,        storvsc_attach),
347         DEVMETHOD(device_detach,        storvsc_detach),
348         DEVMETHOD(device_shutdown,      bus_generic_shutdown),
349         DEVMETHOD_END
350 };
351
352 static driver_t storvsc_driver = {
353         "storvsc", storvsc_methods, sizeof(struct storvsc_softc),
354 };
355
356 static devclass_t storvsc_devclass;
357 DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0);
358 MODULE_VERSION(storvsc, 1);
359 MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
360
361 static void
362 storvsc_subchan_attach(struct storvsc_softc *sc,
363     struct vmbus_channel *new_channel)
364 {
365         struct vmstor_chan_props props;
366         int ret = 0;
367
368         memset(&props, 0, sizeof(props));
369
370         vmbus_chan_cpu_rr(new_channel);
371         ret = vmbus_chan_open(new_channel,
372             sc->hs_drv_props->drv_ringbuffer_size,
373             sc->hs_drv_props->drv_ringbuffer_size,
374             (void *)&props,
375             sizeof(struct vmstor_chan_props),
376             hv_storvsc_on_channel_callback, sc);
377 }
378
379 /**
380  * @brief Send multi-channel creation request to host
381  *
382  * @param device  a Hyper-V device pointer
383  * @param max_chans  the max channels supported by vmbus
384  */
385 static void
386 storvsc_send_multichannel_request(struct storvsc_softc *sc, int max_chans)
387 {
388         struct vmbus_channel **subchan;
389         struct hv_storvsc_request *request;
390         struct vstor_packet *vstor_packet;      
391         int request_channels_cnt = 0;
392         int ret, i;
393
394         /* get multichannels count that need to create */
395         request_channels_cnt = MIN(max_chans, mp_ncpus);
396
397         request = &sc->hs_init_req;
398
399         /* request the host to create multi-channel */
400         memset(request, 0, sizeof(struct hv_storvsc_request));
401         
402         sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
403
404         vstor_packet = &request->vstor_packet;
405         
406         vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
407         vstor_packet->flags = REQUEST_COMPLETION_FLAG;
408         vstor_packet->u.multi_channels_cnt = request_channels_cnt;
409
410         ret = vmbus_chan_send(sc->hs_chan,
411             VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
412             vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
413
414         /* wait for 5 seconds */
415         ret = sema_timedwait(&request->synch_sema, 5 * hz);
416         if (ret != 0) {         
417                 printf("Storvsc_error: create multi-channel timeout, %d\n",
418                     ret);
419                 return;
420         }
421
422         if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
423             vstor_packet->status != 0) {                
424                 printf("Storvsc_error: create multi-channel invalid operation "
425                     "(%d) or statue (%u)\n",
426                     vstor_packet->operation, vstor_packet->status);
427                 return;
428         }
429
430         /* Update channel count */
431         sc->hs_nchan = request_channels_cnt + 1;
432
433         /* Wait for sub-channels setup to complete. */
434         subchan = vmbus_subchan_get(sc->hs_chan, request_channels_cnt);
435
436         /* Attach the sub-channels. */
437         for (i = 0; i < request_channels_cnt; ++i)
438                 storvsc_subchan_attach(sc, subchan[i]);
439
440         /* Release the sub-channels. */
441         vmbus_subchan_rel(subchan, request_channels_cnt);
442
443         if (bootverbose)
444                 printf("Storvsc create multi-channel success!\n");
445 }
446
447 /**
448  * @brief initialize channel connection to parent partition
449  *
450  * @param dev  a Hyper-V device pointer
451  * @returns  0 on success, non-zero error on failure
452  */
453 static int
454 hv_storvsc_channel_init(struct storvsc_softc *sc)
455 {
456         int ret = 0, i;
457         struct hv_storvsc_request *request;
458         struct vstor_packet *vstor_packet;
459         uint16_t max_chans = 0;
460         boolean_t support_multichannel = FALSE;
461         uint32_t version;
462
463         max_chans = 0;
464         support_multichannel = FALSE;
465
466         request = &sc->hs_init_req;
467         memset(request, 0, sizeof(struct hv_storvsc_request));
468         vstor_packet = &request->vstor_packet;
469         request->softc = sc;
470
471         /**
472          * Initiate the vsc/vsp initialization protocol on the open channel
473          */
474         sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
475
476         vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION;
477         vstor_packet->flags = REQUEST_COMPLETION_FLAG;
478
479
480         ret = vmbus_chan_send(sc->hs_chan,
481             VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
482             vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
483
484         if (ret != 0)
485                 goto cleanup;
486
487         /* wait 5 seconds */
488         ret = sema_timedwait(&request->synch_sema, 5 * hz);
489         if (ret != 0)
490                 goto cleanup;
491
492         if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
493                 vstor_packet->status != 0) {
494                 goto cleanup;
495         }
496
497         for (i = 0; i < nitems(vmstor_proto_list); i++) {
498                 /* reuse the packet for version range supported */
499
500                 memset(vstor_packet, 0, sizeof(struct vstor_packet));
501                 vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
502                 vstor_packet->flags = REQUEST_COMPLETION_FLAG;
503
504                 vstor_packet->u.version.major_minor =
505                         vmstor_proto_list[i].proto_version;
506
507                 /* revision is only significant for Windows guests */
508                 vstor_packet->u.version.revision = 0;
509
510                 ret = vmbus_chan_send(sc->hs_chan,
511                     VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
512                     vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
513
514                 if (ret != 0)
515                         goto cleanup;
516
517                 /* wait 5 seconds */
518                 ret = sema_timedwait(&request->synch_sema, 5 * hz);
519
520                 if (ret)
521                         goto cleanup;
522
523                 if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO) {
524                         ret = EINVAL;
525                         goto cleanup;   
526                 }
527                 if (vstor_packet->status == 0) {
528                         vmstor_proto_version =
529                                 vmstor_proto_list[i].proto_version;
530                         sense_buffer_size =
531                                 vmstor_proto_list[i].sense_buffer_size;
532                         vmscsi_size_delta =
533                                 vmstor_proto_list[i].vmscsi_size_delta;
534                         break;
535                 }
536         }
537
538         if (vstor_packet->status != 0) {
539                 ret = EINVAL;
540                 goto cleanup;
541         }
542         /**
543          * Query channel properties
544          */
545         memset(vstor_packet, 0, sizeof(struct vstor_packet));
546         vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES;
547         vstor_packet->flags = REQUEST_COMPLETION_FLAG;
548
549         ret = vmbus_chan_send(sc->hs_chan,
550             VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
551             vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
552
553         if ( ret != 0)
554                 goto cleanup;
555
556         /* wait 5 seconds */
557         ret = sema_timedwait(&request->synch_sema, 5 * hz);
558
559         if (ret != 0)
560                 goto cleanup;
561
562         /* TODO: Check returned version */
563         if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
564             vstor_packet->status != 0) {
565                 goto cleanup;
566         }
567
568         /* multi-channels feature is supported by WIN8 and above version */
569         max_chans = vstor_packet->u.chan_props.max_channel_cnt;
570         version = VMBUS_GET_VERSION(device_get_parent(sc->hs_dev), sc->hs_dev);
571         if (version != VMBUS_VERSION_WIN7 && version != VMBUS_VERSION_WS2008 &&
572             (vstor_packet->u.chan_props.flags &
573              HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
574                 support_multichannel = TRUE;
575         }
576
577         memset(vstor_packet, 0, sizeof(struct vstor_packet));
578         vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
579         vstor_packet->flags = REQUEST_COMPLETION_FLAG;
580
581         ret = vmbus_chan_send(sc->hs_chan,
582             VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
583             vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
584
585         if (ret != 0) {
586                 goto cleanup;
587         }
588
589         /* wait 5 seconds */
590         ret = sema_timedwait(&request->synch_sema, 5 * hz);
591
592         if (ret != 0)
593                 goto cleanup;
594
595         if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
596             vstor_packet->status != 0)
597                 goto cleanup;
598
599         /*
600          * If multi-channel is supported, send multichannel create
601          * request to host.
602          */
603         if (support_multichannel)
604                 storvsc_send_multichannel_request(sc, max_chans);
605 cleanup:
606         sema_destroy(&request->synch_sema);
607         return (ret);
608 }
609
610 /**
611  * @brief Open channel connection to paraent partition StorVSP driver
612  *
613  * Open and initialize channel connection to parent partition StorVSP driver.
614  *
615  * @param pointer to a Hyper-V device
616  * @returns 0 on success, non-zero error on failure
617  */
618 static int
619 hv_storvsc_connect_vsp(struct storvsc_softc *sc)
620 {       
621         int ret = 0;
622         struct vmstor_chan_props props;
623
624         memset(&props, 0, sizeof(struct vmstor_chan_props));
625
626         /*
627          * Open the channel
628          */
629         vmbus_chan_cpu_rr(sc->hs_chan);
630         ret = vmbus_chan_open(
631                 sc->hs_chan,
632                 sc->hs_drv_props->drv_ringbuffer_size,
633                 sc->hs_drv_props->drv_ringbuffer_size,
634                 (void *)&props,
635                 sizeof(struct vmstor_chan_props),
636                 hv_storvsc_on_channel_callback, sc);
637
638         if (ret != 0) {
639                 return ret;
640         }
641
642         ret = hv_storvsc_channel_init(sc);
643         return (ret);
644 }
645
646 #if HVS_HOST_RESET
647 static int
648 hv_storvsc_host_reset(struct storvsc_softc *sc)
649 {
650         int ret = 0;
651
652         struct hv_storvsc_request *request;
653         struct vstor_packet *vstor_packet;
654
655         request = &sc->hs_reset_req;
656         request->softc = sc;
657         vstor_packet = &request->vstor_packet;
658
659         sema_init(&request->synch_sema, 0, "stor synch sema");
660
661         vstor_packet->operation = VSTOR_OPERATION_RESETBUS;
662         vstor_packet->flags = REQUEST_COMPLETION_FLAG;
663
664         ret = vmbus_chan_send(dev->channel,
665             VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
666             vstor_packet, VSTOR_PKT_SIZE,
667             (uint64_t)(uintptr_t)&sc->hs_reset_req);
668
669         if (ret != 0) {
670                 goto cleanup;
671         }
672
673         ret = sema_timedwait(&request->synch_sema, 5 * hz); /* KYS 5 seconds */
674
675         if (ret) {
676                 goto cleanup;
677         }
678
679
680         /*
681          * At this point, all outstanding requests in the adapter
682          * should have been flushed out and return to us
683          */
684
685 cleanup:
686         sema_destroy(&request->synch_sema);
687         return (ret);
688 }
689 #endif /* HVS_HOST_RESET */
690
691 /**
692  * @brief Function to initiate an I/O request
693  *
694  * @param device Hyper-V device pointer
695  * @param request pointer to a request structure
696  * @returns 0 on success, non-zero error on failure
697  */
698 static int
699 hv_storvsc_io_request(struct storvsc_softc *sc,
700                                           struct hv_storvsc_request *request)
701 {
702         struct vstor_packet *vstor_packet = &request->vstor_packet;
703         struct vmbus_channel* outgoing_channel = NULL;
704         int ret = 0, ch_sel;
705
706         vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
707
708         vstor_packet->u.vm_srb.length =
709             sizeof(struct vmscsi_req) - vmscsi_size_delta;
710         
711         vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size;
712
713         vstor_packet->u.vm_srb.transfer_len =
714             request->prp_list.gpa_range.gpa_len;
715
716         vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
717
718         ch_sel = (vstor_packet->u.vm_srb.lun + curcpu) % sc->hs_nchan;
719         outgoing_channel = sc->hs_sel_chan[ch_sel];
720
721         mtx_unlock(&request->softc->hs_lock);
722         if (request->prp_list.gpa_range.gpa_len) {
723                 ret = vmbus_chan_send_prplist(outgoing_channel,
724                     &request->prp_list.gpa_range, request->prp_cnt,
725                     vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
726         } else {
727                 ret = vmbus_chan_send(outgoing_channel,
728                     VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
729                     vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
730         }
731         /* statistic for successful request sending on each channel */
732         if (!ret) {
733                 sc->sysctl_data.chan_send_cnt[ch_sel]++;
734         }
735         mtx_lock(&request->softc->hs_lock);
736
737         if (ret != 0) {
738                 printf("Unable to send packet %p ret %d", vstor_packet, ret);
739         } else {
740                 atomic_add_int(&sc->hs_num_out_reqs, 1);
741         }
742
743         return (ret);
744 }
745
746
747 /**
748  * Process IO_COMPLETION_OPERATION and ready
749  * the result to be completed for upper layer
750  * processing by the CAM layer.
751  */
752 static void
753 hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
754                            struct vstor_packet *vstor_packet,
755                            struct hv_storvsc_request *request)
756 {
757         struct vmscsi_req *vm_srb;
758
759         vm_srb = &vstor_packet->u.vm_srb;
760
761         /*
762          * Copy some fields of the host's response into the request structure,
763          * because the fields will be used later in storvsc_io_done().
764          */
765         request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status;
766         request->vstor_packet.u.vm_srb.srb_status = vm_srb->srb_status;
767         request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len;
768
769         if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
770                         (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
771                 /* Autosense data available */
772
773                 KASSERT(vm_srb->sense_info_len <= request->sense_info_len,
774                                 ("vm_srb->sense_info_len <= "
775                                  "request->sense_info_len"));
776
777                 memcpy(request->sense_data, vm_srb->u.sense_data,
778                         vm_srb->sense_info_len);
779
780                 request->sense_info_len = vm_srb->sense_info_len;
781         }
782
783         /* Complete request by passing to the CAM layer */
784         storvsc_io_done(request);
785         atomic_subtract_int(&sc->hs_num_out_reqs, 1);
786         if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) {
787                 sema_post(&sc->hs_drain_sema);
788         }
789 }
790
791 static void
792 hv_storvsc_rescan_target(struct storvsc_softc *sc)
793 {
794         path_id_t pathid;
795         target_id_t targetid;
796         union ccb *ccb;
797
798         pathid = cam_sim_path(sc->hs_sim);
799         targetid = CAM_TARGET_WILDCARD;
800
801         /*
802          * Allocate a CCB and schedule a rescan.
803          */
804         ccb = xpt_alloc_ccb_nowait();
805         if (ccb == NULL) {
806                 printf("unable to alloc CCB for rescan\n");
807                 return;
808         }
809
810         if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid,
811             CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
812                 printf("unable to create path for rescan, pathid: %u,"
813                     "targetid: %u\n", pathid, targetid);
814                 xpt_free_ccb(ccb);
815                 return;
816         }
817
818         if (targetid == CAM_TARGET_WILDCARD)
819                 ccb->ccb_h.func_code = XPT_SCAN_BUS;
820         else
821                 ccb->ccb_h.func_code = XPT_SCAN_TGT;
822
823         xpt_rescan(ccb);
824 }
825
826 static void
827 hv_storvsc_on_channel_callback(struct vmbus_channel *channel, void *xsc)
828 {
829         int ret = 0;
830         struct storvsc_softc *sc = xsc;
831         uint32_t bytes_recvd;
832         uint64_t request_id;
833         uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)];
834         struct hv_storvsc_request *request;
835         struct vstor_packet *vstor_packet;
836
837         bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8);
838         ret = vmbus_chan_recv(channel, packet, &bytes_recvd, &request_id);
839         KASSERT(ret != ENOBUFS, ("storvsc recvbuf is not large enough"));
840         /* XXX check bytes_recvd to make sure that it contains enough data */
841
842         while ((ret == 0) && (bytes_recvd > 0)) {
843                 request = (struct hv_storvsc_request *)(uintptr_t)request_id;
844
845                 if ((request == &sc->hs_init_req) ||
846                         (request == &sc->hs_reset_req)) {
847                         memcpy(&request->vstor_packet, packet,
848                                    sizeof(struct vstor_packet));
849                         sema_post(&request->synch_sema);
850                 } else {
851                         vstor_packet = (struct vstor_packet *)packet;
852                         switch(vstor_packet->operation) {
853                         case VSTOR_OPERATION_COMPLETEIO:
854                                 if (request == NULL)
855                                         panic("VMBUS: storvsc received a "
856                                             "packet with NULL request id in "
857                                             "COMPLETEIO operation.");
858
859                                 hv_storvsc_on_iocompletion(sc,
860                                                         vstor_packet, request);
861                                 break;
862                         case VSTOR_OPERATION_REMOVEDEVICE:
863                                 printf("VMBUS: storvsc operation %d not "
864                                     "implemented.\n", vstor_packet->operation);
865                                 /* TODO: implement */
866                                 break;
867                         case VSTOR_OPERATION_ENUMERATE_BUS:
868                                 hv_storvsc_rescan_target(sc);
869                                 break;
870                         default:
871                                 break;
872                         }                       
873                 }
874
875                 bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8),
876                 ret = vmbus_chan_recv(channel, packet, &bytes_recvd,
877                     &request_id);
878                 KASSERT(ret != ENOBUFS,
879                     ("storvsc recvbuf is not large enough"));
880                 /*
881                  * XXX check bytes_recvd to make sure that it contains
882                  * enough data
883                  */
884         }
885 }
886
887 /**
888  * @brief StorVSC probe function
889  *
890  * Device probe function.  Returns 0 if the input device is a StorVSC
891  * device.  Otherwise, a ENXIO is returned.  If the input device is
892  * for BlkVSC (paravirtual IDE) device and this support is disabled in
893  * favor of the emulated ATA/IDE device, return ENXIO.
894  *
895  * @param a device
896  * @returns 0 on success, ENXIO if not a matcing StorVSC device
897  */
898 static int
899 storvsc_probe(device_t dev)
900 {
901         int ata_disk_enable = 0;
902         int ret = ENXIO;
903         
904         switch (storvsc_get_storage_type(dev)) {
905         case DRIVER_BLKVSC:
906                 if(bootverbose)
907                         device_printf(dev, "DRIVER_BLKVSC-Emulated ATA/IDE probe\n");
908                 if (!getenv_int("hw.ata.disk_enable", &ata_disk_enable)) {
909                         if(bootverbose)
910                                 device_printf(dev,
911                                         "Enlightened ATA/IDE detected\n");
912                         device_set_desc(dev, g_drv_props_table[DRIVER_BLKVSC].drv_desc);
913                         ret = BUS_PROBE_DEFAULT;
914                 } else if(bootverbose)
915                         device_printf(dev, "Emulated ATA/IDE set (hw.ata.disk_enable set)\n");
916                 break;
917         case DRIVER_STORVSC:
918                 if(bootverbose)
919                         device_printf(dev, "Enlightened SCSI device detected\n");
920                 device_set_desc(dev, g_drv_props_table[DRIVER_STORVSC].drv_desc);
921                 ret = BUS_PROBE_DEFAULT;
922                 break;
923         default:
924                 ret = ENXIO;
925         }
926         return (ret);
927 }
928
929 static void
930 storvsc_create_chan_sel(struct storvsc_softc *sc)
931 {
932         struct vmbus_channel **subch;
933         int i, nsubch;
934
935         sc->hs_sel_chan[0] = sc->hs_chan;
936         nsubch = sc->hs_nchan - 1;
937         if (nsubch == 0)
938                 return;
939
940         subch = vmbus_subchan_get(sc->hs_chan, nsubch);
941         for (i = 0; i < nsubch; i++)
942                 sc->hs_sel_chan[i + 1] = subch[i];
943         vmbus_subchan_rel(subch, nsubch);
944 }
945
946 static int
947 storvsc_init_requests(device_t dev)
948 {
949         struct storvsc_softc *sc = device_get_softc(dev);
950         struct hv_storvsc_request *reqp;
951         int error, i;
952
953         LIST_INIT(&sc->hs_free_list);
954
955         error = bus_dma_tag_create(
956                 bus_get_dma_tag(dev),           /* parent */
957                 1,                              /* alignment */
958                 PAGE_SIZE,                      /* boundary */
959                 BUS_SPACE_MAXADDR,              /* lowaddr */
960                 BUS_SPACE_MAXADDR,              /* highaddr */
961                 NULL, NULL,                     /* filter, filterarg */
962                 STORVSC_DATA_SIZE_MAX,          /* maxsize */
963                 STORVSC_DATA_SEGCNT_MAX,        /* nsegments */
964                 STORVSC_DATA_SEGSZ_MAX,         /* maxsegsize */
965                 0,                              /* flags */
966                 NULL,                           /* lockfunc */
967                 NULL,                           /* lockfuncarg */
968                 &sc->storvsc_req_dtag);
969         if (error) {
970                 device_printf(dev, "failed to create storvsc dma tag\n");
971                 return (error);
972         }
973
974         for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) {
975                 reqp = malloc(sizeof(struct hv_storvsc_request),
976                                  M_DEVBUF, M_WAITOK|M_ZERO);
977                 reqp->softc = sc;
978                 error = bus_dmamap_create(sc->storvsc_req_dtag, 0,
979                                 &reqp->data_dmap);
980                 if (error) {
981                         device_printf(dev, "failed to allocate storvsc "
982                             "data dmamap\n");
983                         goto cleanup;
984                 }
985                 LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
986         }
987         return (0);
988
989 cleanup:
990         while ((reqp = LIST_FIRST(&sc->hs_free_list)) != NULL) {
991                 LIST_REMOVE(reqp, link);
992                 bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
993                 free(reqp, M_DEVBUF);
994         }
995         return (error);
996 }
997
998 static void
999 storvsc_sysctl(device_t dev)
1000 {
1001         struct sysctl_oid_list *child;
1002         struct sysctl_ctx_list *ctx;
1003         struct sysctl_oid *ch_tree, *chid_tree;
1004         struct storvsc_softc *sc;
1005         char name[16];
1006         int i;
1007
1008         sc = device_get_softc(dev);
1009         ctx = device_get_sysctl_ctx(dev);
1010         child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
1011
1012         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_bio_cnt", CTLFLAG_RW,
1013                 &sc->sysctl_data.data_bio_cnt, "# of bio data block");
1014         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_vaddr_cnt", CTLFLAG_RW,
1015                 &sc->sysctl_data.data_vaddr_cnt, "# of vaddr data block");
1016         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_sg_cnt", CTLFLAG_RW,
1017                 &sc->sysctl_data.data_sg_cnt, "# of sg data block");
1018
1019         /* dev.storvsc.UNIT.channel */
1020         ch_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "channel",
1021                 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1022         if (ch_tree == NULL)
1023                 return;
1024
1025         for (i = 0; i < sc->hs_nchan; i++) {
1026                 uint32_t ch_id;
1027
1028                 ch_id = vmbus_chan_id(sc->hs_sel_chan[i]);
1029                 snprintf(name, sizeof(name), "%d", ch_id);
1030                 /* dev.storvsc.UNIT.channel.CHID */
1031                 chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
1032                         OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1033                 if (chid_tree == NULL)
1034                         return;
1035                 /* dev.storvsc.UNIT.channel.CHID.send_req */
1036                 SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
1037                         "send_req", CTLFLAG_RD, &sc->sysctl_data.chan_send_cnt[i],
1038                         "# of request sending from this channel");
1039         }
1040 }
1041
1042 /**
1043  * @brief StorVSC attach function
1044  *
1045  * Function responsible for allocating per-device structures,
1046  * setting up CAM interfaces and scanning for available LUNs to
1047  * be used for SCSI device peripherals.
1048  *
1049  * @param a device
1050  * @returns 0 on success or an error on failure
1051  */
1052 static int
1053 storvsc_attach(device_t dev)
1054 {
1055         enum hv_storage_type stor_type;
1056         struct storvsc_softc *sc;
1057         struct cam_devq *devq;
1058         int ret, i, j;
1059         struct hv_storvsc_request *reqp;
1060         struct root_hold_token *root_mount_token = NULL;
1061         struct hv_sgl_node *sgl_node = NULL;
1062         void *tmp_buff = NULL;
1063
1064         /*
1065          * We need to serialize storvsc attach calls.
1066          */
1067         root_mount_token = root_mount_hold("storvsc");
1068
1069         sc = device_get_softc(dev);
1070         sc->hs_nchan = 1;
1071         sc->hs_chan = vmbus_get_channel(dev);
1072
1073         stor_type = storvsc_get_storage_type(dev);
1074
1075         if (stor_type == DRIVER_UNKNOWN) {
1076                 ret = ENODEV;
1077                 goto cleanup;
1078         }
1079
1080         /* fill in driver specific properties */
1081         sc->hs_drv_props = &g_drv_props_table[stor_type];
1082         sc->hs_drv_props->drv_ringbuffer_size = hv_storvsc_ringbuffer_size;
1083         sc->hs_drv_props->drv_max_ios_per_target =
1084                 MIN(STORVSC_MAX_IO, hv_storvsc_max_io);
1085         if (bootverbose) {
1086                 printf("storvsc ringbuffer size: %d, max_io: %d\n",
1087                         sc->hs_drv_props->drv_ringbuffer_size,
1088                         sc->hs_drv_props->drv_max_ios_per_target);
1089         }
1090         /* fill in device specific properties */
1091         sc->hs_unit     = device_get_unit(dev);
1092         sc->hs_dev      = dev;
1093
1094         mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF);
1095
1096         ret = storvsc_init_requests(dev);
1097         if (ret != 0)
1098                 goto cleanup;
1099
1100         /* create sg-list page pool */
1101         if (FALSE == g_hv_sgl_page_pool.is_init) {
1102                 g_hv_sgl_page_pool.is_init = TRUE;
1103                 LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
1104                 LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
1105
1106                 /*
1107                  * Pre-create SG list, each SG list with
1108                  * STORVSC_DATA_SEGCNT_MAX segments, each
1109                  * segment has one page buffer
1110                  */
1111                 for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; i++) {
1112                         sgl_node = malloc(sizeof(struct hv_sgl_node),
1113                             M_DEVBUF, M_WAITOK|M_ZERO);
1114
1115                         sgl_node->sgl_data =
1116                             sglist_alloc(STORVSC_DATA_SEGCNT_MAX,
1117                             M_WAITOK|M_ZERO);
1118
1119                         for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
1120                                 tmp_buff = malloc(PAGE_SIZE,
1121                                     M_DEVBUF, M_WAITOK|M_ZERO);
1122
1123                                 sgl_node->sgl_data->sg_segs[j].ss_paddr =
1124                                     (vm_paddr_t)tmp_buff;
1125                         }
1126
1127                         LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
1128                             sgl_node, link);
1129                 }
1130         }
1131
1132         sc->hs_destroy = FALSE;
1133         sc->hs_drain_notify = FALSE;
1134         sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
1135
1136         ret = hv_storvsc_connect_vsp(sc);
1137         if (ret != 0) {
1138                 goto cleanup;
1139         }
1140
1141         /* Construct cpu to channel mapping */
1142         storvsc_create_chan_sel(sc);
1143
1144         /*
1145          * Create the device queue.
1146          * Hyper-V maps each target to one SCSI HBA
1147          */
1148         devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target);
1149         if (devq == NULL) {
1150                 device_printf(dev, "Failed to alloc device queue\n");
1151                 ret = ENOMEM;
1152                 goto cleanup;
1153         }
1154
1155         sc->hs_sim = cam_sim_alloc(storvsc_action,
1156                                 storvsc_poll,
1157                                 sc->hs_drv_props->drv_name,
1158                                 sc,
1159                                 sc->hs_unit,
1160                                 &sc->hs_lock, 1,
1161                                 sc->hs_drv_props->drv_max_ios_per_target,
1162                                 devq);
1163
1164         if (sc->hs_sim == NULL) {
1165                 device_printf(dev, "Failed to alloc sim\n");
1166                 cam_simq_free(devq);
1167                 ret = ENOMEM;
1168                 goto cleanup;
1169         }
1170
1171         mtx_lock(&sc->hs_lock);
1172         /* bus_id is set to 0, need to get it from VMBUS channel query? */
1173         if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) {
1174                 cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1175                 mtx_unlock(&sc->hs_lock);
1176                 device_printf(dev, "Unable to register SCSI bus\n");
1177                 ret = ENXIO;
1178                 goto cleanup;
1179         }
1180
1181         if (xpt_create_path(&sc->hs_path, /*periph*/NULL,
1182                  cam_sim_path(sc->hs_sim),
1183                 CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
1184                 xpt_bus_deregister(cam_sim_path(sc->hs_sim));
1185                 cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1186                 mtx_unlock(&sc->hs_lock);
1187                 device_printf(dev, "Unable to create path\n");
1188                 ret = ENXIO;
1189                 goto cleanup;
1190         }
1191
1192         mtx_unlock(&sc->hs_lock);
1193
1194         storvsc_sysctl(dev);
1195
1196         root_mount_rel(root_mount_token);
1197         return (0);
1198
1199
1200 cleanup:
1201         root_mount_rel(root_mount_token);
1202         while (!LIST_EMPTY(&sc->hs_free_list)) {
1203                 reqp = LIST_FIRST(&sc->hs_free_list);
1204                 LIST_REMOVE(reqp, link);
1205                 bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
1206                 free(reqp, M_DEVBUF);
1207         }
1208
1209         while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1210                 sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1211                 LIST_REMOVE(sgl_node, link);
1212                 for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
1213                         if (NULL !=
1214                             (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
1215                                 free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
1216                         }
1217                 }
1218                 sglist_free(sgl_node->sgl_data);
1219                 free(sgl_node, M_DEVBUF);
1220         }
1221
1222         return (ret);
1223 }
1224
1225 /**
1226  * @brief StorVSC device detach function
1227  *
1228  * This function is responsible for safely detaching a
1229  * StorVSC device.  This includes waiting for inbound responses
1230  * to complete and freeing associated per-device structures.
1231  *
1232  * @param dev a device
1233  * returns 0 on success
1234  */
1235 static int
1236 storvsc_detach(device_t dev)
1237 {
1238         struct storvsc_softc *sc = device_get_softc(dev);
1239         struct hv_storvsc_request *reqp = NULL;
1240         struct hv_sgl_node *sgl_node = NULL;
1241         int j = 0;
1242
1243         sc->hs_destroy = TRUE;
1244
1245         /*
1246          * At this point, all outbound traffic should be disabled. We
1247          * only allow inbound traffic (responses) to proceed so that
1248          * outstanding requests can be completed.
1249          */
1250
1251         sc->hs_drain_notify = TRUE;
1252         sema_wait(&sc->hs_drain_sema);
1253         sc->hs_drain_notify = FALSE;
1254
1255         /*
1256          * Since we have already drained, we don't need to busy wait.
1257          * The call to close the channel will reset the callback
1258          * under the protection of the incoming channel lock.
1259          */
1260
1261         vmbus_chan_close(sc->hs_chan);
1262
1263         mtx_lock(&sc->hs_lock);
1264         while (!LIST_EMPTY(&sc->hs_free_list)) {
1265                 reqp = LIST_FIRST(&sc->hs_free_list);
1266                 LIST_REMOVE(reqp, link);
1267                 bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
1268                 free(reqp, M_DEVBUF);
1269         }
1270         mtx_unlock(&sc->hs_lock);
1271
1272         while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1273                 sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1274                 LIST_REMOVE(sgl_node, link);
1275                 for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++){
1276                         if (NULL !=
1277                             (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
1278                                 free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
1279                         }
1280                 }
1281                 sglist_free(sgl_node->sgl_data);
1282                 free(sgl_node, M_DEVBUF);
1283         }
1284         
1285         return (0);
1286 }
1287
1288 #if HVS_TIMEOUT_TEST
1289 /**
1290  * @brief unit test for timed out operations
1291  *
1292  * This function provides unit testing capability to simulate
1293  * timed out operations.  Recompilation with HV_TIMEOUT_TEST=1
1294  * is required.
1295  *
1296  * @param reqp pointer to a request structure
1297  * @param opcode SCSI operation being performed
1298  * @param wait if 1, wait for I/O to complete
1299  */
1300 static void
1301 storvsc_timeout_test(struct hv_storvsc_request *reqp,
1302                 uint8_t opcode, int wait)
1303 {
1304         int ret;
1305         union ccb *ccb = reqp->ccb;
1306         struct storvsc_softc *sc = reqp->softc;
1307
1308         if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) {
1309                 return;
1310         }
1311
1312         if (wait) {
1313                 mtx_lock(&reqp->event.mtx);
1314         }
1315         ret = hv_storvsc_io_request(sc, reqp);
1316         if (ret != 0) {
1317                 if (wait) {
1318                         mtx_unlock(&reqp->event.mtx);
1319                 }
1320                 printf("%s: io_request failed with %d.\n",
1321                                 __func__, ret);
1322                 ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1323                 mtx_lock(&sc->hs_lock);
1324                 storvsc_free_request(sc, reqp);
1325                 xpt_done(ccb);
1326                 mtx_unlock(&sc->hs_lock);
1327                 return;
1328         }
1329
1330         if (wait) {
1331                 xpt_print(ccb->ccb_h.path,
1332                                 "%u: %s: waiting for IO return.\n",
1333                                 ticks, __func__);
1334                 ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz);
1335                 mtx_unlock(&reqp->event.mtx);
1336                 xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n",
1337                                 ticks, __func__, (ret == 0)?
1338                                 "IO return detected" :
1339                                 "IO return not detected");
1340                 /*
1341                  * Now both the timer handler and io done are running
1342                  * simultaneously. We want to confirm the io done always
1343                  * finishes after the timer handler exits. So reqp used by
1344                  * timer handler is not freed or stale. Do busy loop for
1345                  * another 1/10 second to make sure io done does
1346                  * wait for the timer handler to complete.
1347                  */
1348                 DELAY(100*1000);
1349                 mtx_lock(&sc->hs_lock);
1350                 xpt_print(ccb->ccb_h.path,
1351                                 "%u: %s: finishing, queue frozen %d, "
1352                                 "ccb status 0x%x scsi_status 0x%x.\n",
1353                                 ticks, __func__, sc->hs_frozen,
1354                                 ccb->ccb_h.status,
1355                                 ccb->csio.scsi_status);
1356                 mtx_unlock(&sc->hs_lock);
1357         }
1358 }
1359 #endif /* HVS_TIMEOUT_TEST */
1360
1361 #ifdef notyet
1362 /**
1363  * @brief timeout handler for requests
1364  *
1365  * This function is called as a result of a callout expiring.
1366  *
1367  * @param arg pointer to a request
1368  */
1369 static void
1370 storvsc_timeout(void *arg)
1371 {
1372         struct hv_storvsc_request *reqp = arg;
1373         struct storvsc_softc *sc = reqp->softc;
1374         union ccb *ccb = reqp->ccb;
1375
1376         if (reqp->retries == 0) {
1377                 mtx_lock(&sc->hs_lock);
1378                 xpt_print(ccb->ccb_h.path,
1379                     "%u: IO timed out (req=0x%p), wait for another %u secs.\n",
1380                     ticks, reqp, ccb->ccb_h.timeout / 1000);
1381                 cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
1382                 mtx_unlock(&sc->hs_lock);
1383
1384                 reqp->retries++;
1385                 callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout,
1386                     0, storvsc_timeout, reqp, 0);
1387 #if HVS_TIMEOUT_TEST
1388                 storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0);
1389 #endif
1390                 return;
1391         }
1392
1393         mtx_lock(&sc->hs_lock);
1394         xpt_print(ccb->ccb_h.path,
1395                 "%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n",
1396                 ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000,
1397                 (sc->hs_frozen == 0)?
1398                 "freezing the queue" : "the queue is already frozen");
1399         if (sc->hs_frozen == 0) {
1400                 sc->hs_frozen = 1;
1401                 xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1);
1402         }
1403         mtx_unlock(&sc->hs_lock);
1404         
1405 #if HVS_TIMEOUT_TEST
1406         storvsc_timeout_test(reqp, MODE_SELECT_10, 1);
1407 #endif
1408 }
1409 #endif
1410
1411 /**
1412  * @brief StorVSC device poll function
1413  *
1414  * This function is responsible for servicing requests when
1415  * interrupts are disabled (i.e when we are dumping core.)
1416  *
1417  * @param sim a pointer to a CAM SCSI interface module
1418  */
1419 static void
1420 storvsc_poll(struct cam_sim *sim)
1421 {
1422         struct storvsc_softc *sc = cam_sim_softc(sim);
1423
1424         mtx_assert(&sc->hs_lock, MA_OWNED);
1425         mtx_unlock(&sc->hs_lock);
1426         hv_storvsc_on_channel_callback(sc->hs_chan, sc);
1427         mtx_lock(&sc->hs_lock);
1428 }
1429
1430 /**
1431  * @brief StorVSC device action function
1432  *
1433  * This function is responsible for handling SCSI operations which
1434  * are passed from the CAM layer.  The requests are in the form of
1435  * CAM control blocks which indicate the action being performed.
1436  * Not all actions require converting the request to a VSCSI protocol
1437  * message - these actions can be responded to by this driver.
1438  * Requests which are destined for a backend storage device are converted
1439  * to a VSCSI protocol message and sent on the channel connection associated
1440  * with this device.
1441  *
1442  * @param sim pointer to a CAM SCSI interface module
1443  * @param ccb pointer to a CAM control block
1444  */
1445 static void
1446 storvsc_action(struct cam_sim *sim, union ccb *ccb)
1447 {
1448         struct storvsc_softc *sc = cam_sim_softc(sim);
1449         int res;
1450
1451         mtx_assert(&sc->hs_lock, MA_OWNED);
1452         switch (ccb->ccb_h.func_code) {
1453         case XPT_PATH_INQ: {
1454                 struct ccb_pathinq *cpi = &ccb->cpi;
1455
1456                 cpi->version_num = 1;
1457                 cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE;
1458                 cpi->target_sprt = 0;
1459                 cpi->hba_misc = PIM_NOBUSRESET;
1460                 if (hv_storvsc_use_pim_unmapped)
1461                         cpi->hba_misc |= PIM_UNMAPPED;
1462                 cpi->maxio = STORVSC_DATA_SIZE_MAX;
1463                 cpi->hba_eng_cnt = 0;
1464                 cpi->max_target = STORVSC_MAX_TARGETS;
1465                 cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target;
1466                 cpi->initiator_id = cpi->max_target;
1467                 cpi->bus_id = cam_sim_bus(sim);
1468                 cpi->base_transfer_speed = 300000;
1469                 cpi->transport = XPORT_SAS;
1470                 cpi->transport_version = 0;
1471                 cpi->protocol = PROTO_SCSI;
1472                 cpi->protocol_version = SCSI_REV_SPC2;
1473                 strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
1474                 strncpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN);
1475                 strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
1476                 cpi->unit_number = cam_sim_unit(sim);
1477
1478                 ccb->ccb_h.status = CAM_REQ_CMP;
1479                 xpt_done(ccb);
1480                 return;
1481         }
1482         case XPT_GET_TRAN_SETTINGS: {
1483                 struct  ccb_trans_settings *cts = &ccb->cts;
1484
1485                 cts->transport = XPORT_SAS;
1486                 cts->transport_version = 0;
1487                 cts->protocol = PROTO_SCSI;
1488                 cts->protocol_version = SCSI_REV_SPC2;
1489
1490                 /* enable tag queuing and disconnected mode */
1491                 cts->proto_specific.valid = CTS_SCSI_VALID_TQ;
1492                 cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
1493                 cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
1494                 cts->xport_specific.valid = CTS_SPI_VALID_DISC;
1495                 cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
1496                         
1497                 ccb->ccb_h.status = CAM_REQ_CMP;
1498                 xpt_done(ccb);
1499                 return;
1500         }
1501         case XPT_SET_TRAN_SETTINGS:     {
1502                 ccb->ccb_h.status = CAM_REQ_CMP;
1503                 xpt_done(ccb);
1504                 return;
1505         }
1506         case XPT_CALC_GEOMETRY:{
1507                 cam_calc_geometry(&ccb->ccg, 1);
1508                 xpt_done(ccb);
1509                 return;
1510         }
1511         case  XPT_RESET_BUS:
1512         case  XPT_RESET_DEV:{
1513 #if HVS_HOST_RESET
1514                 if ((res = hv_storvsc_host_reset(sc)) != 0) {
1515                         xpt_print(ccb->ccb_h.path,
1516                                 "hv_storvsc_host_reset failed with %d\n", res);
1517                         ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1518                         xpt_done(ccb);
1519                         return;
1520                 }
1521                 ccb->ccb_h.status = CAM_REQ_CMP;
1522                 xpt_done(ccb);
1523                 return;
1524 #else
1525                 xpt_print(ccb->ccb_h.path,
1526                                   "%s reset not supported.\n",
1527                                   (ccb->ccb_h.func_code == XPT_RESET_BUS)?
1528                                   "bus" : "dev");
1529                 ccb->ccb_h.status = CAM_REQ_INVALID;
1530                 xpt_done(ccb);
1531                 return;
1532 #endif  /* HVS_HOST_RESET */
1533         }
1534         case XPT_SCSI_IO:
1535         case XPT_IMMED_NOTIFY: {
1536                 struct hv_storvsc_request *reqp = NULL;
1537                 bus_dmamap_t dmap_saved;
1538
1539                 if (ccb->csio.cdb_len == 0) {
1540                         panic("cdl_len is 0\n");
1541                 }
1542
1543                 if (LIST_EMPTY(&sc->hs_free_list)) {
1544                         ccb->ccb_h.status = CAM_REQUEUE_REQ;
1545                         if (sc->hs_frozen == 0) {
1546                                 sc->hs_frozen = 1;
1547                                 xpt_freeze_simq(sim, /* count*/1);
1548                         }
1549                         xpt_done(ccb);
1550                         return;
1551                 }
1552
1553                 reqp = LIST_FIRST(&sc->hs_free_list);
1554                 LIST_REMOVE(reqp, link);
1555
1556                 /* Save the data_dmap before reset request */
1557                 dmap_saved = reqp->data_dmap;
1558
1559                 /* XXX this is ugly */
1560                 bzero(reqp, sizeof(struct hv_storvsc_request));
1561
1562                 /* Restore necessary bits */
1563                 reqp->data_dmap = dmap_saved;
1564                 reqp->softc = sc;
1565                 
1566                 ccb->ccb_h.status |= CAM_SIM_QUEUED;
1567                 if ((res = create_storvsc_request(ccb, reqp)) != 0) {
1568                         ccb->ccb_h.status = CAM_REQ_INVALID;
1569                         xpt_done(ccb);
1570                         return;
1571                 }
1572
1573 #ifdef notyet
1574                 if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
1575                         callout_init(&reqp->callout, CALLOUT_MPSAFE);
1576                         callout_reset_sbt(&reqp->callout,
1577                             SBT_1MS * ccb->ccb_h.timeout, 0,
1578                             storvsc_timeout, reqp, 0);
1579 #if HVS_TIMEOUT_TEST
1580                         cv_init(&reqp->event.cv, "storvsc timeout cv");
1581                         mtx_init(&reqp->event.mtx, "storvsc timeout mutex",
1582                                         NULL, MTX_DEF);
1583                         switch (reqp->vstor_packet.vm_srb.cdb[0]) {
1584                                 case MODE_SELECT_10:
1585                                 case SEND_DIAGNOSTIC:
1586                                         /* To have timer send the request. */
1587                                         return;
1588                                 default:
1589                                         break;
1590                         }
1591 #endif /* HVS_TIMEOUT_TEST */
1592                 }
1593 #endif
1594
1595                 if ((res = hv_storvsc_io_request(sc, reqp)) != 0) {
1596                         xpt_print(ccb->ccb_h.path,
1597                                 "hv_storvsc_io_request failed with %d\n", res);
1598                         ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1599                         storvsc_free_request(sc, reqp);
1600                         xpt_done(ccb);
1601                         return;
1602                 }
1603                 return;
1604         }
1605
1606         default:
1607                 ccb->ccb_h.status = CAM_REQ_INVALID;
1608                 xpt_done(ccb);
1609                 return;
1610         }
1611 }
1612
1613 /**
1614  * @brief destroy bounce buffer
1615  *
1616  * This function is responsible for destroy a Scatter/Gather list
1617  * that create by storvsc_create_bounce_buffer()
1618  *
1619  * @param sgl- the Scatter/Gather need be destroy
1620  * @param sg_count- page count of the SG list.
1621  *
1622  */
1623 static void
1624 storvsc_destroy_bounce_buffer(struct sglist *sgl)
1625 {
1626         struct hv_sgl_node *sgl_node = NULL;
1627         if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) {
1628                 printf("storvsc error: not enough in use sgl\n");
1629                 return;
1630         }
1631         sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
1632         LIST_REMOVE(sgl_node, link);
1633         sgl_node->sgl_data = sgl;
1634         LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
1635 }
1636
1637 /**
1638  * @brief create bounce buffer
1639  *
1640  * This function is responsible for create a Scatter/Gather list,
1641  * which hold several pages that can be aligned with page size.
1642  *
1643  * @param seg_count- SG-list segments count
1644  * @param write - if WRITE_TYPE, set SG list page used size to 0,
1645  * otherwise set used size to page size.
1646  *
1647  * return NULL if create failed
1648  */
1649 static struct sglist *
1650 storvsc_create_bounce_buffer(uint16_t seg_count, int write)
1651 {
1652         int i = 0;
1653         struct sglist *bounce_sgl = NULL;
1654         unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
1655         struct hv_sgl_node *sgl_node = NULL;    
1656
1657         /* get struct sglist from free_sgl_list */
1658         if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1659                 printf("storvsc error: not enough free sgl\n");
1660                 return NULL;
1661         }
1662         sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1663         LIST_REMOVE(sgl_node, link);
1664         bounce_sgl = sgl_node->sgl_data;
1665         LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
1666
1667         bounce_sgl->sg_maxseg = seg_count;
1668
1669         if (write == WRITE_TYPE)
1670                 bounce_sgl->sg_nseg = 0;
1671         else
1672                 bounce_sgl->sg_nseg = seg_count;
1673
1674         for (i = 0; i < seg_count; i++)
1675                 bounce_sgl->sg_segs[i].ss_len = buf_len;
1676
1677         return bounce_sgl;
1678 }
1679
1680 /**
1681  * @brief copy data from SG list to bounce buffer
1682  *
1683  * This function is responsible for copy data from one SG list's segments
1684  * to another SG list which used as bounce buffer.
1685  *
1686  * @param bounce_sgl - the destination SG list
1687  * @param orig_sgl - the segment of the source SG list.
1688  * @param orig_sgl_count - the count of segments.
1689  * @param orig_sgl_count - indicate which segment need bounce buffer,
1690  *  set 1 means need.
1691  *
1692  */
1693 static void
1694 storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
1695                                bus_dma_segment_t *orig_sgl,
1696                                unsigned int orig_sgl_count,
1697                                uint64_t seg_bits)
1698 {
1699         int src_sgl_idx = 0;
1700
1701         for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
1702                 if (seg_bits & (1 << src_sgl_idx)) {
1703                         memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr,
1704                             (void*)orig_sgl[src_sgl_idx].ds_addr,
1705                             orig_sgl[src_sgl_idx].ds_len);
1706
1707                         bounce_sgl->sg_segs[src_sgl_idx].ss_len =
1708                             orig_sgl[src_sgl_idx].ds_len;
1709                 }
1710         }
1711 }
1712
1713 /**
1714  * @brief copy data from SG list which used as bounce to another SG list
1715  *
1716  * This function is responsible for copy data from one SG list with bounce
1717  * buffer to another SG list's segments.
1718  *
1719  * @param dest_sgl - the destination SG list's segments
1720  * @param dest_sgl_count - the count of destination SG list's segment.
1721  * @param src_sgl - the source SG list.
1722  * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
1723  *
1724  */
1725 void
1726 storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
1727                                     unsigned int dest_sgl_count,
1728                                     struct sglist* src_sgl,
1729                                     uint64_t seg_bits)
1730 {
1731         int sgl_idx = 0;
1732         
1733         for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
1734                 if (seg_bits & (1 << sgl_idx)) {
1735                         memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
1736                             (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr),
1737                             src_sgl->sg_segs[sgl_idx].ss_len);
1738                 }
1739         }
1740 }
1741
1742 /**
1743  * @brief check SG list with bounce buffer or not
1744  *
1745  * This function is responsible for check if need bounce buffer for SG list.
1746  *
1747  * @param sgl - the SG list's segments
1748  * @param sg_count - the count of SG list's segment.
1749  * @param bits - segmengs number that need bounce buffer
1750  *
1751  * return -1 if SG list needless bounce buffer
1752  */
1753 static int
1754 storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
1755                                 unsigned int sg_count,
1756                                 uint64_t *bits)
1757 {
1758         int i = 0;
1759         int offset = 0;
1760         uint64_t phys_addr = 0;
1761         uint64_t tmp_bits = 0;
1762         boolean_t found_hole = FALSE;
1763         boolean_t pre_aligned = TRUE;
1764
1765         if (sg_count < 2){
1766                 return -1;
1767         }
1768
1769         *bits = 0;
1770         
1771         phys_addr = vtophys(sgl[0].ds_addr);
1772         offset =  phys_addr - trunc_page(phys_addr);
1773
1774         if (offset != 0) {
1775                 pre_aligned = FALSE;
1776                 tmp_bits |= 1;
1777         }
1778
1779         for (i = 1; i < sg_count; i++) {
1780                 phys_addr = vtophys(sgl[i].ds_addr);
1781                 offset =  phys_addr - trunc_page(phys_addr);
1782
1783                 if (offset == 0) {
1784                         if (FALSE == pre_aligned){
1785                                 /*
1786                                  * This segment is aligned, if the previous
1787                                  * one is not aligned, find a hole
1788                                  */
1789                                 found_hole = TRUE;
1790                         }
1791                         pre_aligned = TRUE;
1792                 } else {
1793                         tmp_bits |= 1 << i;
1794                         if (!pre_aligned) {
1795                                 if (phys_addr != vtophys(sgl[i-1].ds_addr +
1796                                     sgl[i-1].ds_len)) {
1797                                         /*
1798                                          * Check whether connect to previous
1799                                          * segment,if not, find the hole
1800                                          */
1801                                         found_hole = TRUE;
1802                                 }
1803                         } else {
1804                                 found_hole = TRUE;
1805                         }
1806                         pre_aligned = FALSE;
1807                 }
1808         }
1809
1810         if (!found_hole) {
1811                 return (-1);
1812         } else {
1813                 *bits = tmp_bits;
1814                 return 0;
1815         }
1816 }
1817
1818 /**
1819  * Copy bus_dma segments to multiple page buffer, which requires
1820  * the pages are compact composed except for the 1st and last pages.
1821  */
1822 static void
1823 storvsc_xferbuf_prepare(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
1824 {
1825         struct hv_storvsc_request *reqp = arg;
1826         union ccb *ccb = reqp->ccb;
1827         struct ccb_scsiio *csio = &ccb->csio;
1828         struct storvsc_gpa_range *prplist;
1829         int i;
1830
1831         prplist = &reqp->prp_list;
1832         prplist->gpa_range.gpa_len = csio->dxfer_len;
1833         prplist->gpa_range.gpa_ofs = segs[0].ds_addr & PAGE_MASK;
1834
1835         for (i = 0; i < nsegs; i++) {
1836 #ifdef INVARIANTS
1837                 if (nsegs > 1) {
1838                         if (i == 0) {
1839                                 KASSERT((segs[i].ds_addr & PAGE_MASK) +
1840                                     segs[i].ds_len == PAGE_SIZE,
1841                                     ("invalid 1st page, ofs 0x%jx, len %zu",
1842                                      (uintmax_t)segs[i].ds_addr,
1843                                      segs[i].ds_len));
1844                         } else if (i == nsegs - 1) {
1845                                 KASSERT((segs[i].ds_addr & PAGE_MASK) == 0,
1846                                     ("invalid last page, ofs 0x%jx",
1847                                      (uintmax_t)segs[i].ds_addr));
1848                         } else {
1849                                 KASSERT((segs[i].ds_addr & PAGE_MASK) == 0 &&
1850                                     segs[i].ds_len == PAGE_SIZE,
1851                                     ("not a full page, ofs 0x%jx, len %zu",
1852                                      (uintmax_t)segs[i].ds_addr,
1853                                      segs[i].ds_len));
1854                         }
1855                 }
1856 #endif
1857                 prplist->gpa_page[i] = atop(segs[i].ds_addr);
1858         }
1859         reqp->prp_cnt = nsegs;
1860 }
1861
1862 /**
1863  * @brief Fill in a request structure based on a CAM control block
1864  *
1865  * Fills in a request structure based on the contents of a CAM control
1866  * block.  The request structure holds the payload information for
1867  * VSCSI protocol request.
1868  *
1869  * @param ccb pointer to a CAM contorl block
1870  * @param reqp pointer to a request structure
1871  */
1872 static int
1873 create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
1874 {
1875         struct ccb_scsiio *csio = &ccb->csio;
1876         uint64_t phys_addr;
1877         uint32_t pfn;
1878         uint64_t not_aligned_seg_bits = 0;
1879         int error;
1880         
1881         /* refer to struct vmscsi_req for meanings of these two fields */
1882         reqp->vstor_packet.u.vm_srb.port =
1883                 cam_sim_unit(xpt_path_sim(ccb->ccb_h.path));
1884         reqp->vstor_packet.u.vm_srb.path_id =
1885                 cam_sim_bus(xpt_path_sim(ccb->ccb_h.path));
1886
1887         reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id;
1888         reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun;
1889
1890         reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len;
1891         if(ccb->ccb_h.flags & CAM_CDB_POINTER) {
1892                 memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr,
1893                         csio->cdb_len);
1894         } else {
1895                 memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes,
1896                         csio->cdb_len);
1897         }
1898
1899         if (hv_storvsc_use_win8ext_flags) {
1900                 reqp->vstor_packet.u.vm_srb.win8_extension.time_out_value = 60;
1901                 reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1902                         SRB_FLAGS_DISABLE_SYNCH_TRANSFER;
1903         }
1904         switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
1905         case CAM_DIR_OUT:
1906                 reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;
1907                 if (hv_storvsc_use_win8ext_flags) {
1908                         reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1909                                 SRB_FLAGS_DATA_OUT;
1910                 }
1911                 break;
1912         case CAM_DIR_IN:
1913                 reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
1914                 if (hv_storvsc_use_win8ext_flags) {
1915                         reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1916                                 SRB_FLAGS_DATA_IN;
1917                 }
1918                 break;
1919         case CAM_DIR_NONE:
1920                 reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
1921                 if (hv_storvsc_use_win8ext_flags) {
1922                         reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1923                                 SRB_FLAGS_NO_DATA_TRANSFER;
1924                 }
1925                 break;
1926         default:
1927                 printf("Error: unexpected data direction: 0x%x\n",
1928                         ccb->ccb_h.flags & CAM_DIR_MASK);
1929                 return (EINVAL);
1930         }
1931
1932         reqp->sense_data     = &csio->sense_data;
1933         reqp->sense_info_len = csio->sense_len;
1934
1935         reqp->ccb = ccb;
1936
1937         if (0 == csio->dxfer_len) {
1938                 return (0);
1939         }
1940
1941         switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
1942         case CAM_DATA_BIO:
1943         case CAM_DATA_VADDR:
1944                 error = bus_dmamap_load_ccb(reqp->softc->storvsc_req_dtag,
1945                     reqp->data_dmap, ccb, storvsc_xferbuf_prepare, reqp,
1946                     BUS_DMA_NOWAIT);
1947                 if (error) {
1948                         xpt_print(ccb->ccb_h.path,
1949                             "bus_dmamap_load_ccb failed: %d\n", error);
1950                         return (error);
1951                 }
1952                 if ((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO)
1953                         reqp->softc->sysctl_data.data_bio_cnt++;
1954                 else
1955                         reqp->softc->sysctl_data.data_vaddr_cnt++;
1956                 break;
1957
1958         case CAM_DATA_SG:
1959         {
1960                 struct storvsc_gpa_range *prplist;
1961                 int i = 0;
1962                 int offset = 0;
1963                 int ret;
1964
1965                 bus_dma_segment_t *storvsc_sglist =
1966                     (bus_dma_segment_t *)ccb->csio.data_ptr;
1967                 u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
1968
1969                 prplist = &reqp->prp_list;
1970                 prplist->gpa_range.gpa_len = csio->dxfer_len;
1971
1972                 printf("Storvsc: get SG I/O operation, %d\n",
1973                     reqp->vstor_packet.u.vm_srb.data_in);
1974
1975                 if (storvsc_sg_count > STORVSC_DATA_SEGCNT_MAX){
1976                         printf("Storvsc: %d segments is too much, "
1977                             "only support %d segments\n",
1978                             storvsc_sg_count, STORVSC_DATA_SEGCNT_MAX);
1979                         return (EINVAL);
1980                 }
1981
1982                 /*
1983                  * We create our own bounce buffer function currently. Idealy
1984                  * we should use BUS_DMA(9) framework. But with current BUS_DMA
1985                  * code there is no callback API to check the page alignment of
1986                  * middle segments before busdma can decide if a bounce buffer
1987                  * is needed for particular segment. There is callback,
1988                  * "bus_dma_filter_t *filter", but the parrameters are not
1989                  * sufficient for storvsc driver.
1990                  * TODO:
1991                  *      Add page alignment check in BUS_DMA(9) callback. Once
1992                  *      this is complete, switch the following code to use
1993                  *      BUS_DMA(9) for storvsc bounce buffer support.
1994                  */
1995                 /* check if we need to create bounce buffer */
1996                 ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
1997                     storvsc_sg_count, &not_aligned_seg_bits);
1998                 if (ret != -1) {
1999                         reqp->bounce_sgl =
2000                             storvsc_create_bounce_buffer(storvsc_sg_count,
2001                             reqp->vstor_packet.u.vm_srb.data_in);
2002                         if (NULL == reqp->bounce_sgl) {
2003                                 printf("Storvsc_error: "
2004                                     "create bounce buffer failed.\n");
2005                                 return (ENOMEM);
2006                         }
2007
2008                         reqp->bounce_sgl_count = storvsc_sg_count;
2009                         reqp->not_aligned_seg_bits = not_aligned_seg_bits;
2010
2011                         /*
2012                          * if it is write, we need copy the original data
2013                          *to bounce buffer
2014                          */
2015                         if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
2016                                 storvsc_copy_sgl_to_bounce_buf(
2017                                     reqp->bounce_sgl,
2018                                     storvsc_sglist,
2019                                     storvsc_sg_count,
2020                                     reqp->not_aligned_seg_bits);
2021                         }
2022
2023                         /* transfer virtual address to physical frame number */
2024                         if (reqp->not_aligned_seg_bits & 0x1){
2025                                 phys_addr =
2026                                     vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr);
2027                         }else{
2028                                 phys_addr =
2029                                         vtophys(storvsc_sglist[0].ds_addr);
2030                         }
2031                         prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
2032
2033                         pfn = phys_addr >> PAGE_SHIFT;
2034                         prplist->gpa_page[0] = pfn;
2035                         
2036                         for (i = 1; i < storvsc_sg_count; i++) {
2037                                 if (reqp->not_aligned_seg_bits & (1 << i)) {
2038                                         phys_addr =
2039                                             vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr);
2040                                 } else {
2041                                         phys_addr =
2042                                             vtophys(storvsc_sglist[i].ds_addr);
2043                                 }
2044
2045                                 pfn = phys_addr >> PAGE_SHIFT;
2046                                 prplist->gpa_page[i] = pfn;
2047                         }
2048                         reqp->prp_cnt = i;
2049                 } else {
2050                         phys_addr = vtophys(storvsc_sglist[0].ds_addr);
2051
2052                         prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
2053
2054                         for (i = 0; i < storvsc_sg_count; i++) {
2055                                 phys_addr = vtophys(storvsc_sglist[i].ds_addr);
2056                                 pfn = phys_addr >> PAGE_SHIFT;
2057                                 prplist->gpa_page[i] = pfn;
2058                         }
2059                         reqp->prp_cnt = i;
2060
2061                         /* check the last segment cross boundary or not */
2062                         offset = phys_addr & PAGE_MASK;
2063                         if (offset) {
2064                                 /* Add one more PRP entry */
2065                                 phys_addr =
2066                                     vtophys(storvsc_sglist[i-1].ds_addr +
2067                                     PAGE_SIZE - offset);
2068                                 pfn = phys_addr >> PAGE_SHIFT;
2069                                 prplist->gpa_page[i] = pfn;
2070                                 reqp->prp_cnt++;
2071                         }
2072                         
2073                         reqp->bounce_sgl_count = 0;
2074                 }
2075                 reqp->softc->sysctl_data.data_sg_cnt++;
2076                 break;
2077         }
2078         default:
2079                 printf("Unknow flags: %d\n", ccb->ccb_h.flags);
2080                 return(EINVAL);
2081         }
2082
2083         return(0);
2084 }
2085
2086 /**
2087  * @brief completion function before returning to CAM
2088  *
2089  * I/O process has been completed and the result needs
2090  * to be passed to the CAM layer.
2091  * Free resources related to this request.
2092  *
2093  * @param reqp pointer to a request structure
2094  */
2095 static void
2096 storvsc_io_done(struct hv_storvsc_request *reqp)
2097 {
2098         union ccb *ccb = reqp->ccb;
2099         struct ccb_scsiio *csio = &ccb->csio;
2100         struct storvsc_softc *sc = reqp->softc;
2101         struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
2102         bus_dma_segment_t *ori_sglist = NULL;
2103         int ori_sg_count = 0;
2104         /* destroy bounce buffer if it is used */
2105         if (reqp->bounce_sgl_count) {
2106                 ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
2107                 ori_sg_count = ccb->csio.sglist_cnt;
2108
2109                 /*
2110                  * If it is READ operation, we should copy back the data
2111                  * to original SG list.
2112                  */
2113                 if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
2114                         storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
2115                             ori_sg_count,
2116                             reqp->bounce_sgl,
2117                             reqp->not_aligned_seg_bits);
2118                 }
2119
2120                 storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
2121                 reqp->bounce_sgl_count = 0;
2122         }
2123                 
2124         if (reqp->retries > 0) {
2125                 mtx_lock(&sc->hs_lock);
2126 #if HVS_TIMEOUT_TEST
2127                 xpt_print(ccb->ccb_h.path,
2128                         "%u: IO returned after timeout, "
2129                         "waking up timer handler if any.\n", ticks);
2130                 mtx_lock(&reqp->event.mtx);
2131                 cv_signal(&reqp->event.cv);
2132                 mtx_unlock(&reqp->event.mtx);
2133 #endif
2134                 reqp->retries = 0;
2135                 xpt_print(ccb->ccb_h.path,
2136                         "%u: IO returned after timeout, "
2137                         "stopping timer if any.\n", ticks);
2138                 mtx_unlock(&sc->hs_lock);
2139         }
2140
2141 #ifdef notyet
2142         /*
2143          * callout_drain() will wait for the timer handler to finish
2144          * if it is running. So we don't need any lock to synchronize
2145          * between this routine and the timer handler.
2146          * Note that we need to make sure reqp is not freed when timer
2147          * handler is using or will use it.
2148          */
2149         if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
2150                 callout_drain(&reqp->callout);
2151         }
2152 #endif
2153
2154         ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
2155         ccb->ccb_h.status &= ~CAM_STATUS_MASK;
2156         if (vm_srb->scsi_status == SCSI_STATUS_OK) {
2157                 const struct scsi_generic *cmd;
2158
2159                 if (vm_srb->srb_status != SRB_STATUS_SUCCESS) {
2160                         if (vm_srb->srb_status == SRB_STATUS_INVALID_LUN) {
2161                                 xpt_print(ccb->ccb_h.path, "invalid LUN %d\n",
2162                                     vm_srb->lun);
2163                         } else {
2164                                 xpt_print(ccb->ccb_h.path, "Unknown SRB flag: %d\n",
2165                                     vm_srb->srb_status);
2166                         }
2167                         /*
2168                          * If there are errors, for example, invalid LUN,
2169                          * host will inform VM through SRB status.
2170                          */
2171                         ccb->ccb_h.status |= CAM_SEL_TIMEOUT;
2172                 } else {
2173                         ccb->ccb_h.status |= CAM_REQ_CMP;
2174                 }
2175
2176                 cmd = (const struct scsi_generic *)
2177                     ((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
2178                      csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
2179                 if (cmd->opcode == INQUIRY) {
2180                         struct scsi_inquiry_data *inq_data =
2181                             (struct scsi_inquiry_data *)csio->data_ptr;
2182                         uint8_t *resp_buf = (uint8_t *)csio->data_ptr;
2183                         int resp_xfer_len, resp_buf_len, data_len;
2184
2185                         /* Get the buffer length reported by host */
2186                         resp_xfer_len = vm_srb->transfer_len;
2187                         /* Get the available buffer length */
2188                         resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
2189                         data_len = (resp_buf_len < resp_xfer_len) ?
2190                             resp_buf_len : resp_xfer_len;
2191
2192                         if (bootverbose && data_len >= 5) {
2193                                 xpt_print(ccb->ccb_h.path, "storvsc inquiry "
2194                                     "(%d) [%x %x %x %x %x ... ]\n", data_len,
2195                                     resp_buf[0], resp_buf[1], resp_buf[2],
2196                                     resp_buf[3], resp_buf[4]);
2197                         }
2198                         if (vm_srb->srb_status == SRB_STATUS_SUCCESS &&
2199                             data_len > SHORT_INQUIRY_LENGTH) {
2200                                 char vendor[16];
2201
2202                                 cam_strvis(vendor, inq_data->vendor,
2203                                     sizeof(inq_data->vendor), sizeof(vendor));
2204
2205                                 /*
2206                                  * XXX: Upgrade SPC2 to SPC3 if host is WIN8 or
2207                                  * WIN2012 R2 in order to support UNMAP feature.
2208                                  */
2209                                 if (!strncmp(vendor, "Msft", 4) &&
2210                                     SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 &&
2211                                     (vmstor_proto_version ==
2212                                      VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
2213                                      vmstor_proto_version ==
2214                                      VMSTOR_PROTOCOL_VERSION_WIN8)) {
2215                                         inq_data->version = SCSI_REV_SPC3;
2216                                         if (bootverbose) {
2217                                                 xpt_print(ccb->ccb_h.path,
2218                                                     "storvsc upgrades "
2219                                                     "SPC2 to SPC3\n");
2220                                         }
2221                                 }
2222                         }
2223                 }
2224         } else {
2225                 mtx_lock(&sc->hs_lock);
2226                 xpt_print(ccb->ccb_h.path,
2227                         "storvsc scsi_status = %d\n",
2228                         vm_srb->scsi_status);
2229                 mtx_unlock(&sc->hs_lock);
2230                 ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
2231         }
2232
2233         ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF);
2234         ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len;
2235
2236         if (reqp->sense_info_len != 0) {
2237                 csio->sense_resid = csio->sense_len - reqp->sense_info_len;
2238                 ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
2239         }
2240
2241         mtx_lock(&sc->hs_lock);
2242         if (reqp->softc->hs_frozen == 1) {
2243                 xpt_print(ccb->ccb_h.path,
2244                         "%u: storvsc unfreezing softc 0x%p.\n",
2245                         ticks, reqp->softc);
2246                 ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
2247                 reqp->softc->hs_frozen = 0;
2248         }
2249         storvsc_free_request(sc, reqp);
2250         mtx_unlock(&sc->hs_lock);
2251
2252         xpt_done_direct(ccb);
2253 }
2254
2255 /**
2256  * @brief Free a request structure
2257  *
2258  * Free a request structure by returning it to the free list
2259  *
2260  * @param sc pointer to a softc
2261  * @param reqp pointer to a request structure
2262  */     
2263 static void
2264 storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp)
2265 {
2266
2267         LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
2268 }
2269
2270 /**
2271  * @brief Determine type of storage device from GUID
2272  *
2273  * Using the type GUID, determine if this is a StorVSC (paravirtual
2274  * SCSI or BlkVSC (paravirtual IDE) device.
2275  *
2276  * @param dev a device
2277  * returns an enum
2278  */
2279 static enum hv_storage_type
2280 storvsc_get_storage_type(device_t dev)
2281 {
2282         device_t parent = device_get_parent(dev);
2283
2284         if (VMBUS_PROBE_GUID(parent, dev, &gBlkVscDeviceType) == 0)
2285                 return DRIVER_BLKVSC;
2286         if (VMBUS_PROBE_GUID(parent, dev, &gStorVscDeviceType) == 0)
2287                 return DRIVER_STORVSC;
2288         return DRIVER_UNKNOWN;
2289 }