]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
MFC 303474
[FreeBSD/stable/10.git] / sys / dev / hyperv / storvsc / hv_storvsc_drv_freebsd.c
1 /*-
2  * Copyright (c) 2009-2012,2016 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 /**
30  * StorVSC driver for Hyper-V.  This driver presents a SCSI HBA interface
31  * to the Comman Access Method (CAM) layer.  CAM control blocks (CCBs) are
32  * converted into VSCSI protocol messages which are delivered to the parent
33  * partition StorVSP driver over the Hyper-V VMBUS.
34  */
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 #include <sys/param.h>
39 #include <sys/proc.h>
40 #include <sys/condvar.h>
41 #include <sys/time.h>
42 #include <sys/systm.h>
43 #include <sys/sysctl.h>
44 #include <sys/sockio.h>
45 #include <sys/mbuf.h>
46 #include <sys/malloc.h>
47 #include <sys/module.h>
48 #include <sys/kernel.h>
49 #include <sys/queue.h>
50 #include <sys/lock.h>
51 #include <sys/sx.h>
52 #include <sys/taskqueue.h>
53 #include <sys/bus.h>
54 #include <sys/mutex.h>
55 #include <sys/callout.h>
56 #include <sys/smp.h>
57 #include <vm/vm.h>
58 #include <vm/pmap.h>
59 #include <vm/uma.h>
60 #include <sys/lock.h>
61 #include <sys/sema.h>
62 #include <sys/sglist.h>
63 #include <machine/bus.h>
64 #include <sys/bus_dma.h>
65
66 #include <cam/cam.h>
67 #include <cam/cam_ccb.h>
68 #include <cam/cam_periph.h>
69 #include <cam/cam_sim.h>
70 #include <cam/cam_xpt_sim.h>
71 #include <cam/cam_xpt_internal.h>
72 #include <cam/cam_debug.h>
73 #include <cam/scsi/scsi_all.h>
74 #include <cam/scsi/scsi_message.h>
75
76 #include <dev/hyperv/include/hyperv.h>
77 #include <dev/hyperv/include/vmbus.h>
78
79 #include "hv_vstorage.h"
80 #include "vmbus_if.h"
81
82 #define STORVSC_RINGBUFFER_SIZE         (20*PAGE_SIZE)
83 #define STORVSC_MAX_LUNS_PER_TARGET     (64)
84 #define STORVSC_MAX_IO_REQUESTS         (STORVSC_MAX_LUNS_PER_TARGET * 2)
85 #define BLKVSC_MAX_IDE_DISKS_PER_TARGET (1)
86 #define BLKVSC_MAX_IO_REQUESTS          STORVSC_MAX_IO_REQUESTS
87 #define STORVSC_MAX_TARGETS             (2)
88
89 #define VSTOR_PKT_SIZE  (sizeof(struct vstor_packet) - vmscsi_size_delta)
90
91 #define STORVSC_DATA_SEGCNT_MAX         VMBUS_CHAN_PRPLIST_MAX
92 #define STORVSC_DATA_SEGSZ_MAX          PAGE_SIZE
93 #define STORVSC_DATA_SIZE_MAX           \
94         (STORVSC_DATA_SEGCNT_MAX * STORVSC_DATA_SEGSZ_MAX)
95
96 struct storvsc_softc;
97
98 struct hv_sgl_node {
99         LIST_ENTRY(hv_sgl_node) link;
100         struct sglist *sgl_data;
101 };
102
103 struct hv_sgl_page_pool{
104         LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
105         LIST_HEAD(, hv_sgl_node) free_sgl_list;
106         boolean_t                is_init;
107 } g_hv_sgl_page_pool;
108
109 #define STORVSC_MAX_SG_PAGE_CNT STORVSC_MAX_IO_REQUESTS * STORVSC_DATA_SEGCNT_MAX
110
111 enum storvsc_request_type {
112         WRITE_TYPE,
113         READ_TYPE,
114         UNKNOWN_TYPE
115 };
116
117 SYSCTL_NODE(_hw, OID_AUTO, storvsc, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
118     "Hyper-V storage interface");
119
120 static u_int hv_storvsc_use_pim_unmapped = 1;
121 SYSCTL_INT(_hw_storvsc, OID_AUTO, use_pim_unmapped, CTLFLAG_RDTUN,
122     &hv_storvsc_use_pim_unmapped, 0,
123     "Optimize storvsc by using unmapped I/O");
124
125 struct hv_storvsc_sysctl {
126         u_long          data_bio_cnt;
127         u_long          data_vaddr_cnt;
128         u_long          data_sg_cnt;
129 };
130
131 struct storvsc_gpa_range {
132         struct vmbus_gpa_range  gpa_range;
133         uint64_t                gpa_page[STORVSC_DATA_SEGCNT_MAX];
134 } __packed;
135
136 struct hv_storvsc_request {
137         LIST_ENTRY(hv_storvsc_request)  link;
138         struct vstor_packet             vstor_packet;
139         int                             prp_cnt;
140         struct storvsc_gpa_range        prp_list;
141         void                            *sense_data;
142         uint8_t                         sense_info_len;
143         uint8_t                         retries;
144         union ccb                       *ccb;
145         struct storvsc_softc            *softc;
146         struct callout                  callout;
147         struct sema                     synch_sema; /*Synchronize the request/response if needed */
148         struct sglist                   *bounce_sgl;
149         unsigned int                    bounce_sgl_count;
150         uint64_t                        not_aligned_seg_bits;
151         bus_dmamap_t                    data_dmap;
152 };
153
154 struct storvsc_softc {
155         struct vmbus_channel            *hs_chan;
156         LIST_HEAD(, hv_storvsc_request) hs_free_list;
157         struct mtx                      hs_lock;
158         struct storvsc_driver_props     *hs_drv_props;
159         int                             hs_unit;
160         uint32_t                        hs_frozen;
161         struct cam_sim                  *hs_sim;
162         struct cam_path                 *hs_path;
163         uint32_t                        hs_num_out_reqs;
164         boolean_t                       hs_destroy;
165         boolean_t                       hs_drain_notify;
166         struct sema                     hs_drain_sema;  
167         struct hv_storvsc_request       hs_init_req;
168         struct hv_storvsc_request       hs_reset_req;
169         device_t                        hs_dev;
170         bus_dma_tag_t                   storvsc_req_dtag;
171         struct hv_storvsc_sysctl        sysctl_data;
172
173         struct vmbus_channel            *hs_cpu2chan[MAXCPU];
174 };
175
176
177 /**
178  * HyperV storvsc timeout testing cases:
179  * a. IO returned after first timeout;
180  * b. IO returned after second timeout and queue freeze;
181  * c. IO returned while timer handler is running
182  * The first can be tested by "sg_senddiag -vv /dev/daX",
183  * and the second and third can be done by
184  * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
185  */
186 #define HVS_TIMEOUT_TEST 0
187
188 /*
189  * Bus/adapter reset functionality on the Hyper-V host is
190  * buggy and it will be disabled until
191  * it can be further tested.
192  */
193 #define HVS_HOST_RESET 0
194
195 struct storvsc_driver_props {
196         char            *drv_name;
197         char            *drv_desc;
198         uint8_t         drv_max_luns_per_target;
199         uint8_t         drv_max_ios_per_target;
200         uint32_t        drv_ringbuffer_size;
201 };
202
203 enum hv_storage_type {
204         DRIVER_BLKVSC,
205         DRIVER_STORVSC,
206         DRIVER_UNKNOWN
207 };
208
209 #define HS_MAX_ADAPTERS 10
210
211 #define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
212
213 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
214 static const struct hyperv_guid gStorVscDeviceType={
215         .hv_guid = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
216                  0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
217 };
218
219 /* {32412632-86cb-44a2-9b5c-50d1417354f5} */
220 static const struct hyperv_guid gBlkVscDeviceType={
221         .hv_guid = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
222                  0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
223 };
224
225 static struct storvsc_driver_props g_drv_props_table[] = {
226         {"blkvsc", "Hyper-V IDE Storage Interface",
227          BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS,
228          STORVSC_RINGBUFFER_SIZE},
229         {"storvsc", "Hyper-V SCSI Storage Interface",
230          STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS,
231          STORVSC_RINGBUFFER_SIZE}
232 };
233
234 /*
235  * Sense buffer size changed in win8; have a run-time
236  * variable to track the size we should use.
237  */
238 static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
239
240 /*
241  * The size of the vmscsi_request has changed in win8. The
242  * additional size is for the newly added elements in the
243  * structure. These elements are valid only when we are talking
244  * to a win8 host.
245  * Track the correct size we need to apply.
246  */
247 static int vmscsi_size_delta;
248 /*
249  * The storage protocol version is determined during the
250  * initial exchange with the host.  It will indicate which
251  * storage functionality is available in the host.
252 */
253 static int vmstor_proto_version;
254
255 struct vmstor_proto {
256         int proto_version;
257         int sense_buffer_size;
258         int vmscsi_size_delta;
259 };
260
261 static const struct vmstor_proto vmstor_proto_list[] = {
262         {
263                 VMSTOR_PROTOCOL_VERSION_WIN10,
264                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
265                 0
266         },
267         {
268                 VMSTOR_PROTOCOL_VERSION_WIN8_1,
269                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
270                 0
271         },
272         {
273                 VMSTOR_PROTOCOL_VERSION_WIN8,
274                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
275                 0
276         },
277         {
278                 VMSTOR_PROTOCOL_VERSION_WIN7,
279                 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
280                 sizeof(struct vmscsi_win8_extension),
281         },
282         {
283                 VMSTOR_PROTOCOL_VERSION_WIN6,
284                 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
285                 sizeof(struct vmscsi_win8_extension),
286         }
287 };
288
289 /* static functions */
290 static int storvsc_probe(device_t dev);
291 static int storvsc_attach(device_t dev);
292 static int storvsc_detach(device_t dev);
293 static void storvsc_poll(struct cam_sim * sim);
294 static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
295 static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
296 static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
297 static enum hv_storage_type storvsc_get_storage_type(device_t dev);
298 static void hv_storvsc_rescan_target(struct storvsc_softc *sc);
299 static void hv_storvsc_on_channel_callback(struct vmbus_channel *chan, void *xsc);
300 static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
301                                         struct vstor_packet *vstor_packet,
302                                         struct hv_storvsc_request *request);
303 static int hv_storvsc_connect_vsp(struct storvsc_softc *);
304 static void storvsc_io_done(struct hv_storvsc_request *reqp);
305 static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
306                                 bus_dma_segment_t *orig_sgl,
307                                 unsigned int orig_sgl_count,
308                                 uint64_t seg_bits);
309 void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
310                                 unsigned int dest_sgl_count,
311                                 struct sglist* src_sgl,
312                                 uint64_t seg_bits);
313
314 static device_method_t storvsc_methods[] = {
315         /* Device interface */
316         DEVMETHOD(device_probe,         storvsc_probe),
317         DEVMETHOD(device_attach,        storvsc_attach),
318         DEVMETHOD(device_detach,        storvsc_detach),
319         DEVMETHOD(device_shutdown,      bus_generic_shutdown),
320         DEVMETHOD_END
321 };
322
323 static driver_t storvsc_driver = {
324         "storvsc", storvsc_methods, sizeof(struct storvsc_softc),
325 };
326
327 static devclass_t storvsc_devclass;
328 DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0);
329 MODULE_VERSION(storvsc, 1);
330 MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
331
332 static void
333 storvsc_subchan_attach(struct storvsc_softc *sc,
334     struct vmbus_channel *new_channel)
335 {
336         struct vmstor_chan_props props;
337         int ret = 0;
338
339         memset(&props, 0, sizeof(props));
340
341         vmbus_chan_cpu_rr(new_channel);
342         ret = vmbus_chan_open(new_channel,
343             sc->hs_drv_props->drv_ringbuffer_size,
344             sc->hs_drv_props->drv_ringbuffer_size,
345             (void *)&props,
346             sizeof(struct vmstor_chan_props),
347             hv_storvsc_on_channel_callback, sc);
348 }
349
350 /**
351  * @brief Send multi-channel creation request to host
352  *
353  * @param device  a Hyper-V device pointer
354  * @param max_chans  the max channels supported by vmbus
355  */
356 static void
357 storvsc_send_multichannel_request(struct storvsc_softc *sc, int max_chans)
358 {
359         struct vmbus_channel **subchan;
360         struct hv_storvsc_request *request;
361         struct vstor_packet *vstor_packet;      
362         int request_channels_cnt = 0;
363         int ret, i;
364
365         /* get multichannels count that need to create */
366         request_channels_cnt = MIN(max_chans, mp_ncpus);
367
368         request = &sc->hs_init_req;
369
370         /* request the host to create multi-channel */
371         memset(request, 0, sizeof(struct hv_storvsc_request));
372         
373         sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
374
375         vstor_packet = &request->vstor_packet;
376         
377         vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
378         vstor_packet->flags = REQUEST_COMPLETION_FLAG;
379         vstor_packet->u.multi_channels_cnt = request_channels_cnt;
380
381         ret = vmbus_chan_send(sc->hs_chan,
382             VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
383             vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
384
385         /* wait for 5 seconds */
386         ret = sema_timedwait(&request->synch_sema, 5 * hz);
387         if (ret != 0) {         
388                 printf("Storvsc_error: create multi-channel timeout, %d\n",
389                     ret);
390                 return;
391         }
392
393         if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
394             vstor_packet->status != 0) {                
395                 printf("Storvsc_error: create multi-channel invalid operation "
396                     "(%d) or statue (%u)\n",
397                     vstor_packet->operation, vstor_packet->status);
398                 return;
399         }
400
401         /* Wait for sub-channels setup to complete. */
402         subchan = vmbus_subchan_get(sc->hs_chan, request_channels_cnt);
403
404         /* Attach the sub-channels. */
405         for (i = 0; i < request_channels_cnt; ++i)
406                 storvsc_subchan_attach(sc, subchan[i]);
407
408         /* Release the sub-channels. */
409         vmbus_subchan_rel(subchan, request_channels_cnt);
410
411         if (bootverbose)
412                 printf("Storvsc create multi-channel success!\n");
413 }
414
415 /**
416  * @brief initialize channel connection to parent partition
417  *
418  * @param dev  a Hyper-V device pointer
419  * @returns  0 on success, non-zero error on failure
420  */
421 static int
422 hv_storvsc_channel_init(struct storvsc_softc *sc)
423 {
424         int ret = 0, i;
425         struct hv_storvsc_request *request;
426         struct vstor_packet *vstor_packet;
427         uint16_t max_chans = 0;
428         boolean_t support_multichannel = FALSE;
429         uint32_t version;
430
431         max_chans = 0;
432         support_multichannel = FALSE;
433
434         request = &sc->hs_init_req;
435         memset(request, 0, sizeof(struct hv_storvsc_request));
436         vstor_packet = &request->vstor_packet;
437         request->softc = sc;
438
439         /**
440          * Initiate the vsc/vsp initialization protocol on the open channel
441          */
442         sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
443
444         vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION;
445         vstor_packet->flags = REQUEST_COMPLETION_FLAG;
446
447
448         ret = vmbus_chan_send(sc->hs_chan,
449             VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
450             vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
451
452         if (ret != 0)
453                 goto cleanup;
454
455         /* wait 5 seconds */
456         ret = sema_timedwait(&request->synch_sema, 5 * hz);
457         if (ret != 0)
458                 goto cleanup;
459
460         if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
461                 vstor_packet->status != 0) {
462                 goto cleanup;
463         }
464
465         for (i = 0; i < nitems(vmstor_proto_list); i++) {
466                 /* reuse the packet for version range supported */
467
468                 memset(vstor_packet, 0, sizeof(struct vstor_packet));
469                 vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
470                 vstor_packet->flags = REQUEST_COMPLETION_FLAG;
471
472                 vstor_packet->u.version.major_minor =
473                         vmstor_proto_list[i].proto_version;
474
475                 /* revision is only significant for Windows guests */
476                 vstor_packet->u.version.revision = 0;
477
478                 ret = vmbus_chan_send(sc->hs_chan,
479                     VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
480                     vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
481
482                 if (ret != 0)
483                         goto cleanup;
484
485                 /* wait 5 seconds */
486                 ret = sema_timedwait(&request->synch_sema, 5 * hz);
487
488                 if (ret)
489                         goto cleanup;
490
491                 if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO) {
492                         ret = EINVAL;
493                         goto cleanup;   
494                 }
495                 if (vstor_packet->status == 0) {
496                         vmstor_proto_version =
497                                 vmstor_proto_list[i].proto_version;
498                         sense_buffer_size =
499                                 vmstor_proto_list[i].sense_buffer_size;
500                         vmscsi_size_delta =
501                                 vmstor_proto_list[i].vmscsi_size_delta;
502                         break;
503                 }
504         }
505
506         if (vstor_packet->status != 0) {
507                 ret = EINVAL;
508                 goto cleanup;
509         }
510         /**
511          * Query channel properties
512          */
513         memset(vstor_packet, 0, sizeof(struct vstor_packet));
514         vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES;
515         vstor_packet->flags = REQUEST_COMPLETION_FLAG;
516
517         ret = vmbus_chan_send(sc->hs_chan,
518             VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
519             vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
520
521         if ( ret != 0)
522                 goto cleanup;
523
524         /* wait 5 seconds */
525         ret = sema_timedwait(&request->synch_sema, 5 * hz);
526
527         if (ret != 0)
528                 goto cleanup;
529
530         /* TODO: Check returned version */
531         if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
532             vstor_packet->status != 0) {
533                 goto cleanup;
534         }
535
536         /* multi-channels feature is supported by WIN8 and above version */
537         max_chans = vstor_packet->u.chan_props.max_channel_cnt;
538         version = VMBUS_GET_VERSION(device_get_parent(sc->hs_dev), sc->hs_dev);
539         if (version != VMBUS_VERSION_WIN7 && version != VMBUS_VERSION_WS2008 &&
540             (vstor_packet->u.chan_props.flags &
541              HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
542                 support_multichannel = TRUE;
543         }
544
545         memset(vstor_packet, 0, sizeof(struct vstor_packet));
546         vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
547         vstor_packet->flags = REQUEST_COMPLETION_FLAG;
548
549         ret = vmbus_chan_send(sc->hs_chan,
550             VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
551             vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
552
553         if (ret != 0) {
554                 goto cleanup;
555         }
556
557         /* wait 5 seconds */
558         ret = sema_timedwait(&request->synch_sema, 5 * hz);
559
560         if (ret != 0)
561                 goto cleanup;
562
563         if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
564             vstor_packet->status != 0)
565                 goto cleanup;
566
567         /*
568          * If multi-channel is supported, send multichannel create
569          * request to host.
570          */
571         if (support_multichannel)
572                 storvsc_send_multichannel_request(sc, max_chans);
573
574 cleanup:
575         sema_destroy(&request->synch_sema);
576         return (ret);
577 }
578
579 /**
580  * @brief Open channel connection to paraent partition StorVSP driver
581  *
582  * Open and initialize channel connection to parent partition StorVSP driver.
583  *
584  * @param pointer to a Hyper-V device
585  * @returns 0 on success, non-zero error on failure
586  */
587 static int
588 hv_storvsc_connect_vsp(struct storvsc_softc *sc)
589 {       
590         int ret = 0;
591         struct vmstor_chan_props props;
592
593         memset(&props, 0, sizeof(struct vmstor_chan_props));
594
595         /*
596          * Open the channel
597          */
598         vmbus_chan_cpu_rr(sc->hs_chan);
599         ret = vmbus_chan_open(
600                 sc->hs_chan,
601                 sc->hs_drv_props->drv_ringbuffer_size,
602                 sc->hs_drv_props->drv_ringbuffer_size,
603                 (void *)&props,
604                 sizeof(struct vmstor_chan_props),
605                 hv_storvsc_on_channel_callback, sc);
606
607         if (ret != 0) {
608                 return ret;
609         }
610
611         ret = hv_storvsc_channel_init(sc);
612
613         return (ret);
614 }
615
616 #if HVS_HOST_RESET
617 static int
618 hv_storvsc_host_reset(struct storvsc_softc *sc)
619 {
620         int ret = 0;
621
622         struct hv_storvsc_request *request;
623         struct vstor_packet *vstor_packet;
624
625         request = &sc->hs_reset_req;
626         request->softc = sc;
627         vstor_packet = &request->vstor_packet;
628
629         sema_init(&request->synch_sema, 0, "stor synch sema");
630
631         vstor_packet->operation = VSTOR_OPERATION_RESETBUS;
632         vstor_packet->flags = REQUEST_COMPLETION_FLAG;
633
634         ret = vmbus_chan_send(dev->channel,
635             VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
636             vstor_packet, VSTOR_PKT_SIZE,
637             (uint64_t)(uintptr_t)&sc->hs_reset_req);
638
639         if (ret != 0) {
640                 goto cleanup;
641         }
642
643         ret = sema_timedwait(&request->synch_sema, 5 * hz); /* KYS 5 seconds */
644
645         if (ret) {
646                 goto cleanup;
647         }
648
649
650         /*
651          * At this point, all outstanding requests in the adapter
652          * should have been flushed out and return to us
653          */
654
655 cleanup:
656         sema_destroy(&request->synch_sema);
657         return (ret);
658 }
659 #endif /* HVS_HOST_RESET */
660
661 /**
662  * @brief Function to initiate an I/O request
663  *
664  * @param device Hyper-V device pointer
665  * @param request pointer to a request structure
666  * @returns 0 on success, non-zero error on failure
667  */
668 static int
669 hv_storvsc_io_request(struct storvsc_softc *sc,
670                                           struct hv_storvsc_request *request)
671 {
672         struct vstor_packet *vstor_packet = &request->vstor_packet;
673         struct vmbus_channel* outgoing_channel = NULL;
674         int ret = 0;
675
676         vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
677
678         vstor_packet->u.vm_srb.length =
679             sizeof(struct vmscsi_req) - vmscsi_size_delta;
680         
681         vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size;
682
683         vstor_packet->u.vm_srb.transfer_len =
684             request->prp_list.gpa_range.gpa_len;
685
686         vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
687
688         outgoing_channel = sc->hs_cpu2chan[curcpu];
689
690         mtx_unlock(&request->softc->hs_lock);
691         if (request->prp_list.gpa_range.gpa_len) {
692                 ret = vmbus_chan_send_prplist(outgoing_channel,
693                     &request->prp_list.gpa_range, request->prp_cnt,
694                     vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
695         } else {
696                 ret = vmbus_chan_send(outgoing_channel,
697                     VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
698                     vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
699         }
700         mtx_lock(&request->softc->hs_lock);
701
702         if (ret != 0) {
703                 printf("Unable to send packet %p ret %d", vstor_packet, ret);
704         } else {
705                 atomic_add_int(&sc->hs_num_out_reqs, 1);
706         }
707
708         return (ret);
709 }
710
711
712 /**
713  * Process IO_COMPLETION_OPERATION and ready
714  * the result to be completed for upper layer
715  * processing by the CAM layer.
716  */
717 static void
718 hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
719                            struct vstor_packet *vstor_packet,
720                            struct hv_storvsc_request *request)
721 {
722         struct vmscsi_req *vm_srb;
723
724         vm_srb = &vstor_packet->u.vm_srb;
725
726         /*
727          * Copy some fields of the host's response into the request structure,
728          * because the fields will be used later in storvsc_io_done().
729          */
730         request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status;
731         request->vstor_packet.u.vm_srb.srb_status = vm_srb->srb_status;
732         request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len;
733
734         if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
735                         (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
736                 /* Autosense data available */
737
738                 KASSERT(vm_srb->sense_info_len <= request->sense_info_len,
739                                 ("vm_srb->sense_info_len <= "
740                                  "request->sense_info_len"));
741
742                 memcpy(request->sense_data, vm_srb->u.sense_data,
743                         vm_srb->sense_info_len);
744
745                 request->sense_info_len = vm_srb->sense_info_len;
746         }
747
748         /* Complete request by passing to the CAM layer */
749         storvsc_io_done(request);
750         atomic_subtract_int(&sc->hs_num_out_reqs, 1);
751         if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) {
752                 sema_post(&sc->hs_drain_sema);
753         }
754 }
755
756 static void
757 hv_storvsc_rescan_target(struct storvsc_softc *sc)
758 {
759         path_id_t pathid;
760         target_id_t targetid;
761         union ccb *ccb;
762
763         pathid = cam_sim_path(sc->hs_sim);
764         targetid = CAM_TARGET_WILDCARD;
765
766         /*
767          * Allocate a CCB and schedule a rescan.
768          */
769         ccb = xpt_alloc_ccb_nowait();
770         if (ccb == NULL) {
771                 printf("unable to alloc CCB for rescan\n");
772                 return;
773         }
774
775         if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid,
776             CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
777                 printf("unable to create path for rescan, pathid: %u,"
778                     "targetid: %u\n", pathid, targetid);
779                 xpt_free_ccb(ccb);
780                 return;
781         }
782
783         if (targetid == CAM_TARGET_WILDCARD)
784                 ccb->ccb_h.func_code = XPT_SCAN_BUS;
785         else
786                 ccb->ccb_h.func_code = XPT_SCAN_TGT;
787
788         xpt_rescan(ccb);
789 }
790
791 static void
792 hv_storvsc_on_channel_callback(struct vmbus_channel *channel, void *xsc)
793 {
794         int ret = 0;
795         struct storvsc_softc *sc = xsc;
796         uint32_t bytes_recvd;
797         uint64_t request_id;
798         uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)];
799         struct hv_storvsc_request *request;
800         struct vstor_packet *vstor_packet;
801
802         bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8);
803         ret = vmbus_chan_recv(channel, packet, &bytes_recvd, &request_id);
804         KASSERT(ret != ENOBUFS, ("storvsc recvbuf is not large enough"));
805         /* XXX check bytes_recvd to make sure that it contains enough data */
806
807         while ((ret == 0) && (bytes_recvd > 0)) {
808                 request = (struct hv_storvsc_request *)(uintptr_t)request_id;
809
810                 if ((request == &sc->hs_init_req) ||
811                         (request == &sc->hs_reset_req)) {
812                         memcpy(&request->vstor_packet, packet,
813                                    sizeof(struct vstor_packet));
814                         sema_post(&request->synch_sema);
815                 } else {
816                         vstor_packet = (struct vstor_packet *)packet;
817                         switch(vstor_packet->operation) {
818                         case VSTOR_OPERATION_COMPLETEIO:
819                                 if (request == NULL)
820                                         panic("VMBUS: storvsc received a "
821                                             "packet with NULL request id in "
822                                             "COMPLETEIO operation.");
823
824                                 hv_storvsc_on_iocompletion(sc,
825                                                         vstor_packet, request);
826                                 break;
827                         case VSTOR_OPERATION_REMOVEDEVICE:
828                                 printf("VMBUS: storvsc operation %d not "
829                                     "implemented.\n", vstor_packet->operation);
830                                 /* TODO: implement */
831                                 break;
832                         case VSTOR_OPERATION_ENUMERATE_BUS:
833                                 hv_storvsc_rescan_target(sc);
834                                 break;
835                         default:
836                                 break;
837                         }                       
838                 }
839
840                 bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8),
841                 ret = vmbus_chan_recv(channel, packet, &bytes_recvd,
842                     &request_id);
843                 KASSERT(ret != ENOBUFS,
844                     ("storvsc recvbuf is not large enough"));
845                 /*
846                  * XXX check bytes_recvd to make sure that it contains
847                  * enough data
848                  */
849         }
850 }
851
852 /**
853  * @brief StorVSC probe function
854  *
855  * Device probe function.  Returns 0 if the input device is a StorVSC
856  * device.  Otherwise, a ENXIO is returned.  If the input device is
857  * for BlkVSC (paravirtual IDE) device and this support is disabled in
858  * favor of the emulated ATA/IDE device, return ENXIO.
859  *
860  * @param a device
861  * @returns 0 on success, ENXIO if not a matcing StorVSC device
862  */
863 static int
864 storvsc_probe(device_t dev)
865 {
866         int ata_disk_enable = 0;
867         int ret = ENXIO;
868         
869         switch (storvsc_get_storage_type(dev)) {
870         case DRIVER_BLKVSC:
871                 if(bootverbose)
872                         device_printf(dev, "DRIVER_BLKVSC-Emulated ATA/IDE probe\n");
873                 if (!getenv_int("hw.ata.disk_enable", &ata_disk_enable)) {
874                         if(bootverbose)
875                                 device_printf(dev,
876                                         "Enlightened ATA/IDE detected\n");
877                         device_set_desc(dev, g_drv_props_table[DRIVER_BLKVSC].drv_desc);
878                         ret = BUS_PROBE_DEFAULT;
879                 } else if(bootverbose)
880                         device_printf(dev, "Emulated ATA/IDE set (hw.ata.disk_enable set)\n");
881                 break;
882         case DRIVER_STORVSC:
883                 if(bootverbose)
884                         device_printf(dev, "Enlightened SCSI device detected\n");
885                 device_set_desc(dev, g_drv_props_table[DRIVER_STORVSC].drv_desc);
886                 ret = BUS_PROBE_DEFAULT;
887                 break;
888         default:
889                 ret = ENXIO;
890         }
891         return (ret);
892 }
893
894 static void
895 storvsc_create_cpu2chan(struct storvsc_softc *sc)
896 {
897         int cpu;
898
899         CPU_FOREACH(cpu) {
900                 sc->hs_cpu2chan[cpu] = vmbus_chan_cpu2chan(sc->hs_chan, cpu);
901                 if (bootverbose) {
902                         device_printf(sc->hs_dev, "cpu%d -> chan%u\n",
903                             cpu, vmbus_chan_id(sc->hs_cpu2chan[cpu]));
904                 }
905         }
906 }
907
908 static int
909 storvsc_init_requests(device_t dev)
910 {
911         struct storvsc_softc *sc = device_get_softc(dev);
912         struct hv_storvsc_request *reqp;
913         int error, i;
914
915         LIST_INIT(&sc->hs_free_list);
916
917         error = bus_dma_tag_create(
918                 bus_get_dma_tag(dev),           /* parent */
919                 1,                              /* alignment */
920                 PAGE_SIZE,                      /* boundary */
921                 BUS_SPACE_MAXADDR,              /* lowaddr */
922                 BUS_SPACE_MAXADDR,              /* highaddr */
923                 NULL, NULL,                     /* filter, filterarg */
924                 STORVSC_DATA_SIZE_MAX,          /* maxsize */
925                 STORVSC_DATA_SEGCNT_MAX,        /* nsegments */
926                 STORVSC_DATA_SEGSZ_MAX,         /* maxsegsize */
927                 0,                              /* flags */
928                 NULL,                           /* lockfunc */
929                 NULL,                           /* lockfuncarg */
930                 &sc->storvsc_req_dtag);
931         if (error) {
932                 device_printf(dev, "failed to create storvsc dma tag\n");
933                 return (error);
934         }
935
936         for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) {
937                 reqp = malloc(sizeof(struct hv_storvsc_request),
938                                  M_DEVBUF, M_WAITOK|M_ZERO);
939                 reqp->softc = sc;
940                 error = bus_dmamap_create(sc->storvsc_req_dtag, 0,
941                                 &reqp->data_dmap);
942                 if (error) {
943                         device_printf(dev, "failed to allocate storvsc "
944                             "data dmamap\n");
945                         goto cleanup;
946                 }
947                 LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
948         }
949         return (0);
950
951 cleanup:
952         while ((reqp = LIST_FIRST(&sc->hs_free_list)) != NULL) {
953                 LIST_REMOVE(reqp, link);
954                 bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
955                 free(reqp, M_DEVBUF);
956         }
957         return (error);
958 }
959
960 static void
961 storvsc_sysctl(device_t dev)
962 {
963         struct sysctl_oid_list *child;
964         struct sysctl_ctx_list *ctx;
965         struct storvsc_softc *sc;
966
967         sc = device_get_softc(dev);
968         ctx = device_get_sysctl_ctx(dev);
969         child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
970
971         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_bio_cnt", CTLFLAG_RW,
972                 &sc->sysctl_data.data_bio_cnt, "# of bio data block");
973         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_vaddr_cnt", CTLFLAG_RW,
974                 &sc->sysctl_data.data_vaddr_cnt, "# of vaddr data block");
975         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_sg_cnt", CTLFLAG_RW,
976                 &sc->sysctl_data.data_sg_cnt, "# of sg data block");
977 }
978
979 /**
980  * @brief StorVSC attach function
981  *
982  * Function responsible for allocating per-device structures,
983  * setting up CAM interfaces and scanning for available LUNs to
984  * be used for SCSI device peripherals.
985  *
986  * @param a device
987  * @returns 0 on success or an error on failure
988  */
989 static int
990 storvsc_attach(device_t dev)
991 {
992         enum hv_storage_type stor_type;
993         struct storvsc_softc *sc;
994         struct cam_devq *devq;
995         int ret, i, j;
996         struct hv_storvsc_request *reqp;
997         struct root_hold_token *root_mount_token = NULL;
998         struct hv_sgl_node *sgl_node = NULL;
999         void *tmp_buff = NULL;
1000
1001         /*
1002          * We need to serialize storvsc attach calls.
1003          */
1004         root_mount_token = root_mount_hold("storvsc");
1005
1006         sc = device_get_softc(dev);
1007         sc->hs_chan = vmbus_get_channel(dev);
1008
1009         stor_type = storvsc_get_storage_type(dev);
1010
1011         if (stor_type == DRIVER_UNKNOWN) {
1012                 ret = ENODEV;
1013                 goto cleanup;
1014         }
1015
1016         /* fill in driver specific properties */
1017         sc->hs_drv_props = &g_drv_props_table[stor_type];
1018
1019         /* fill in device specific properties */
1020         sc->hs_unit     = device_get_unit(dev);
1021         sc->hs_dev      = dev;
1022
1023         mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF);
1024
1025         ret = storvsc_init_requests(dev);
1026         if (ret != 0)
1027                 goto cleanup;
1028
1029         /* create sg-list page pool */
1030         if (FALSE == g_hv_sgl_page_pool.is_init) {
1031                 g_hv_sgl_page_pool.is_init = TRUE;
1032                 LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
1033                 LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
1034
1035                 /*
1036                  * Pre-create SG list, each SG list with
1037                  * STORVSC_DATA_SEGCNT_MAX segments, each
1038                  * segment has one page buffer
1039                  */
1040                 for (i = 0; i < STORVSC_MAX_IO_REQUESTS; i++) {
1041                         sgl_node = malloc(sizeof(struct hv_sgl_node),
1042                             M_DEVBUF, M_WAITOK|M_ZERO);
1043
1044                         sgl_node->sgl_data =
1045                             sglist_alloc(STORVSC_DATA_SEGCNT_MAX,
1046                             M_WAITOK|M_ZERO);
1047
1048                         for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
1049                                 tmp_buff = malloc(PAGE_SIZE,
1050                                     M_DEVBUF, M_WAITOK|M_ZERO);
1051
1052                                 sgl_node->sgl_data->sg_segs[j].ss_paddr =
1053                                     (vm_paddr_t)tmp_buff;
1054                         }
1055
1056                         LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
1057                             sgl_node, link);
1058                 }
1059         }
1060
1061         sc->hs_destroy = FALSE;
1062         sc->hs_drain_notify = FALSE;
1063         sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
1064
1065         ret = hv_storvsc_connect_vsp(sc);
1066         if (ret != 0) {
1067                 goto cleanup;
1068         }
1069
1070         /* Construct cpu to channel mapping */
1071         storvsc_create_cpu2chan(sc);
1072
1073         /*
1074          * Create the device queue.
1075          * Hyper-V maps each target to one SCSI HBA
1076          */
1077         devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target);
1078         if (devq == NULL) {
1079                 device_printf(dev, "Failed to alloc device queue\n");
1080                 ret = ENOMEM;
1081                 goto cleanup;
1082         }
1083
1084         sc->hs_sim = cam_sim_alloc(storvsc_action,
1085                                 storvsc_poll,
1086                                 sc->hs_drv_props->drv_name,
1087                                 sc,
1088                                 sc->hs_unit,
1089                                 &sc->hs_lock, 1,
1090                                 sc->hs_drv_props->drv_max_ios_per_target,
1091                                 devq);
1092
1093         if (sc->hs_sim == NULL) {
1094                 device_printf(dev, "Failed to alloc sim\n");
1095                 cam_simq_free(devq);
1096                 ret = ENOMEM;
1097                 goto cleanup;
1098         }
1099
1100         mtx_lock(&sc->hs_lock);
1101         /* bus_id is set to 0, need to get it from VMBUS channel query? */
1102         if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) {
1103                 cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1104                 mtx_unlock(&sc->hs_lock);
1105                 device_printf(dev, "Unable to register SCSI bus\n");
1106                 ret = ENXIO;
1107                 goto cleanup;
1108         }
1109
1110         if (xpt_create_path(&sc->hs_path, /*periph*/NULL,
1111                  cam_sim_path(sc->hs_sim),
1112                 CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
1113                 xpt_bus_deregister(cam_sim_path(sc->hs_sim));
1114                 cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1115                 mtx_unlock(&sc->hs_lock);
1116                 device_printf(dev, "Unable to create path\n");
1117                 ret = ENXIO;
1118                 goto cleanup;
1119         }
1120
1121         mtx_unlock(&sc->hs_lock);
1122
1123         storvsc_sysctl(dev);
1124
1125         root_mount_rel(root_mount_token);
1126         return (0);
1127
1128
1129 cleanup:
1130         root_mount_rel(root_mount_token);
1131         while (!LIST_EMPTY(&sc->hs_free_list)) {
1132                 reqp = LIST_FIRST(&sc->hs_free_list);
1133                 LIST_REMOVE(reqp, link);
1134                 bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
1135                 free(reqp, M_DEVBUF);
1136         }
1137
1138         while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1139                 sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1140                 LIST_REMOVE(sgl_node, link);
1141                 for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
1142                         if (NULL !=
1143                             (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
1144                                 free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
1145                         }
1146                 }
1147                 sglist_free(sgl_node->sgl_data);
1148                 free(sgl_node, M_DEVBUF);
1149         }
1150
1151         return (ret);
1152 }
1153
1154 /**
1155  * @brief StorVSC device detach function
1156  *
1157  * This function is responsible for safely detaching a
1158  * StorVSC device.  This includes waiting for inbound responses
1159  * to complete and freeing associated per-device structures.
1160  *
1161  * @param dev a device
1162  * returns 0 on success
1163  */
1164 static int
1165 storvsc_detach(device_t dev)
1166 {
1167         struct storvsc_softc *sc = device_get_softc(dev);
1168         struct hv_storvsc_request *reqp = NULL;
1169         struct hv_sgl_node *sgl_node = NULL;
1170         int j = 0;
1171
1172         sc->hs_destroy = TRUE;
1173
1174         /*
1175          * At this point, all outbound traffic should be disabled. We
1176          * only allow inbound traffic (responses) to proceed so that
1177          * outstanding requests can be completed.
1178          */
1179
1180         sc->hs_drain_notify = TRUE;
1181         sema_wait(&sc->hs_drain_sema);
1182         sc->hs_drain_notify = FALSE;
1183
1184         /*
1185          * Since we have already drained, we don't need to busy wait.
1186          * The call to close the channel will reset the callback
1187          * under the protection of the incoming channel lock.
1188          */
1189
1190         vmbus_chan_close(sc->hs_chan);
1191
1192         mtx_lock(&sc->hs_lock);
1193         while (!LIST_EMPTY(&sc->hs_free_list)) {
1194                 reqp = LIST_FIRST(&sc->hs_free_list);
1195                 LIST_REMOVE(reqp, link);
1196                 bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
1197                 free(reqp, M_DEVBUF);
1198         }
1199         mtx_unlock(&sc->hs_lock);
1200
1201         while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1202                 sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1203                 LIST_REMOVE(sgl_node, link);
1204                 for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++){
1205                         if (NULL !=
1206                             (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
1207                                 free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
1208                         }
1209                 }
1210                 sglist_free(sgl_node->sgl_data);
1211                 free(sgl_node, M_DEVBUF);
1212         }
1213         
1214         return (0);
1215 }
1216
1217 #if HVS_TIMEOUT_TEST
1218 /**
1219  * @brief unit test for timed out operations
1220  *
1221  * This function provides unit testing capability to simulate
1222  * timed out operations.  Recompilation with HV_TIMEOUT_TEST=1
1223  * is required.
1224  *
1225  * @param reqp pointer to a request structure
1226  * @param opcode SCSI operation being performed
1227  * @param wait if 1, wait for I/O to complete
1228  */
1229 static void
1230 storvsc_timeout_test(struct hv_storvsc_request *reqp,
1231                 uint8_t opcode, int wait)
1232 {
1233         int ret;
1234         union ccb *ccb = reqp->ccb;
1235         struct storvsc_softc *sc = reqp->softc;
1236
1237         if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) {
1238                 return;
1239         }
1240
1241         if (wait) {
1242                 mtx_lock(&reqp->event.mtx);
1243         }
1244         ret = hv_storvsc_io_request(sc, reqp);
1245         if (ret != 0) {
1246                 if (wait) {
1247                         mtx_unlock(&reqp->event.mtx);
1248                 }
1249                 printf("%s: io_request failed with %d.\n",
1250                                 __func__, ret);
1251                 ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1252                 mtx_lock(&sc->hs_lock);
1253                 storvsc_free_request(sc, reqp);
1254                 xpt_done(ccb);
1255                 mtx_unlock(&sc->hs_lock);
1256                 return;
1257         }
1258
1259         if (wait) {
1260                 xpt_print(ccb->ccb_h.path,
1261                                 "%u: %s: waiting for IO return.\n",
1262                                 ticks, __func__);
1263                 ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz);
1264                 mtx_unlock(&reqp->event.mtx);
1265                 xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n",
1266                                 ticks, __func__, (ret == 0)?
1267                                 "IO return detected" :
1268                                 "IO return not detected");
1269                 /*
1270                  * Now both the timer handler and io done are running
1271                  * simultaneously. We want to confirm the io done always
1272                  * finishes after the timer handler exits. So reqp used by
1273                  * timer handler is not freed or stale. Do busy loop for
1274                  * another 1/10 second to make sure io done does
1275                  * wait for the timer handler to complete.
1276                  */
1277                 DELAY(100*1000);
1278                 mtx_lock(&sc->hs_lock);
1279                 xpt_print(ccb->ccb_h.path,
1280                                 "%u: %s: finishing, queue frozen %d, "
1281                                 "ccb status 0x%x scsi_status 0x%x.\n",
1282                                 ticks, __func__, sc->hs_frozen,
1283                                 ccb->ccb_h.status,
1284                                 ccb->csio.scsi_status);
1285                 mtx_unlock(&sc->hs_lock);
1286         }
1287 }
1288 #endif /* HVS_TIMEOUT_TEST */
1289
1290 #ifdef notyet
1291 /**
1292  * @brief timeout handler for requests
1293  *
1294  * This function is called as a result of a callout expiring.
1295  *
1296  * @param arg pointer to a request
1297  */
1298 static void
1299 storvsc_timeout(void *arg)
1300 {
1301         struct hv_storvsc_request *reqp = arg;
1302         struct storvsc_softc *sc = reqp->softc;
1303         union ccb *ccb = reqp->ccb;
1304
1305         if (reqp->retries == 0) {
1306                 mtx_lock(&sc->hs_lock);
1307                 xpt_print(ccb->ccb_h.path,
1308                     "%u: IO timed out (req=0x%p), wait for another %u secs.\n",
1309                     ticks, reqp, ccb->ccb_h.timeout / 1000);
1310                 cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
1311                 mtx_unlock(&sc->hs_lock);
1312
1313                 reqp->retries++;
1314                 callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout,
1315                     0, storvsc_timeout, reqp, 0);
1316 #if HVS_TIMEOUT_TEST
1317                 storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0);
1318 #endif
1319                 return;
1320         }
1321
1322         mtx_lock(&sc->hs_lock);
1323         xpt_print(ccb->ccb_h.path,
1324                 "%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n",
1325                 ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000,
1326                 (sc->hs_frozen == 0)?
1327                 "freezing the queue" : "the queue is already frozen");
1328         if (sc->hs_frozen == 0) {
1329                 sc->hs_frozen = 1;
1330                 xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1);
1331         }
1332         mtx_unlock(&sc->hs_lock);
1333         
1334 #if HVS_TIMEOUT_TEST
1335         storvsc_timeout_test(reqp, MODE_SELECT_10, 1);
1336 #endif
1337 }
1338 #endif
1339
1340 /**
1341  * @brief StorVSC device poll function
1342  *
1343  * This function is responsible for servicing requests when
1344  * interrupts are disabled (i.e when we are dumping core.)
1345  *
1346  * @param sim a pointer to a CAM SCSI interface module
1347  */
1348 static void
1349 storvsc_poll(struct cam_sim *sim)
1350 {
1351         struct storvsc_softc *sc = cam_sim_softc(sim);
1352
1353         mtx_assert(&sc->hs_lock, MA_OWNED);
1354         mtx_unlock(&sc->hs_lock);
1355         hv_storvsc_on_channel_callback(sc->hs_chan, sc);
1356         mtx_lock(&sc->hs_lock);
1357 }
1358
1359 /**
1360  * @brief StorVSC device action function
1361  *
1362  * This function is responsible for handling SCSI operations which
1363  * are passed from the CAM layer.  The requests are in the form of
1364  * CAM control blocks which indicate the action being performed.
1365  * Not all actions require converting the request to a VSCSI protocol
1366  * message - these actions can be responded to by this driver.
1367  * Requests which are destined for a backend storage device are converted
1368  * to a VSCSI protocol message and sent on the channel connection associated
1369  * with this device.
1370  *
1371  * @param sim pointer to a CAM SCSI interface module
1372  * @param ccb pointer to a CAM control block
1373  */
1374 static void
1375 storvsc_action(struct cam_sim *sim, union ccb *ccb)
1376 {
1377         struct storvsc_softc *sc = cam_sim_softc(sim);
1378         int res;
1379
1380         mtx_assert(&sc->hs_lock, MA_OWNED);
1381         switch (ccb->ccb_h.func_code) {
1382         case XPT_PATH_INQ: {
1383                 struct ccb_pathinq *cpi = &ccb->cpi;
1384
1385                 cpi->version_num = 1;
1386                 cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE;
1387                 cpi->target_sprt = 0;
1388                 cpi->hba_misc = PIM_NOBUSRESET;
1389                 if (hv_storvsc_use_pim_unmapped)
1390                         cpi->hba_misc |= PIM_UNMAPPED;
1391                 cpi->hba_eng_cnt = 0;
1392                 cpi->max_target = STORVSC_MAX_TARGETS;
1393                 cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target;
1394                 cpi->initiator_id = cpi->max_target;
1395                 cpi->bus_id = cam_sim_bus(sim);
1396                 cpi->base_transfer_speed = 300000;
1397                 cpi->transport = XPORT_SAS;
1398                 cpi->transport_version = 0;
1399                 cpi->protocol = PROTO_SCSI;
1400                 cpi->protocol_version = SCSI_REV_SPC2;
1401                 strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
1402                 strncpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN);
1403                 strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
1404                 cpi->unit_number = cam_sim_unit(sim);
1405
1406                 ccb->ccb_h.status = CAM_REQ_CMP;
1407                 xpt_done(ccb);
1408                 return;
1409         }
1410         case XPT_GET_TRAN_SETTINGS: {
1411                 struct  ccb_trans_settings *cts = &ccb->cts;
1412
1413                 cts->transport = XPORT_SAS;
1414                 cts->transport_version = 0;
1415                 cts->protocol = PROTO_SCSI;
1416                 cts->protocol_version = SCSI_REV_SPC2;
1417
1418                 /* enable tag queuing and disconnected mode */
1419                 cts->proto_specific.valid = CTS_SCSI_VALID_TQ;
1420                 cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
1421                 cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
1422                 cts->xport_specific.valid = CTS_SPI_VALID_DISC;
1423                 cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
1424                         
1425                 ccb->ccb_h.status = CAM_REQ_CMP;
1426                 xpt_done(ccb);
1427                 return;
1428         }
1429         case XPT_SET_TRAN_SETTINGS:     {
1430                 ccb->ccb_h.status = CAM_REQ_CMP;
1431                 xpt_done(ccb);
1432                 return;
1433         }
1434         case XPT_CALC_GEOMETRY:{
1435                 cam_calc_geometry(&ccb->ccg, 1);
1436                 xpt_done(ccb);
1437                 return;
1438         }
1439         case  XPT_RESET_BUS:
1440         case  XPT_RESET_DEV:{
1441 #if HVS_HOST_RESET
1442                 if ((res = hv_storvsc_host_reset(sc)) != 0) {
1443                         xpt_print(ccb->ccb_h.path,
1444                                 "hv_storvsc_host_reset failed with %d\n", res);
1445                         ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1446                         xpt_done(ccb);
1447                         return;
1448                 }
1449                 ccb->ccb_h.status = CAM_REQ_CMP;
1450                 xpt_done(ccb);
1451                 return;
1452 #else
1453                 xpt_print(ccb->ccb_h.path,
1454                                   "%s reset not supported.\n",
1455                                   (ccb->ccb_h.func_code == XPT_RESET_BUS)?
1456                                   "bus" : "dev");
1457                 ccb->ccb_h.status = CAM_REQ_INVALID;
1458                 xpt_done(ccb);
1459                 return;
1460 #endif  /* HVS_HOST_RESET */
1461         }
1462         case XPT_SCSI_IO:
1463         case XPT_IMMED_NOTIFY: {
1464                 struct hv_storvsc_request *reqp = NULL;
1465                 bus_dmamap_t dmap_saved;
1466
1467                 if (ccb->csio.cdb_len == 0) {
1468                         panic("cdl_len is 0\n");
1469                 }
1470
1471                 if (LIST_EMPTY(&sc->hs_free_list)) {
1472                         ccb->ccb_h.status = CAM_REQUEUE_REQ;
1473                         if (sc->hs_frozen == 0) {
1474                                 sc->hs_frozen = 1;
1475                                 xpt_freeze_simq(sim, /* count*/1);
1476                         }
1477                         xpt_done(ccb);
1478                         return;
1479                 }
1480
1481                 reqp = LIST_FIRST(&sc->hs_free_list);
1482                 LIST_REMOVE(reqp, link);
1483
1484                 /* Save the data_dmap before reset request */
1485                 dmap_saved = reqp->data_dmap;
1486
1487                 /* XXX this is ugly */
1488                 bzero(reqp, sizeof(struct hv_storvsc_request));
1489
1490                 /* Restore necessary bits */
1491                 reqp->data_dmap = dmap_saved;
1492                 reqp->softc = sc;
1493                 
1494                 ccb->ccb_h.status |= CAM_SIM_QUEUED;
1495                 if ((res = create_storvsc_request(ccb, reqp)) != 0) {
1496                         ccb->ccb_h.status = CAM_REQ_INVALID;
1497                         xpt_done(ccb);
1498                         return;
1499                 }
1500
1501 #ifdef notyet
1502                 if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
1503                         callout_init(&reqp->callout, CALLOUT_MPSAFE);
1504                         callout_reset_sbt(&reqp->callout,
1505                             SBT_1MS * ccb->ccb_h.timeout, 0,
1506                             storvsc_timeout, reqp, 0);
1507 #if HVS_TIMEOUT_TEST
1508                         cv_init(&reqp->event.cv, "storvsc timeout cv");
1509                         mtx_init(&reqp->event.mtx, "storvsc timeout mutex",
1510                                         NULL, MTX_DEF);
1511                         switch (reqp->vstor_packet.vm_srb.cdb[0]) {
1512                                 case MODE_SELECT_10:
1513                                 case SEND_DIAGNOSTIC:
1514                                         /* To have timer send the request. */
1515                                         return;
1516                                 default:
1517                                         break;
1518                         }
1519 #endif /* HVS_TIMEOUT_TEST */
1520                 }
1521 #endif
1522
1523                 if ((res = hv_storvsc_io_request(sc, reqp)) != 0) {
1524                         xpt_print(ccb->ccb_h.path,
1525                                 "hv_storvsc_io_request failed with %d\n", res);
1526                         ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1527                         storvsc_free_request(sc, reqp);
1528                         xpt_done(ccb);
1529                         return;
1530                 }
1531                 return;
1532         }
1533
1534         default:
1535                 ccb->ccb_h.status = CAM_REQ_INVALID;
1536                 xpt_done(ccb);
1537                 return;
1538         }
1539 }
1540
1541 /**
1542  * @brief destroy bounce buffer
1543  *
1544  * This function is responsible for destroy a Scatter/Gather list
1545  * that create by storvsc_create_bounce_buffer()
1546  *
1547  * @param sgl- the Scatter/Gather need be destroy
1548  * @param sg_count- page count of the SG list.
1549  *
1550  */
1551 static void
1552 storvsc_destroy_bounce_buffer(struct sglist *sgl)
1553 {
1554         struct hv_sgl_node *sgl_node = NULL;
1555         if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) {
1556                 printf("storvsc error: not enough in use sgl\n");
1557                 return;
1558         }
1559         sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
1560         LIST_REMOVE(sgl_node, link);
1561         sgl_node->sgl_data = sgl;
1562         LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
1563 }
1564
1565 /**
1566  * @brief create bounce buffer
1567  *
1568  * This function is responsible for create a Scatter/Gather list,
1569  * which hold several pages that can be aligned with page size.
1570  *
1571  * @param seg_count- SG-list segments count
1572  * @param write - if WRITE_TYPE, set SG list page used size to 0,
1573  * otherwise set used size to page size.
1574  *
1575  * return NULL if create failed
1576  */
1577 static struct sglist *
1578 storvsc_create_bounce_buffer(uint16_t seg_count, int write)
1579 {
1580         int i = 0;
1581         struct sglist *bounce_sgl = NULL;
1582         unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
1583         struct hv_sgl_node *sgl_node = NULL;    
1584
1585         /* get struct sglist from free_sgl_list */
1586         if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1587                 printf("storvsc error: not enough free sgl\n");
1588                 return NULL;
1589         }
1590         sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1591         LIST_REMOVE(sgl_node, link);
1592         bounce_sgl = sgl_node->sgl_data;
1593         LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
1594
1595         bounce_sgl->sg_maxseg = seg_count;
1596
1597         if (write == WRITE_TYPE)
1598                 bounce_sgl->sg_nseg = 0;
1599         else
1600                 bounce_sgl->sg_nseg = seg_count;
1601
1602         for (i = 0; i < seg_count; i++)
1603                 bounce_sgl->sg_segs[i].ss_len = buf_len;
1604
1605         return bounce_sgl;
1606 }
1607
1608 /**
1609  * @brief copy data from SG list to bounce buffer
1610  *
1611  * This function is responsible for copy data from one SG list's segments
1612  * to another SG list which used as bounce buffer.
1613  *
1614  * @param bounce_sgl - the destination SG list
1615  * @param orig_sgl - the segment of the source SG list.
1616  * @param orig_sgl_count - the count of segments.
1617  * @param orig_sgl_count - indicate which segment need bounce buffer,
1618  *  set 1 means need.
1619  *
1620  */
1621 static void
1622 storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
1623                                bus_dma_segment_t *orig_sgl,
1624                                unsigned int orig_sgl_count,
1625                                uint64_t seg_bits)
1626 {
1627         int src_sgl_idx = 0;
1628
1629         for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
1630                 if (seg_bits & (1 << src_sgl_idx)) {
1631                         memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr,
1632                             (void*)orig_sgl[src_sgl_idx].ds_addr,
1633                             orig_sgl[src_sgl_idx].ds_len);
1634
1635                         bounce_sgl->sg_segs[src_sgl_idx].ss_len =
1636                             orig_sgl[src_sgl_idx].ds_len;
1637                 }
1638         }
1639 }
1640
1641 /**
1642  * @brief copy data from SG list which used as bounce to another SG list
1643  *
1644  * This function is responsible for copy data from one SG list with bounce
1645  * buffer to another SG list's segments.
1646  *
1647  * @param dest_sgl - the destination SG list's segments
1648  * @param dest_sgl_count - the count of destination SG list's segment.
1649  * @param src_sgl - the source SG list.
1650  * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
1651  *
1652  */
1653 void
1654 storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
1655                                     unsigned int dest_sgl_count,
1656                                     struct sglist* src_sgl,
1657                                     uint64_t seg_bits)
1658 {
1659         int sgl_idx = 0;
1660         
1661         for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
1662                 if (seg_bits & (1 << sgl_idx)) {
1663                         memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
1664                             (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr),
1665                             src_sgl->sg_segs[sgl_idx].ss_len);
1666                 }
1667         }
1668 }
1669
1670 /**
1671  * @brief check SG list with bounce buffer or not
1672  *
1673  * This function is responsible for check if need bounce buffer for SG list.
1674  *
1675  * @param sgl - the SG list's segments
1676  * @param sg_count - the count of SG list's segment.
1677  * @param bits - segmengs number that need bounce buffer
1678  *
1679  * return -1 if SG list needless bounce buffer
1680  */
1681 static int
1682 storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
1683                                 unsigned int sg_count,
1684                                 uint64_t *bits)
1685 {
1686         int i = 0;
1687         int offset = 0;
1688         uint64_t phys_addr = 0;
1689         uint64_t tmp_bits = 0;
1690         boolean_t found_hole = FALSE;
1691         boolean_t pre_aligned = TRUE;
1692
1693         if (sg_count < 2){
1694                 return -1;
1695         }
1696
1697         *bits = 0;
1698         
1699         phys_addr = vtophys(sgl[0].ds_addr);
1700         offset =  phys_addr - trunc_page(phys_addr);
1701
1702         if (offset != 0) {
1703                 pre_aligned = FALSE;
1704                 tmp_bits |= 1;
1705         }
1706
1707         for (i = 1; i < sg_count; i++) {
1708                 phys_addr = vtophys(sgl[i].ds_addr);
1709                 offset =  phys_addr - trunc_page(phys_addr);
1710
1711                 if (offset == 0) {
1712                         if (FALSE == pre_aligned){
1713                                 /*
1714                                  * This segment is aligned, if the previous
1715                                  * one is not aligned, find a hole
1716                                  */
1717                                 found_hole = TRUE;
1718                         }
1719                         pre_aligned = TRUE;
1720                 } else {
1721                         tmp_bits |= 1 << i;
1722                         if (!pre_aligned) {
1723                                 if (phys_addr != vtophys(sgl[i-1].ds_addr +
1724                                     sgl[i-1].ds_len)) {
1725                                         /*
1726                                          * Check whether connect to previous
1727                                          * segment,if not, find the hole
1728                                          */
1729                                         found_hole = TRUE;
1730                                 }
1731                         } else {
1732                                 found_hole = TRUE;
1733                         }
1734                         pre_aligned = FALSE;
1735                 }
1736         }
1737
1738         if (!found_hole) {
1739                 return (-1);
1740         } else {
1741                 *bits = tmp_bits;
1742                 return 0;
1743         }
1744 }
1745
1746 /**
1747  * Copy bus_dma segments to multiple page buffer, which requires
1748  * the pages are compact composed except for the 1st and last pages.
1749  */
1750 static void
1751 storvsc_xferbuf_prepare(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
1752 {
1753         struct hv_storvsc_request *reqp = arg;
1754         union ccb *ccb = reqp->ccb;
1755         struct ccb_scsiio *csio = &ccb->csio;
1756         struct storvsc_gpa_range *prplist;
1757         int i;
1758
1759         prplist = &reqp->prp_list;
1760         prplist->gpa_range.gpa_len = csio->dxfer_len;
1761         prplist->gpa_range.gpa_ofs = segs[0].ds_addr & PAGE_MASK;
1762
1763         for (i = 0; i < nsegs; i++) {
1764                 prplist->gpa_page[i] = atop(segs[i].ds_addr);
1765 #ifdef INVARIANTS
1766                 if (i != 0 && i != nsegs - 1) {
1767                         KASSERT((segs[i].ds_addr & PAGE_MASK) == 0 &&
1768                             segs[i].ds_len == PAGE_SIZE, ("not a full page"));
1769                 }
1770 #endif
1771         }
1772         reqp->prp_cnt = nsegs;
1773 }
1774
1775 /**
1776  * @brief Fill in a request structure based on a CAM control block
1777  *
1778  * Fills in a request structure based on the contents of a CAM control
1779  * block.  The request structure holds the payload information for
1780  * VSCSI protocol request.
1781  *
1782  * @param ccb pointer to a CAM contorl block
1783  * @param reqp pointer to a request structure
1784  */
1785 static int
1786 create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
1787 {
1788         struct ccb_scsiio *csio = &ccb->csio;
1789         uint64_t phys_addr;
1790         uint32_t pfn;
1791         uint64_t not_aligned_seg_bits = 0;
1792         int error;
1793         
1794         /* refer to struct vmscsi_req for meanings of these two fields */
1795         reqp->vstor_packet.u.vm_srb.port =
1796                 cam_sim_unit(xpt_path_sim(ccb->ccb_h.path));
1797         reqp->vstor_packet.u.vm_srb.path_id =
1798                 cam_sim_bus(xpt_path_sim(ccb->ccb_h.path));
1799
1800         reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id;
1801         reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun;
1802
1803         reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len;
1804         if(ccb->ccb_h.flags & CAM_CDB_POINTER) {
1805                 memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr,
1806                         csio->cdb_len);
1807         } else {
1808                 memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes,
1809                         csio->cdb_len);
1810         }
1811
1812         switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
1813         case CAM_DIR_OUT:
1814                 reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;       
1815                 break;
1816         case CAM_DIR_IN:
1817                 reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
1818                 break;
1819         case CAM_DIR_NONE:
1820                 reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
1821                 break;
1822         default:
1823                 reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
1824                 break;
1825         }
1826
1827         reqp->sense_data     = &csio->sense_data;
1828         reqp->sense_info_len = csio->sense_len;
1829
1830         reqp->ccb = ccb;
1831
1832         if (0 == csio->dxfer_len) {
1833                 return (0);
1834         }
1835
1836         switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
1837         case CAM_DATA_BIO:
1838         case CAM_DATA_VADDR:
1839                 error = bus_dmamap_load_ccb(reqp->softc->storvsc_req_dtag,
1840                     reqp->data_dmap, ccb, storvsc_xferbuf_prepare, reqp,
1841                     BUS_DMA_NOWAIT);
1842                 if (error) {
1843                         xpt_print(ccb->ccb_h.path,
1844                             "bus_dmamap_load_ccb failed: %d\n", error);
1845                         return (error);
1846                 }
1847                 if ((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO)
1848                         reqp->softc->sysctl_data.data_bio_cnt++;
1849                 else
1850                         reqp->softc->sysctl_data.data_vaddr_cnt++;
1851                 break;
1852
1853         case CAM_DATA_SG:
1854         {
1855                 struct storvsc_gpa_range *prplist;
1856                 int i = 0;
1857                 int offset = 0;
1858                 int ret;
1859
1860                 bus_dma_segment_t *storvsc_sglist =
1861                     (bus_dma_segment_t *)ccb->csio.data_ptr;
1862                 u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
1863
1864                 prplist = &reqp->prp_list;
1865                 prplist->gpa_range.gpa_len = csio->dxfer_len;
1866
1867                 printf("Storvsc: get SG I/O operation, %d\n",
1868                     reqp->vstor_packet.u.vm_srb.data_in);
1869
1870                 if (storvsc_sg_count > STORVSC_DATA_SEGCNT_MAX){
1871                         printf("Storvsc: %d segments is too much, "
1872                             "only support %d segments\n",
1873                             storvsc_sg_count, STORVSC_DATA_SEGCNT_MAX);
1874                         return (EINVAL);
1875                 }
1876
1877                 /*
1878                  * We create our own bounce buffer function currently. Idealy
1879                  * we should use BUS_DMA(9) framework. But with current BUS_DMA
1880                  * code there is no callback API to check the page alignment of
1881                  * middle segments before busdma can decide if a bounce buffer
1882                  * is needed for particular segment. There is callback,
1883                  * "bus_dma_filter_t *filter", but the parrameters are not
1884                  * sufficient for storvsc driver.
1885                  * TODO:
1886                  *      Add page alignment check in BUS_DMA(9) callback. Once
1887                  *      this is complete, switch the following code to use
1888                  *      BUS_DMA(9) for storvsc bounce buffer support.
1889                  */
1890                 /* check if we need to create bounce buffer */
1891                 ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
1892                     storvsc_sg_count, &not_aligned_seg_bits);
1893                 if (ret != -1) {
1894                         reqp->bounce_sgl =
1895                             storvsc_create_bounce_buffer(storvsc_sg_count,
1896                             reqp->vstor_packet.u.vm_srb.data_in);
1897                         if (NULL == reqp->bounce_sgl) {
1898                                 printf("Storvsc_error: "
1899                                     "create bounce buffer failed.\n");
1900                                 return (ENOMEM);
1901                         }
1902
1903                         reqp->bounce_sgl_count = storvsc_sg_count;
1904                         reqp->not_aligned_seg_bits = not_aligned_seg_bits;
1905
1906                         /*
1907                          * if it is write, we need copy the original data
1908                          *to bounce buffer
1909                          */
1910                         if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
1911                                 storvsc_copy_sgl_to_bounce_buf(
1912                                     reqp->bounce_sgl,
1913                                     storvsc_sglist,
1914                                     storvsc_sg_count,
1915                                     reqp->not_aligned_seg_bits);
1916                         }
1917
1918                         /* transfer virtual address to physical frame number */
1919                         if (reqp->not_aligned_seg_bits & 0x1){
1920                                 phys_addr =
1921                                     vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr);
1922                         }else{
1923                                 phys_addr =
1924                                         vtophys(storvsc_sglist[0].ds_addr);
1925                         }
1926                         prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
1927
1928                         pfn = phys_addr >> PAGE_SHIFT;
1929                         prplist->gpa_page[0] = pfn;
1930                         
1931                         for (i = 1; i < storvsc_sg_count; i++) {
1932                                 if (reqp->not_aligned_seg_bits & (1 << i)) {
1933                                         phys_addr =
1934                                             vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr);
1935                                 } else {
1936                                         phys_addr =
1937                                             vtophys(storvsc_sglist[i].ds_addr);
1938                                 }
1939
1940                                 pfn = phys_addr >> PAGE_SHIFT;
1941                                 prplist->gpa_page[i] = pfn;
1942                         }
1943                         reqp->prp_cnt = i;
1944                 } else {
1945                         phys_addr = vtophys(storvsc_sglist[0].ds_addr);
1946
1947                         prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
1948
1949                         for (i = 0; i < storvsc_sg_count; i++) {
1950                                 phys_addr = vtophys(storvsc_sglist[i].ds_addr);
1951                                 pfn = phys_addr >> PAGE_SHIFT;
1952                                 prplist->gpa_page[i] = pfn;
1953                         }
1954                         reqp->prp_cnt = i;
1955
1956                         /* check the last segment cross boundary or not */
1957                         offset = phys_addr & PAGE_MASK;
1958                         if (offset) {
1959                                 /* Add one more PRP entry */
1960                                 phys_addr =
1961                                     vtophys(storvsc_sglist[i-1].ds_addr +
1962                                     PAGE_SIZE - offset);
1963                                 pfn = phys_addr >> PAGE_SHIFT;
1964                                 prplist->gpa_page[i] = pfn;
1965                                 reqp->prp_cnt++;
1966                         }
1967                         
1968                         reqp->bounce_sgl_count = 0;
1969                 }
1970                 reqp->softc->sysctl_data.data_sg_cnt++;
1971                 break;
1972         }
1973         default:
1974                 printf("Unknow flags: %d\n", ccb->ccb_h.flags);
1975                 return(EINVAL);
1976         }
1977
1978         return(0);
1979 }
1980
1981 /**
1982  * @brief completion function before returning to CAM
1983  *
1984  * I/O process has been completed and the result needs
1985  * to be passed to the CAM layer.
1986  * Free resources related to this request.
1987  *
1988  * @param reqp pointer to a request structure
1989  */
1990 static void
1991 storvsc_io_done(struct hv_storvsc_request *reqp)
1992 {
1993         union ccb *ccb = reqp->ccb;
1994         struct ccb_scsiio *csio = &ccb->csio;
1995         struct storvsc_softc *sc = reqp->softc;
1996         struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
1997         bus_dma_segment_t *ori_sglist = NULL;
1998         int ori_sg_count = 0;
1999         /* destroy bounce buffer if it is used */
2000         if (reqp->bounce_sgl_count) {
2001                 ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
2002                 ori_sg_count = ccb->csio.sglist_cnt;
2003
2004                 /*
2005                  * If it is READ operation, we should copy back the data
2006                  * to original SG list.
2007                  */
2008                 if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
2009                         storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
2010                             ori_sg_count,
2011                             reqp->bounce_sgl,
2012                             reqp->not_aligned_seg_bits);
2013                 }
2014
2015                 storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
2016                 reqp->bounce_sgl_count = 0;
2017         }
2018                 
2019         if (reqp->retries > 0) {
2020                 mtx_lock(&sc->hs_lock);
2021 #if HVS_TIMEOUT_TEST
2022                 xpt_print(ccb->ccb_h.path,
2023                         "%u: IO returned after timeout, "
2024                         "waking up timer handler if any.\n", ticks);
2025                 mtx_lock(&reqp->event.mtx);
2026                 cv_signal(&reqp->event.cv);
2027                 mtx_unlock(&reqp->event.mtx);
2028 #endif
2029                 reqp->retries = 0;
2030                 xpt_print(ccb->ccb_h.path,
2031                         "%u: IO returned after timeout, "
2032                         "stopping timer if any.\n", ticks);
2033                 mtx_unlock(&sc->hs_lock);
2034         }
2035
2036 #ifdef notyet
2037         /*
2038          * callout_drain() will wait for the timer handler to finish
2039          * if it is running. So we don't need any lock to synchronize
2040          * between this routine and the timer handler.
2041          * Note that we need to make sure reqp is not freed when timer
2042          * handler is using or will use it.
2043          */
2044         if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
2045                 callout_drain(&reqp->callout);
2046         }
2047 #endif
2048
2049         ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
2050         ccb->ccb_h.status &= ~CAM_STATUS_MASK;
2051         if (vm_srb->scsi_status == SCSI_STATUS_OK) {
2052                 const struct scsi_generic *cmd;
2053
2054                 if (vm_srb->srb_status != SRB_STATUS_SUCCESS) {
2055                         if (vm_srb->srb_status == SRB_STATUS_INVALID_LUN) {
2056                                 xpt_print(ccb->ccb_h.path, "invalid LUN %d\n",
2057                                     vm_srb->lun);
2058                         } else {
2059                                 xpt_print(ccb->ccb_h.path, "Unknown SRB flag: %d\n",
2060                                     vm_srb->srb_status);
2061                         }
2062                         /*
2063                          * If there are errors, for example, invalid LUN,
2064                          * host will inform VM through SRB status.
2065                          */
2066                         ccb->ccb_h.status |= CAM_SEL_TIMEOUT;
2067                 } else {
2068                         ccb->ccb_h.status |= CAM_REQ_CMP;
2069                 }
2070
2071                 cmd = (const struct scsi_generic *)
2072                     ((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
2073                      csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
2074                 if (cmd->opcode == INQUIRY) {
2075                         struct scsi_inquiry_data *inq_data =
2076                             (struct scsi_inquiry_data *)csio->data_ptr;
2077                         uint8_t *resp_buf = (uint8_t *)csio->data_ptr;
2078                         int resp_xfer_len, resp_buf_len, data_len;
2079
2080                         /* Get the buffer length reported by host */
2081                         resp_xfer_len = vm_srb->transfer_len;
2082                         /* Get the available buffer length */
2083                         resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
2084                         data_len = (resp_buf_len < resp_xfer_len) ?
2085                             resp_buf_len : resp_xfer_len;
2086
2087                         if (bootverbose && data_len >= 5) {
2088                                 xpt_print(ccb->ccb_h.path, "storvsc inquiry "
2089                                     "(%d) [%x %x %x %x %x ... ]\n", data_len,
2090                                     resp_buf[0], resp_buf[1], resp_buf[2],
2091                                     resp_buf[3], resp_buf[4]);
2092                         }
2093                         if (vm_srb->srb_status == SRB_STATUS_SUCCESS &&
2094                             data_len > SHORT_INQUIRY_LENGTH) {
2095                                 char vendor[16];
2096
2097                                 cam_strvis(vendor, inq_data->vendor,
2098                                     sizeof(inq_data->vendor), sizeof(vendor));
2099
2100                                 /*
2101                                  * XXX: Upgrade SPC2 to SPC3 if host is WIN8 or
2102                                  * WIN2012 R2 in order to support UNMAP feature.
2103                                  */
2104                                 if (!strncmp(vendor, "Msft", 4) &&
2105                                     SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 &&
2106                                     (vmstor_proto_version ==
2107                                      VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
2108                                      vmstor_proto_version ==
2109                                      VMSTOR_PROTOCOL_VERSION_WIN8)) {
2110                                         inq_data->version = SCSI_REV_SPC3;
2111                                         if (bootverbose) {
2112                                                 xpt_print(ccb->ccb_h.path,
2113                                                     "storvsc upgrades "
2114                                                     "SPC2 to SPC3\n");
2115                                         }
2116                                 }
2117                         }
2118                 }
2119         } else {
2120                 mtx_lock(&sc->hs_lock);
2121                 xpt_print(ccb->ccb_h.path,
2122                         "storvsc scsi_status = %d\n",
2123                         vm_srb->scsi_status);
2124                 mtx_unlock(&sc->hs_lock);
2125                 ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
2126         }
2127
2128         ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF);
2129         ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len;
2130
2131         if (reqp->sense_info_len != 0) {
2132                 csio->sense_resid = csio->sense_len - reqp->sense_info_len;
2133                 ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
2134         }
2135
2136         mtx_lock(&sc->hs_lock);
2137         if (reqp->softc->hs_frozen == 1) {
2138                 xpt_print(ccb->ccb_h.path,
2139                         "%u: storvsc unfreezing softc 0x%p.\n",
2140                         ticks, reqp->softc);
2141                 ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
2142                 reqp->softc->hs_frozen = 0;
2143         }
2144         storvsc_free_request(sc, reqp);
2145         mtx_unlock(&sc->hs_lock);
2146
2147         xpt_done_direct(ccb);
2148 }
2149
2150 /**
2151  * @brief Free a request structure
2152  *
2153  * Free a request structure by returning it to the free list
2154  *
2155  * @param sc pointer to a softc
2156  * @param reqp pointer to a request structure
2157  */     
2158 static void
2159 storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp)
2160 {
2161
2162         LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
2163 }
2164
2165 /**
2166  * @brief Determine type of storage device from GUID
2167  *
2168  * Using the type GUID, determine if this is a StorVSC (paravirtual
2169  * SCSI or BlkVSC (paravirtual IDE) device.
2170  *
2171  * @param dev a device
2172  * returns an enum
2173  */
2174 static enum hv_storage_type
2175 storvsc_get_storage_type(device_t dev)
2176 {
2177         device_t parent = device_get_parent(dev);
2178
2179         if (VMBUS_PROBE_GUID(parent, dev, &gBlkVscDeviceType) == 0)
2180                 return DRIVER_BLKVSC;
2181         if (VMBUS_PROBE_GUID(parent, dev, &gStorVscDeviceType) == 0)
2182                 return DRIVER_STORVSC;
2183         return DRIVER_UNKNOWN;
2184 }