]> CyberLeo.Net >> Repos - FreeBSD/releng/10.3.git/blob - sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
Fix heimdal KDC-REP service name validation vulnerability [SA-17:05]
[FreeBSD/releng/10.3.git] / sys / dev / hyperv / storvsc / hv_storvsc_drv_freebsd.c
1 /*-
2  * Copyright (c) 2009-2012 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 /**
30  * StorVSC driver for Hyper-V.  This driver presents a SCSI HBA interface
31  * to the Comman Access Method (CAM) layer.  CAM control blocks (CCBs) are
32  * converted into VSCSI protocol messages which are delivered to the parent
33  * partition StorVSP driver over the Hyper-V VMBUS.
34  */
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 #include <sys/param.h>
39 #include <sys/proc.h>
40 #include <sys/condvar.h>
41 #include <sys/time.h>
42 #include <sys/systm.h>
43 #include <sys/sockio.h>
44 #include <sys/mbuf.h>
45 #include <sys/malloc.h>
46 #include <sys/module.h>
47 #include <sys/kernel.h>
48 #include <sys/queue.h>
49 #include <sys/lock.h>
50 #include <sys/sx.h>
51 #include <sys/taskqueue.h>
52 #include <sys/bus.h>
53 #include <sys/mutex.h>
54 #include <sys/callout.h>
55 #include <vm/vm.h>
56 #include <vm/pmap.h>
57 #include <vm/uma.h>
58 #include <sys/lock.h>
59 #include <sys/sema.h>
60 #include <sys/sglist.h>
61 #include <sys/eventhandler.h>
62 #include <machine/bus.h>
63 #include <sys/bus_dma.h>
64
65 #include <cam/cam.h>
66 #include <cam/cam_ccb.h>
67 #include <cam/cam_periph.h>
68 #include <cam/cam_sim.h>
69 #include <cam/cam_xpt_sim.h>
70 #include <cam/cam_xpt_internal.h>
71 #include <cam/cam_debug.h>
72 #include <cam/scsi/scsi_all.h>
73 #include <cam/scsi/scsi_message.h>
74
75 #include <dev/hyperv/include/hyperv.h>
76 #include "hv_vstorage.h"
77
78 #define STORVSC_RINGBUFFER_SIZE         (20*PAGE_SIZE)
79 #define STORVSC_MAX_LUNS_PER_TARGET     (64)
80 #define STORVSC_MAX_IO_REQUESTS         (STORVSC_MAX_LUNS_PER_TARGET * 2)
81 #define BLKVSC_MAX_IDE_DISKS_PER_TARGET (1)
82 #define BLKVSC_MAX_IO_REQUESTS          STORVSC_MAX_IO_REQUESTS
83 #define STORVSC_MAX_TARGETS             (2)
84
85 #define VSTOR_PKT_SIZE  (sizeof(struct vstor_packet) - vmscsi_size_delta)
86
87 #define HV_ALIGN(x, a) roundup2(x, a)
88
89 struct storvsc_softc;
90
91 struct hv_sgl_node {
92         LIST_ENTRY(hv_sgl_node) link;
93         struct sglist *sgl_data;
94 };
95
96 struct hv_sgl_page_pool{
97         LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
98         LIST_HEAD(, hv_sgl_node) free_sgl_list;
99         boolean_t                is_init;
100 } g_hv_sgl_page_pool;
101
102 #define STORVSC_MAX_SG_PAGE_CNT STORVSC_MAX_IO_REQUESTS * HV_MAX_MULTIPAGE_BUFFER_COUNT
103
104 enum storvsc_request_type {
105         WRITE_TYPE,
106         READ_TYPE,
107         UNKNOWN_TYPE
108 };
109
110 struct hv_storvsc_request {
111         LIST_ENTRY(hv_storvsc_request) link;
112         struct vstor_packet     vstor_packet;
113         hv_vmbus_multipage_buffer data_buf;
114         void *sense_data;
115         uint8_t sense_info_len;
116         uint8_t retries;
117         union ccb *ccb;
118         struct storvsc_softc *softc;
119         struct callout callout;
120         struct sema synch_sema; /*Synchronize the request/response if needed */
121         struct sglist *bounce_sgl;
122         unsigned int bounce_sgl_count;
123         uint64_t not_aligned_seg_bits;
124 };
125
126 struct storvsc_softc {
127         struct hv_device                *hs_dev;
128         LIST_HEAD(, hv_storvsc_request) hs_free_list;
129         struct mtx                      hs_lock;
130         struct storvsc_driver_props     *hs_drv_props;
131         int                             hs_unit;
132         uint32_t                        hs_frozen;
133         struct cam_sim                  *hs_sim;
134         struct cam_path                 *hs_path;
135         uint32_t                        hs_num_out_reqs;
136         boolean_t                       hs_destroy;
137         boolean_t                       hs_drain_notify;
138         boolean_t                       hs_open_multi_channel;
139         struct sema                     hs_drain_sema;  
140         struct hv_storvsc_request       hs_init_req;
141         struct hv_storvsc_request       hs_reset_req;
142 };
143
144
145 /**
146  * HyperV storvsc timeout testing cases:
147  * a. IO returned after first timeout;
148  * b. IO returned after second timeout and queue freeze;
149  * c. IO returned while timer handler is running
150  * The first can be tested by "sg_senddiag -vv /dev/daX",
151  * and the second and third can be done by
152  * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
153  */
154 #define HVS_TIMEOUT_TEST 0
155
156 /*
157  * Bus/adapter reset functionality on the Hyper-V host is
158  * buggy and it will be disabled until
159  * it can be further tested.
160  */
161 #define HVS_HOST_RESET 0
162
163 struct storvsc_driver_props {
164         char            *drv_name;
165         char            *drv_desc;
166         uint8_t         drv_max_luns_per_target;
167         uint8_t         drv_max_ios_per_target;
168         uint32_t        drv_ringbuffer_size;
169 };
170
171 enum hv_storage_type {
172         DRIVER_BLKVSC,
173         DRIVER_STORVSC,
174         DRIVER_UNKNOWN
175 };
176
177 #define HS_MAX_ADAPTERS 10
178
179 #define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
180
181 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
182 static const hv_guid gStorVscDeviceType={
183         .data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
184                  0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
185 };
186
187 /* {32412632-86cb-44a2-9b5c-50d1417354f5} */
188 static const hv_guid gBlkVscDeviceType={
189         .data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
190                  0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
191 };
192
193 static struct storvsc_driver_props g_drv_props_table[] = {
194         {"blkvsc", "Hyper-V IDE Storage Interface",
195          BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS,
196          STORVSC_RINGBUFFER_SIZE},
197         {"storvsc", "Hyper-V SCSI Storage Interface",
198          STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS,
199          STORVSC_RINGBUFFER_SIZE}
200 };
201
202 static eventhandler_tag storvsc_handler_tag;
203 /*
204  * Sense buffer size changed in win8; have a run-time
205  * variable to track the size we should use.
206  */
207 static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
208
209 /*
210  * The size of the vmscsi_request has changed in win8. The
211  * additional size is for the newly added elements in the
212  * structure. These elements are valid only when we are talking
213  * to a win8 host.
214  * Track the correct size we need to apply.
215  */
216 static int vmscsi_size_delta;
217 /*
218  * The storage protocol version is determined during the
219  * initial exchange with the host.  It will indicate which
220  * storage functionality is available in the host.
221 */
222 static int vmstor_proto_version;
223
224 struct vmstor_proto {
225         int proto_version;
226         int sense_buffer_size;
227         int vmscsi_size_delta;
228 };
229
230 static const struct vmstor_proto vmstor_proto_list[] = {
231         {
232                 VMSTOR_PROTOCOL_VERSION_WIN10,
233                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
234                 0
235         },
236         {
237                 VMSTOR_PROTOCOL_VERSION_WIN8_1,
238                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
239                 0
240         },
241         {
242                 VMSTOR_PROTOCOL_VERSION_WIN8,
243                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
244                 0
245         },
246         {
247                 VMSTOR_PROTOCOL_VERSION_WIN7,
248                 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
249                 sizeof(struct vmscsi_win8_extension),
250         },
251         {
252                 VMSTOR_PROTOCOL_VERSION_WIN6,
253                 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
254                 sizeof(struct vmscsi_win8_extension),
255         }
256 };
257
258 /* static functions */
259 static int storvsc_probe(device_t dev);
260 static int storvsc_attach(device_t dev);
261 static int storvsc_detach(device_t dev);
262 static void storvsc_poll(struct cam_sim * sim);
263 static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
264 static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
265 static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
266 static enum hv_storage_type storvsc_get_storage_type(device_t dev);
267 static void hv_storvsc_rescan_target(struct storvsc_softc *sc);
268 static void hv_storvsc_on_channel_callback(void *context);
269 static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
270                                         struct vstor_packet *vstor_packet,
271                                         struct hv_storvsc_request *request);
272 static int hv_storvsc_connect_vsp(struct hv_device *device);
273 static void storvsc_io_done(struct hv_storvsc_request *reqp);
274 static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
275                                 bus_dma_segment_t *orig_sgl,
276                                 unsigned int orig_sgl_count,
277                                 uint64_t seg_bits);
278 void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
279                                 unsigned int dest_sgl_count,
280                                 struct sglist* src_sgl,
281                                 uint64_t seg_bits);
282
283 static device_method_t storvsc_methods[] = {
284         /* Device interface */
285         DEVMETHOD(device_probe,         storvsc_probe),
286         DEVMETHOD(device_attach,        storvsc_attach),
287         DEVMETHOD(device_detach,        storvsc_detach),
288         DEVMETHOD(device_shutdown,      bus_generic_shutdown),
289         DEVMETHOD_END
290 };
291
292 static driver_t storvsc_driver = {
293         "storvsc", storvsc_methods, sizeof(struct storvsc_softc),
294 };
295
296 static devclass_t storvsc_devclass;
297 DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0);
298 MODULE_VERSION(storvsc, 1);
299 MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
300
301
302 /**
303  * The host is capable of sending messages to us that are
304  * completely unsolicited. So, we need to address the race
305  * condition where we may be in the process of unloading the
306  * driver when the host may send us an unsolicited message.
307  * We address this issue by implementing a sequentially
308  * consistent protocol:
309  *
310  * 1. Channel callback is invoked while holding the the channel lock
311  *    and an unloading driver will reset the channel callback under
312  *    the protection of this channel lock.
313  *
314  * 2. To ensure bounded wait time for unloading a driver, we don't
315  *    permit outgoing traffic once the device is marked as being
316  *    destroyed.
317  *
318  * 3. Once the device is marked as being destroyed, we only
319  *    permit incoming traffic to properly account for
320  *    packets already sent out.
321  */
322 static inline struct storvsc_softc *
323 get_stor_device(struct hv_device *device,
324                                 boolean_t outbound)
325 {
326         struct storvsc_softc *sc;
327
328         sc = device_get_softc(device->device);
329         if (sc == NULL) {
330                 return NULL;
331         }
332
333         if (outbound) {
334                 /*
335                  * Here we permit outgoing I/O only
336                  * if the device is not being destroyed.
337                  */
338
339                 if (sc->hs_destroy) {
340                         sc = NULL;
341                 }
342         } else {
343                 /*
344                  * inbound case; if being destroyed
345                  * only permit to account for
346                  * messages already sent out.
347                  */
348                 if (sc->hs_destroy && (sc->hs_num_out_reqs == 0)) {
349                         sc = NULL;
350                 }
351         }
352         return sc;
353 }
354
355 /**
356  * @brief Callback handler, will be invoked when receive mutil-channel offer
357  *
358  * @param context  new multi-channel
359  */
360 static void
361 storvsc_handle_sc_creation(void *context)
362 {
363         hv_vmbus_channel *new_channel;
364         struct hv_device *device;
365         struct storvsc_softc *sc;
366         struct vmstor_chan_props props;
367         int ret = 0;
368
369         new_channel = (hv_vmbus_channel *)context;
370         device = new_channel->primary_channel->device;
371         sc = get_stor_device(device, TRUE);
372         if (sc == NULL)
373                 return;
374
375         if (FALSE == sc->hs_open_multi_channel)
376                 return;
377         
378         memset(&props, 0, sizeof(props));
379
380         ret = hv_vmbus_channel_open(new_channel,
381             sc->hs_drv_props->drv_ringbuffer_size,
382             sc->hs_drv_props->drv_ringbuffer_size,
383             (void *)&props,
384             sizeof(struct vmstor_chan_props),
385             hv_storvsc_on_channel_callback,
386             new_channel);
387
388         return;
389 }
390
391 /**
392  * @brief Send multi-channel creation request to host
393  *
394  * @param device  a Hyper-V device pointer
395  * @param max_chans  the max channels supported by vmbus
396  */
397 static void
398 storvsc_send_multichannel_request(struct hv_device *dev, int max_chans)
399 {
400         struct storvsc_softc *sc;
401         struct hv_storvsc_request *request;
402         struct vstor_packet *vstor_packet;      
403         int request_channels_cnt = 0;
404         int ret;
405
406         /* get multichannels count that need to create */
407         request_channels_cnt = MIN(max_chans, mp_ncpus);
408
409         sc = get_stor_device(dev, TRUE);
410         if (sc == NULL) {
411                 printf("Storvsc_error: get sc failed while send mutilchannel "
412                     "request\n");
413                 return;
414         }
415
416         request = &sc->hs_init_req;
417
418         /* Establish a handler for multi-channel */
419         dev->channel->sc_creation_callback = storvsc_handle_sc_creation;
420
421         /* request the host to create multi-channel */
422         memset(request, 0, sizeof(struct hv_storvsc_request));
423         
424         sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
425
426         vstor_packet = &request->vstor_packet;
427         
428         vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
429         vstor_packet->flags = REQUEST_COMPLETION_FLAG;
430         vstor_packet->u.multi_channels_cnt = request_channels_cnt;
431
432         ret = hv_vmbus_channel_send_packet(
433             dev->channel,
434             vstor_packet,
435             VSTOR_PKT_SIZE,
436             (uint64_t)(uintptr_t)request,
437             HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
438             HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
439
440         /* wait for 5 seconds */
441         ret = sema_timedwait(&request->synch_sema, 5 * hz);
442         if (ret != 0) {         
443                 printf("Storvsc_error: create multi-channel timeout, %d\n",
444                     ret);
445                 return;
446         }
447
448         if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
449             vstor_packet->status != 0) {                
450                 printf("Storvsc_error: create multi-channel invalid operation "
451                     "(%d) or statue (%u)\n",
452                     vstor_packet->operation, vstor_packet->status);
453                 return;
454         }
455
456         sc->hs_open_multi_channel = TRUE;
457
458         if (bootverbose)
459                 printf("Storvsc create multi-channel success!\n");
460 }
461
462 /**
463  * @brief initialize channel connection to parent partition
464  *
465  * @param dev  a Hyper-V device pointer
466  * @returns  0 on success, non-zero error on failure
467  */
468 static int
469 hv_storvsc_channel_init(struct hv_device *dev)
470 {
471         int ret = 0, i;
472         struct hv_storvsc_request *request;
473         struct vstor_packet *vstor_packet;
474         struct storvsc_softc *sc;
475         uint16_t max_chans = 0;
476         boolean_t support_multichannel = FALSE;
477
478         max_chans = 0;
479         support_multichannel = FALSE;
480
481         sc = get_stor_device(dev, TRUE);
482         if (sc == NULL)
483                 return (ENODEV);
484
485         request = &sc->hs_init_req;
486         memset(request, 0, sizeof(struct hv_storvsc_request));
487         vstor_packet = &request->vstor_packet;
488         request->softc = sc;
489
490         /**
491          * Initiate the vsc/vsp initialization protocol on the open channel
492          */
493         sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
494
495         vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION;
496         vstor_packet->flags = REQUEST_COMPLETION_FLAG;
497
498
499         ret = hv_vmbus_channel_send_packet(
500                         dev->channel,
501                         vstor_packet,
502                         VSTOR_PKT_SIZE,
503                         (uint64_t)(uintptr_t)request,
504                         HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
505                         HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
506
507         if (ret != 0)
508                 goto cleanup;
509
510         /* wait 5 seconds */
511         ret = sema_timedwait(&request->synch_sema, 5 * hz);
512         if (ret != 0)
513                 goto cleanup;
514
515         if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
516                 vstor_packet->status != 0) {
517                 goto cleanup;
518         }
519
520         for (i = 0; i < nitems(vmstor_proto_list); i++) {
521                 /* reuse the packet for version range supported */
522
523                 memset(vstor_packet, 0, sizeof(struct vstor_packet));
524                 vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
525                 vstor_packet->flags = REQUEST_COMPLETION_FLAG;
526
527                 vstor_packet->u.version.major_minor =
528                         vmstor_proto_list[i].proto_version;
529
530                 /* revision is only significant for Windows guests */
531                 vstor_packet->u.version.revision = 0;
532
533                 ret = hv_vmbus_channel_send_packet(
534                         dev->channel,
535                         vstor_packet,
536                         VSTOR_PKT_SIZE,
537                         (uint64_t)(uintptr_t)request,
538                         HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
539                         HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
540
541                 if (ret != 0)
542                         goto cleanup;
543
544                 /* wait 5 seconds */
545                 ret = sema_timedwait(&request->synch_sema, 5 * hz);
546
547                 if (ret)
548                         goto cleanup;
549
550                 if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO) {
551                         ret = EINVAL;
552                         goto cleanup;   
553                 }
554                 if (vstor_packet->status == 0) {
555                         vmstor_proto_version =
556                                 vmstor_proto_list[i].proto_version;
557                         sense_buffer_size =
558                                 vmstor_proto_list[i].sense_buffer_size;
559                         vmscsi_size_delta =
560                                 vmstor_proto_list[i].vmscsi_size_delta;
561                         break;
562                 }
563         }
564
565         if (vstor_packet->status != 0) {
566                 ret = EINVAL;
567                 goto cleanup;
568         }
569         /**
570          * Query channel properties
571          */
572         memset(vstor_packet, 0, sizeof(struct vstor_packet));
573         vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES;
574         vstor_packet->flags = REQUEST_COMPLETION_FLAG;
575
576         ret = hv_vmbus_channel_send_packet(
577                                 dev->channel,
578                                 vstor_packet,
579                                 VSTOR_PKT_SIZE,
580                                 (uint64_t)(uintptr_t)request,
581                                 HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
582                                 HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
583
584         if ( ret != 0)
585                 goto cleanup;
586
587         /* wait 5 seconds */
588         ret = sema_timedwait(&request->synch_sema, 5 * hz);
589
590         if (ret != 0)
591                 goto cleanup;
592
593         /* TODO: Check returned version */
594         if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
595             vstor_packet->status != 0) {
596                 goto cleanup;
597         }
598
599         /* multi-channels feature is supported by WIN8 and above version */
600         max_chans = vstor_packet->u.chan_props.max_channel_cnt;
601         if ((hv_vmbus_protocal_version != HV_VMBUS_VERSION_WIN7) &&
602             (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) &&
603             (vstor_packet->u.chan_props.flags &
604              HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
605                 support_multichannel = TRUE;
606         }
607
608         memset(vstor_packet, 0, sizeof(struct vstor_packet));
609         vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
610         vstor_packet->flags = REQUEST_COMPLETION_FLAG;
611
612         ret = hv_vmbus_channel_send_packet(
613                         dev->channel,
614                         vstor_packet,
615                         VSTOR_PKT_SIZE,
616                         (uint64_t)(uintptr_t)request,
617                         HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
618                         HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
619
620         if (ret != 0) {
621                 goto cleanup;
622         }
623
624         /* wait 5 seconds */
625         ret = sema_timedwait(&request->synch_sema, 5 * hz);
626
627         if (ret != 0)
628                 goto cleanup;
629
630         if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
631             vstor_packet->status != 0)
632                 goto cleanup;
633
634         /*
635          * If multi-channel is supported, send multichannel create
636          * request to host.
637          */
638         if (support_multichannel)
639                 storvsc_send_multichannel_request(dev, max_chans);
640
641 cleanup:
642         sema_destroy(&request->synch_sema);
643         return (ret);
644 }
645
646 /**
647  * @brief Open channel connection to paraent partition StorVSP driver
648  *
649  * Open and initialize channel connection to parent partition StorVSP driver.
650  *
651  * @param pointer to a Hyper-V device
652  * @returns 0 on success, non-zero error on failure
653  */
654 static int
655 hv_storvsc_connect_vsp(struct hv_device *dev)
656 {       
657         int ret = 0;
658         struct vmstor_chan_props props;
659         struct storvsc_softc *sc;
660
661         sc = device_get_softc(dev->device);
662                 
663         memset(&props, 0, sizeof(struct vmstor_chan_props));
664
665         /*
666          * Open the channel
667          */
668
669         ret = hv_vmbus_channel_open(
670                 dev->channel,
671                 sc->hs_drv_props->drv_ringbuffer_size,
672                 sc->hs_drv_props->drv_ringbuffer_size,
673                 (void *)&props,
674                 sizeof(struct vmstor_chan_props),
675                 hv_storvsc_on_channel_callback,
676                 dev->channel);
677
678         if (ret != 0) {
679                 return ret;
680         }
681
682         ret = hv_storvsc_channel_init(dev);
683
684         return (ret);
685 }
686
687 #if HVS_HOST_RESET
688 static int
689 hv_storvsc_host_reset(struct hv_device *dev)
690 {
691         int ret = 0;
692         struct storvsc_softc *sc;
693
694         struct hv_storvsc_request *request;
695         struct vstor_packet *vstor_packet;
696
697         sc = get_stor_device(dev, TRUE);
698         if (sc == NULL) {
699                 return ENODEV;
700         }
701
702         request = &sc->hs_reset_req;
703         request->softc = sc;
704         vstor_packet = &request->vstor_packet;
705
706         sema_init(&request->synch_sema, 0, "stor synch sema");
707
708         vstor_packet->operation = VSTOR_OPERATION_RESETBUS;
709         vstor_packet->flags = REQUEST_COMPLETION_FLAG;
710
711         ret = hv_vmbus_channel_send_packet(dev->channel,
712                         vstor_packet,
713                         VSTOR_PKT_SIZE,
714                         (uint64_t)(uintptr_t)&sc->hs_reset_req,
715                         HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
716                         HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
717
718         if (ret != 0) {
719                 goto cleanup;
720         }
721
722         ret = sema_timedwait(&request->synch_sema, 5 * hz); /* KYS 5 seconds */
723
724         if (ret) {
725                 goto cleanup;
726         }
727
728
729         /*
730          * At this point, all outstanding requests in the adapter
731          * should have been flushed out and return to us
732          */
733
734 cleanup:
735         sema_destroy(&request->synch_sema);
736         return (ret);
737 }
738 #endif /* HVS_HOST_RESET */
739
740 /**
741  * @brief Function to initiate an I/O request
742  *
743  * @param device Hyper-V device pointer
744  * @param request pointer to a request structure
745  * @returns 0 on success, non-zero error on failure
746  */
747 static int
748 hv_storvsc_io_request(struct hv_device *device,
749                                           struct hv_storvsc_request *request)
750 {
751         struct storvsc_softc *sc;
752         struct vstor_packet *vstor_packet = &request->vstor_packet;
753         struct hv_vmbus_channel* outgoing_channel = NULL;
754         int ret = 0;
755
756         sc = get_stor_device(device, TRUE);
757
758         if (sc == NULL) {
759                 return ENODEV;
760         }
761
762         vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
763
764         vstor_packet->u.vm_srb.length = VSTOR_PKT_SIZE;
765         
766         vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size;
767
768         vstor_packet->u.vm_srb.transfer_len = request->data_buf.length;
769
770         vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
771
772         outgoing_channel = vmbus_select_outgoing_channel(device->channel);
773
774         mtx_unlock(&request->softc->hs_lock);
775         if (request->data_buf.length) {
776                 ret = hv_vmbus_channel_send_packet_multipagebuffer(
777                                 outgoing_channel,
778                                 &request->data_buf,
779                                 vstor_packet,
780                                 VSTOR_PKT_SIZE,
781                                 (uint64_t)(uintptr_t)request);
782
783         } else {
784                 ret = hv_vmbus_channel_send_packet(
785                         outgoing_channel,
786                         vstor_packet,
787                         VSTOR_PKT_SIZE,
788                         (uint64_t)(uintptr_t)request,
789                         HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
790                         HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
791         }
792         mtx_lock(&request->softc->hs_lock);
793
794         if (ret != 0) {
795                 printf("Unable to send packet %p ret %d", vstor_packet, ret);
796         } else {
797                 atomic_add_int(&sc->hs_num_out_reqs, 1);
798         }
799
800         return (ret);
801 }
802
803
804 /**
805  * Process IO_COMPLETION_OPERATION and ready
806  * the result to be completed for upper layer
807  * processing by the CAM layer.
808  */
809 static void
810 hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
811                            struct vstor_packet *vstor_packet,
812                            struct hv_storvsc_request *request)
813 {
814         struct vmscsi_req *vm_srb;
815
816         vm_srb = &vstor_packet->u.vm_srb;
817
818         /*
819          * Copy some fields of the host's response into the request structure,
820          * because the fields will be used later in storvsc_io_done().
821          */
822         request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status;
823         request->vstor_packet.u.vm_srb.srb_status = vm_srb->srb_status;
824         request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len;
825
826         if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
827                         (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
828                 /* Autosense data available */
829
830                 KASSERT(vm_srb->sense_info_len <= request->sense_info_len,
831                                 ("vm_srb->sense_info_len <= "
832                                  "request->sense_info_len"));
833
834                 memcpy(request->sense_data, vm_srb->u.sense_data,
835                         vm_srb->sense_info_len);
836
837                 request->sense_info_len = vm_srb->sense_info_len;
838         }
839
840         /* Complete request by passing to the CAM layer */
841         storvsc_io_done(request);
842         atomic_subtract_int(&sc->hs_num_out_reqs, 1);
843         if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) {
844                 sema_post(&sc->hs_drain_sema);
845         }
846 }
847
848 static void
849 hv_storvsc_rescan_target(struct storvsc_softc *sc)
850 {
851         path_id_t pathid;
852         target_id_t targetid;
853         union ccb *ccb;
854
855         pathid = cam_sim_path(sc->hs_sim);
856         targetid = CAM_TARGET_WILDCARD;
857
858         /*
859          * Allocate a CCB and schedule a rescan.
860          */
861         ccb = xpt_alloc_ccb_nowait();
862         if (ccb == NULL) {
863                 printf("unable to alloc CCB for rescan\n");
864                 return;
865         }
866
867         if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid,
868             CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
869                 printf("unable to create path for rescan, pathid: %d,"
870                     "targetid: %d\n", pathid, targetid);
871                 xpt_free_ccb(ccb);
872                 return;
873         }
874
875         if (targetid == CAM_TARGET_WILDCARD)
876                 ccb->ccb_h.func_code = XPT_SCAN_BUS;
877         else
878                 ccb->ccb_h.func_code = XPT_SCAN_TGT;
879
880         xpt_rescan(ccb);
881 }
882
883 static void
884 hv_storvsc_on_channel_callback(void *context)
885 {
886         int ret = 0;
887         hv_vmbus_channel *channel = (hv_vmbus_channel *)context;
888         struct hv_device *device = NULL;
889         struct storvsc_softc *sc;
890         uint32_t bytes_recvd;
891         uint64_t request_id;
892         uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)];
893         struct hv_storvsc_request *request;
894         struct vstor_packet *vstor_packet;
895
896         if (channel->primary_channel != NULL){
897                 device = channel->primary_channel->device;
898         } else {
899                 device = channel->device;
900         }
901
902         KASSERT(device, ("device is NULL"));
903
904         sc = get_stor_device(device, FALSE);
905         if (sc == NULL) {
906                 printf("Storvsc_error: get stor device failed.\n");
907                 return;
908         }
909
910         ret = hv_vmbus_channel_recv_packet(
911                         channel,
912                         packet,
913                         roundup2(VSTOR_PKT_SIZE, 8),
914                         &bytes_recvd,
915                         &request_id);
916
917         while ((ret == 0) && (bytes_recvd > 0)) {
918                 request = (struct hv_storvsc_request *)(uintptr_t)request_id;
919
920                 if ((request == &sc->hs_init_req) ||
921                         (request == &sc->hs_reset_req)) {
922                         memcpy(&request->vstor_packet, packet,
923                                    sizeof(struct vstor_packet));
924                         sema_post(&request->synch_sema);
925                 } else {
926                         vstor_packet = (struct vstor_packet *)packet;
927                         switch(vstor_packet->operation) {
928                         case VSTOR_OPERATION_COMPLETEIO:
929                                 if (request == NULL)
930                                         panic("VMBUS: storvsc received a "
931                                             "packet with NULL request id in "
932                                             "COMPLETEIO operation.");
933
934                                 hv_storvsc_on_iocompletion(sc,
935                                                         vstor_packet, request);
936                                 break;
937                         case VSTOR_OPERATION_REMOVEDEVICE:
938                                 printf("VMBUS: storvsc operation %d not "
939                                     "implemented.\n", vstor_packet->operation);
940                                 /* TODO: implement */
941                                 break;
942                         case VSTOR_OPERATION_ENUMERATE_BUS:
943                                 hv_storvsc_rescan_target(sc);
944                                 break;
945                         default:
946                                 break;
947                         }                       
948                 }
949                 ret = hv_vmbus_channel_recv_packet(
950                                 channel,
951                                 packet,
952                                 roundup2(VSTOR_PKT_SIZE, 8),
953                                 &bytes_recvd,
954                                 &request_id);
955         }
956 }
957
958 /**
959  * @brief StorVSC probe function
960  *
961  * Device probe function.  Returns 0 if the input device is a StorVSC
962  * device.  Otherwise, a ENXIO is returned.  If the input device is
963  * for BlkVSC (paravirtual IDE) device and this support is disabled in
964  * favor of the emulated ATA/IDE device, return ENXIO.
965  *
966  * @param a device
967  * @returns 0 on success, ENXIO if not a matcing StorVSC device
968  */
969 static int
970 storvsc_probe(device_t dev)
971 {
972         int ret = ENXIO;
973         
974         switch (storvsc_get_storage_type(dev)) {
975         case DRIVER_BLKVSC:
976                 if(bootverbose)
977                         device_printf(dev, "Enlightened ATA/IDE detected\n");
978                 ret = BUS_PROBE_DEFAULT;
979                 break;
980         case DRIVER_STORVSC:
981                 if(bootverbose)
982                         device_printf(dev, "Enlightened SCSI device detected\n");
983                 ret = BUS_PROBE_DEFAULT;
984                 break;
985         default:
986                 ret = ENXIO;
987         }
988         return (ret);
989 }
990
991 /**
992  * @brief StorVSC attach function
993  *
994  * Function responsible for allocating per-device structures,
995  * setting up CAM interfaces and scanning for available LUNs to
996  * be used for SCSI device peripherals.
997  *
998  * @param a device
999  * @returns 0 on success or an error on failure
1000  */
1001 static int
1002 storvsc_attach(device_t dev)
1003 {
1004         struct hv_device *hv_dev = vmbus_get_devctx(dev);
1005         enum hv_storage_type stor_type;
1006         struct storvsc_softc *sc;
1007         struct cam_devq *devq;
1008         int ret, i, j;
1009         struct hv_storvsc_request *reqp;
1010         struct root_hold_token *root_mount_token = NULL;
1011         struct hv_sgl_node *sgl_node = NULL;
1012         void *tmp_buff = NULL;
1013
1014         /*
1015          * We need to serialize storvsc attach calls.
1016          */
1017         root_mount_token = root_mount_hold("storvsc");
1018
1019         sc = device_get_softc(dev);
1020         if (sc == NULL) {
1021                 ret = ENOMEM;
1022                 goto cleanup;
1023         }
1024
1025         stor_type = storvsc_get_storage_type(dev);
1026
1027         if (stor_type == DRIVER_UNKNOWN) {
1028                 ret = ENODEV;
1029                 goto cleanup;
1030         }
1031
1032         bzero(sc, sizeof(struct storvsc_softc));
1033
1034         /* fill in driver specific properties */
1035         sc->hs_drv_props = &g_drv_props_table[stor_type];
1036
1037         /* fill in device specific properties */
1038         sc->hs_unit     = device_get_unit(dev);
1039         sc->hs_dev      = hv_dev;
1040         device_set_desc(dev, g_drv_props_table[stor_type].drv_desc);
1041
1042         LIST_INIT(&sc->hs_free_list);
1043         mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF);
1044
1045         for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) {
1046                 reqp = malloc(sizeof(struct hv_storvsc_request),
1047                                  M_DEVBUF, M_WAITOK|M_ZERO);
1048                 reqp->softc = sc;
1049
1050                 LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
1051         }
1052
1053         /* create sg-list page pool */
1054         if (FALSE == g_hv_sgl_page_pool.is_init) {
1055                 g_hv_sgl_page_pool.is_init = TRUE;
1056                 LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
1057                 LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
1058
1059                 /*
1060                  * Pre-create SG list, each SG list with
1061                  * HV_MAX_MULTIPAGE_BUFFER_COUNT segments, each
1062                  * segment has one page buffer
1063                  */
1064                 for (i = 0; i < STORVSC_MAX_IO_REQUESTS; i++) {
1065                         sgl_node = malloc(sizeof(struct hv_sgl_node),
1066                             M_DEVBUF, M_WAITOK|M_ZERO);
1067
1068                         sgl_node->sgl_data =
1069                             sglist_alloc(HV_MAX_MULTIPAGE_BUFFER_COUNT,
1070                             M_WAITOK|M_ZERO);
1071
1072                         for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) {
1073                                 tmp_buff = malloc(PAGE_SIZE,
1074                                     M_DEVBUF, M_WAITOK|M_ZERO);
1075
1076                                 sgl_node->sgl_data->sg_segs[j].ss_paddr =
1077                                     (vm_paddr_t)tmp_buff;
1078                         }
1079
1080                         LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
1081                             sgl_node, link);
1082                 }
1083         }
1084
1085         sc->hs_destroy = FALSE;
1086         sc->hs_drain_notify = FALSE;
1087         sc->hs_open_multi_channel = FALSE;
1088         sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
1089
1090         ret = hv_storvsc_connect_vsp(hv_dev);
1091         if (ret != 0) {
1092                 goto cleanup;
1093         }
1094
1095         /*
1096          * Create the device queue.
1097          * Hyper-V maps each target to one SCSI HBA
1098          */
1099         devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target);
1100         if (devq == NULL) {
1101                 device_printf(dev, "Failed to alloc device queue\n");
1102                 ret = ENOMEM;
1103                 goto cleanup;
1104         }
1105
1106         sc->hs_sim = cam_sim_alloc(storvsc_action,
1107                                 storvsc_poll,
1108                                 sc->hs_drv_props->drv_name,
1109                                 sc,
1110                                 sc->hs_unit,
1111                                 &sc->hs_lock, 1,
1112                                 sc->hs_drv_props->drv_max_ios_per_target,
1113                                 devq);
1114
1115         if (sc->hs_sim == NULL) {
1116                 device_printf(dev, "Failed to alloc sim\n");
1117                 cam_simq_free(devq);
1118                 ret = ENOMEM;
1119                 goto cleanup;
1120         }
1121
1122         mtx_lock(&sc->hs_lock);
1123         /* bus_id is set to 0, need to get it from VMBUS channel query? */
1124         if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) {
1125                 cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1126                 mtx_unlock(&sc->hs_lock);
1127                 device_printf(dev, "Unable to register SCSI bus\n");
1128                 ret = ENXIO;
1129                 goto cleanup;
1130         }
1131
1132         if (xpt_create_path(&sc->hs_path, /*periph*/NULL,
1133                  cam_sim_path(sc->hs_sim),
1134                 CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
1135                 xpt_bus_deregister(cam_sim_path(sc->hs_sim));
1136                 cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1137                 mtx_unlock(&sc->hs_lock);
1138                 device_printf(dev, "Unable to create path\n");
1139                 ret = ENXIO;
1140                 goto cleanup;
1141         }
1142
1143         mtx_unlock(&sc->hs_lock);
1144
1145         root_mount_rel(root_mount_token);
1146         return (0);
1147
1148
1149 cleanup:
1150         root_mount_rel(root_mount_token);
1151         while (!LIST_EMPTY(&sc->hs_free_list)) {
1152                 reqp = LIST_FIRST(&sc->hs_free_list);
1153                 LIST_REMOVE(reqp, link);
1154                 free(reqp, M_DEVBUF);
1155         }
1156
1157         while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1158                 sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1159                 LIST_REMOVE(sgl_node, link);
1160                 for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) {
1161                         if (NULL !=
1162                             (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
1163                                 free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
1164                         }
1165                 }
1166                 sglist_free(sgl_node->sgl_data);
1167                 free(sgl_node, M_DEVBUF);
1168         }
1169
1170         return (ret);
1171 }
1172
1173 /**
1174  * @brief StorVSC device detach function
1175  *
1176  * This function is responsible for safely detaching a
1177  * StorVSC device.  This includes waiting for inbound responses
1178  * to complete and freeing associated per-device structures.
1179  *
1180  * @param dev a device
1181  * returns 0 on success
1182  */
1183 static int
1184 storvsc_detach(device_t dev)
1185 {
1186         struct storvsc_softc *sc = device_get_softc(dev);
1187         struct hv_storvsc_request *reqp = NULL;
1188         struct hv_device *hv_device = vmbus_get_devctx(dev);
1189         struct hv_sgl_node *sgl_node = NULL;
1190         int j = 0;
1191
1192         mtx_lock(&hv_device->channel->inbound_lock);
1193         sc->hs_destroy = TRUE;
1194         mtx_unlock(&hv_device->channel->inbound_lock);
1195
1196         /*
1197          * At this point, all outbound traffic should be disabled. We
1198          * only allow inbound traffic (responses) to proceed so that
1199          * outstanding requests can be completed.
1200          */
1201
1202         sc->hs_drain_notify = TRUE;
1203         sema_wait(&sc->hs_drain_sema);
1204         sc->hs_drain_notify = FALSE;
1205
1206         /*
1207          * Since we have already drained, we don't need to busy wait.
1208          * The call to close the channel will reset the callback
1209          * under the protection of the incoming channel lock.
1210          */
1211
1212         hv_vmbus_channel_close(hv_device->channel);
1213
1214         mtx_lock(&sc->hs_lock);
1215         while (!LIST_EMPTY(&sc->hs_free_list)) {
1216                 reqp = LIST_FIRST(&sc->hs_free_list);
1217                 LIST_REMOVE(reqp, link);
1218
1219                 free(reqp, M_DEVBUF);
1220         }
1221         mtx_unlock(&sc->hs_lock);
1222
1223         while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1224                 sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1225                 LIST_REMOVE(sgl_node, link);
1226                 for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++){
1227                         if (NULL !=
1228                             (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
1229                                 free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
1230                         }
1231                 }
1232                 sglist_free(sgl_node->sgl_data);
1233                 free(sgl_node, M_DEVBUF);
1234         }
1235         
1236         return (0);
1237 }
1238
1239 #if HVS_TIMEOUT_TEST
1240 /**
1241  * @brief unit test for timed out operations
1242  *
1243  * This function provides unit testing capability to simulate
1244  * timed out operations.  Recompilation with HV_TIMEOUT_TEST=1
1245  * is required.
1246  *
1247  * @param reqp pointer to a request structure
1248  * @param opcode SCSI operation being performed
1249  * @param wait if 1, wait for I/O to complete
1250  */
1251 static void
1252 storvsc_timeout_test(struct hv_storvsc_request *reqp,
1253                 uint8_t opcode, int wait)
1254 {
1255         int ret;
1256         union ccb *ccb = reqp->ccb;
1257         struct storvsc_softc *sc = reqp->softc;
1258
1259         if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) {
1260                 return;
1261         }
1262
1263         if (wait) {
1264                 mtx_lock(&reqp->event.mtx);
1265         }
1266         ret = hv_storvsc_io_request(sc->hs_dev, reqp);
1267         if (ret != 0) {
1268                 if (wait) {
1269                         mtx_unlock(&reqp->event.mtx);
1270                 }
1271                 printf("%s: io_request failed with %d.\n",
1272                                 __func__, ret);
1273                 ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1274                 mtx_lock(&sc->hs_lock);
1275                 storvsc_free_request(sc, reqp);
1276                 xpt_done(ccb);
1277                 mtx_unlock(&sc->hs_lock);
1278                 return;
1279         }
1280
1281         if (wait) {
1282                 xpt_print(ccb->ccb_h.path,
1283                                 "%u: %s: waiting for IO return.\n",
1284                                 ticks, __func__);
1285                 ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz);
1286                 mtx_unlock(&reqp->event.mtx);
1287                 xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n",
1288                                 ticks, __func__, (ret == 0)?
1289                                 "IO return detected" :
1290                                 "IO return not detected");
1291                 /*
1292                  * Now both the timer handler and io done are running
1293                  * simultaneously. We want to confirm the io done always
1294                  * finishes after the timer handler exits. So reqp used by
1295                  * timer handler is not freed or stale. Do busy loop for
1296                  * another 1/10 second to make sure io done does
1297                  * wait for the timer handler to complete.
1298                  */
1299                 DELAY(100*1000);
1300                 mtx_lock(&sc->hs_lock);
1301                 xpt_print(ccb->ccb_h.path,
1302                                 "%u: %s: finishing, queue frozen %d, "
1303                                 "ccb status 0x%x scsi_status 0x%x.\n",
1304                                 ticks, __func__, sc->hs_frozen,
1305                                 ccb->ccb_h.status,
1306                                 ccb->csio.scsi_status);
1307                 mtx_unlock(&sc->hs_lock);
1308         }
1309 }
1310 #endif /* HVS_TIMEOUT_TEST */
1311
1312 #ifdef notyet
1313 /**
1314  * @brief timeout handler for requests
1315  *
1316  * This function is called as a result of a callout expiring.
1317  *
1318  * @param arg pointer to a request
1319  */
1320 static void
1321 storvsc_timeout(void *arg)
1322 {
1323         struct hv_storvsc_request *reqp = arg;
1324         struct storvsc_softc *sc = reqp->softc;
1325         union ccb *ccb = reqp->ccb;
1326
1327         if (reqp->retries == 0) {
1328                 mtx_lock(&sc->hs_lock);
1329                 xpt_print(ccb->ccb_h.path,
1330                     "%u: IO timed out (req=0x%p), wait for another %u secs.\n",
1331                     ticks, reqp, ccb->ccb_h.timeout / 1000);
1332                 cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
1333                 mtx_unlock(&sc->hs_lock);
1334
1335                 reqp->retries++;
1336                 callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout,
1337                     0, storvsc_timeout, reqp, 0);
1338 #if HVS_TIMEOUT_TEST
1339                 storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0);
1340 #endif
1341                 return;
1342         }
1343
1344         mtx_lock(&sc->hs_lock);
1345         xpt_print(ccb->ccb_h.path,
1346                 "%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n",
1347                 ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000,
1348                 (sc->hs_frozen == 0)?
1349                 "freezing the queue" : "the queue is already frozen");
1350         if (sc->hs_frozen == 0) {
1351                 sc->hs_frozen = 1;
1352                 xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1);
1353         }
1354         mtx_unlock(&sc->hs_lock);
1355         
1356 #if HVS_TIMEOUT_TEST
1357         storvsc_timeout_test(reqp, MODE_SELECT_10, 1);
1358 #endif
1359 }
1360 #endif
1361
1362 /**
1363  * @brief StorVSC device poll function
1364  *
1365  * This function is responsible for servicing requests when
1366  * interrupts are disabled (i.e when we are dumping core.)
1367  *
1368  * @param sim a pointer to a CAM SCSI interface module
1369  */
1370 static void
1371 storvsc_poll(struct cam_sim *sim)
1372 {
1373         struct storvsc_softc *sc = cam_sim_softc(sim);
1374
1375         mtx_assert(&sc->hs_lock, MA_OWNED);
1376         mtx_unlock(&sc->hs_lock);
1377         hv_storvsc_on_channel_callback(sc->hs_dev->channel);
1378         mtx_lock(&sc->hs_lock);
1379 }
1380
1381 /**
1382  * @brief StorVSC device action function
1383  *
1384  * This function is responsible for handling SCSI operations which
1385  * are passed from the CAM layer.  The requests are in the form of
1386  * CAM control blocks which indicate the action being performed.
1387  * Not all actions require converting the request to a VSCSI protocol
1388  * message - these actions can be responded to by this driver.
1389  * Requests which are destined for a backend storage device are converted
1390  * to a VSCSI protocol message and sent on the channel connection associated
1391  * with this device.
1392  *
1393  * @param sim pointer to a CAM SCSI interface module
1394  * @param ccb pointer to a CAM control block
1395  */
1396 static void
1397 storvsc_action(struct cam_sim *sim, union ccb *ccb)
1398 {
1399         struct storvsc_softc *sc = cam_sim_softc(sim);
1400         int res;
1401
1402         mtx_assert(&sc->hs_lock, MA_OWNED);
1403         switch (ccb->ccb_h.func_code) {
1404         case XPT_PATH_INQ: {
1405                 struct ccb_pathinq *cpi = &ccb->cpi;
1406
1407                 cpi->version_num = 1;
1408                 cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE;
1409                 cpi->target_sprt = 0;
1410                 cpi->hba_misc = PIM_NOBUSRESET;
1411                 cpi->hba_eng_cnt = 0;
1412                 cpi->max_target = STORVSC_MAX_TARGETS;
1413                 cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target;
1414                 cpi->initiator_id = cpi->max_target;
1415                 cpi->bus_id = cam_sim_bus(sim);
1416                 cpi->base_transfer_speed = 300000;
1417                 cpi->transport = XPORT_SAS;
1418                 cpi->transport_version = 0;
1419                 cpi->protocol = PROTO_SCSI;
1420                 cpi->protocol_version = SCSI_REV_SPC2;
1421                 strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
1422                 strncpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN);
1423                 strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
1424                 cpi->unit_number = cam_sim_unit(sim);
1425
1426                 ccb->ccb_h.status = CAM_REQ_CMP;
1427                 xpt_done(ccb);
1428                 return;
1429         }
1430         case XPT_GET_TRAN_SETTINGS: {
1431                 struct  ccb_trans_settings *cts = &ccb->cts;
1432
1433                 cts->transport = XPORT_SAS;
1434                 cts->transport_version = 0;
1435                 cts->protocol = PROTO_SCSI;
1436                 cts->protocol_version = SCSI_REV_SPC2;
1437
1438                 /* enable tag queuing and disconnected mode */
1439                 cts->proto_specific.valid = CTS_SCSI_VALID_TQ;
1440                 cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
1441                 cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
1442                 cts->xport_specific.valid = CTS_SPI_VALID_DISC;
1443                 cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
1444                         
1445                 ccb->ccb_h.status = CAM_REQ_CMP;
1446                 xpt_done(ccb);
1447                 return;
1448         }
1449         case XPT_SET_TRAN_SETTINGS:     {
1450                 ccb->ccb_h.status = CAM_REQ_CMP;
1451                 xpt_done(ccb);
1452                 return;
1453         }
1454         case XPT_CALC_GEOMETRY:{
1455                 cam_calc_geometry(&ccb->ccg, 1);
1456                 xpt_done(ccb);
1457                 return;
1458         }
1459         case  XPT_RESET_BUS:
1460         case  XPT_RESET_DEV:{
1461 #if HVS_HOST_RESET
1462                 if ((res = hv_storvsc_host_reset(sc->hs_dev)) != 0) {
1463                         xpt_print(ccb->ccb_h.path,
1464                                 "hv_storvsc_host_reset failed with %d\n", res);
1465                         ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1466                         xpt_done(ccb);
1467                         return;
1468                 }
1469                 ccb->ccb_h.status = CAM_REQ_CMP;
1470                 xpt_done(ccb);
1471                 return;
1472 #else
1473                 xpt_print(ccb->ccb_h.path,
1474                                   "%s reset not supported.\n",
1475                                   (ccb->ccb_h.func_code == XPT_RESET_BUS)?
1476                                   "bus" : "dev");
1477                 ccb->ccb_h.status = CAM_REQ_INVALID;
1478                 xpt_done(ccb);
1479                 return;
1480 #endif  /* HVS_HOST_RESET */
1481         }
1482         case XPT_SCSI_IO:
1483         case XPT_IMMED_NOTIFY: {
1484                 struct hv_storvsc_request *reqp = NULL;
1485
1486                 if (ccb->csio.cdb_len == 0) {
1487                         panic("cdl_len is 0\n");
1488                 }
1489
1490                 if (LIST_EMPTY(&sc->hs_free_list)) {
1491                         ccb->ccb_h.status = CAM_REQUEUE_REQ;
1492                         if (sc->hs_frozen == 0) {
1493                                 sc->hs_frozen = 1;
1494                                 xpt_freeze_simq(sim, /* count*/1);
1495                         }
1496                         xpt_done(ccb);
1497                         return;
1498                 }
1499
1500                 reqp = LIST_FIRST(&sc->hs_free_list);
1501                 LIST_REMOVE(reqp, link);
1502
1503                 bzero(reqp, sizeof(struct hv_storvsc_request));
1504                 reqp->softc = sc;
1505                 
1506                 ccb->ccb_h.status |= CAM_SIM_QUEUED;
1507                 if ((res = create_storvsc_request(ccb, reqp)) != 0) {
1508                         ccb->ccb_h.status = CAM_REQ_INVALID;
1509                         xpt_done(ccb);
1510                         return;
1511                 }
1512
1513 #ifdef notyet
1514                 if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
1515                         callout_init(&reqp->callout, CALLOUT_MPSAFE);
1516                         callout_reset_sbt(&reqp->callout,
1517                             SBT_1MS * ccb->ccb_h.timeout, 0,
1518                             storvsc_timeout, reqp, 0);
1519 #if HVS_TIMEOUT_TEST
1520                         cv_init(&reqp->event.cv, "storvsc timeout cv");
1521                         mtx_init(&reqp->event.mtx, "storvsc timeout mutex",
1522                                         NULL, MTX_DEF);
1523                         switch (reqp->vstor_packet.vm_srb.cdb[0]) {
1524                                 case MODE_SELECT_10:
1525                                 case SEND_DIAGNOSTIC:
1526                                         /* To have timer send the request. */
1527                                         return;
1528                                 default:
1529                                         break;
1530                         }
1531 #endif /* HVS_TIMEOUT_TEST */
1532                 }
1533 #endif
1534
1535                 if ((res = hv_storvsc_io_request(sc->hs_dev, reqp)) != 0) {
1536                         xpt_print(ccb->ccb_h.path,
1537                                 "hv_storvsc_io_request failed with %d\n", res);
1538                         ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1539                         storvsc_free_request(sc, reqp);
1540                         xpt_done(ccb);
1541                         return;
1542                 }
1543                 return;
1544         }
1545
1546         default:
1547                 ccb->ccb_h.status = CAM_REQ_INVALID;
1548                 xpt_done(ccb);
1549                 return;
1550         }
1551 }
1552
1553 /**
1554  * @brief destroy bounce buffer
1555  *
1556  * This function is responsible for destroy a Scatter/Gather list
1557  * that create by storvsc_create_bounce_buffer()
1558  *
1559  * @param sgl- the Scatter/Gather need be destroy
1560  * @param sg_count- page count of the SG list.
1561  *
1562  */
1563 static void
1564 storvsc_destroy_bounce_buffer(struct sglist *sgl)
1565 {
1566         struct hv_sgl_node *sgl_node = NULL;
1567
1568         sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
1569         LIST_REMOVE(sgl_node, link);
1570         if (NULL == sgl_node) {
1571                 printf("storvsc error: not enough in use sgl\n");
1572                 return;
1573         }
1574         sgl_node->sgl_data = sgl;
1575         LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
1576 }
1577
1578 /**
1579  * @brief create bounce buffer
1580  *
1581  * This function is responsible for create a Scatter/Gather list,
1582  * which hold several pages that can be aligned with page size.
1583  *
1584  * @param seg_count- SG-list segments count
1585  * @param write - if WRITE_TYPE, set SG list page used size to 0,
1586  * otherwise set used size to page size.
1587  *
1588  * return NULL if create failed
1589  */
1590 static struct sglist *
1591 storvsc_create_bounce_buffer(uint16_t seg_count, int write)
1592 {
1593         int i = 0;
1594         struct sglist *bounce_sgl = NULL;
1595         unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
1596         struct hv_sgl_node *sgl_node = NULL;    
1597
1598         /* get struct sglist from free_sgl_list */
1599         sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1600         LIST_REMOVE(sgl_node, link);
1601         if (NULL == sgl_node) {
1602                 printf("storvsc error: not enough free sgl\n");
1603                 return NULL;
1604         }
1605         bounce_sgl = sgl_node->sgl_data;
1606         LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
1607
1608         bounce_sgl->sg_maxseg = seg_count;
1609
1610         if (write == WRITE_TYPE)
1611                 bounce_sgl->sg_nseg = 0;
1612         else
1613                 bounce_sgl->sg_nseg = seg_count;
1614
1615         for (i = 0; i < seg_count; i++)
1616                 bounce_sgl->sg_segs[i].ss_len = buf_len;
1617
1618         return bounce_sgl;
1619 }
1620
1621 /**
1622  * @brief copy data from SG list to bounce buffer
1623  *
1624  * This function is responsible for copy data from one SG list's segments
1625  * to another SG list which used as bounce buffer.
1626  *
1627  * @param bounce_sgl - the destination SG list
1628  * @param orig_sgl - the segment of the source SG list.
1629  * @param orig_sgl_count - the count of segments.
1630  * @param orig_sgl_count - indicate which segment need bounce buffer,
1631  *  set 1 means need.
1632  *
1633  */
1634 static void
1635 storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
1636                                bus_dma_segment_t *orig_sgl,
1637                                unsigned int orig_sgl_count,
1638                                uint64_t seg_bits)
1639 {
1640         int src_sgl_idx = 0;
1641
1642         for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
1643                 if (seg_bits & (1 << src_sgl_idx)) {
1644                         memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr,
1645                             (void*)orig_sgl[src_sgl_idx].ds_addr,
1646                             orig_sgl[src_sgl_idx].ds_len);
1647
1648                         bounce_sgl->sg_segs[src_sgl_idx].ss_len =
1649                             orig_sgl[src_sgl_idx].ds_len;
1650                 }
1651         }
1652 }
1653
1654 /**
1655  * @brief copy data from SG list which used as bounce to another SG list
1656  *
1657  * This function is responsible for copy data from one SG list with bounce
1658  * buffer to another SG list's segments.
1659  *
1660  * @param dest_sgl - the destination SG list's segments
1661  * @param dest_sgl_count - the count of destination SG list's segment.
1662  * @param src_sgl - the source SG list.
1663  * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
1664  *
1665  */
1666 void
1667 storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
1668                                     unsigned int dest_sgl_count,
1669                                     struct sglist* src_sgl,
1670                                     uint64_t seg_bits)
1671 {
1672         int sgl_idx = 0;
1673         
1674         for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
1675                 if (seg_bits & (1 << sgl_idx)) {
1676                         memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
1677                             (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr),
1678                             src_sgl->sg_segs[sgl_idx].ss_len);
1679                 }
1680         }
1681 }
1682
1683 /**
1684  * @brief check SG list with bounce buffer or not
1685  *
1686  * This function is responsible for check if need bounce buffer for SG list.
1687  *
1688  * @param sgl - the SG list's segments
1689  * @param sg_count - the count of SG list's segment.
1690  * @param bits - segmengs number that need bounce buffer
1691  *
1692  * return -1 if SG list needless bounce buffer
1693  */
1694 static int
1695 storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
1696                                 unsigned int sg_count,
1697                                 uint64_t *bits)
1698 {
1699         int i = 0;
1700         int offset = 0;
1701         uint64_t phys_addr = 0;
1702         uint64_t tmp_bits = 0;
1703         boolean_t found_hole = FALSE;
1704         boolean_t pre_aligned = TRUE;
1705
1706         if (sg_count < 2){
1707                 return -1;
1708         }
1709
1710         *bits = 0;
1711         
1712         phys_addr = vtophys(sgl[0].ds_addr);
1713         offset =  phys_addr - trunc_page(phys_addr);
1714
1715         if (offset != 0) {
1716                 pre_aligned = FALSE;
1717                 tmp_bits |= 1;
1718         }
1719
1720         for (i = 1; i < sg_count; i++) {
1721                 phys_addr = vtophys(sgl[i].ds_addr);
1722                 offset =  phys_addr - trunc_page(phys_addr);
1723
1724                 if (offset == 0) {
1725                         if (FALSE == pre_aligned){
1726                                 /*
1727                                  * This segment is aligned, if the previous
1728                                  * one is not aligned, find a hole
1729                                  */
1730                                 found_hole = TRUE;
1731                         }
1732                         pre_aligned = TRUE;
1733                 } else {
1734                         tmp_bits |= 1 << i;
1735                         if (!pre_aligned) {
1736                                 if (phys_addr != vtophys(sgl[i-1].ds_addr +
1737                                     sgl[i-1].ds_len)) {
1738                                         /*
1739                                          * Check whether connect to previous
1740                                          * segment,if not, find the hole
1741                                          */
1742                                         found_hole = TRUE;
1743                                 }
1744                         } else {
1745                                 found_hole = TRUE;
1746                         }
1747                         pre_aligned = FALSE;
1748                 }
1749         }
1750
1751         if (!found_hole) {
1752                 return (-1);
1753         } else {
1754                 *bits = tmp_bits;
1755                 return 0;
1756         }
1757 }
1758
1759 /**
1760  * @brief Fill in a request structure based on a CAM control block
1761  *
1762  * Fills in a request structure based on the contents of a CAM control
1763  * block.  The request structure holds the payload information for
1764  * VSCSI protocol request.
1765  *
1766  * @param ccb pointer to a CAM contorl block
1767  * @param reqp pointer to a request structure
1768  */
1769 static int
1770 create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
1771 {
1772         struct ccb_scsiio *csio = &ccb->csio;
1773         uint64_t phys_addr;
1774         uint32_t bytes_to_copy = 0;
1775         uint32_t pfn_num = 0;
1776         uint32_t pfn;
1777         uint64_t not_aligned_seg_bits = 0;
1778         
1779         /* refer to struct vmscsi_req for meanings of these two fields */
1780         reqp->vstor_packet.u.vm_srb.port =
1781                 cam_sim_unit(xpt_path_sim(ccb->ccb_h.path));
1782         reqp->vstor_packet.u.vm_srb.path_id =
1783                 cam_sim_bus(xpt_path_sim(ccb->ccb_h.path));
1784
1785         reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id;
1786         reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun;
1787
1788         reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len;
1789         if(ccb->ccb_h.flags & CAM_CDB_POINTER) {
1790                 memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr,
1791                         csio->cdb_len);
1792         } else {
1793                 memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes,
1794                         csio->cdb_len);
1795         }
1796
1797         switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
1798         case CAM_DIR_OUT:
1799                 reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;       
1800                 break;
1801         case CAM_DIR_IN:
1802                 reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
1803                 break;
1804         case CAM_DIR_NONE:
1805                 reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
1806                 break;
1807         default:
1808                 reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
1809                 break;
1810         }
1811
1812         reqp->sense_data     = &csio->sense_data;
1813         reqp->sense_info_len = csio->sense_len;
1814
1815         reqp->ccb = ccb;
1816
1817         if (0 == csio->dxfer_len) {
1818                 return (0);
1819         }
1820
1821         reqp->data_buf.length = csio->dxfer_len;
1822
1823         switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
1824         case CAM_DATA_VADDR:
1825         {
1826                 bytes_to_copy = csio->dxfer_len;
1827                 phys_addr = vtophys(csio->data_ptr);
1828                 reqp->data_buf.offset = phys_addr & PAGE_MASK;
1829                 
1830                 while (bytes_to_copy != 0) {
1831                         int bytes, page_offset;
1832                         phys_addr =
1833                             vtophys(&csio->data_ptr[reqp->data_buf.length -
1834                             bytes_to_copy]);
1835                         pfn = phys_addr >> PAGE_SHIFT;
1836                         reqp->data_buf.pfn_array[pfn_num] = pfn;
1837                         page_offset = phys_addr & PAGE_MASK;
1838
1839                         bytes = min(PAGE_SIZE - page_offset, bytes_to_copy);
1840
1841                         bytes_to_copy -= bytes;
1842                         pfn_num++;
1843                 }
1844                 break;
1845         }
1846
1847         case CAM_DATA_SG:
1848         {
1849                 int i = 0;
1850                 int offset = 0;
1851                 int ret;
1852
1853                 bus_dma_segment_t *storvsc_sglist =
1854                     (bus_dma_segment_t *)ccb->csio.data_ptr;
1855                 u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
1856
1857                 printf("Storvsc: get SG I/O operation, %d\n",
1858                     reqp->vstor_packet.u.vm_srb.data_in);
1859
1860                 if (storvsc_sg_count > HV_MAX_MULTIPAGE_BUFFER_COUNT){
1861                         printf("Storvsc: %d segments is too much, "
1862                             "only support %d segments\n",
1863                             storvsc_sg_count, HV_MAX_MULTIPAGE_BUFFER_COUNT);
1864                         return (EINVAL);
1865                 }
1866
1867                 /*
1868                  * We create our own bounce buffer function currently. Idealy
1869                  * we should use BUS_DMA(9) framework. But with current BUS_DMA
1870                  * code there is no callback API to check the page alignment of
1871                  * middle segments before busdma can decide if a bounce buffer
1872                  * is needed for particular segment. There is callback,
1873                  * "bus_dma_filter_t *filter", but the parrameters are not
1874                  * sufficient for storvsc driver.
1875                  * TODO:
1876                  *      Add page alignment check in BUS_DMA(9) callback. Once
1877                  *      this is complete, switch the following code to use
1878                  *      BUS_DMA(9) for storvsc bounce buffer support.
1879                  */
1880                 /* check if we need to create bounce buffer */
1881                 ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
1882                     storvsc_sg_count, &not_aligned_seg_bits);
1883                 if (ret != -1) {
1884                         reqp->bounce_sgl =
1885                             storvsc_create_bounce_buffer(storvsc_sg_count,
1886                             reqp->vstor_packet.u.vm_srb.data_in);
1887                         if (NULL == reqp->bounce_sgl) {
1888                                 printf("Storvsc_error: "
1889                                     "create bounce buffer failed.\n");
1890                                 return (ENOMEM);
1891                         }
1892
1893                         reqp->bounce_sgl_count = storvsc_sg_count;
1894                         reqp->not_aligned_seg_bits = not_aligned_seg_bits;
1895
1896                         /*
1897                          * if it is write, we need copy the original data
1898                          *to bounce buffer
1899                          */
1900                         if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
1901                                 storvsc_copy_sgl_to_bounce_buf(
1902                                     reqp->bounce_sgl,
1903                                     storvsc_sglist,
1904                                     storvsc_sg_count,
1905                                     reqp->not_aligned_seg_bits);
1906                         }
1907
1908                         /* transfer virtual address to physical frame number */
1909                         if (reqp->not_aligned_seg_bits & 0x1){
1910                                 phys_addr =
1911                                     vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr);
1912                         }else{
1913                                 phys_addr =
1914                                         vtophys(storvsc_sglist[0].ds_addr);
1915                         }
1916                         reqp->data_buf.offset = phys_addr & PAGE_MASK;
1917
1918                         pfn = phys_addr >> PAGE_SHIFT;
1919                         reqp->data_buf.pfn_array[0] = pfn;
1920                         
1921                         for (i = 1; i < storvsc_sg_count; i++) {
1922                                 if (reqp->not_aligned_seg_bits & (1 << i)) {
1923                                         phys_addr =
1924                                             vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr);
1925                                 } else {
1926                                         phys_addr =
1927                                             vtophys(storvsc_sglist[i].ds_addr);
1928                                 }
1929
1930                                 pfn = phys_addr >> PAGE_SHIFT;
1931                                 reqp->data_buf.pfn_array[i] = pfn;
1932                         }
1933                 } else {
1934                         phys_addr = vtophys(storvsc_sglist[0].ds_addr);
1935
1936                         reqp->data_buf.offset = phys_addr & PAGE_MASK;
1937
1938                         for (i = 0; i < storvsc_sg_count; i++) {
1939                                 phys_addr = vtophys(storvsc_sglist[i].ds_addr);
1940                                 pfn = phys_addr >> PAGE_SHIFT;
1941                                 reqp->data_buf.pfn_array[i] = pfn;
1942                         }
1943
1944                         /* check the last segment cross boundary or not */
1945                         offset = phys_addr & PAGE_MASK;
1946                         if (offset) {
1947                                 phys_addr =
1948                                     vtophys(storvsc_sglist[i-1].ds_addr +
1949                                     PAGE_SIZE - offset);
1950                                 pfn = phys_addr >> PAGE_SHIFT;
1951                                 reqp->data_buf.pfn_array[i] = pfn;
1952                         }
1953                         
1954                         reqp->bounce_sgl_count = 0;
1955                 }
1956                 break;
1957         }
1958         default:
1959                 printf("Unknow flags: %d\n", ccb->ccb_h.flags);
1960                 return(EINVAL);
1961         }
1962
1963         return(0);
1964 }
1965
1966 static uint32_t
1967 is_scsi_valid(const struct scsi_inquiry_data *inq_data)
1968 {
1969         u_int8_t type;
1970         type = SID_TYPE(inq_data);
1971         if (type == T_NODEVICE)
1972                 return (0);
1973         if (SID_QUAL(inq_data) == SID_QUAL_BAD_LU)
1974                 return (0);
1975         return (1);
1976 }
1977 /**
1978  * @brief completion function before returning to CAM
1979  *
1980  * I/O process has been completed and the result needs
1981  * to be passed to the CAM layer.
1982  * Free resources related to this request.
1983  *
1984  * @param reqp pointer to a request structure
1985  */
1986 static void
1987 storvsc_io_done(struct hv_storvsc_request *reqp)
1988 {
1989         union ccb *ccb = reqp->ccb;
1990         struct ccb_scsiio *csio = &ccb->csio;
1991         struct storvsc_softc *sc = reqp->softc;
1992         struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
1993         bus_dma_segment_t *ori_sglist = NULL;
1994         int ori_sg_count = 0;
1995
1996         /* destroy bounce buffer if it is used */
1997         if (reqp->bounce_sgl_count) {
1998                 ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
1999                 ori_sg_count = ccb->csio.sglist_cnt;
2000
2001                 /*
2002                  * If it is READ operation, we should copy back the data
2003                  * to original SG list.
2004                  */
2005                 if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
2006                         storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
2007                             ori_sg_count,
2008                             reqp->bounce_sgl,
2009                             reqp->not_aligned_seg_bits);
2010                 }
2011
2012                 storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
2013                 reqp->bounce_sgl_count = 0;
2014         }
2015                 
2016         if (reqp->retries > 0) {
2017                 mtx_lock(&sc->hs_lock);
2018 #if HVS_TIMEOUT_TEST
2019                 xpt_print(ccb->ccb_h.path,
2020                         "%u: IO returned after timeout, "
2021                         "waking up timer handler if any.\n", ticks);
2022                 mtx_lock(&reqp->event.mtx);
2023                 cv_signal(&reqp->event.cv);
2024                 mtx_unlock(&reqp->event.mtx);
2025 #endif
2026                 reqp->retries = 0;
2027                 xpt_print(ccb->ccb_h.path,
2028                         "%u: IO returned after timeout, "
2029                         "stopping timer if any.\n", ticks);
2030                 mtx_unlock(&sc->hs_lock);
2031         }
2032
2033 #ifdef notyet
2034         /*
2035          * callout_drain() will wait for the timer handler to finish
2036          * if it is running. So we don't need any lock to synchronize
2037          * between this routine and the timer handler.
2038          * Note that we need to make sure reqp is not freed when timer
2039          * handler is using or will use it.
2040          */
2041         if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
2042                 callout_drain(&reqp->callout);
2043         }
2044 #endif
2045         ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
2046         ccb->ccb_h.status &= ~CAM_STATUS_MASK;
2047         if (vm_srb->scsi_status == SCSI_STATUS_OK) {
2048                 const struct scsi_generic *cmd;
2049                 cmd = (const struct scsi_generic *)
2050                     ((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
2051                      csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
2052                 if (vm_srb->srb_status != SRB_STATUS_SUCCESS) {
2053                         /*
2054                          * If there are errors, for example, invalid LUN,
2055                          * host will inform VM through SRB status.
2056                          */
2057                         if (bootverbose) {
2058                                 if (vm_srb->srb_status == SRB_STATUS_INVALID_LUN) {
2059                                         xpt_print(ccb->ccb_h.path,
2060                                             "invalid LUN %d for op: %s\n",
2061                                             vm_srb->lun,
2062                                             scsi_op_desc(cmd->opcode, NULL));
2063                                 } else {
2064                                         xpt_print(ccb->ccb_h.path,
2065                                             "Unknown SRB flag: %d for op: %s\n",
2066                                             vm_srb->srb_status,
2067                                             scsi_op_desc(cmd->opcode, NULL));
2068                                 }
2069                         }
2070
2071                         /*
2072                          * XXX For a selection timeout, all of the LUNs
2073                          * on the target will be gone.  It works for SCSI
2074                          * disks, but does not work for IDE disks.
2075                          *
2076                          * For CAM_DEV_NOT_THERE, CAM will only get
2077                          * rid of the device(s) specified by the path.
2078                          */
2079                         if (storvsc_get_storage_type(sc->hs_dev->device) ==
2080                             DRIVER_STORVSC)
2081                                 ccb->ccb_h.status |= CAM_SEL_TIMEOUT;
2082                         else
2083                                 ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
2084                 } else {
2085                         ccb->ccb_h.status |= CAM_REQ_CMP;
2086                 }
2087
2088                 if (cmd->opcode == INQUIRY &&
2089                     vm_srb->srb_status == SRB_STATUS_SUCCESS) {
2090                         int resp_xfer_len, resp_buf_len, data_len;
2091                         struct scsi_inquiry_data *inq_data =
2092                             (struct scsi_inquiry_data *)csio->data_ptr;
2093                         /* Get the buffer length reported by host */
2094                         resp_xfer_len = vm_srb->transfer_len;
2095                         uint8_t *resp_buf = (uint8_t *)csio->data_ptr;
2096
2097                         /* Get the available buffer length */
2098                         resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
2099                         data_len = (resp_buf_len < resp_xfer_len) ?
2100                             resp_buf_len : resp_xfer_len;
2101                         if (bootverbose && data_len >= 5) {
2102                                 xpt_print(ccb->ccb_h.path, "storvsc inquiry "
2103                                     "(%d) [%x %x %x %x %x ... ]\n", data_len,
2104                                     resp_buf[0], resp_buf[1], resp_buf[2],
2105                                     resp_buf[3], resp_buf[4]);
2106                         }
2107                         /*
2108                          * XXX: Manually fix the wrong response returned from WS2012
2109                          */
2110                         if (!is_scsi_valid(inq_data) &&
2111                             (vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
2112                             vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8 ||
2113                             vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN7)) {
2114                                 if (data_len >= 4 &&
2115                                     (resp_buf[2] == 0 || resp_buf[3] == 0)) {
2116                                         resp_buf[2] = 5; // verion=5 means SPC-3
2117                                         resp_buf[3] = 2; // resp fmt must be 2
2118                                         if (bootverbose)
2119                                                 xpt_print(ccb->ccb_h.path,
2120                                                     "fix version and resp fmt for 0x%x\n",
2121                                                     vmstor_proto_version);
2122                                 }
2123                         } else if (data_len >= SHORT_INQUIRY_LENGTH) {
2124                                 char vendor[16];
2125
2126                                 cam_strvis(vendor, inq_data->vendor,
2127                                     sizeof(inq_data->vendor), sizeof(vendor));
2128                                 /*
2129                                  * XXX: Upgrade SPC2 to SPC3 if host is WIN8 or
2130                                  * WIN2012 R2 in order to support UNMAP feature.
2131                                  */
2132                                 if (!strncmp(vendor, "Msft", 4) &&
2133                                     SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 &&
2134                                     (vmstor_proto_version ==
2135                                      VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
2136                                      vmstor_proto_version ==
2137                                      VMSTOR_PROTOCOL_VERSION_WIN8)) {
2138                                         inq_data->version = SCSI_REV_SPC3;
2139                                         if (bootverbose) {
2140                                                 xpt_print(ccb->ccb_h.path,
2141                                                     "storvsc upgrades "
2142                                                     "SPC2 to SPC3\n");
2143                                         }
2144                                 }
2145                         }
2146                 }
2147         } else {
2148                 mtx_lock(&sc->hs_lock);
2149                 xpt_print(ccb->ccb_h.path,
2150                         "storvsc scsi_status = %d\n",
2151                         vm_srb->scsi_status);
2152                 mtx_unlock(&sc->hs_lock);
2153                 ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
2154         }
2155
2156         ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF);
2157         ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len;
2158
2159         if (reqp->sense_info_len != 0) {
2160                 csio->sense_resid = csio->sense_len - reqp->sense_info_len;
2161                 ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
2162         }
2163
2164         mtx_lock(&sc->hs_lock);
2165         if (reqp->softc->hs_frozen == 1) {
2166                 xpt_print(ccb->ccb_h.path,
2167                         "%u: storvsc unfreezing softc 0x%p.\n",
2168                         ticks, reqp->softc);
2169                 ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
2170                 reqp->softc->hs_frozen = 0;
2171         }
2172         storvsc_free_request(sc, reqp);
2173         xpt_done(ccb);
2174         mtx_unlock(&sc->hs_lock);
2175 }
2176
2177 /**
2178  * @brief Free a request structure
2179  *
2180  * Free a request structure by returning it to the free list
2181  *
2182  * @param sc pointer to a softc
2183  * @param reqp pointer to a request structure
2184  */     
2185 static void
2186 storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp)
2187 {
2188
2189         LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
2190 }
2191
2192 /**
2193  * @brief Determine type of storage device from GUID
2194  *
2195  * Using the type GUID, determine if this is a StorVSC (paravirtual
2196  * SCSI or BlkVSC (paravirtual IDE) device.
2197  *
2198  * @param dev a device
2199  * returns an enum
2200  */
2201 static enum hv_storage_type
2202 storvsc_get_storage_type(device_t dev)
2203 {
2204         const char *p = vmbus_get_type(dev);
2205
2206         if (!memcmp(p, &gBlkVscDeviceType, sizeof(hv_guid))) {
2207                 return DRIVER_BLKVSC;
2208         } else if (!memcmp(p, &gStorVscDeviceType, sizeof(hv_guid))) {
2209                 return DRIVER_STORVSC;
2210         }
2211         return (DRIVER_UNKNOWN);
2212 }
2213
2214 #define PCI_VENDOR_INTEL        0x8086
2215 #define PCI_PRODUCT_PIIX4       0x7111
2216
2217 static void
2218 storvsc_ada_probe_veto(void *arg __unused, struct cam_path *path,
2219     struct ata_params *ident_buf __unused, int *veto)
2220 {
2221         /*
2222          * Hyper-V should ignore ATA
2223          */
2224         if (path->device->protocol == PROTO_ATA) {
2225                 struct ccb_pathinq cpi;
2226
2227                 bzero(&cpi, sizeof(cpi));
2228                 xpt_setup_ccb(&cpi.ccb_h, path, CAM_PRIORITY_NONE);
2229                 cpi.ccb_h.func_code = XPT_PATH_INQ;
2230                 xpt_action((union ccb *)&cpi);
2231                 if (cpi.ccb_h.status == CAM_REQ_CMP &&
2232                     cpi.hba_vendor == PCI_VENDOR_INTEL &&
2233                     cpi.hba_device == PCI_PRODUCT_PIIX4) {
2234                         (*veto)++;
2235                         xpt_print(path,
2236                             "Disable ATA for vendor: %x, device: %x\n",
2237                             cpi.hba_vendor, cpi.hba_device);
2238                 }
2239         }
2240 }
2241
2242 static void
2243 storvsc_sysinit(void *arg __unused)
2244 {
2245         if (vm_guest == VM_GUEST_HV) {
2246                 storvsc_handler_tag = EVENTHANDLER_REGISTER(ada_probe_veto,
2247                     storvsc_ada_probe_veto, NULL, EVENTHANDLER_PRI_ANY);
2248         }
2249 }
2250 SYSINIT(storvsc_sys_init, SI_SUB_DRIVERS, SI_ORDER_SECOND, storvsc_sysinit,
2251     NULL);
2252
2253 static void
2254 storvsc_sysuninit(void *arg __unused)
2255 {
2256         if (storvsc_handler_tag != NULL) {
2257                 EVENTHANDLER_DEREGISTER(ada_probe_veto, storvsc_handler_tag);
2258         }
2259 }
2260 SYSUNINIT(storvsc_sys_uninit, SI_SUB_DRIVERS, SI_ORDER_SECOND,
2261     storvsc_sysuninit, NULL);