]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/hyperv/utilities/hv_snapshot.c
MFC 308664,308742,308743
[FreeBSD/stable/10.git] / sys / dev / hyperv / utilities / hv_snapshot.c
1 /*-
2  * Copyright (c) 2016 Microsoft Corp.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <sys/param.h>
31 #include <sys/kernel.h>
32 #include <sys/conf.h>
33 #include <sys/uio.h>
34 #include <sys/bus.h>
35 #include <sys/malloc.h>
36 #include <sys/mbuf.h>
37 #include <sys/module.h>
38 #include <sys/lock.h>
39 #include <sys/taskqueue.h>
40 #include <sys/selinfo.h>
41 #include <sys/sysctl.h>
42 #include <sys/poll.h>
43 #include <sys/proc.h>
44 #include <sys/queue.h>
45 #include <sys/kthread.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysproto.h>
48 #include <sys/un.h>
49 #include <sys/endian.h>
50 #include <sys/sema.h>
51 #include <sys/signal.h>
52 #include <sys/syslog.h>
53 #include <sys/systm.h>
54 #include <sys/mutex.h>
55 #include <sys/callout.h>
56
57 #include <dev/hyperv/include/hyperv.h>
58 #include <dev/hyperv/utilities/hv_utilreg.h>
59 #include <dev/hyperv/utilities/vmbus_icreg.h>
60
61 #include "hv_util.h"
62 #include "hv_snapshot.h"
63 #include "vmbus_if.h"
64
65 #define VSS_MAJOR               5
66 #define VSS_MINOR               0
67 #define VSS_MSGVER              VMBUS_IC_VERSION(VSS_MAJOR, VSS_MINOR)
68
69 #define VSS_FWVER_MAJOR         3
70 #define VSS_FWVER               VMBUS_IC_VERSION(VSS_FWVER_MAJOR, 0)
71
72 #define TIMEOUT_LIMIT           (15)    // seconds
73 enum hv_vss_op {
74         VSS_OP_CREATE = 0,
75         VSS_OP_DELETE,
76         VSS_OP_HOT_BACKUP,
77         VSS_OP_GET_DM_INFO,
78         VSS_OP_BU_COMPLETE,
79         /*
80          * Following operations are only supported with IC version >= 5.0
81          */
82         VSS_OP_FREEZE, /* Freeze the file systems in the VM */
83         VSS_OP_THAW, /* Unfreeze the file systems */
84         VSS_OP_AUTO_RECOVER,
85         VSS_OP_COUNT /* Number of operations, must be last */
86 };
87
88 /*
89  * Header for all VSS messages.
90  */
91 struct hv_vss_hdr {
92         struct vmbus_icmsg_hdr  ic_hdr;
93         uint8_t                 operation;
94         uint8_t                 reserved[7];
95 } __packed;
96
97
98 /*
99  * Flag values for the hv_vss_check_feature. Here supports only
100  * one value.
101  */
102 #define VSS_HBU_NO_AUTO_RECOVERY                0x00000005
103
104 struct hv_vss_check_feature {
105         uint32_t flags;
106 } __packed;
107
108 struct hv_vss_check_dm_info {
109         uint32_t flags;
110 } __packed;
111
112 struct hv_vss_msg {
113         union {
114                 struct hv_vss_hdr vss_hdr;
115         } hdr;
116         union {
117                 struct hv_vss_check_feature vss_cf;
118                 struct hv_vss_check_dm_info dm_info;
119         } body;
120 } __packed;
121
122 struct hv_vss_req {
123         struct hv_vss_opt_msg   opt_msg;        /* used to communicate with daemon */
124         struct hv_vss_msg       msg;            /* used to communicate with host */
125 } __packed;
126
127 /* hv_vss debug control */
128 static int hv_vss_log = 0;
129
130 #define hv_vss_log_error(...)   do {                            \
131         if (hv_vss_log > 0)                                     \
132                 log(LOG_ERR, "hv_vss: " __VA_ARGS__);           \
133 } while (0)
134
135 #define hv_vss_log_info(...) do {                               \
136         if (hv_vss_log > 1)                                     \
137                 log(LOG_INFO, "hv_vss: " __VA_ARGS__);          \
138 } while (0)
139
140 static const struct vmbus_ic_desc vmbus_vss_descs[] = {
141         {
142                 .ic_guid = { .hv_guid = {
143                     0x29, 0x2e, 0xfa, 0x35, 0x23, 0xea, 0x36, 0x42,
144                     0x96, 0xae, 0x3a, 0x6e, 0xba, 0xcb, 0xa4,  0x40} },
145                 .ic_desc = "Hyper-V VSS"
146         },
147         VMBUS_IC_DESC_END
148 };
149
150 static const char * vss_opt_name[] = {"None", "VSSCheck", "Freeze", "Thaw"};
151
152 /* character device prototypes */
153 static d_open_t         hv_vss_dev_open;
154 static d_close_t        hv_vss_dev_close;
155 static d_poll_t         hv_vss_dev_daemon_poll;
156 static d_ioctl_t        hv_vss_dev_daemon_ioctl;
157
158 static d_open_t         hv_appvss_dev_open;
159 static d_close_t        hv_appvss_dev_close;
160 static d_poll_t         hv_appvss_dev_poll;
161 static d_ioctl_t        hv_appvss_dev_ioctl;
162
163 /* hv_vss character device structure */
164 static struct cdevsw hv_vss_cdevsw =
165 {
166         .d_version      = D_VERSION,
167         .d_open         = hv_vss_dev_open,
168         .d_close        = hv_vss_dev_close,
169         .d_poll         = hv_vss_dev_daemon_poll,
170         .d_ioctl        = hv_vss_dev_daemon_ioctl,
171         .d_name         = FS_VSS_DEV_NAME,
172 };
173
174 static struct cdevsw hv_appvss_cdevsw =
175 {
176         .d_version      = D_VERSION,
177         .d_open         = hv_appvss_dev_open,
178         .d_close        = hv_appvss_dev_close,
179         .d_poll         = hv_appvss_dev_poll,
180         .d_ioctl        = hv_appvss_dev_ioctl,
181         .d_name         = APP_VSS_DEV_NAME,
182 };
183
184 struct hv_vss_sc;
185 /*
186  * Global state to track cdev
187  */
188 struct hv_vss_dev_sc {
189         /*
190          * msg was transferred from host to notify queue, and
191          * ack queue. Finally, it was recyled to free list.
192          */
193         STAILQ_HEAD(, hv_vss_req_internal)      to_notify_queue;
194         STAILQ_HEAD(, hv_vss_req_internal)      to_ack_queue;
195         struct hv_vss_sc                        *sc;
196         struct proc                             *proc_task;
197         struct selinfo                          hv_vss_selinfo;
198 };
199 /*
200  * Global state to track and synchronize the transaction requests from the host.
201  * The VSS allows user to register their function to do freeze/thaw for application.
202  * VSS kernel will notify both vss daemon and user application if it is registered.
203  * The implementation state transition is illustrated by:
204  * https://clovertrail.github.io/assets/vssdot.png
205  */
206 typedef struct hv_vss_sc {
207         struct hv_util_sc                       util_sc;
208         device_t                                dev;
209
210         struct task                             task;
211
212         /*
213          * mutex is used to protect access of list/queue,
214          * callout in request is also used this mutex.
215          */
216         struct mtx                              pending_mutex;
217         /*
218          * req_free_list contains all free items
219          */
220         LIST_HEAD(, hv_vss_req_internal)        req_free_list;
221
222         /* Indicates if daemon registered with driver */
223         boolean_t                               register_done;
224
225         boolean_t                               app_register_done;
226
227         /* cdev for file system freeze/thaw */
228         struct cdev                             *hv_vss_dev;
229         /* cdev for application freeze/thaw */
230         struct cdev                             *hv_appvss_dev;
231
232         /* sc for app */
233         struct hv_vss_dev_sc                    app_sc;
234         /* sc for deamon */
235         struct hv_vss_dev_sc                    daemon_sc;
236 } hv_vss_sc;
237
238 typedef struct hv_vss_req_internal {
239         LIST_ENTRY(hv_vss_req_internal)         link;
240         STAILQ_ENTRY(hv_vss_req_internal)       slink;
241         struct hv_vss_req                       vss_req;
242
243         /* Rcv buffer for communicating with the host*/
244         uint8_t                                 *rcv_buf;
245         /* Length of host message */
246         uint32_t                                host_msg_len;
247         /* Host message id */
248         uint64_t                                host_msg_id;
249
250         hv_vss_sc                               *sc;
251
252         struct callout                          callout;
253 } hv_vss_req_internal;
254
255 #define SEARCH_REMOVE_REQ_LOCKED(reqp, queue, link, tmp, id)            \
256         do {                                                            \
257                 STAILQ_FOREACH_SAFE(reqp, queue, link, tmp) {           \
258                         if (reqp->vss_req.opt_msg.msgid == id) {        \
259                                 STAILQ_REMOVE(queue,                    \
260                                     reqp, hv_vss_req_internal, link);   \
261                                 break;                                  \
262                         }                                               \
263                 }                                                       \
264         } while (0)
265
266 static bool
267 hv_vss_is_daemon_killed_after_launch(hv_vss_sc *sc)
268 {
269         return (!sc->register_done && sc->daemon_sc.proc_task);
270 }
271
272 /*
273  * Callback routine that gets called whenever there is a message from host
274  */
275 static void
276 hv_vss_callback(struct vmbus_channel *chan __unused, void *context)
277 {
278         hv_vss_sc *sc = (hv_vss_sc*)context;
279         if (hv_vss_is_daemon_killed_after_launch(sc))
280                 hv_vss_log_info("%s: daemon was killed!\n", __func__);
281         if (sc->register_done || sc->daemon_sc.proc_task) {
282                 hv_vss_log_info("%s: Queuing work item\n", __func__);
283                 if (hv_vss_is_daemon_killed_after_launch(sc))
284                         hv_vss_log_info("%s: daemon was killed!\n", __func__);
285                 taskqueue_enqueue(taskqueue_thread, &sc->task);
286         } else {
287                 hv_vss_log_info("%s: daemon has never been registered\n", __func__);
288         }
289         hv_vss_log_info("%s: received msg from host\n", __func__);
290 }
291 /*
292  * Send the response back to the host.
293  */
294 static void
295 hv_vss_respond_host(uint8_t *rcv_buf, struct vmbus_channel *ch,
296     uint32_t recvlen, uint64_t requestid, uint32_t error)
297 {
298         struct vmbus_icmsg_hdr *hv_icmsg_hdrp;
299
300         hv_icmsg_hdrp = (struct vmbus_icmsg_hdr *)rcv_buf;
301
302         hv_icmsg_hdrp->ic_status = error;
303         hv_icmsg_hdrp->ic_flags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE;
304
305         error = vmbus_chan_send(ch, VMBUS_CHANPKT_TYPE_INBAND, 0,
306             rcv_buf, recvlen, requestid);
307         if (error)
308                 hv_vss_log_info("%s: hv_vss_respond_host: sendpacket error:%d\n",
309                     __func__, error);
310 }
311
312 static void
313 hv_vss_notify_host_result_locked(struct hv_vss_req_internal *reqp, uint32_t status)
314 {
315         struct hv_vss_msg* msg = (struct hv_vss_msg *)reqp->rcv_buf;
316         hv_vss_sc *sc = reqp->sc;
317         if (reqp->vss_req.opt_msg.opt == HV_VSS_CHECK) {
318                 msg->body.vss_cf.flags = VSS_HBU_NO_AUTO_RECOVERY;
319         }
320         hv_vss_log_info("%s, %s response %s to host\n", __func__,
321             vss_opt_name[reqp->vss_req.opt_msg.opt],
322             status == HV_S_OK ? "Success" : "Fail");
323         hv_vss_respond_host(reqp->rcv_buf, vmbus_get_channel(reqp->sc->dev),
324             reqp->host_msg_len, reqp->host_msg_id, status);
325         /* recycle the request */
326         LIST_INSERT_HEAD(&sc->req_free_list, reqp, link);
327 }
328
329 static void
330 hv_vss_notify_host_result(struct hv_vss_req_internal *reqp, uint32_t status)
331 {
332         mtx_lock(&reqp->sc->pending_mutex);
333         hv_vss_notify_host_result_locked(reqp, status);
334         mtx_unlock(&reqp->sc->pending_mutex);
335 }
336
337 static void
338 hv_vss_cp_vssreq_to_user(struct hv_vss_req_internal *reqp,
339     struct hv_vss_opt_msg *userdata)
340 {
341         struct hv_vss_req *hv_vss_dev_buf;
342         hv_vss_dev_buf = &reqp->vss_req;
343         hv_vss_dev_buf->opt_msg.opt = HV_VSS_NONE;
344         switch (reqp->vss_req.msg.hdr.vss_hdr.operation) {
345         case VSS_OP_FREEZE:
346                 hv_vss_dev_buf->opt_msg.opt = HV_VSS_FREEZE;
347                 break;
348         case VSS_OP_THAW:
349                 hv_vss_dev_buf->opt_msg.opt = HV_VSS_THAW;
350                 break;
351         case VSS_OP_HOT_BACKUP:
352                 hv_vss_dev_buf->opt_msg.opt = HV_VSS_CHECK;
353                 break;
354         }
355         *userdata = hv_vss_dev_buf->opt_msg;
356         hv_vss_log_info("%s, read data from user for "
357             "%s (%ju) \n", __func__, vss_opt_name[userdata->opt],
358             (uintmax_t)userdata->msgid);
359 }
360
361 /**
362  * Remove the request id from app notifiy or ack queue,
363  * and recyle the request by inserting it to free list.
364  *
365  * When app was notified but not yet sending ack, the request
366  * should locate in either notify queue or ack queue.
367  */
368 static struct hv_vss_req_internal*
369 hv_vss_drain_req_queue_locked(hv_vss_sc *sc, uint64_t req_id)
370 {
371         struct hv_vss_req_internal *reqp, *tmp;
372         SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->daemon_sc.to_notify_queue,
373             slink, tmp, req_id);
374         if (reqp == NULL)
375                 SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->daemon_sc.to_ack_queue,
376                     slink, tmp, req_id);
377         if (reqp == NULL)
378                 SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->app_sc.to_notify_queue,
379                     slink, tmp, req_id);
380         if (reqp == NULL)
381                 SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->app_sc.to_ack_queue, slink,
382                     tmp, req_id);
383         return (reqp);
384 }
385 /**
386  * Actions for daemon who has been notified.
387  */
388 static void
389 hv_vss_notified(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata)
390 {
391         struct hv_vss_req_internal *reqp;
392         mtx_lock(&dev_sc->sc->pending_mutex);
393         if (!STAILQ_EMPTY(&dev_sc->to_notify_queue)) {
394                 reqp = STAILQ_FIRST(&dev_sc->to_notify_queue);
395                 hv_vss_cp_vssreq_to_user(reqp, userdata);
396                 STAILQ_REMOVE_HEAD(&dev_sc->to_notify_queue, slink);
397                 /* insert the msg to queue for write */
398                 STAILQ_INSERT_TAIL(&dev_sc->to_ack_queue, reqp, slink);
399                 userdata->status = VSS_SUCCESS;
400         } else {
401                 /* Timeout occur, thus request was removed from queue. */
402                 hv_vss_log_info("%s: notify queue is empty!\n", __func__);
403                 userdata->status = VSS_FAIL;
404         }
405         mtx_unlock(&dev_sc->sc->pending_mutex);
406 }
407
408 static void
409 hv_vss_notify(struct hv_vss_dev_sc *dev_sc, struct hv_vss_req_internal *reqp)
410 {
411         uint32_t opt = reqp->vss_req.opt_msg.opt;
412         mtx_lock(&dev_sc->sc->pending_mutex);
413         STAILQ_INSERT_TAIL(&dev_sc->to_notify_queue, reqp, slink);
414         hv_vss_log_info("%s: issuing query %s (%ju) to %s\n", __func__,
415             vss_opt_name[opt], (uintmax_t)reqp->vss_req.opt_msg.msgid,
416             &dev_sc->sc->app_sc == dev_sc ? "app" : "daemon");
417         mtx_unlock(&dev_sc->sc->pending_mutex);
418         selwakeup(&dev_sc->hv_vss_selinfo);
419 }
420
421 /**
422  * Actions for daemon who has acknowledged.
423  */
424 static void
425 hv_vss_daemon_acked(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata)
426 {
427         struct hv_vss_req_internal      *reqp, *tmp;
428         uint64_t                        req_id;
429         int                             opt;
430         uint32_t                        status;
431
432         opt = userdata->opt;
433         req_id = userdata->msgid;
434         status = userdata->status;
435         /* make sure the reserved fields are all zeros. */
436         memset(&userdata->reserved, 0, sizeof(struct hv_vss_opt_msg) -
437             __offsetof(struct hv_vss_opt_msg, reserved));
438         mtx_lock(&dev_sc->sc->pending_mutex);
439         SEARCH_REMOVE_REQ_LOCKED(reqp, &dev_sc->to_ack_queue, slink, tmp, req_id);
440         mtx_unlock(&dev_sc->sc->pending_mutex);
441         if (reqp == NULL) {
442                 hv_vss_log_info("%s Timeout: fail to find daemon ack request\n",
443                     __func__);
444                 userdata->status = VSS_FAIL;
445                 return;
446         }
447         KASSERT(opt == reqp->vss_req.opt_msg.opt, ("Mismatched VSS operation!"));
448         hv_vss_log_info("%s, get response %d from daemon for %s (%ju) \n", __func__,
449             status, vss_opt_name[opt], (uintmax_t)req_id);
450         switch (opt) {
451         case HV_VSS_CHECK:
452         case HV_VSS_FREEZE:
453                 callout_drain(&reqp->callout);
454                 hv_vss_notify_host_result(reqp,
455                     status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL);
456                 break;
457         case HV_VSS_THAW:
458                 if (dev_sc->sc->app_register_done) {
459                         if (status == VSS_SUCCESS) {
460                                 hv_vss_notify(&dev_sc->sc->app_sc, reqp);
461                         } else {
462                                 /* handle error */
463                                 callout_drain(&reqp->callout);
464                                 hv_vss_notify_host_result(reqp, HV_E_FAIL);
465                         }
466                 } else {
467                         callout_drain(&reqp->callout);
468                         hv_vss_notify_host_result(reqp,
469                             status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL);
470                 }
471                 break;
472         }
473 }
474
475 /**
476  * Actions for app who has acknowledged.
477  */
478 static void
479 hv_vss_app_acked(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata)
480 {
481         struct hv_vss_req_internal      *reqp, *tmp;
482         uint64_t                        req_id;
483         int                             opt;
484         uint8_t                         status;
485
486         opt = userdata->opt;
487         req_id = userdata->msgid;
488         status = userdata->status;
489         /* make sure the reserved fields are all zeros. */
490         memset(&userdata->reserved, 0, sizeof(struct hv_vss_opt_msg) -
491             __offsetof(struct hv_vss_opt_msg, reserved));
492         mtx_lock(&dev_sc->sc->pending_mutex);
493         SEARCH_REMOVE_REQ_LOCKED(reqp, &dev_sc->to_ack_queue, slink, tmp, req_id);
494         mtx_unlock(&dev_sc->sc->pending_mutex);
495         if (reqp == NULL) {
496                 hv_vss_log_info("%s Timeout: fail to find app ack request\n",
497                     __func__);
498                 userdata->status = VSS_FAIL;
499                 return;
500         }
501         KASSERT(opt == reqp->vss_req.opt_msg.opt, ("Mismatched VSS operation!"));
502         hv_vss_log_info("%s, get response %d from app for %s (%ju) \n",
503             __func__, status, vss_opt_name[opt], (uintmax_t)req_id);
504         if (dev_sc->sc->register_done) {
505                 switch (opt) {
506                 case HV_VSS_CHECK:
507                 case HV_VSS_FREEZE:
508                         if (status == VSS_SUCCESS) {
509                                 hv_vss_notify(&dev_sc->sc->daemon_sc, reqp);
510                         } else {
511                                 /* handle error */
512                                 callout_drain(&reqp->callout);
513                                 hv_vss_notify_host_result(reqp, HV_E_FAIL);
514                         }
515                         break;
516                 case HV_VSS_THAW:
517                         callout_drain(&reqp->callout);
518                         hv_vss_notify_host_result(reqp,
519                             status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL);
520                         break;
521                 }
522         } else {
523                 hv_vss_log_info("%s, Fatal: vss daemon was killed\n", __func__);
524         }
525 }
526
527 static int
528 hv_vss_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
529 {
530         struct proc     *td_proc;
531         td_proc = td->td_proc;
532
533         struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
534         hv_vss_log_info("%s: %s opens device \"%s\" successfully.\n",
535             __func__, td_proc->p_comm, FS_VSS_DEV_NAME);
536
537         if (dev_sc->sc->register_done)
538                 return (EBUSY);
539
540         dev_sc->sc->register_done = true;
541         hv_vss_callback(vmbus_get_channel(dev_sc->sc->dev), dev_sc->sc);
542
543         dev_sc->proc_task = curproc;
544         return (0);
545 }
546
547 static int
548 hv_vss_dev_close(struct cdev *dev, int fflag __unused, int devtype __unused,
549                                  struct thread *td)
550 {
551         struct proc     *td_proc;
552         td_proc = td->td_proc;
553
554         struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
555
556         hv_vss_log_info("%s: %s closes device \"%s\"\n",
557             __func__, td_proc->p_comm, FS_VSS_DEV_NAME);
558         dev_sc->sc->register_done = false;
559         return (0);
560 }
561
562 static int
563 hv_vss_dev_daemon_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
564     struct thread *td)
565 {
566         struct proc                     *td_proc;
567         struct hv_vss_dev_sc            *sc;
568
569         td_proc = td->td_proc;
570         sc = (struct hv_vss_dev_sc*)dev->si_drv1;
571
572         hv_vss_log_info("%s: %s invoked vss ioctl\n", __func__, td_proc->p_comm);
573
574         struct hv_vss_opt_msg* userdata = (struct hv_vss_opt_msg*)data;
575         switch(cmd) {
576         case IOCHVVSSREAD:
577                 hv_vss_notified(sc, userdata);
578                 break;
579         case IOCHVVSSWRITE:
580                 hv_vss_daemon_acked(sc, userdata);
581                 break;
582         }
583         return (0);
584 }
585
586 /*
587  * hv_vss_daemon poll invokes this function to check if data is available
588  * for daemon to read.
589  */
590 static int
591 hv_vss_dev_daemon_poll(struct cdev *dev, int events, struct thread *td)
592 {
593         int revent = 0;
594         struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
595
596         mtx_lock(&dev_sc->sc->pending_mutex);
597         /**
598          * if there is data ready, inform daemon's poll
599          */
600         if (!STAILQ_EMPTY(&dev_sc->to_notify_queue))
601                 revent = POLLIN;
602         if (revent == 0)
603                 selrecord(td, &dev_sc->hv_vss_selinfo);
604         hv_vss_log_info("%s return 0x%x\n", __func__, revent);
605         mtx_unlock(&dev_sc->sc->pending_mutex);
606         return (revent);
607 }
608
609 static int
610 hv_appvss_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
611 {
612         struct proc     *td_proc;
613         td_proc = td->td_proc;
614
615         struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
616         hv_vss_log_info("%s: %s opens device \"%s\" successfully.\n",
617             __func__, td_proc->p_comm, APP_VSS_DEV_NAME);
618
619         if (dev_sc->sc->app_register_done)
620                 return (EBUSY);
621
622         dev_sc->sc->app_register_done = true;
623         dev_sc->proc_task = curproc;
624         return (0);
625 }
626
627 static int
628 hv_appvss_dev_close(struct cdev *dev, int fflag __unused, int devtype __unused,
629                                  struct thread *td)
630 {
631         struct proc     *td_proc;
632         td_proc = td->td_proc;
633
634         struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
635
636         hv_vss_log_info("%s: %s closes device \"%s\".\n",
637             __func__, td_proc->p_comm, APP_VSS_DEV_NAME);
638         dev_sc->sc->app_register_done = false;
639         return (0);
640 }
641
642 static int
643 hv_appvss_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
644     struct thread *td)
645 {
646         struct proc                     *td_proc;
647         struct hv_vss_dev_sc            *dev_sc;
648
649         td_proc = td->td_proc;
650         dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
651
652         hv_vss_log_info("%s: %s invoked vss ioctl\n", __func__, td_proc->p_comm);
653
654         struct hv_vss_opt_msg* userdata = (struct hv_vss_opt_msg*)data;
655         switch(cmd) {
656         case IOCHVVSSREAD:
657                 hv_vss_notified(dev_sc, userdata);
658                 break;
659         case IOCHVVSSWRITE:
660                 hv_vss_app_acked(dev_sc, userdata);
661                 break;
662         }
663         return (0);
664 }
665
666 /*
667  * hv_vss_daemon poll invokes this function to check if data is available
668  * for daemon to read.
669  */
670 static int
671 hv_appvss_dev_poll(struct cdev *dev, int events, struct thread *td)
672 {
673         int revent = 0;
674         struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
675
676         mtx_lock(&dev_sc->sc->pending_mutex);
677         /**
678          * if there is data ready, inform daemon's poll
679          */
680         if (!STAILQ_EMPTY(&dev_sc->to_notify_queue))
681                 revent = POLLIN;
682         if (revent == 0)
683                 selrecord(td, &dev_sc->hv_vss_selinfo);
684         hv_vss_log_info("%s return 0x%x\n", __func__, revent);
685         mtx_unlock(&dev_sc->sc->pending_mutex);
686         return (revent);
687 }
688
689 static void
690 hv_vss_timeout(void *arg)
691 {
692         hv_vss_req_internal *reqp = arg;
693         hv_vss_req_internal *request;
694         hv_vss_sc* sc = reqp->sc;
695         uint64_t req_id = reqp->vss_req.opt_msg.msgid;
696         /* This thread is locked */
697         KASSERT(mtx_owned(&sc->pending_mutex), ("mutex lock is not owned!"));
698         request = hv_vss_drain_req_queue_locked(sc, req_id);
699         KASSERT(request != NULL, ("timeout but fail to find request"));
700         hv_vss_notify_host_result_locked(reqp, HV_E_FAIL);
701 }
702
703 /*
704  * This routine is called whenever a message is received from the host
705  */
706 static void
707 hv_vss_init_req(hv_vss_req_internal *reqp,
708     uint32_t recvlen, uint64_t requestid, uint8_t *vss_buf, hv_vss_sc *sc)
709 {
710         struct timespec vm_ts;
711         struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf;
712
713         memset(reqp, 0, __offsetof(hv_vss_req_internal, callout));
714         reqp->host_msg_len = recvlen;
715         reqp->host_msg_id = requestid;
716         reqp->rcv_buf = vss_buf;
717         reqp->sc = sc;
718         memcpy(&reqp->vss_req.msg,
719             (struct hv_vss_msg *)vss_buf, sizeof(struct hv_vss_msg));
720         /* set the opt for users */
721         switch (msg->hdr.vss_hdr.operation) {
722         case VSS_OP_FREEZE:
723                 reqp->vss_req.opt_msg.opt = HV_VSS_FREEZE;
724                 break;
725         case VSS_OP_THAW:
726                 reqp->vss_req.opt_msg.opt = HV_VSS_THAW;
727                 break;
728         case VSS_OP_HOT_BACKUP:
729                 reqp->vss_req.opt_msg.opt = HV_VSS_CHECK;
730                 break;
731         }
732         /* Use a timestamp as msg request ID */
733         nanotime(&vm_ts);
734         reqp->vss_req.opt_msg.msgid = (vm_ts.tv_sec * NANOSEC) + vm_ts.tv_nsec;
735 }
736
737 static hv_vss_req_internal*
738 hv_vss_get_new_req_locked(hv_vss_sc *sc)
739 {
740         hv_vss_req_internal *reqp;
741         if (!STAILQ_EMPTY(&sc->daemon_sc.to_notify_queue) ||
742             !STAILQ_EMPTY(&sc->daemon_sc.to_ack_queue) ||
743             !STAILQ_EMPTY(&sc->app_sc.to_notify_queue) ||
744             !STAILQ_EMPTY(&sc->app_sc.to_ack_queue)) {
745                 /*
746                  * There is request coming from host before
747                  * finishing previous requests
748                  */
749                 hv_vss_log_info("%s: Warning: there is new request "
750                     "coming before finishing previous requests\n", __func__);
751                 return (NULL);
752         }
753         if (LIST_EMPTY(&sc->req_free_list)) {
754                 /* TODO Error: no buffer */
755                 hv_vss_log_info("Error: No buffer\n");
756                 return (NULL);
757         }
758         reqp = LIST_FIRST(&sc->req_free_list);
759         LIST_REMOVE(reqp, link);
760         return (reqp);
761 }
762
763 static void
764 hv_vss_start_notify(hv_vss_req_internal *reqp, uint32_t opt)
765 {
766         hv_vss_sc *sc = reqp->sc;
767         /*
768          * Freeze/Check notification sequence: kernel -> app -> daemon(fs)
769          * Thaw notification sequence:         kernel -> daemon(fs) -> app
770          *
771          * We should wake up the daemon, in case it's doing poll().
772          * The response should be received after 5s, otherwise, trigger timeout.
773          */
774         switch (opt) {
775         case VSS_OP_FREEZE:
776         case VSS_OP_HOT_BACKUP:
777                 if (sc->app_register_done)
778                         hv_vss_notify(&sc->app_sc, reqp);
779                 else
780                         hv_vss_notify(&sc->daemon_sc, reqp);
781                 callout_reset(&reqp->callout, TIMEOUT_LIMIT * hz,
782                     hv_vss_timeout, reqp);
783                 break;
784         case VSS_OP_THAW:
785                 hv_vss_notify(&sc->daemon_sc, reqp);
786                 callout_reset(&reqp->callout, TIMEOUT_LIMIT * hz,
787                     hv_vss_timeout, reqp);
788                 break;
789         }
790 }
791
792 /*
793  * Function to read the vss request buffer from host
794  * and interact with daemon
795  */
796 static void
797 hv_vss_process_request(void *context, int pending __unused)
798 {
799         uint8_t *vss_buf;
800         struct vmbus_channel *channel;
801         uint32_t recvlen = 0;
802         uint64_t requestid;
803         struct vmbus_icmsg_hdr *icmsghdrp;
804         int ret = 0;
805         hv_vss_sc *sc;
806         hv_vss_req_internal *reqp;
807
808         hv_vss_log_info("%s: entering hv_vss_process_request\n", __func__);
809
810         sc = (hv_vss_sc*)context;
811         vss_buf = sc->util_sc.receive_buffer;
812         channel = vmbus_get_channel(sc->dev);
813
814         recvlen = sc->util_sc.ic_buflen;
815         ret = vmbus_chan_recv(channel, vss_buf, &recvlen, &requestid);
816         KASSERT(ret != ENOBUFS, ("hvvss recvbuf is not large enough"));
817         /* XXX check recvlen to make sure that it contains enough data */
818
819         while ((ret == 0) && (recvlen > 0)) {
820                 icmsghdrp = (struct vmbus_icmsg_hdr *)vss_buf;
821
822                 if (icmsghdrp->ic_type == HV_ICMSGTYPE_NEGOTIATE) {
823                         ret = vmbus_ic_negomsg(&sc->util_sc, vss_buf,
824                             &recvlen, VSS_FWVER, VSS_MSGVER);
825                         hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev),
826                             recvlen, requestid, ret);
827                         hv_vss_log_info("%s: version negotiated\n", __func__);
828                 } else if (!hv_vss_is_daemon_killed_after_launch(sc)) {
829                         struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf;
830                         switch(msg->hdr.vss_hdr.operation) {
831                         case VSS_OP_FREEZE:
832                         case VSS_OP_THAW:
833                         case VSS_OP_HOT_BACKUP:
834                                 mtx_lock(&sc->pending_mutex);
835                                 reqp = hv_vss_get_new_req_locked(sc);
836                                 mtx_unlock(&sc->pending_mutex);
837                                 if (reqp == NULL) {
838                                         /* ignore this request from host */
839                                         break;
840                                 }
841                                 hv_vss_init_req(reqp, recvlen, requestid, vss_buf, sc);
842                                 hv_vss_log_info("%s: receive %s (%ju) from host\n",
843                                     __func__,
844                                     vss_opt_name[reqp->vss_req.opt_msg.opt],
845                                     (uintmax_t)reqp->vss_req.opt_msg.msgid);
846                                 hv_vss_start_notify(reqp, msg->hdr.vss_hdr.operation);
847                                 break;
848                         case VSS_OP_GET_DM_INFO:
849                                 hv_vss_log_info("%s: receive GET_DM_INFO from host\n",
850                                     __func__);
851                                 msg->body.dm_info.flags = 0;
852                                 hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev),
853                                     recvlen, requestid, HV_S_OK);
854                                 break;
855                         default:
856                                 device_printf(sc->dev, "Unknown opt from host: %d\n",
857                                     msg->hdr.vss_hdr.operation);
858                                 break;
859                         }
860                 } else {
861                         /* daemon was killed for some reason after it was launched */
862                         struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf;
863                         switch(msg->hdr.vss_hdr.operation) {
864                         case VSS_OP_FREEZE:
865                                 hv_vss_log_info("%s: response fail for FREEZE\n",
866                                     __func__);
867                                 break;
868                         case VSS_OP_THAW:
869                                 hv_vss_log_info("%s: response fail for THAW\n",
870                                     __func__);
871                                 break;
872                         case VSS_OP_HOT_BACKUP:
873                                 hv_vss_log_info("%s: response fail for HOT_BACKUP\n",
874                                     __func__);
875                                 msg->body.vss_cf.flags = VSS_HBU_NO_AUTO_RECOVERY;
876                                 break;
877                         case VSS_OP_GET_DM_INFO:
878                                 hv_vss_log_info("%s: response fail for GET_DM_INFO\n",
879                                     __func__);
880                                 msg->body.dm_info.flags = 0;
881                                 break;
882                         default:
883                                 device_printf(sc->dev, "Unknown opt from host: %d\n",
884                                     msg->hdr.vss_hdr.operation);
885                                 break;
886                         }
887                         hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev),
888                             recvlen, requestid, HV_E_FAIL);
889                 }
890                 /*
891                  * Try reading next buffer
892                  */
893                 recvlen = sc->util_sc.ic_buflen;
894                 ret = vmbus_chan_recv(channel, vss_buf, &recvlen, &requestid);
895                 KASSERT(ret != ENOBUFS, ("hvvss recvbuf is not large enough"));
896                 /* XXX check recvlen to make sure that it contains enough data */
897
898                 hv_vss_log_info("%s: read: context %p, ret =%d, recvlen=%d\n",
899                     __func__, context, ret, recvlen);
900         }
901 }
902
903 static int
904 hv_vss_probe(device_t dev)
905 {
906         return (vmbus_ic_probe(dev, vmbus_vss_descs));
907 }
908
909 static int
910 hv_vss_init_send_receive_queue(device_t dev)
911 {
912         hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev);
913         int i;
914         const int max_list = 4; /* It is big enough for the list */
915         struct hv_vss_req_internal* reqp;
916
917         LIST_INIT(&sc->req_free_list);
918         STAILQ_INIT(&sc->daemon_sc.to_notify_queue);
919         STAILQ_INIT(&sc->daemon_sc.to_ack_queue);
920         STAILQ_INIT(&sc->app_sc.to_notify_queue);
921         STAILQ_INIT(&sc->app_sc.to_ack_queue);
922
923         for (i = 0; i < max_list; i++) {
924                 reqp = malloc(sizeof(struct hv_vss_req_internal),
925                     M_DEVBUF, M_WAITOK|M_ZERO);
926                 LIST_INSERT_HEAD(&sc->req_free_list, reqp, link);
927                 callout_init_mtx(&reqp->callout, &sc->pending_mutex, 0);
928         }
929         return (0);
930 }
931
932 static int
933 hv_vss_destroy_send_receive_queue(device_t dev)
934 {
935         hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev);
936         hv_vss_req_internal* reqp;
937
938         while (!LIST_EMPTY(&sc->req_free_list)) {
939                 reqp = LIST_FIRST(&sc->req_free_list);
940                 LIST_REMOVE(reqp, link);
941                 free(reqp, M_DEVBUF);
942         }
943
944         while (!STAILQ_EMPTY(&sc->daemon_sc.to_notify_queue)) {
945                 reqp = STAILQ_FIRST(&sc->daemon_sc.to_notify_queue);
946                 STAILQ_REMOVE_HEAD(&sc->daemon_sc.to_notify_queue, slink);
947                 free(reqp, M_DEVBUF);
948         }
949
950         while (!STAILQ_EMPTY(&sc->daemon_sc.to_ack_queue)) {
951                 reqp = STAILQ_FIRST(&sc->daemon_sc.to_ack_queue);
952                 STAILQ_REMOVE_HEAD(&sc->daemon_sc.to_ack_queue, slink);
953                 free(reqp, M_DEVBUF);
954         }
955
956         while (!STAILQ_EMPTY(&sc->app_sc.to_notify_queue)) {
957                 reqp = STAILQ_FIRST(&sc->app_sc.to_notify_queue);
958                 STAILQ_REMOVE_HEAD(&sc->app_sc.to_notify_queue, slink);
959                 free(reqp, M_DEVBUF);
960         }
961
962         while (!STAILQ_EMPTY(&sc->app_sc.to_ack_queue)) {
963                 reqp = STAILQ_FIRST(&sc->app_sc.to_ack_queue);
964                 STAILQ_REMOVE_HEAD(&sc->app_sc.to_ack_queue, slink);
965                 free(reqp, M_DEVBUF);
966         }
967         return (0);
968 }
969
970 static int
971 hv_vss_attach(device_t dev)
972 {
973         int error;
974         struct sysctl_oid_list *child;
975         struct sysctl_ctx_list *ctx;
976
977         hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev);
978
979         sc->dev = dev;
980         mtx_init(&sc->pending_mutex, "hv_vss pending mutex", NULL, MTX_DEF);
981
982         ctx = device_get_sysctl_ctx(dev);
983         child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
984
985         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "hv_vss_log",
986             CTLFLAG_RWTUN, &hv_vss_log, 0, "Hyperv VSS service log level");
987
988         TASK_INIT(&sc->task, 0, hv_vss_process_request, sc);
989         hv_vss_init_send_receive_queue(dev);
990         /* create character device for file system freeze/thaw */
991         error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK,
992                     &sc->hv_vss_dev,
993                     &hv_vss_cdevsw,
994                     0,
995                     UID_ROOT,
996                     GID_WHEEL,
997                     0640,
998                     FS_VSS_DEV_NAME);
999
1000         if (error != 0) {
1001                 hv_vss_log_info("Fail to create '%s': %d\n", FS_VSS_DEV_NAME, error);
1002                 return (error);
1003         }
1004         sc->hv_vss_dev->si_drv1 = &sc->daemon_sc;
1005         sc->daemon_sc.sc = sc;
1006         /* create character device for application freeze/thaw */
1007         error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK,
1008                     &sc->hv_appvss_dev,
1009                     &hv_appvss_cdevsw,
1010                     0,
1011                     UID_ROOT,
1012                     GID_WHEEL,
1013                     0640,
1014                     APP_VSS_DEV_NAME);
1015
1016         if (error != 0) {
1017                 hv_vss_log_info("Fail to create '%s': %d\n", APP_VSS_DEV_NAME, error);
1018                 return (error);
1019         }
1020         sc->hv_appvss_dev->si_drv1 = &sc->app_sc;
1021         sc->app_sc.sc = sc;
1022
1023         return hv_util_attach(dev, hv_vss_callback);
1024 }
1025
1026 static int
1027 hv_vss_detach(device_t dev)
1028 {
1029         hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev);
1030         mtx_destroy(&sc->pending_mutex);
1031         if (sc->daemon_sc.proc_task != NULL) {
1032                 PROC_LOCK(sc->daemon_sc.proc_task);
1033                 kern_psignal(sc->daemon_sc.proc_task, SIGKILL);
1034                 PROC_UNLOCK(sc->daemon_sc.proc_task);
1035         }
1036         if (sc->app_sc.proc_task != NULL) {
1037                 PROC_LOCK(sc->app_sc.proc_task);
1038                 kern_psignal(sc->app_sc.proc_task, SIGKILL);
1039                 PROC_UNLOCK(sc->app_sc.proc_task);
1040         }
1041         hv_vss_destroy_send_receive_queue(dev);
1042         destroy_dev(sc->hv_vss_dev);
1043         destroy_dev(sc->hv_appvss_dev);
1044         return hv_util_detach(dev);
1045 }
1046
1047 static device_method_t vss_methods[] = {
1048         /* Device interface */
1049         DEVMETHOD(device_probe, hv_vss_probe),
1050         DEVMETHOD(device_attach, hv_vss_attach),
1051         DEVMETHOD(device_detach, hv_vss_detach),
1052         { 0, 0 }
1053 };
1054
1055 static driver_t vss_driver = { "hvvss", vss_methods, sizeof(hv_vss_sc)};
1056
1057 static devclass_t vss_devclass;
1058
1059 DRIVER_MODULE(hv_vss, vmbus, vss_driver, vss_devclass, NULL, NULL);
1060 MODULE_VERSION(hv_vss, 1);
1061 MODULE_DEPEND(hv_vss, vmbus, 1, 1, 1);