1 /******************************************************************************
4 * This is the kernel equivalent of the "xs" library. We don't need everything
5 * and we use xenbus_comms for communication.
7 * Copyright (C) 2005 Rusty Russell, IBM Corporation
9 * This file may be distributed separately from the Linux kernel, or
10 * incorporated into other software packages, subject to the following license:
12 * Permission is hereby granted, free of charge, to any person obtaining a copy
13 * of this source file (the "Software"), to deal in the Software without
14 * restriction, including without limitation the rights to use, copy, modify,
15 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16 * and to permit persons to whom the Software is furnished to do so, subject to
17 * the following conditions:
19 * The above copyright notice and this permission notice shall be included in
20 * all copies or substantial portions of the Software.
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
37 #include <sys/kernel.h>
39 #include <sys/mutex.h>
41 #include <sys/syslog.h>
42 #include <sys/malloc.h>
43 #include <sys/systm.h>
45 #include <sys/kthread.h>
46 #include <sys/unistd.h>
48 #include <machine/xen/xen-os.h>
49 #include <xen/hypervisor.h>
50 #include <machine/stdarg.h>
52 #include <xen/xenbus/xenbusvar.h>
53 #include <xen/xenbus/xenbus_comms.h>
54 #include <xen/interface/hvm/params.h>
59 static int xs_process_msg(enum xsd_sockmsg_type *type);
61 int xenwatch_running = 0;
62 int xenbus_running = 0;
65 struct xs_stored_msg {
66 TAILQ_ENTRY(xs_stored_msg) list;
68 struct xsd_sockmsg hdr;
76 /* Queued watch events. */
78 struct xenbus_watch *handle;
80 unsigned int vec_size;
86 /* A list of replies. Currently only one will ever be outstanding. */
87 TAILQ_HEAD(xs_handle_list, xs_stored_msg) reply_list;
88 struct mtx reply_lock;
91 /* One request at a time. */
92 struct sx request_mutex;
94 /* Protect transactions against save/restore. */
95 struct sx suspend_mutex;
98 static struct xs_handle xs_state;
100 /* List of registered watches, and a lock to protect it. */
101 static LIST_HEAD(watch_list_head, xenbus_watch) watches;
102 static struct mtx watches_lock;
103 /* List of pending watch callback events, and a lock to protect it. */
104 static TAILQ_HEAD(event_list_head, xs_stored_msg) watch_events;
105 static struct mtx watch_events_lock;
108 * Details of the xenwatch callback kernel thread. The thread waits on the
109 * watch_events_waitq for work to do (queued on watch_events list). When it
110 * wakes up it acquires the xenwatch_mutex before reading the list and
113 static pid_t xenwatch_pid;
114 struct sx xenwatch_mutex;
115 static int watch_events_waitq;
117 #define xsd_error_count (sizeof(xsd_errors) / sizeof(xsd_errors[0]))
120 xs_get_error(const char *errorstring)
124 for (i = 0; i < xsd_error_count; i++) {
125 if (!strcmp(errorstring, xsd_errors[i].errstring))
126 return (xsd_errors[i].errnum);
128 log(LOG_WARNING, "XENBUS xen store gave: unknown error %s",
133 extern void kdb_backtrace(void);
136 xs_read_reply(enum xsd_sockmsg_type *type, unsigned int *len, void **result)
138 struct xs_stored_msg *msg;
142 mtx_lock(&xs_state.reply_lock);
144 while (TAILQ_EMPTY(&xs_state.reply_list)) {
145 while (TAILQ_EMPTY(&xs_state.reply_list)) {
146 error = mtx_sleep(&xs_state.reply_waitq,
147 &xs_state.reply_lock,
148 PCATCH, "xswait", hz/10);
149 if (error && error != EWOULDBLOCK) {
150 mtx_unlock(&xs_state.reply_lock);
156 msg = TAILQ_FIRST(&xs_state.reply_list);
157 TAILQ_REMOVE(&xs_state.reply_list, msg, list);
159 mtx_unlock(&xs_state.reply_lock);
161 *type = msg->hdr.type;
164 body = msg->u.reply.body;
172 /* Emergency write. UNUSED*/
173 void xenbus_debug_write(const char *str, unsigned int count)
175 struct xsd_sockmsg msg = { 0 };
178 msg.len = sizeof("print") + count + 1;
180 sx_xlock(&xs_state.request_mutex);
181 xb_write(&msg, sizeof(msg));
182 xb_write("print", sizeof("print"));
183 xb_write(str, count);
185 sx_xunlock(&xs_state.request_mutex);
191 xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void **result)
193 struct xsd_sockmsg req_msg = *msg;
196 if (req_msg.type == XS_TRANSACTION_START)
197 sx_slock(&xs_state.suspend_mutex);
199 sx_xlock(&xs_state.request_mutex);
201 error = xb_write(msg, sizeof(*msg) + msg->len,
202 &xs_state.request_mutex.lock_object);
204 msg->type = XS_ERROR;
206 error = xs_read_reply(&msg->type, &msg->len, result);
209 sx_xunlock(&xs_state.request_mutex);
211 if ((msg->type == XS_TRANSACTION_END) ||
212 ((req_msg.type == XS_TRANSACTION_START) &&
213 (msg->type == XS_ERROR)))
214 sx_sunlock(&xs_state.suspend_mutex);
220 * Send message to xs. The reply is returned in *result and should be
221 * fred with free(*result, M_DEVBUF). Return zero on success or an
222 * error code on failure.
225 xs_talkv(struct xenbus_transaction t, enum xsd_sockmsg_type type,
226 const struct iovec *iovec, unsigned int num_vecs,
227 unsigned int *len, void **result)
229 struct xsd_sockmsg msg;
238 for (i = 0; i < num_vecs; i++)
239 msg.len += iovec[i].iov_len;
241 sx_xlock(&xs_state.request_mutex);
243 error = xb_write(&msg, sizeof(msg),
244 &xs_state.request_mutex.lock_object);
246 sx_xunlock(&xs_state.request_mutex);
247 printf("xs_talkv failed %d\n", error);
251 for (i = 0; i < num_vecs; i++) {
252 error = xb_write(iovec[i].iov_base, iovec[i].iov_len,
253 &xs_state.request_mutex.lock_object);
255 sx_xunlock(&xs_state.request_mutex);
256 printf("xs_talkv failed %d\n", error);
261 error = xs_read_reply(&msg.type, len, &ret);
263 sx_xunlock(&xs_state.request_mutex);
268 if (msg.type == XS_ERROR) {
269 error = xs_get_error(ret);
275 if ((xenwatch_running == 0) && (xenwatch_inline == 0)) {
277 while (!TAILQ_EMPTY(&watch_events)
278 && xenwatch_running == 0) {
280 struct xs_stored_msg *wmsg = TAILQ_FIRST(&watch_events);
281 TAILQ_REMOVE(&watch_events, wmsg, list);
283 wmsg->u.watch.handle->callback(
284 wmsg->u.watch.handle,
285 (const char **)wmsg->u.watch.vec,
286 wmsg->u.watch.vec_size);
287 free(wmsg->u.watch.vec, M_DEVBUF);
288 free(wmsg, M_DEVBUF);
293 KASSERT(msg.type == type, ("bad xenstore message type"));
303 /* Simplified version of xs_talkv: single message. */
305 xs_single(struct xenbus_transaction t, enum xsd_sockmsg_type type,
306 const char *string, unsigned int *len, void **result)
310 iovec.iov_base = (void *)(uintptr_t) string;
311 iovec.iov_len = strlen(string) + 1;
313 return (xs_talkv(t, type, &iovec, 1, len, result));
317 count_strings(const char *strings, unsigned int len)
322 for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
328 /* Return the path to dir with /name appended. Buffer must be kfree()'ed. */
330 join(const char *dir, const char *name)
334 buffer = malloc(strlen(dir) + strlen("/") + strlen(name) + 1,
338 if (strcmp(name, "")) {
340 strcat(buffer, name);
347 split(char *strings, unsigned int len, unsigned int *num)
351 /* Count the strings. */
352 *num = count_strings(strings, len) + 1;
354 /* Transfer to one big alloc for easy freeing. */
355 ret = malloc(*num * sizeof(char *) + len, M_DEVBUF, M_WAITOK);
356 memcpy(&ret[*num], strings, len);
357 free(strings, M_DEVBUF);
359 strings = (char *)&ret[*num];
360 for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1)
363 ret[*num] = strings + len;
369 * Return the contents of a directory in *result which should be freed
370 * with free(*result, M_DEVBUF).
373 xenbus_directory(struct xenbus_transaction t, const char *dir,
374 const char *node, unsigned int *num, char ***result)
376 char *strings, *path;
377 unsigned int len = 0;
380 path = join(dir, node);
381 error = xs_single(t, XS_DIRECTORY, path, &len, (void **) &strings);
382 free(path, M_DEVBUF);
386 *result = split(strings, len, num);
391 * Check if a path exists. Return 1 if it does.
394 xenbus_exists(struct xenbus_transaction t, const char *dir, const char *node)
399 error = xenbus_directory(t, dir, node, &dir_n, &d);
407 * Get the value of a single file. Returns the contents in *result
408 * which should be freed with free(*result, M_DEVBUF) after use.
409 * The length of the value in bytes is returned in *len.
412 xenbus_read(struct xenbus_transaction t, const char *dir, const char *node,
413 unsigned int *len, void **result)
419 path = join(dir, node);
420 error = xs_single(t, XS_READ, path, len, &ret);
421 free(path, M_DEVBUF);
429 * Write the value of a single file. Returns error on failure.
432 xenbus_write(struct xenbus_transaction t, const char *dir, const char *node,
436 struct iovec iovec[2];
439 path = join(dir, node);
441 iovec[0].iov_base = (void *)(uintptr_t) path;
442 iovec[0].iov_len = strlen(path) + 1;
443 iovec[1].iov_base = (void *)(uintptr_t) string;
444 iovec[1].iov_len = strlen(string);
446 error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
447 free(path, M_DEVBUF);
453 * Create a new directory.
456 xenbus_mkdir(struct xenbus_transaction t, const char *dir, const char *node)
461 path = join(dir, node);
462 ret = xs_single(t, XS_MKDIR, path, NULL, NULL);
463 free(path, M_DEVBUF);
469 * Destroy a file or directory (directories must be empty).
472 xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node)
477 path = join(dir, node);
478 ret = xs_single(t, XS_RM, path, NULL, NULL);
479 free(path, M_DEVBUF);
485 * Start a transaction: changes by others will not be seen during this
486 * transaction, and changes will not be visible to others until end.
489 xenbus_transaction_start(struct xenbus_transaction *t)
494 sx_slock(&xs_state.suspend_mutex);
495 error = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL,
498 sx_sunlock(&xs_state.suspend_mutex);
502 t->id = strtoul(id_str, NULL, 0);
503 free(id_str, M_DEVBUF);
509 * End a transaction. If abandon is true, transaction is discarded
510 * instead of committed.
512 int xenbus_transaction_end(struct xenbus_transaction t, int abort)
518 strcpy(abortstr, "F");
520 strcpy(abortstr, "T");
522 error = xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL);
524 sx_sunlock(&xs_state.suspend_mutex);
529 /* Single read and scanf: returns zero or errno. */
531 xenbus_scanf(struct xenbus_transaction t,
532 const char *dir, const char *node, int *scancountp, const char *fmt, ...)
538 error = xenbus_read(t, dir, node, NULL, (void **) &val);
543 ns = vsscanf(val, fmt, ap);
546 /* Distinctive errno. */
554 /* Single printf and write: returns zero or errno. */
556 xenbus_printf(struct xenbus_transaction t,
557 const char *dir, const char *node, const char *fmt, ...)
561 #define PRINTF_BUFFER_SIZE 4096
564 printf_buffer = malloc(PRINTF_BUFFER_SIZE, M_DEVBUF, M_WAITOK);
567 ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
570 KASSERT(ret <= PRINTF_BUFFER_SIZE-1, ("xenbus_printf: message too large"));
571 error = xenbus_write(t, dir, node, printf_buffer);
573 free(printf_buffer, M_DEVBUF);
578 /* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
580 xenbus_gather(struct xenbus_transaction t, const char *dir, ...)
586 for (i = 0; i < 10000; i++)
591 while (error == 0 && (name = va_arg(ap, char *)) != NULL) {
592 const char *fmt = va_arg(ap, char *);
593 void *result = va_arg(ap, void *);
596 error = xenbus_read(t, dir, name, NULL, (void **) &p);
601 if (sscanf(p, fmt, result) == 0)
605 *(char **)result = p;
613 xs_watch(const char *path, const char *token)
617 iov[0].iov_base = (void *)(uintptr_t) path;
618 iov[0].iov_len = strlen(path) + 1;
619 iov[1].iov_base = (void *)(uintptr_t) token;
620 iov[1].iov_len = strlen(token) + 1;
622 return (xs_talkv(XBT_NIL, XS_WATCH, iov, 2, NULL, NULL));
626 xs_unwatch(const char *path, const char *token)
630 iov[0].iov_base = (void *)(uintptr_t) path;
631 iov[0].iov_len = strlen(path) + 1;
632 iov[1].iov_base = (void *)(uintptr_t) token;
633 iov[1].iov_len = strlen(token) + 1;
635 return (xs_talkv(XBT_NIL, XS_UNWATCH, iov, 2, NULL, NULL));
638 static struct xenbus_watch *
639 find_watch(const char *token)
641 struct xenbus_watch *i, *cmp;
643 cmp = (void *)strtoul(token, NULL, 16);
645 LIST_FOREACH(i, &watches, list)
652 /* Register callback to watch this node. */
654 register_xenbus_watch(struct xenbus_watch *watch)
656 /* Pointer in ascii is the token. */
657 char token[sizeof(watch) * 2 + 1];
660 sprintf(token, "%lX", (long)watch);
662 sx_slock(&xs_state.suspend_mutex);
664 mtx_lock(&watches_lock);
665 KASSERT(find_watch(token) == NULL, ("watch already registered"));
666 LIST_INSERT_HEAD(&watches, watch, list);
667 mtx_unlock(&watches_lock);
669 error = xs_watch(watch->node, token);
671 /* Ignore errors due to multiple registration. */
672 if (error == EEXIST) {
673 mtx_lock(&watches_lock);
674 LIST_REMOVE(watch, list);
675 mtx_unlock(&watches_lock);
678 sx_sunlock(&xs_state.suspend_mutex);
684 unregister_xenbus_watch(struct xenbus_watch *watch)
686 struct xs_stored_msg *msg, *tmp;
687 char token[sizeof(watch) * 2 + 1];
690 sprintf(token, "%lX", (long)watch);
692 sx_slock(&xs_state.suspend_mutex);
694 mtx_lock(&watches_lock);
695 KASSERT(find_watch(token), ("watch not registered"));
696 LIST_REMOVE(watch, list);
697 mtx_unlock(&watches_lock);
699 error = xs_unwatch(watch->node, token);
701 log(LOG_WARNING, "XENBUS Failed to release watch %s: %i\n",
704 sx_sunlock(&xs_state.suspend_mutex);
706 /* Cancel pending watch events. */
707 mtx_lock(&watch_events_lock);
708 TAILQ_FOREACH_SAFE(msg, &watch_events, list, tmp) {
709 if (msg->u.watch.handle != watch)
711 TAILQ_REMOVE(&watch_events, msg, list);
712 free(msg->u.watch.vec, M_DEVBUF);
715 mtx_unlock(&watch_events_lock);
717 /* Flush any currently-executing callback, unless we are it. :-) */
718 if (curproc->p_pid != xenwatch_pid) {
719 sx_xlock(&xenwatch_mutex);
720 sx_xunlock(&xenwatch_mutex);
728 sx_xlock(&xs_state.suspend_mutex);
729 sx_xlock(&xs_state.request_mutex);
735 struct xenbus_watch *watch;
736 char token[sizeof(watch) * 2 + 1];
738 sx_xunlock(&xs_state.request_mutex);
740 /* No need for watches_lock: the suspend_mutex is sufficient. */
741 LIST_FOREACH(watch, &watches, list) {
742 sprintf(token, "%lX", (long)watch);
743 xs_watch(watch->node, token);
746 sx_xunlock(&xs_state.suspend_mutex);
750 xenwatch_thread(void *unused)
752 struct xs_stored_msg *msg;
756 mtx_lock(&watch_events_lock);
757 while (TAILQ_EMPTY(&watch_events))
758 mtx_sleep(&watch_events_waitq,
760 PWAIT | PCATCH, "waitev", hz/10);
762 mtx_unlock(&watch_events_lock);
763 sx_xlock(&xenwatch_mutex);
765 mtx_lock(&watch_events_lock);
766 msg = TAILQ_FIRST(&watch_events);
768 TAILQ_REMOVE(&watch_events, msg, list);
769 mtx_unlock(&watch_events_lock);
773 * XXX There are messages coming in with a NULL callback.
774 * XXX This deserves further investigation; the workaround
775 * XXX here simply prevents the kernel from panic'ing
778 if (msg->u.watch.handle->callback != NULL)
779 msg->u.watch.handle->callback(
781 (const char **)msg->u.watch.vec,
782 msg->u.watch.vec_size);
783 free(msg->u.watch.vec, M_DEVBUF);
787 sx_xunlock(&xenwatch_mutex);
792 xs_process_msg(enum xsd_sockmsg_type *type)
794 struct xs_stored_msg *msg;
798 msg = malloc(sizeof(*msg), M_DEVBUF, M_WAITOK);
799 mtx_lock(&xs_state.reply_lock);
800 error = xb_read(&msg->hdr, sizeof(msg->hdr),
801 &xs_state.reply_lock.lock_object);
802 mtx_unlock(&xs_state.reply_lock);
808 body = malloc(msg->hdr.len + 1, M_DEVBUF, M_WAITOK);
809 mtx_lock(&xs_state.reply_lock);
810 error = xb_read(body, msg->hdr.len,
811 &xs_state.reply_lock.lock_object);
812 mtx_unlock(&xs_state.reply_lock);
814 free(body, M_DEVBUF);
818 body[msg->hdr.len] = '\0';
820 *type = msg->hdr.type;
821 if (msg->hdr.type == XS_WATCH_EVENT) {
822 msg->u.watch.vec = split(body, msg->hdr.len,
823 &msg->u.watch.vec_size);
825 mtx_lock(&watches_lock);
826 msg->u.watch.handle = find_watch(
827 msg->u.watch.vec[XS_WATCH_TOKEN]);
828 if (msg->u.watch.handle != NULL) {
829 mtx_lock(&watch_events_lock);
830 TAILQ_INSERT_TAIL(&watch_events, msg, list);
831 wakeup(&watch_events_waitq);
832 mtx_unlock(&watch_events_lock);
834 free(msg->u.watch.vec, M_DEVBUF);
837 mtx_unlock(&watches_lock);
839 msg->u.reply.body = body;
840 mtx_lock(&xs_state.reply_lock);
841 TAILQ_INSERT_TAIL(&xs_state.reply_list, msg, list);
842 wakeup(&xs_state.reply_waitq);
843 mtx_unlock(&xs_state.reply_lock);
850 xenbus_thread(void *unused)
853 enum xsd_sockmsg_type type;
857 error = xs_process_msg(&type);
859 printf("XENBUS error %d while reading message\n",
865 static unsigned long xen_store_mfn;
868 static inline unsigned long
869 hvm_get_parameter(int index)
871 struct xen_hvm_param xhv;
874 xhv.domid = DOMID_SELF;
876 error = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
878 printf("hvm_get_parameter: failed to get %d, error %d\n",
894 xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
895 xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN);
896 xen_store = pmap_mapdev(xen_store_mfn * PAGE_SIZE, PAGE_SIZE);
898 xen_store_evtchn = xen_start_info->store_evtchn;
901 TAILQ_INIT(&xs_state.reply_list);
902 TAILQ_INIT(&watch_events);
903 sx_init(&xenwatch_mutex, "xenwatch");
906 mtx_init(&xs_state.reply_lock, "state reply", NULL, MTX_DEF);
907 sx_init(&xs_state.request_mutex, "xenstore request");
908 sx_init(&xs_state.suspend_mutex, "xenstore suspend");
912 mtx_init(&xs_state.suspend_mutex, "xenstore suspend", NULL, MTX_DEF);
913 sema_init(&xs_state.request_mutex, 1, "xenstore request");
914 sema_init(&xenwatch_mutex, 1, "xenwatch");
916 mtx_init(&watches_lock, "watches", NULL, MTX_DEF);
917 mtx_init(&watch_events_lock, "watch events", NULL, MTX_DEF);
919 /* Initialize the shared memory rings to talk to xenstored */
920 error = xb_init_comms();
924 xenwatch_running = 1;
925 error = kproc_create(xenwatch_thread, NULL, &p,
926 RFHIGHPID, 0, "xenwatch");
929 xenwatch_pid = p->p_pid;
931 error = kproc_create(xenbus_thread, NULL, NULL,
932 RFHIGHPID, 0, "xenbus");