1 /******************************************************************************
4 * Low-level kernel interface to the XenStore.
6 * Copyright (C) 2005 Rusty Russell, IBM Corporation
7 * Copyright (C) 2009,2010 Spectra Logic Corporation
9 * This file may be distributed separately from the Linux kernel, or
10 * incorporated into other software packages, subject to the following license:
12 * Permission is hereby granted, free of charge, to any person obtaining a copy
13 * of this source file (the "Software"), to deal in the Software without
14 * restriction, including without limitation the rights to use, copy, modify,
15 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16 * and to permit persons to whom the Software is furnished to do so, subject to
17 * the following conditions:
19 * The above copyright notice and this permission notice shall be included in
20 * all copies or substantial portions of the Software.
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
37 #include <sys/kernel.h>
39 #include <sys/module.h>
40 #include <sys/mutex.h>
42 #include <sys/syslog.h>
43 #include <sys/malloc.h>
44 #include <sys/systm.h>
46 #include <sys/kthread.h>
48 #include <sys/sysctl.h>
50 #include <sys/unistd.h>
51 #include <sys/queue.h>
52 #include <sys/taskqueue.h>
54 #include <machine/stdarg.h>
56 #include <xen/xen-os.h>
57 #include <xen/hypervisor.h>
58 #include <xen/xen_intr.h>
60 #include <xen/interface/hvm/params.h>
63 #include <xen/xenstore/xenstorevar.h>
64 #include <xen/xenstore/xenstore_internal.h>
71 * \brief XenStore interface
73 * The XenStore interface is a simple storage system that is a means of
74 * communicating state and configuration data between the Xen Domain 0
75 * and the various guest domains. All configuration data other than
76 * a small amount of essential information required during the early
77 * boot process of launching a Xen aware guest, is managed using the
80 * The XenStore is ASCII string based, and has a structure and semantics
81 * similar to a filesystem. There are files and directories, the directories
82 * able to contain files or other directories. The depth of the hierarchy
83 * is only limited by the XenStore's maximum path length.
85 * The communication channel between the XenStore service and other
86 * domains is via two, guest specific, ring buffers in a shared memory
87 * area. One ring buffer is used for communicating in each direction.
88 * The grant table references for this shared memory are given to the
89 * guest either via the xen_start_info structure for a fully para-
90 * virtualized guest, or via HVM hypercalls for a hardware virtualized
93 * The XenStore communication relies on an event channel and thus
94 * interrupts. For this reason, the attachment of the XenStore
95 * relies on an interrupt driven configuration hook to hold off
96 * boot processing until communication with the XenStore service
99 * Several Xen services depend on the XenStore, most notably the
100 * XenBus used to discover and manage Xen devices. These services
101 * are implemented as NewBus child attachments to a bus exported
102 * by this XenStore driver.
105 static struct xs_watch *find_watch(const char *token);
107 MALLOC_DEFINE(M_XENSTORE, "xenstore", "XenStore data and results");
110 * Pointer to shared memory communication structures allowing us
111 * to communicate with the XenStore service.
113 * When operating in full PV mode, this pointer is set early in kernel
114 * startup from within xen_machdep.c. In HVM mode, we use hypercalls
115 * to get the guest frame number for the shared page and then map it
116 * into kva. See xs_init() for details.
118 static struct xenstore_domain_interface *xen_store;
120 /*-------------------------- Private Data Structures ------------------------*/
123 * Structure capturing messages received from the XenStore service.
125 struct xs_stored_msg {
126 TAILQ_ENTRY(xs_stored_msg) list;
128 struct xsd_sockmsg hdr;
131 /* Queued replies. */
136 /* Queued watch events. */
138 struct xs_watch *handle;
144 TAILQ_HEAD(xs_stored_msg_list, xs_stored_msg);
147 * Container for all XenStore related state.
150 /** Newbus device for the XenStore. */
154 * Lock serializing access to ring producer/consumer
155 * indexes. Use of this lock guarantees that wakeups
156 * of blocking readers/writers are not missed due to
157 * races with the XenStore service.
159 struct mtx ring_lock;
162 * Mutex used to insure exclusive access to the outgoing
163 * communication ring. We use a lock type that can be
164 * held while sleeping so that xs_write() can block waiting
165 * for space in the ring to free up, without allowing another
166 * writer to come in and corrupt a partial message write.
168 struct sx request_mutex;
171 * A list of replies to our requests.
173 * The reply list is filled by xs_rcv_thread(). It
174 * is consumed by the context that issued the request
175 * to which a reply is made. The requester blocks in
178 * /note Only one requesting context can be active at a time.
179 * This is guaranteed by the request_mutex and insures
180 * that the requester sees replies matching the order
183 struct xs_stored_msg_list reply_list;
185 /** Lock protecting the reply list. */
186 struct mtx reply_lock;
189 * List of registered watches.
191 struct xs_watch_list registered_watches;
193 /** Lock protecting the registered watches list. */
194 struct mtx registered_watches_lock;
197 * List of pending watch callback events.
199 struct xs_stored_msg_list watch_events;
201 /** Lock protecting the watch calback list. */
202 struct mtx watch_events_lock;
205 * The processid of the xenwatch thread.
210 * Sleepable mutex used to gate the execution of XenStore
211 * watch event callbacks.
213 * xenwatch_thread holds an exclusive lock on this mutex
214 * while delivering event callbacks, and xenstore_unregister_watch()
215 * uses an exclusive lock of this mutex to guarantee that no
216 * callbacks of the just unregistered watch are pending
217 * before returning to its caller.
219 struct sx xenwatch_mutex;
222 * The HVM guest pseudo-physical frame number. This is Xen's mapping
223 * of the true machine frame number into our "physical address space".
228 * The event channel for communicating with the
233 /** Handle for XenStore interrupts. */
234 xen_intr_handle_t xen_intr_handle;
237 * Interrupt driven config hook allowing us to defer
238 * attaching children until interrupts (and thus communication
239 * with the XenStore service) are available.
241 struct intr_config_hook xs_attachcb;
244 * Xenstore is a user-space process that usually runs in Dom0,
245 * so if this domain is booting as Dom0, xenstore wont we accessible,
246 * and we have to defer the initialization of xenstore related
247 * devices to later (when xenstore is started).
252 * Task to run when xenstore is initialized (Dom0 only), will
253 * take care of attaching xenstore related devices.
255 struct task xs_late_init;
258 /*-------------------------------- Global Data ------------------------------*/
259 static struct xs_softc xs;
261 /*------------------------- Private Utility Functions -----------------------*/
264 * Count and optionally record pointers to a number of NUL terminated
265 * strings in a buffer.
267 * \param strings A pointer to a contiguous buffer of NUL terminated strings.
268 * \param dest An array to store pointers to each string found in strings.
269 * \param len The length of the buffer pointed to by strings.
271 * \return A count of the number of strings found.
274 extract_strings(const char *strings, const char **dest, u_int len)
279 for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) {
289 * Convert a contiguous buffer containing a series of NUL terminated
290 * strings into an array of pointers to strings.
292 * The returned pointer references the array of string pointers which
293 * is followed by the storage for the string data. It is the client's
294 * responsibility to free this storage.
296 * The storage addressed by strings is free'd prior to split returning.
298 * \param strings A pointer to a contiguous buffer of NUL terminated strings.
299 * \param len The length of the buffer pointed to by strings.
300 * \param num The number of strings found and returned in the strings
303 * \return An array of pointers to the strings found in the input buffer.
306 split(char *strings, u_int len, u_int *num)
310 /* Protect against unterminated buffers. */
312 strings[len - 1] = '\0';
314 /* Count the strings. */
315 *num = extract_strings(strings, /*dest*/NULL, len);
317 /* Transfer to one big alloc for easy freeing by the caller. */
318 ret = malloc(*num * sizeof(char *) + len, M_XENSTORE, M_WAITOK);
319 memcpy(&ret[*num], strings, len);
320 free(strings, M_XENSTORE);
322 /* Extract pointers to newly allocated array. */
323 strings = (char *)&ret[*num];
324 (void)extract_strings(strings, /*dest*/ret, len);
329 /*------------------------- Public Utility Functions -------------------------*/
330 /*------- API comments for these methods can be found in xenstorevar.h -------*/
332 xs_join(const char *dir, const char *name)
336 sb = sbuf_new_auto();
338 if (name[0] != '\0') {
347 /*-------------------- Low Level Communication Management --------------------*/
349 * Interrupt handler for the XenStore event channel.
351 * XenStore reads and writes block on "xen_store" for buffer
352 * space. Wakeup any blocking operations when the XenStore
353 * service has modified the queues.
356 xs_intr(void * arg __unused /*__attribute__((unused))*/)
359 /* If xenstore has not been initialized, initialize it now */
360 if (!xs.initialized) {
361 xs.initialized = true;
363 * Since this task is probing and attaching devices we
364 * have to hold the Giant lock.
366 taskqueue_enqueue(taskqueue_swi_giant, &xs.xs_late_init);
370 * Hold ring lock across wakeup so that clients
371 * cannot miss a wakeup.
373 mtx_lock(&xs.ring_lock);
375 mtx_unlock(&xs.ring_lock);
379 * Verify that the indexes for a ring are valid.
381 * The difference between the producer and consumer cannot
382 * exceed the size of the ring.
384 * \param cons The consumer index for the ring to test.
385 * \param prod The producer index for the ring to test.
387 * \retval 1 If indexes are in range.
388 * \retval 0 If the indexes are out of range.
391 xs_check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
394 return ((prod - cons) <= XENSTORE_RING_SIZE);
398 * Return a pointer to, and the length of, the contiguous
399 * free region available for output in a ring buffer.
401 * \param cons The consumer index for the ring.
402 * \param prod The producer index for the ring.
403 * \param buf The base address of the ring's storage.
404 * \param len The amount of contiguous storage available.
406 * \return A pointer to the start location of the free region.
409 xs_get_output_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
410 char *buf, uint32_t *len)
413 *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
414 if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
415 *len = XENSTORE_RING_SIZE - (prod - cons);
416 return (buf + MASK_XENSTORE_IDX(prod));
420 * Return a pointer to, and the length of, the contiguous
421 * data available to read from a ring buffer.
423 * \param cons The consumer index for the ring.
424 * \param prod The producer index for the ring.
425 * \param buf The base address of the ring's storage.
426 * \param len The amount of contiguous data available to read.
428 * \return A pointer to the start location of the available data.
431 xs_get_input_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
432 const char *buf, uint32_t *len)
435 *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
436 if ((prod - cons) < *len)
438 return (buf + MASK_XENSTORE_IDX(cons));
442 * Transmit data to the XenStore service.
444 * \param tdata A pointer to the contiguous data to send.
445 * \param len The amount of data to send.
447 * \return On success 0, otherwise an errno value indicating the
450 * \invariant Called from thread context.
451 * \invariant The buffer pointed to by tdata is at least len bytes
453 * \invariant xs.request_mutex exclusively locked.
456 xs_write_store(const void *tdata, unsigned len)
458 XENSTORE_RING_IDX cons, prod;
459 const char *data = (const char *)tdata;
462 sx_assert(&xs.request_mutex, SX_XLOCKED);
467 /* Hold lock so we can't miss wakeups should we block. */
468 mtx_lock(&xs.ring_lock);
469 cons = xen_store->req_cons;
470 prod = xen_store->req_prod;
471 if ((prod - cons) == XENSTORE_RING_SIZE) {
473 * Output ring is full. Wait for a ring event.
475 * Note that the events from both queues
476 * are combined, so being woken does not
477 * guarantee that data exist in the read
480 * To simplify error recovery and the retry,
481 * we specify PDROP so our lock is *not* held
482 * when msleep returns.
484 error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
485 "xbwrite", /*timeout*/0);
486 if (error && error != EWOULDBLOCK)
492 mtx_unlock(&xs.ring_lock);
494 /* Verify queue sanity. */
495 if (!xs_check_indexes(cons, prod)) {
496 xen_store->req_cons = xen_store->req_prod = 0;
500 dst = xs_get_output_chunk(cons, prod, xen_store->req, &avail);
504 memcpy(dst, data, avail);
509 * The store to the producer index, which indicates
510 * to the other side that new data has arrived, must
511 * be visible only after our copy of the data into the
512 * ring has completed.
515 xen_store->req_prod += avail;
518 * xen_intr_signal() implies mb(). The other side will see
519 * the change to req_prod at the time of the interrupt.
521 xen_intr_signal(xs.xen_intr_handle);
528 * Receive data from the XenStore service.
530 * \param tdata A pointer to the contiguous buffer to receive the data.
531 * \param len The amount of data to receive.
533 * \return On success 0, otherwise an errno value indicating the
536 * \invariant Called from thread context.
537 * \invariant The buffer pointed to by tdata is at least len bytes
540 * \note xs_read does not perform any internal locking to guarantee
541 * serial access to the incoming ring buffer. However, there
542 * is only one context processing reads: xs_rcv_thread().
545 xs_read_store(void *tdata, unsigned len)
547 XENSTORE_RING_IDX cons, prod;
548 char *data = (char *)tdata;
555 /* Hold lock so we can't miss wakeups should we block. */
556 mtx_lock(&xs.ring_lock);
557 cons = xen_store->rsp_cons;
558 prod = xen_store->rsp_prod;
561 * Nothing to read. Wait for a ring event.
563 * Note that the events from both queues
564 * are combined, so being woken does not
565 * guarantee that data exist in the read
568 * To simplify error recovery and the retry,
569 * we specify PDROP so our lock is *not* held
570 * when msleep returns.
572 error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
573 "xbread", /*timeout*/0);
574 if (error && error != EWOULDBLOCK)
578 mtx_unlock(&xs.ring_lock);
580 /* Verify queue sanity. */
581 if (!xs_check_indexes(cons, prod)) {
582 xen_store->rsp_cons = xen_store->rsp_prod = 0;
586 src = xs_get_input_chunk(cons, prod, xen_store->rsp, &avail);
591 * Insure the data we read is related to the indexes
596 memcpy(data, src, avail);
601 * Insure that the producer of this ring does not see
602 * the ring space as free until after we have copied it
606 xen_store->rsp_cons += avail;
609 * xen_intr_signal() implies mb(). The producer will see
610 * the updated consumer index when the event is delivered.
612 xen_intr_signal(xs.xen_intr_handle);
618 /*----------------------- Received Message Processing ------------------------*/
620 * Block reading the next message from the XenStore service and
621 * process the result.
623 * \param type The returned type of the XenStore message received.
625 * \return 0 on success. Otherwise an errno value indicating the
626 * type of failure encountered.
629 xs_process_msg(enum xsd_sockmsg_type *type)
631 struct xs_stored_msg *msg;
635 msg = malloc(sizeof(*msg), M_XENSTORE, M_WAITOK);
636 error = xs_read_store(&msg->hdr, sizeof(msg->hdr));
638 free(msg, M_XENSTORE);
642 body = malloc(msg->hdr.len + 1, M_XENSTORE, M_WAITOK);
643 error = xs_read_store(body, msg->hdr.len);
645 free(body, M_XENSTORE);
646 free(msg, M_XENSTORE);
649 body[msg->hdr.len] = '\0';
651 *type = msg->hdr.type;
652 if (msg->hdr.type == XS_WATCH_EVENT) {
653 msg->u.watch.vec = split(body, msg->hdr.len,
654 &msg->u.watch.vec_size);
656 mtx_lock(&xs.registered_watches_lock);
657 msg->u.watch.handle = find_watch(
658 msg->u.watch.vec[XS_WATCH_TOKEN]);
659 mtx_lock(&xs.watch_events_lock);
660 if (msg->u.watch.handle != NULL &&
661 (!msg->u.watch.handle->max_pending ||
662 msg->u.watch.handle->pending <
663 msg->u.watch.handle->max_pending)) {
664 msg->u.watch.handle->pending++;
665 TAILQ_INSERT_TAIL(&xs.watch_events, msg, list);
666 wakeup(&xs.watch_events);
667 mtx_unlock(&xs.watch_events_lock);
669 mtx_unlock(&xs.watch_events_lock);
670 free(msg->u.watch.vec, M_XENSTORE);
671 free(msg, M_XENSTORE);
673 mtx_unlock(&xs.registered_watches_lock);
675 msg->u.reply.body = body;
676 mtx_lock(&xs.reply_lock);
677 TAILQ_INSERT_TAIL(&xs.reply_list, msg, list);
678 wakeup(&xs.reply_list);
679 mtx_unlock(&xs.reply_lock);
686 * Thread body of the XenStore receive thread.
688 * This thread blocks waiting for data from the XenStore service
689 * and processes and received messages.
692 xs_rcv_thread(void *arg __unused)
695 enum xsd_sockmsg_type type;
698 error = xs_process_msg(&type);
700 printf("XENSTORE error %d while reading message\n",
705 /*---------------- XenStore Message Request/Reply Processing -----------------*/
706 #define xsd_error_count (sizeof(xsd_errors) / sizeof(xsd_errors[0]))
709 * Convert a XenStore error string into an errno number.
711 * \param errorstring The error string to convert.
713 * \return The errno best matching the input string.
715 * \note Unknown error strings are converted to EINVAL.
718 xs_get_error(const char *errorstring)
722 for (i = 0; i < xsd_error_count; i++) {
723 if (!strcmp(errorstring, xsd_errors[i].errstring))
724 return (xsd_errors[i].errnum);
726 log(LOG_WARNING, "XENSTORE xen store gave: unknown error %s",
732 * Block waiting for a reply to a message request.
734 * \param type The returned type of the reply.
735 * \param len The returned body length of the reply.
736 * \param result The returned body of the reply.
738 * \return 0 on success. Otherwise an errno indicating the
742 xs_read_reply(enum xsd_sockmsg_type *type, u_int *len, void **result)
744 struct xs_stored_msg *msg;
748 mtx_lock(&xs.reply_lock);
749 while (TAILQ_EMPTY(&xs.reply_list)) {
750 error = mtx_sleep(&xs.reply_list, &xs.reply_lock, 0, "xswait",
752 if (error && error != EWOULDBLOCK) {
753 mtx_unlock(&xs.reply_lock);
757 msg = TAILQ_FIRST(&xs.reply_list);
758 TAILQ_REMOVE(&xs.reply_list, msg, list);
759 mtx_unlock(&xs.reply_lock);
761 *type = msg->hdr.type;
764 body = msg->u.reply.body;
766 free(msg, M_XENSTORE);
772 * Pass-thru interface for XenStore access by userland processes
773 * via the XenStore device.
775 * Reply type and length data are returned by overwriting these
776 * fields in the passed in request message.
778 * \param msg A properly formatted message to transmit to
779 * the XenStore service.
780 * \param result The returned body of the reply.
782 * \return 0 on success. Otherwise an errno indicating the cause
785 * \note The returned result is provided in malloced storage and thus
786 * must be free'd by the caller with 'free(result, M_XENSTORE);
789 xs_dev_request_and_reply(struct xsd_sockmsg *msg, void **result)
791 uint32_t request_type;
794 request_type = msg->type;
796 sx_xlock(&xs.request_mutex);
797 if ((error = xs_write_store(msg, sizeof(*msg) + msg->len)) == 0)
798 error = xs_read_reply(&msg->type, &msg->len, result);
799 sx_xunlock(&xs.request_mutex);
805 * Send a message with an optionally muti-part body to the XenStore service.
807 * \param t The transaction to use for this request.
808 * \param request_type The type of message to send.
809 * \param iovec Pointers to the body sections of the request.
810 * \param num_vecs The number of body sections in the request.
811 * \param len The returned length of the reply.
812 * \param result The returned body of the reply.
814 * \return 0 on success. Otherwise an errno indicating
815 * the cause of failure.
817 * \note The returned result is provided in malloced storage and thus
818 * must be free'd by the caller with 'free(*result, M_XENSTORE);
821 xs_talkv(struct xs_transaction t, enum xsd_sockmsg_type request_type,
822 const struct iovec *iovec, u_int num_vecs, u_int *len, void **result)
824 struct xsd_sockmsg msg;
831 msg.type = request_type;
833 for (i = 0; i < num_vecs; i++)
834 msg.len += iovec[i].iov_len;
836 sx_xlock(&xs.request_mutex);
837 error = xs_write_store(&msg, sizeof(msg));
839 printf("xs_talkv failed %d\n", error);
840 goto error_lock_held;
843 for (i = 0; i < num_vecs; i++) {
844 error = xs_write_store(iovec[i].iov_base, iovec[i].iov_len);
846 printf("xs_talkv failed %d\n", error);
847 goto error_lock_held;
851 error = xs_read_reply(&msg.type, len, &ret);
854 sx_xunlock(&xs.request_mutex);
858 if (msg.type == XS_ERROR) {
859 error = xs_get_error(ret);
860 free(ret, M_XENSTORE);
864 /* Reply is either error or an echo of our request message type. */
865 KASSERT(msg.type == request_type, ("bad xenstore message type"));
870 free(ret, M_XENSTORE);
876 * Wrapper for xs_talkv allowing easy transmission of a message with
877 * a single, contiguous, message body.
879 * \param t The transaction to use for this request.
880 * \param request_type The type of message to send.
881 * \param body The body of the request.
882 * \param len The returned length of the reply.
883 * \param result The returned body of the reply.
885 * \return 0 on success. Otherwise an errno indicating
886 * the cause of failure.
888 * \note The returned result is provided in malloced storage and thus
889 * must be free'd by the caller with 'free(*result, M_XENSTORE);
892 xs_single(struct xs_transaction t, enum xsd_sockmsg_type request_type,
893 const char *body, u_int *len, void **result)
897 iovec.iov_base = (void *)(uintptr_t)body;
898 iovec.iov_len = strlen(body) + 1;
900 return (xs_talkv(t, request_type, &iovec, 1, len, result));
903 /*------------------------- XenStore Watch Support ---------------------------*/
905 * Transmit a watch request to the XenStore service.
907 * \param path The path in the XenStore to watch.
908 * \param tocken A unique identifier for this watch.
910 * \return 0 on success. Otherwise an errno indicating the
914 xs_watch(const char *path, const char *token)
918 iov[0].iov_base = (void *)(uintptr_t) path;
919 iov[0].iov_len = strlen(path) + 1;
920 iov[1].iov_base = (void *)(uintptr_t) token;
921 iov[1].iov_len = strlen(token) + 1;
923 return (xs_talkv(XST_NIL, XS_WATCH, iov, 2, NULL, NULL));
927 * Transmit an uwatch request to the XenStore service.
929 * \param path The path in the XenStore to watch.
930 * \param tocken A unique identifier for this watch.
932 * \return 0 on success. Otherwise an errno indicating the
936 xs_unwatch(const char *path, const char *token)
940 iov[0].iov_base = (void *)(uintptr_t) path;
941 iov[0].iov_len = strlen(path) + 1;
942 iov[1].iov_base = (void *)(uintptr_t) token;
943 iov[1].iov_len = strlen(token) + 1;
945 return (xs_talkv(XST_NIL, XS_UNWATCH, iov, 2, NULL, NULL));
949 * Convert from watch token (unique identifier) to the associated
950 * internal tracking structure for this watch.
952 * \param tocken The unique identifier for the watch to find.
954 * \return A pointer to the found watch structure or NULL.
956 static struct xs_watch *
957 find_watch(const char *token)
959 struct xs_watch *i, *cmp;
961 cmp = (void *)strtoul(token, NULL, 16);
963 LIST_FOREACH(i, &xs.registered_watches, list)
971 * Thread body of the XenStore watch event dispatch thread.
974 xenwatch_thread(void *unused)
976 struct xs_stored_msg *msg;
980 mtx_lock(&xs.watch_events_lock);
981 while (TAILQ_EMPTY(&xs.watch_events))
982 mtx_sleep(&xs.watch_events,
983 &xs.watch_events_lock,
984 PWAIT | PCATCH, "waitev", hz/10);
986 mtx_unlock(&xs.watch_events_lock);
987 sx_xlock(&xs.xenwatch_mutex);
989 mtx_lock(&xs.watch_events_lock);
990 msg = TAILQ_FIRST(&xs.watch_events);
992 TAILQ_REMOVE(&xs.watch_events, msg, list);
993 msg->u.watch.handle->pending--;
995 mtx_unlock(&xs.watch_events_lock);
999 * XXX There are messages coming in with a NULL
1000 * XXX callback. This deserves further investigation;
1001 * XXX the workaround here simply prevents the kernel
1002 * XXX from panic'ing on startup.
1004 if (msg->u.watch.handle->callback != NULL)
1005 msg->u.watch.handle->callback(
1006 msg->u.watch.handle,
1007 (const char **)msg->u.watch.vec,
1008 msg->u.watch.vec_size);
1009 free(msg->u.watch.vec, M_XENSTORE);
1010 free(msg, M_XENSTORE);
1013 sx_xunlock(&xs.xenwatch_mutex);
1017 /*----------- XenStore Configuration, Initialization, and Control ------------*/
1019 * Setup communication channels with the XenStore service.
1021 * \return On success, 0. Otherwise an errno value indicating the
1029 if (xen_store->rsp_prod != xen_store->rsp_cons) {
1030 log(LOG_WARNING, "XENSTORE response ring is not quiescent "
1031 "(%08x:%08x): fixing up\n",
1032 xen_store->rsp_cons, xen_store->rsp_prod);
1033 xen_store->rsp_cons = xen_store->rsp_prod;
1036 xen_intr_unbind(&xs.xen_intr_handle);
1038 error = xen_intr_bind_local_port(xs.xs_dev, xs.evtchn,
1039 /*filter*/NULL, xs_intr, /*arg*/NULL, INTR_TYPE_NET|INTR_MPSAFE,
1040 &xs.xen_intr_handle);
1042 log(LOG_WARNING, "XENSTORE request irq failed %i\n", error);
1049 /*------------------ Private Device Attachment Functions --------------------*/
1051 xs_identify(driver_t *driver, device_t parent)
1054 BUS_ADD_CHILD(parent, 0, "xenstore", 0);
1058 * Probe for the existence of the XenStore.
1063 xs_probe(device_t dev)
1066 * We are either operating within a PV kernel or being probed
1067 * as the child of the successfully attached xenpci device.
1068 * Thus we are in a Xen environment and there will be a XenStore.
1069 * Unconditionally return success.
1071 device_set_desc(dev, "XenStore");
1072 return (BUS_PROBE_NOWILDCARD);
1076 xs_attach_deferred(void *arg)
1079 bus_generic_probe(xs.xs_dev);
1080 bus_generic_attach(xs.xs_dev);
1082 config_intrhook_disestablish(&xs.xs_attachcb);
1086 xs_attach_late(void *arg, int pending)
1089 KASSERT((pending == 1), ("xs late attach queued several times"));
1090 bus_generic_probe(xs.xs_dev);
1091 bus_generic_attach(xs.xs_dev);
1095 * Attach to the XenStore.
1097 * This routine also prepares for the probe/attach of drivers that rely
1101 xs_attach(device_t dev)
1105 /* Allow us to get device_t from softc and vice-versa. */
1107 device_set_softc(dev, &xs);
1109 /* Initialize the interface to xenstore. */
1112 xs.initialized = false;
1113 xs.evtchn = xen_get_xenstore_evtchn();
1114 if (xs.evtchn == 0) {
1115 struct evtchn_alloc_unbound alloc_unbound;
1117 /* Allocate a local event channel for xenstore */
1118 alloc_unbound.dom = DOMID_SELF;
1119 alloc_unbound.remote_dom = DOMID_SELF;
1120 error = HYPERVISOR_event_channel_op(
1121 EVTCHNOP_alloc_unbound, &alloc_unbound);
1124 "unable to alloc event channel for Dom0: %d",
1127 xs.evtchn = alloc_unbound.port;
1129 /* Allocate memory for the xs shared ring */
1130 xen_store = malloc(PAGE_SIZE, M_XENSTORE, M_WAITOK | M_ZERO);
1131 xs.gpfn = atop(pmap_kextract((vm_offset_t)xen_store));
1133 xs.gpfn = xen_get_xenstore_mfn();
1134 xen_store = pmap_mapdev_attr(ptoa(xs.gpfn), PAGE_SIZE,
1136 xs.initialized = true;
1139 TAILQ_INIT(&xs.reply_list);
1140 TAILQ_INIT(&xs.watch_events);
1142 mtx_init(&xs.ring_lock, "ring lock", NULL, MTX_DEF);
1143 mtx_init(&xs.reply_lock, "reply lock", NULL, MTX_DEF);
1144 sx_init(&xs.xenwatch_mutex, "xenwatch");
1145 sx_init(&xs.request_mutex, "xenstore request");
1146 mtx_init(&xs.registered_watches_lock, "watches", NULL, MTX_DEF);
1147 mtx_init(&xs.watch_events_lock, "watch events", NULL, MTX_DEF);
1149 /* Initialize the shared memory rings to talk to xenstored */
1150 error = xs_init_comms();
1154 error = kproc_create(xenwatch_thread, NULL, &p, RFHIGHPID,
1158 xs.xenwatch_pid = p->p_pid;
1160 error = kproc_create(xs_rcv_thread, NULL, NULL,
1161 RFHIGHPID, 0, "xenstore_rcv");
1163 xs.xs_attachcb.ich_func = xs_attach_deferred;
1164 xs.xs_attachcb.ich_arg = NULL;
1165 if (xs.initialized) {
1166 config_intrhook_establish(&xs.xs_attachcb);
1168 TASK_INIT(&xs.xs_late_init, 0, xs_attach_late, NULL);
1175 * Prepare for suspension of this VM by halting XenStore access after
1176 * all transactions and individual requests have completed.
1179 xs_suspend(device_t dev)
1183 /* Suspend child Xen devices. */
1184 error = bus_generic_suspend(dev);
1188 sx_xlock(&xs.request_mutex);
1194 * Resume XenStore operations after this VM is resumed.
1197 xs_resume(device_t dev __unused)
1199 struct xs_watch *watch;
1200 char token[sizeof(watch) * 2 + 1];
1204 sx_xunlock(&xs.request_mutex);
1207 * NB: since xenstore childs have not been resumed yet, there's
1208 * no need to hold any watch mutex. Having clients try to add or
1209 * remove watches at this point (before xenstore is resumed) is
1210 * clearly a violantion of the resume order.
1212 LIST_FOREACH(watch, &xs.registered_watches, list) {
1213 sprintf(token, "%lX", (long)watch);
1214 xs_watch(watch->node, token);
1217 /* Resume child Xen devices. */
1218 bus_generic_resume(dev);
1223 /*-------------------- Private Device Attachment Data -----------------------*/
1224 static device_method_t xenstore_methods[] = {
1225 /* Device interface */
1226 DEVMETHOD(device_identify, xs_identify),
1227 DEVMETHOD(device_probe, xs_probe),
1228 DEVMETHOD(device_attach, xs_attach),
1229 DEVMETHOD(device_detach, bus_generic_detach),
1230 DEVMETHOD(device_shutdown, bus_generic_shutdown),
1231 DEVMETHOD(device_suspend, xs_suspend),
1232 DEVMETHOD(device_resume, xs_resume),
1235 DEVMETHOD(bus_add_child, bus_generic_add_child),
1236 DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource),
1237 DEVMETHOD(bus_release_resource, bus_generic_release_resource),
1238 DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
1239 DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
1244 DEFINE_CLASS_0(xenstore, xenstore_driver, xenstore_methods, 0);
1245 static devclass_t xenstore_devclass;
1247 DRIVER_MODULE(xenstore, xenpv, xenstore_driver, xenstore_devclass, 0, 0);
1249 /*------------------------------- Sysctl Data --------------------------------*/
1250 /* XXX Shouldn't the node be somewhere else? */
1251 SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD, NULL, "Xen");
1252 SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xs.evtchn, 0, "");
1253 SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, "");
1255 /*-------------------------------- Public API --------------------------------*/
1256 /*------- API comments for these methods can be found in xenstorevar.h -------*/
1258 xs_initialized(void)
1261 return (xs.initialized);
1275 return (ptoa(xs.gpfn));
1279 xs_directory(struct xs_transaction t, const char *dir, const char *node,
1280 u_int *num, const char ***result)
1287 path = xs_join(dir, node);
1288 error = xs_single(t, XS_DIRECTORY, sbuf_data(path), &len,
1294 *result = split(strings, len, num);
1300 xs_exists(struct xs_transaction t, const char *dir, const char *node)
1305 error = xs_directory(t, dir, node, &dir_n, &d);
1308 free(d, M_XENSTORE);
1313 xs_read(struct xs_transaction t, const char *dir, const char *node,
1314 u_int *len, void **result)
1320 path = xs_join(dir, node);
1321 error = xs_single(t, XS_READ, sbuf_data(path), len, &ret);
1330 xs_write(struct xs_transaction t, const char *dir, const char *node,
1334 struct iovec iovec[2];
1337 path = xs_join(dir, node);
1339 iovec[0].iov_base = (void *)(uintptr_t) sbuf_data(path);
1340 iovec[0].iov_len = sbuf_len(path) + 1;
1341 iovec[1].iov_base = (void *)(uintptr_t) string;
1342 iovec[1].iov_len = strlen(string);
1344 error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
1351 xs_mkdir(struct xs_transaction t, const char *dir, const char *node)
1356 path = xs_join(dir, node);
1357 ret = xs_single(t, XS_MKDIR, sbuf_data(path), NULL, NULL);
1364 xs_rm(struct xs_transaction t, const char *dir, const char *node)
1369 path = xs_join(dir, node);
1370 ret = xs_single(t, XS_RM, sbuf_data(path), NULL, NULL);
1377 xs_rm_tree(struct xs_transaction xbt, const char *base, const char *node)
1379 struct xs_transaction local_xbt;
1380 struct sbuf *root_path_sbuf;
1381 struct sbuf *cur_path_sbuf;
1388 root_path_sbuf = xs_join(base, node);
1389 cur_path_sbuf = xs_join(base, node);
1390 root_path = sbuf_data(root_path_sbuf);
1391 cur_path = sbuf_data(cur_path_sbuf);
1396 error = xs_transaction_start(&local_xbt);
1406 error = xs_directory(xbt, cur_path, "", &count, &dir);
1410 for (i = 0; i < count; i++) {
1411 error = xs_rm(xbt, cur_path, dir[i]);
1412 if (error == ENOTEMPTY) {
1413 struct sbuf *push_dir;
1416 * Descend to clear out this sub directory.
1417 * We'll return to cur_dir once push_dir
1420 push_dir = xs_join(cur_path, dir[i]);
1421 sbuf_delete(cur_path_sbuf);
1422 cur_path_sbuf = push_dir;
1423 cur_path = sbuf_data(cur_path_sbuf);
1425 } else if (error != 0) {
1430 free(dir, M_XENSTORE);
1436 /* Directory is empty. It is now safe to remove. */
1437 error = xs_rm(xbt, cur_path, "");
1441 if (!strcmp(cur_path, root_path))
1444 /* Return to processing the parent directory. */
1445 last_slash = strrchr(cur_path, '/');
1446 KASSERT(last_slash != NULL,
1447 ("xs_rm_tree: mangled path %s", cur_path));
1453 sbuf_delete(cur_path_sbuf);
1454 sbuf_delete(root_path_sbuf);
1456 free(dir, M_XENSTORE);
1458 if (local_xbt.id != 0) {
1461 terror = xs_transaction_end(local_xbt, /*abort*/error != 0);
1463 if (terror == EAGAIN && error == 0)
1470 xs_transaction_start(struct xs_transaction *t)
1475 error = xs_single(XST_NIL, XS_TRANSACTION_START, "", NULL,
1478 t->id = strtoul(id_str, NULL, 0);
1479 free(id_str, M_XENSTORE);
1485 xs_transaction_end(struct xs_transaction t, int abort)
1490 strcpy(abortstr, "F");
1492 strcpy(abortstr, "T");
1494 return (xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL));
1498 xs_scanf(struct xs_transaction t, const char *dir, const char *node,
1499 int *scancountp, const char *fmt, ...)
1505 error = xs_read(t, dir, node, NULL, (void **) &val);
1510 ns = vsscanf(val, fmt, ap);
1512 free(val, M_XENSTORE);
1513 /* Distinctive errno. */
1522 xs_vprintf(struct xs_transaction t,
1523 const char *dir, const char *node, const char *fmt, va_list ap)
1528 sb = sbuf_new_auto();
1529 sbuf_vprintf(sb, fmt, ap);
1531 error = xs_write(t, dir, node, sbuf_data(sb));
1538 xs_printf(struct xs_transaction t, const char *dir, const char *node,
1539 const char *fmt, ...)
1545 error = xs_vprintf(t, dir, node, fmt, ap);
1552 xs_gather(struct xs_transaction t, const char *dir, ...)
1560 while (error == 0 && (name = va_arg(ap, char *)) != NULL) {
1561 const char *fmt = va_arg(ap, char *);
1562 void *result = va_arg(ap, void *);
1565 error = xs_read(t, dir, name, NULL, (void **) &p);
1570 if (sscanf(p, fmt, result) == 0)
1572 free(p, M_XENSTORE);
1574 *(char **)result = p;
1582 xs_register_watch(struct xs_watch *watch)
1584 /* Pointer in ascii is the token. */
1585 char token[sizeof(watch) * 2 + 1];
1589 sprintf(token, "%lX", (long)watch);
1591 mtx_lock(&xs.registered_watches_lock);
1592 KASSERT(find_watch(token) == NULL, ("watch already registered"));
1593 LIST_INSERT_HEAD(&xs.registered_watches, watch, list);
1594 mtx_unlock(&xs.registered_watches_lock);
1596 error = xs_watch(watch->node, token);
1598 /* Ignore errors due to multiple registration. */
1599 if (error == EEXIST)
1603 mtx_lock(&xs.registered_watches_lock);
1604 LIST_REMOVE(watch, list);
1605 mtx_unlock(&xs.registered_watches_lock);
1612 xs_unregister_watch(struct xs_watch *watch)
1614 struct xs_stored_msg *msg, *tmp;
1615 char token[sizeof(watch) * 2 + 1];
1618 sprintf(token, "%lX", (long)watch);
1620 mtx_lock(&xs.registered_watches_lock);
1621 if (find_watch(token) == NULL) {
1622 mtx_unlock(&xs.registered_watches_lock);
1625 LIST_REMOVE(watch, list);
1626 mtx_unlock(&xs.registered_watches_lock);
1628 error = xs_unwatch(watch->node, token);
1630 log(LOG_WARNING, "XENSTORE Failed to release watch %s: %i\n",
1631 watch->node, error);
1633 /* Cancel pending watch events. */
1634 mtx_lock(&xs.watch_events_lock);
1635 TAILQ_FOREACH_SAFE(msg, &xs.watch_events, list, tmp) {
1636 if (msg->u.watch.handle != watch)
1638 TAILQ_REMOVE(&xs.watch_events, msg, list);
1639 free(msg->u.watch.vec, M_XENSTORE);
1640 free(msg, M_XENSTORE);
1642 mtx_unlock(&xs.watch_events_lock);
1644 /* Flush any currently-executing callback, unless we are it. :-) */
1645 if (curproc->p_pid != xs.xenwatch_pid) {
1646 sx_xlock(&xs.xenwatch_mutex);
1647 sx_xunlock(&xs.xenwatch_mutex);
1655 sx_xlock(&xs.request_mutex);
1663 sx_xunlock(&xs.request_mutex);