2 * Coda: an Experimental Distributed File System
5 * Copyright (c) 1987-1998 Carnegie Mellon University
8 * Permission to use, copy, modify and distribute this software and its
9 * documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation, and
13 * that credit is given to Carnegie Mellon University in all documents
14 * and publicity pertaining to direct or indirect use of this code or its
17 * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
18 * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
19 * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
20 * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
21 * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
22 * ANY DERIVATIVE WORK.
24 * Carnegie Mellon encourages users of this software to return any
25 * improvements or extensions that they make, and to grant Carnegie
26 * Mellon the rights to redistribute these changes without encumbrance.
28 * @(#) src/sys/coda/coda_psdev.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
31 * Mach Operating System
32 * Copyright (c) 1989 Carnegie-Mellon University
33 * All rights reserved. The CMU software License Agreement specifies
34 * the terms and conditions for use and redistribution.
38 * This code was written for the Coda filesystem at Carnegie Mellon
39 * University. Contributers include David Steere, James Kistler, and
40 * M. Satyanarayanan. */
43 * These routines define the psuedo device for communication between Coda's
44 * Venus and Minicache in Mach 2.6. They used to be in cfs_subr.c, but I
45 * moved them to make it easier to port the Minicache without porting coda.
50 * These routines are the device entry points for Venus.
53 #include <sys/cdefs.h>
54 __FBSDID("$FreeBSD$");
56 #include <sys/param.h>
57 #include <sys/systm.h>
58 #include <sys/capability.h>
60 #include <sys/ioccom.h>
61 #include <sys/kernel.h>
63 #include <sys/malloc.h>
64 #include <sys/file.h> /* must come after sys/malloc.h */
65 #include <sys/mount.h>
66 #include <sys/mutex.h>
69 #include <sys/filedesc.h>
71 #include <fs/coda/coda.h>
72 #include <fs/coda/cnode.h>
73 #include <fs/coda/coda_io.h>
74 #include <fs/coda/coda_psdev.h>
77 * Variables to determine how Coda sleeps and whether or not it is
78 * interruptible when it does sleep waiting for Venus.
83 #include <sys/signalvar.h>
86 int coda_psdev_print_entry = 0;
87 static int outstanding_upcalls = 0;
88 int coda_call_sleep = PZERO - 1;
90 int coda_pcatch = PCATCH;
95 if (coda_psdev_print_entry) \
96 myprintf(("Entered %s\n", __func__)); \
100 TAILQ_ENTRY(vmsg) vm_chain;
103 u_short vm_inSize; /* Size is at most 5000 bytes */
105 u_short vm_opcode; /* Copied from data to save ptr deref */
107 caddr_t vm_sleep; /* Not used by Mach. */
112 #define VM_INTR 4 /* Unused. */
115 vc_open(struct cdev *dev, int flag, int mode, struct thread *td)
118 struct coda_mntinfo *mnt;
121 mnt = dev2coda_mntinfo(dev);
122 KASSERT(mnt, ("Coda: tried to open uninitialized cfs device"));
123 vcp = &mnt->mi_vcomm;
126 bzero(&(vcp->vc_selproc), sizeof (struct selinfo));
127 TAILQ_INIT(&vcp->vc_requests);
128 TAILQ_INIT(&vcp->vc_replies);
131 mnt->mi_rootvp = NULL;
136 vc_close(struct cdev *dev, int flag, int mode, struct thread *td)
139 struct vmsg *vmp, *nvmp = NULL;
140 struct coda_mntinfo *mi;
144 mi = dev2coda_mntinfo(dev);
145 KASSERT(mi, ("Coda: closing unknown cfs device"));
147 KASSERT(VC_OPEN(vcp), ("Coda: closing unopened cfs device"));
150 * Prevent future operations on this vfs from succeeding by
151 * auto-unmounting any vfs mounted via this device. This frees user
152 * or sysadm from having to remember where all mount points are
153 * located. Put this before WAKEUPs to avoid queuing new messages
154 * between the WAKEUP and the unmount (which can happen if we're
157 if (mi->mi_rootvp == NULL) {
159 * Just a simple open/close with no mount.
166 * Let unmount know this is for real.
168 VTOC(mi->mi_rootvp)->c_flags |= C_UNMOUNTING;
169 coda_unmounting(mi->mi_vfsp);
172 * Wakeup clients so they can return.
174 outstanding_upcalls = 0;
175 TAILQ_FOREACH_SAFE(vmp, &vcp->vc_requests, vm_chain, nvmp) {
177 * Free signal request messages and don't wakeup cause no one
180 if (vmp->vm_opcode == CODA_SIGNAL) {
181 CODA_FREE((caddr_t)vmp->vm_data,
182 (u_int)VC_IN_NO_DATA);
183 CODA_FREE((caddr_t)vmp, (u_int)sizeof(struct vmsg));
186 outstanding_upcalls++;
187 wakeup(&vmp->vm_sleep);
189 TAILQ_FOREACH(vmp, &vcp->vc_replies, vm_chain) {
190 outstanding_upcalls++;
191 wakeup(&vmp->vm_sleep);
194 if (outstanding_upcalls) {
196 printf("presleep: outstanding_upcalls = %d\n",
197 outstanding_upcalls);
199 (void) tsleep(&outstanding_upcalls, coda_call_sleep,
202 printf("postsleep: outstanding_upcalls = %d\n",
203 outstanding_upcalls);
206 err = dounmount(mi->mi_vfsp, flag, td);
208 myprintf(("Error %d unmounting vfs in vcclose(%s)\n", err,
214 vc_read(struct cdev *dev, struct uio *uiop, int flag)
221 vcp = &dev2coda_mntinfo(dev)->mi_vcomm;
224 * Get message at head of request queue.
226 vmp = TAILQ_FIRST(&vcp->vc_requests);
228 return (0); /* Nothing to read */
231 * Move the input args into userspace.
233 * XXXRW: This is not safe in the presence of >1 reader, as vmp is
234 * still on the head of the list.
236 uiop->uio_rw = UIO_READ;
237 error = uiomove(vmp->vm_data, vmp->vm_inSize, uiop);
239 myprintf(("vcread: error (%d) on uiomove\n", error));
242 TAILQ_REMOVE(&vcp->vc_requests, vmp, vm_chain);
245 * If request was a signal, free up the message and don't enqueue it
246 * in the reply queue.
248 if (vmp->vm_opcode == CODA_SIGNAL) {
250 myprintf(("vcread: signal msg (%d, %d)\n",
251 vmp->vm_opcode, vmp->vm_unique));
252 CODA_FREE((caddr_t)vmp->vm_data, (u_int)VC_IN_NO_DATA);
253 CODA_FREE((caddr_t)vmp, (u_int)sizeof(struct vmsg));
256 vmp->vm_flags |= VM_READ;
257 TAILQ_INSERT_TAIL(&vcp->vc_replies, vmp, vm_chain);
262 vc_write(struct cdev *dev, struct uio *uiop, int flag)
266 struct coda_out_hdr *out;
273 vcp = &dev2coda_mntinfo(dev)->mi_vcomm;
276 * Peek at the opcode, unique without transfering the data.
278 uiop->uio_rw = UIO_WRITE;
279 error = uiomove((caddr_t)buf, sizeof(int) * 2, uiop);
281 myprintf(("vcwrite: error (%d) on uiomove\n", error));
287 myprintf(("vcwrite got a call for %ld.%ld\n", opcode, seq));
288 if (DOWNCALL(opcode)) {
289 union outputArgs pbuf;
292 * Get the rest of the data.
294 uiop->uio_rw = UIO_WRITE;
295 error = uiomove((caddr_t)&pbuf.coda_purgeuser.oh.result,
296 sizeof(pbuf) - (sizeof(int)*2), uiop);
298 myprintf(("vcwrite: error (%d) on uiomove (Op %ld "
299 "seq %ld)\n", error, opcode, seq));
302 return (handleDownCall(dev2coda_mntinfo(dev), opcode, &pbuf));
306 * Look for the message on the (waiting for) reply queue.
308 TAILQ_FOREACH(vmp, &vcp->vc_replies, vm_chain) {
309 if (vmp->vm_unique == seq)
314 myprintf(("vcwrite: msg (%ld, %ld) not found\n",
320 * Remove the message from the reply queue.
322 TAILQ_REMOVE(&vcp->vc_replies, vmp, vm_chain);
325 * Move data into response buffer.
327 out = (struct coda_out_hdr *)vmp->vm_data;
330 * Don't need to copy opcode and uniquifier.
332 * Get the rest of the data.
334 if (vmp->vm_outSize < uiop->uio_resid) {
335 myprintf(("vcwrite: more data than asked for (%d < %zd)\n",
336 vmp->vm_outSize, uiop->uio_resid));
339 * Notify caller of the error.
341 wakeup(&vmp->vm_sleep);
348 buf[0] = uiop->uio_resid;
349 uiop->uio_rw = UIO_WRITE;
350 error = uiomove((caddr_t) &out->result, vmp->vm_outSize -
351 (sizeof(int) * 2), uiop);
353 myprintf(("vcwrite: error (%d) on uiomove (op %ld seq %ld)\n",
354 error, opcode, seq));
359 * I don't think these are used, but just in case.
361 * XXX - aren't these two already correct? -bnoble
363 out->opcode = opcode;
365 vmp->vm_outSize = buf[0]; /* Amount of data transferred? */
366 vmp->vm_flags |= VM_WRITE;
368 if (opcode == CODA_OPEN_BY_FD) {
369 struct coda_open_by_fd_out *tmp =
370 (struct coda_open_by_fd_out *)out;
372 struct vnode *vp = NULL;
374 if (tmp->oh.result == 0) {
375 error = getvnode(uiop->uio_td->td_proc->p_fd, CAP_WRITE,
379 * XXX: Since the whole driver runs with
380 * Giant, don't actually need to acquire it
381 * explicitly here yet.
386 fdrop(fp, uiop->uio_td);
392 wakeup(&vmp->vm_sleep);
397 vc_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
412 case CIOC_KERNEL_VERSION:
413 switch (*(u_int *)addr) {
415 *(u_int *)addr = coda_kernel_version;
420 if (coda_kernel_version != *(u_int *)addr)
435 vc_poll(struct cdev *dev, int events, struct thread *td)
441 vcp = &dev2coda_mntinfo(dev)->mi_vcomm;
442 event_msk = events & (POLLIN|POLLRDNORM);
445 if (!TAILQ_EMPTY(&vcp->vc_requests))
446 return (events & (POLLIN|POLLRDNORM));
447 selrecord(td, &(vcp->vc_selproc));
454 struct coda_clstat coda_clstat;
457 * Key question: whether to sleep interuptably or uninteruptably when waiting
458 * for Venus. The former seems better (cause you can ^C a job), but then
459 * GNU-EMACS completion breaks. Use tsleep with no timeout, and no longjmp
460 * happens. But, when sleeping "uninterruptibly", we don't get told if it
461 * returns abnormally (e.g. kill -9).
464 coda_call(struct coda_mntinfo *mntinfo, int inSize, int *outSize,
471 struct thread *td = curthread;
472 struct proc *p = td->td_proc;
479 * Unlikely, but could be a race condition with a dying warden.
483 vcp = &(mntinfo->mi_vcomm);
484 coda_clstat.ncalls++;
485 coda_clstat.reqs[((struct coda_in_hdr *)buffer)->opcode]++;
488 CODA_ALLOC(vmp,struct vmsg *,sizeof(struct vmsg));
491 * Format the request message.
493 vmp->vm_data = buffer;
495 vmp->vm_inSize = inSize;
497 = *outSize ? *outSize : inSize; /* |buffer| >= inSize */
498 vmp->vm_opcode = ((struct coda_in_hdr *)buffer)->opcode;
499 vmp->vm_unique = ++vcp->vc_seq;
501 myprintf(("Doing a call for %d.%d\n", vmp->vm_opcode,
505 * Fill in the common input args.
507 ((struct coda_in_hdr *)buffer)->unique = vmp->vm_unique;
510 * Append msg to request queue and poke Venus.
512 TAILQ_INSERT_TAIL(&vcp->vc_requests, vmp, vm_chain);
513 selwakeuppri(&(vcp->vc_selproc), coda_call_sleep);
516 * We can be interrupted while we wait for Venus to process our
517 * request. If the interrupt occurs before Venus has read the
518 * request, we dequeue and return. If it occurs after the read but
519 * before the reply, we dequeue, send a signal message, and return.
520 * If it occurs after the reply we ignore it. In no case do we want
521 * to restart the syscall. If it was interrupted by a venus shutdown
522 * (vcclose), return ENODEV.
524 * Ignore return, we have to check anyway.
528 * This is work in progress. Setting coda_pcatch lets tsleep
529 * reawaken on a ^c or ^z. The problem is that emacs sets certain
530 * interrupts as SA_RESTART. This means that we should exit sleep
531 * handle the "signal" and then go to sleep again. Mostly this is
532 * done by letting the syscall complete and be restarted. We are not
533 * idempotent and can not do this. A better solution is necessary.
537 psig_omask = td->td_sigmask;
539 error = msleep(&vmp->vm_sleep, &p->p_mtx,
540 (coda_call_sleep|coda_pcatch), "coda_call", hz*2);
543 else if (error == EWOULDBLOCK) {
545 printf("coda_call: tsleep TIMEOUT %d sec\n", 2+2*i);
549 SIGEMPTYSET(tempset);
550 SIGADDSET(tempset, SIGIO);
551 if (SIGSETEQ(td->td_siglist, tempset)) {
552 SIGADDSET(td->td_sigmask, SIGIO);
554 printf("coda_call: tsleep returns %d SIGIO, "
555 "cnt %d\n", error, i);
558 SIGDELSET(tempset, SIGIO);
559 SIGADDSET(tempset, SIGALRM);
560 if (SIGSETEQ(td->td_siglist, tempset)) {
561 SIGADDSET(td->td_sigmask, SIGALRM);
563 printf("coda_call: tsleep returns "
564 "%d SIGALRM, cnt %d\n", error, i);
568 printf("coda_call: tsleep returns "
569 "%d, cnt %d\n", error, i);
573 tempset = td->td_siglist;
574 SIGSETNAND(tempset, td->td_sigmask);
575 printf("coda_call: siglist = %p, "
576 "sigmask = %p, mask %p\n",
577 td->td_siglist, td->td_sigmask,
580 SIGSETOR(td->td_sigmask, td->td_siglist);
581 tempset = td->td_siglist;
582 SIGSETNAND(tempset, td->td_sigmask);
583 printf("coda_call: new mask, "
584 "siglist = %p, sigmask = %p, "
585 "mask %p\n", td->td_siglist,
586 td->td_sigmask, tempset);
591 } while (error && i++ < 128 && VC_OPEN(vcp));
592 td->td_sigmask = psig_omask;
596 (void)tsleep(&vmp->vm_sleep, coda_call_sleep, "coda_call", 0);
600 * Venus is still alive.
602 * Op went through, interrupt or not...
604 if (vmp->vm_flags & VM_WRITE) {
606 *outSize = vmp->vm_outSize;
607 } else if (!(vmp->vm_flags & VM_READ)) {
608 /* Interrupted before venus read it. */
614 myprintf(("interrupted before read: op = "
615 "%d.%d, flags = %x\n", vmp->vm_opcode,
616 vmp->vm_unique, vmp->vm_flags));
617 TAILQ_REMOVE(&vcp->vc_requests, vmp, vm_chain);
621 * (!(vmp->vm_flags & VM_WRITE)) means interrupted
622 * after upcall started.
624 * Interrupted after start of upcall, send venus a
627 struct coda_in_hdr *dog;
635 myprintf(("Sending Venus a signal: op = "
636 "%d.%d, flags = %x\n", vmp->vm_opcode,
637 vmp->vm_unique, vmp->vm_flags));
638 TAILQ_REMOVE(&vcp->vc_requests, vmp, vm_chain);
640 CODA_ALLOC(svmp, struct vmsg *, sizeof(struct vmsg));
641 CODA_ALLOC((svmp->vm_data), char *,
642 sizeof(struct coda_in_hdr));
643 dog = (struct coda_in_hdr *)svmp->vm_data;
645 dog->opcode = svmp->vm_opcode = CODA_SIGNAL;
646 dog->unique = svmp->vm_unique = vmp->vm_unique;
647 svmp->vm_inSize = sizeof (struct coda_in_hdr);
648 /*??? rvb */ svmp->vm_outSize = sizeof (struct coda_in_hdr);
650 myprintf(("coda_call: enqueing signal msg "
651 "(%d, %d)\n", svmp->vm_opcode,
655 * Insert at head of queue!
657 * XXXRW: Actually, the tail.
659 TAILQ_INSERT_TAIL(&vcp->vc_requests, svmp, vm_chain);
660 selwakeuppri(&(vcp->vc_selproc), coda_call_sleep);
663 /* If venus died (!VC_OPEN(vcp)) */
665 myprintf(("vcclose woke op %d.%d flags %d\n",
666 vmp->vm_opcode, vmp->vm_unique, vmp->vm_flags));
669 CODA_FREE(vmp, sizeof(struct vmsg));
670 if (outstanding_upcalls > 0 && (--outstanding_upcalls == 0))
671 wakeup(&outstanding_upcalls);
673 error = ((struct coda_out_hdr *)buffer)->result;