2 * Coda: an Experimental Distributed File System
5 * Copyright (c) 1987-1998 Carnegie Mellon University
8 * Permission to use, copy, modify and distribute this software and its
9 * documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation, and
13 * that credit is given to Carnegie Mellon University in all documents
14 * and publicity pertaining to direct or indirect use of this code or its
17 * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
18 * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
19 * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
20 * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
21 * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
22 * ANY DERIVATIVE WORK.
24 * Carnegie Mellon encourages users of this software to return any
25 * improvements or extensions that they make, and to grant Carnegie
26 * Mellon the rights to redistribute these changes without encumbrance.
28 * @(#) src/sys/coda/coda_psdev.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
31 * Mach Operating System
32 * Copyright (c) 1989 Carnegie-Mellon University
33 * All rights reserved. The CMU software License Agreement specifies
34 * the terms and conditions for use and redistribution.
38 * This code was written for the Coda filesystem at Carnegie Mellon
39 * University. Contributers include David Steere, James Kistler, and
40 * M. Satyanarayanan. */
43 * These routines define the psuedo device for communication between Coda's
44 * Venus and Minicache in Mach 2.6. They used to be in cfs_subr.c, but I
45 * moved them to make it easier to port the Minicache without porting coda.
50 * These routines are the device entry points for Venus.
53 #include <sys/cdefs.h>
54 __FBSDID("$FreeBSD$");
56 #include <sys/param.h>
57 #include <sys/systm.h>
59 #include <sys/ioccom.h>
60 #include <sys/kernel.h>
62 #include <sys/malloc.h>
63 #include <sys/file.h> /* must come after sys/malloc.h */
64 #include <sys/mount.h>
65 #include <sys/mutex.h>
68 #include <sys/filedesc.h>
70 #include <fs/coda/coda.h>
71 #include <fs/coda/cnode.h>
72 #include <fs/coda/coda_io.h>
73 #include <fs/coda/coda_psdev.h>
76 * Variables to determine how Coda sleeps and whether or not it is
77 * interruptible when it does sleep waiting for Venus.
82 #include <sys/signalvar.h>
85 int coda_psdev_print_entry = 0;
86 static int outstanding_upcalls = 0;
87 int coda_call_sleep = PZERO - 1;
89 int coda_pcatch = PCATCH;
94 if (coda_psdev_print_entry) \
95 myprintf(("Entered %s\n", __func__)); \
99 TAILQ_ENTRY(vmsg) vm_chain;
102 u_short vm_inSize; /* Size is at most 5000 bytes */
104 u_short vm_opcode; /* Copied from data to save ptr deref */
106 caddr_t vm_sleep; /* Not used by Mach. */
111 #define VM_INTR 4 /* Unused. */
114 vc_open(struct cdev *dev, int flag, int mode, struct thread *td)
117 struct coda_mntinfo *mnt;
120 mnt = dev2coda_mntinfo(dev);
121 KASSERT(mnt, ("Coda: tried to open uninitialized cfs device"));
122 vcp = &mnt->mi_vcomm;
125 bzero(&(vcp->vc_selproc), sizeof (struct selinfo));
126 TAILQ_INIT(&vcp->vc_requests);
127 TAILQ_INIT(&vcp->vc_replies);
130 mnt->mi_rootvp = NULL;
135 vc_close(struct cdev *dev, int flag, int mode, struct thread *td)
138 struct vmsg *vmp, *nvmp = NULL;
139 struct coda_mntinfo *mi;
143 mi = dev2coda_mntinfo(dev);
144 KASSERT(mi, ("Coda: closing unknown cfs device"));
146 KASSERT(VC_OPEN(vcp), ("Coda: closing unopened cfs device"));
149 * Prevent future operations on this vfs from succeeding by
150 * auto-unmounting any vfs mounted via this device. This frees user
151 * or sysadm from having to remember where all mount points are
152 * located. Put this before WAKEUPs to avoid queuing new messages
153 * between the WAKEUP and the unmount (which can happen if we're
156 if (mi->mi_rootvp == NULL) {
158 * Just a simple open/close with no mount.
165 * Let unmount know this is for real.
167 VTOC(mi->mi_rootvp)->c_flags |= C_UNMOUNTING;
168 coda_unmounting(mi->mi_vfsp);
171 * Wakeup clients so they can return.
173 outstanding_upcalls = 0;
174 TAILQ_FOREACH_SAFE(vmp, &vcp->vc_requests, vm_chain, nvmp) {
176 * Free signal request messages and don't wakeup cause no one
179 if (vmp->vm_opcode == CODA_SIGNAL) {
180 CODA_FREE((caddr_t)vmp->vm_data,
181 (u_int)VC_IN_NO_DATA);
182 CODA_FREE((caddr_t)vmp, (u_int)sizeof(struct vmsg));
185 outstanding_upcalls++;
186 wakeup(&vmp->vm_sleep);
188 TAILQ_FOREACH(vmp, &vcp->vc_replies, vm_chain) {
189 outstanding_upcalls++;
190 wakeup(&vmp->vm_sleep);
193 if (outstanding_upcalls) {
195 printf("presleep: outstanding_upcalls = %d\n",
196 outstanding_upcalls);
198 (void) tsleep(&outstanding_upcalls, coda_call_sleep,
201 printf("postsleep: outstanding_upcalls = %d\n",
202 outstanding_upcalls);
205 err = dounmount(mi->mi_vfsp, flag, td);
207 myprintf(("Error %d unmounting vfs in vcclose(%s)\n", err,
213 vc_read(struct cdev *dev, struct uio *uiop, int flag)
220 vcp = &dev2coda_mntinfo(dev)->mi_vcomm;
223 * Get message at head of request queue.
225 vmp = TAILQ_FIRST(&vcp->vc_requests);
227 return (0); /* Nothing to read */
230 * Move the input args into userspace.
232 * XXXRW: This is not safe in the presence of >1 reader, as vmp is
233 * still on the head of the list.
235 uiop->uio_rw = UIO_READ;
236 error = uiomove(vmp->vm_data, vmp->vm_inSize, uiop);
238 myprintf(("vcread: error (%d) on uiomove\n", error));
241 TAILQ_REMOVE(&vcp->vc_requests, vmp, vm_chain);
244 * If request was a signal, free up the message and don't enqueue it
245 * in the reply queue.
247 if (vmp->vm_opcode == CODA_SIGNAL) {
249 myprintf(("vcread: signal msg (%d, %d)\n",
250 vmp->vm_opcode, vmp->vm_unique));
251 CODA_FREE((caddr_t)vmp->vm_data, (u_int)VC_IN_NO_DATA);
252 CODA_FREE((caddr_t)vmp, (u_int)sizeof(struct vmsg));
255 vmp->vm_flags |= VM_READ;
256 TAILQ_INSERT_TAIL(&vcp->vc_replies, vmp, vm_chain);
261 vc_write(struct cdev *dev, struct uio *uiop, int flag)
265 struct coda_out_hdr *out;
272 vcp = &dev2coda_mntinfo(dev)->mi_vcomm;
275 * Peek at the opcode, unique without transfering the data.
277 uiop->uio_rw = UIO_WRITE;
278 error = uiomove((caddr_t)buf, sizeof(int) * 2, uiop);
280 myprintf(("vcwrite: error (%d) on uiomove\n", error));
286 myprintf(("vcwrite got a call for %ld.%ld\n", opcode, seq));
287 if (DOWNCALL(opcode)) {
288 union outputArgs pbuf;
291 * Get the rest of the data.
293 uiop->uio_rw = UIO_WRITE;
294 error = uiomove((caddr_t)&pbuf.coda_purgeuser.oh.result,
295 sizeof(pbuf) - (sizeof(int)*2), uiop);
297 myprintf(("vcwrite: error (%d) on uiomove (Op %ld "
298 "seq %ld)\n", error, opcode, seq));
301 return (handleDownCall(dev2coda_mntinfo(dev), opcode, &pbuf));
305 * Look for the message on the (waiting for) reply queue.
307 TAILQ_FOREACH(vmp, &vcp->vc_replies, vm_chain) {
308 if (vmp->vm_unique == seq)
313 myprintf(("vcwrite: msg (%ld, %ld) not found\n",
319 * Remove the message from the reply queue.
321 TAILQ_REMOVE(&vcp->vc_replies, vmp, vm_chain);
324 * Move data into response buffer.
326 out = (struct coda_out_hdr *)vmp->vm_data;
329 * Don't need to copy opcode and uniquifier.
331 * Get the rest of the data.
333 if (vmp->vm_outSize < uiop->uio_resid) {
334 myprintf(("vcwrite: more data than asked for (%d < %d)\n",
335 vmp->vm_outSize, uiop->uio_resid));
338 * Notify caller of the error.
340 wakeup(&vmp->vm_sleep);
347 buf[0] = uiop->uio_resid;
348 uiop->uio_rw = UIO_WRITE;
349 error = uiomove((caddr_t) &out->result, vmp->vm_outSize -
350 (sizeof(int) * 2), uiop);
352 myprintf(("vcwrite: error (%d) on uiomove (op %ld seq %ld)\n",
353 error, opcode, seq));
358 * I don't think these are used, but just in case.
360 * XXX - aren't these two already correct? -bnoble
362 out->opcode = opcode;
364 vmp->vm_outSize = buf[0]; /* Amount of data transferred? */
365 vmp->vm_flags |= VM_WRITE;
367 if (opcode == CODA_OPEN_BY_FD) {
368 struct coda_open_by_fd_out *tmp =
369 (struct coda_open_by_fd_out *)out;
371 struct vnode *vp = NULL;
373 if (tmp->oh.result == 0) {
374 error = getvnode(uiop->uio_td->td_proc->p_fd,
378 * XXX: Since the whole driver runs with
379 * Giant, don't actually need to acquire it
380 * explicitly here yet.
385 fdrop(fp, uiop->uio_td);
391 wakeup(&vmp->vm_sleep);
396 vc_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
411 case CIOC_KERNEL_VERSION:
412 switch (*(u_int *)addr) {
414 *(u_int *)addr = coda_kernel_version;
419 if (coda_kernel_version != *(u_int *)addr)
434 vc_poll(struct cdev *dev, int events, struct thread *td)
440 vcp = &dev2coda_mntinfo(dev)->mi_vcomm;
441 event_msk = events & (POLLIN|POLLRDNORM);
444 if (!TAILQ_EMPTY(&vcp->vc_requests))
445 return (events & (POLLIN|POLLRDNORM));
446 selrecord(td, &(vcp->vc_selproc));
453 struct coda_clstat coda_clstat;
456 * Key question: whether to sleep interuptably or uninteruptably when waiting
457 * for Venus. The former seems better (cause you can ^C a job), but then
458 * GNU-EMACS completion breaks. Use tsleep with no timeout, and no longjmp
459 * happens. But, when sleeping "uninterruptibly", we don't get told if it
460 * returns abnormally (e.g. kill -9).
463 coda_call(struct coda_mntinfo *mntinfo, int inSize, int *outSize,
470 struct thread *td = curthread;
471 struct proc *p = td->td_proc;
478 * Unlikely, but could be a race condition with a dying warden.
482 vcp = &(mntinfo->mi_vcomm);
483 coda_clstat.ncalls++;
484 coda_clstat.reqs[((struct coda_in_hdr *)buffer)->opcode]++;
487 CODA_ALLOC(vmp,struct vmsg *,sizeof(struct vmsg));
490 * Format the request message.
492 vmp->vm_data = buffer;
494 vmp->vm_inSize = inSize;
496 = *outSize ? *outSize : inSize; /* |buffer| >= inSize */
497 vmp->vm_opcode = ((struct coda_in_hdr *)buffer)->opcode;
498 vmp->vm_unique = ++vcp->vc_seq;
500 myprintf(("Doing a call for %d.%d\n", vmp->vm_opcode,
504 * Fill in the common input args.
506 ((struct coda_in_hdr *)buffer)->unique = vmp->vm_unique;
509 * Append msg to request queue and poke Venus.
511 TAILQ_INSERT_TAIL(&vcp->vc_requests, vmp, vm_chain);
512 selwakeuppri(&(vcp->vc_selproc), coda_call_sleep);
515 * We can be interrupted while we wait for Venus to process our
516 * request. If the interrupt occurs before Venus has read the
517 * request, we dequeue and return. If it occurs after the read but
518 * before the reply, we dequeue, send a signal message, and return.
519 * If it occurs after the reply we ignore it. In no case do we want
520 * to restart the syscall. If it was interrupted by a venus shutdown
521 * (vcclose), return ENODEV.
523 * Ignore return, we have to check anyway.
527 * This is work in progress. Setting coda_pcatch lets tsleep
528 * reawaken on a ^c or ^z. The problem is that emacs sets certain
529 * interrupts as SA_RESTART. This means that we should exit sleep
530 * handle the "signal" and then go to sleep again. Mostly this is
531 * done by letting the syscall complete and be restarted. We are not
532 * idempotent and can not do this. A better solution is necessary.
536 psig_omask = td->td_sigmask;
538 error = msleep(&vmp->vm_sleep, &p->p_mtx,
539 (coda_call_sleep|coda_pcatch), "coda_call", hz*2);
542 else if (error == EWOULDBLOCK) {
544 printf("coda_call: tsleep TIMEOUT %d sec\n", 2+2*i);
548 SIGEMPTYSET(tempset);
549 SIGADDSET(tempset, SIGIO);
550 if (SIGSETEQ(td->td_siglist, tempset)) {
551 SIGADDSET(td->td_sigmask, SIGIO);
553 printf("coda_call: tsleep returns %d SIGIO, "
554 "cnt %d\n", error, i);
557 SIGDELSET(tempset, SIGIO);
558 SIGADDSET(tempset, SIGALRM);
559 if (SIGSETEQ(td->td_siglist, tempset)) {
560 SIGADDSET(td->td_sigmask, SIGALRM);
562 printf("coda_call: tsleep returns "
563 "%d SIGALRM, cnt %d\n", error, i);
567 printf("coda_call: tsleep returns "
568 "%d, cnt %d\n", error, i);
572 tempset = td->td_siglist;
573 SIGSETNAND(tempset, td->td_sigmask);
574 printf("coda_call: siglist = %p, "
575 "sigmask = %p, mask %p\n",
576 td->td_siglist, td->td_sigmask,
579 SIGSETOR(td->td_sigmask, td->td_siglist);
580 tempset = td->td_siglist;
581 SIGSETNAND(tempset, td->td_sigmask);
582 printf("coda_call: new mask, "
583 "siglist = %p, sigmask = %p, "
584 "mask %p\n", td->td_siglist,
585 td->td_sigmask, tempset);
590 } while (error && i++ < 128 && VC_OPEN(vcp));
591 td->td_sigmask = psig_omask;
595 (void)tsleep(&vmp->vm_sleep, coda_call_sleep, "coda_call", 0);
599 * Venus is still alive.
601 * Op went through, interrupt or not...
603 if (vmp->vm_flags & VM_WRITE) {
605 *outSize = vmp->vm_outSize;
606 } else if (!(vmp->vm_flags & VM_READ)) {
607 /* Interrupted before venus read it. */
613 myprintf(("interrupted before read: op = "
614 "%d.%d, flags = %x\n", vmp->vm_opcode,
615 vmp->vm_unique, vmp->vm_flags));
616 TAILQ_REMOVE(&vcp->vc_requests, vmp, vm_chain);
620 * (!(vmp->vm_flags & VM_WRITE)) means interrupted
621 * after upcall started.
623 * Interrupted after start of upcall, send venus a
626 struct coda_in_hdr *dog;
634 myprintf(("Sending Venus a signal: op = "
635 "%d.%d, flags = %x\n", vmp->vm_opcode,
636 vmp->vm_unique, vmp->vm_flags));
637 TAILQ_REMOVE(&vcp->vc_requests, vmp, vm_chain);
639 CODA_ALLOC(svmp, struct vmsg *, sizeof(struct vmsg));
640 CODA_ALLOC((svmp->vm_data), char *,
641 sizeof(struct coda_in_hdr));
642 dog = (struct coda_in_hdr *)svmp->vm_data;
644 dog->opcode = svmp->vm_opcode = CODA_SIGNAL;
645 dog->unique = svmp->vm_unique = vmp->vm_unique;
646 svmp->vm_inSize = sizeof (struct coda_in_hdr);
647 /*??? rvb */ svmp->vm_outSize = sizeof (struct coda_in_hdr);
649 myprintf(("coda_call: enqueing signal msg "
650 "(%d, %d)\n", svmp->vm_opcode,
654 * Insert at head of queue!
656 * XXXRW: Actually, the tail.
658 TAILQ_INSERT_TAIL(&vcp->vc_requests, svmp, vm_chain);
659 selwakeuppri(&(vcp->vc_selproc), coda_call_sleep);
662 /* If venus died (!VC_OPEN(vcp)) */
664 myprintf(("vcclose woke op %d.%d flags %d\n",
665 vmp->vm_opcode, vmp->vm_unique, vmp->vm_flags));
668 CODA_FREE(vmp, sizeof(struct vmsg));
669 if (outstanding_upcalls > 0 && (--outstanding_upcalls == 0))
670 wakeup(&outstanding_upcalls);
672 error = ((struct coda_out_hdr *)buffer)->result;