]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/libc_r/uthread/uthread_kern.c
This commit was generated by cvs2svn to compensate for changes in r154032,
[FreeBSD/FreeBSD.git] / lib / libc_r / uthread / uthread_kern.c
1 /*
2  * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by John Birrell.
16  * 4. Neither the name of the author nor the names of any co-contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $FreeBSD$
33  *
34  */
35 #include <errno.h>
36 #include <poll.h>
37 #include <stdlib.h>
38 #include <stdarg.h>
39 #include <string.h>
40 #include <unistd.h>
41 #include <setjmp.h>
42 #include <sys/param.h>
43 #include <sys/types.h>
44 #include <sys/signalvar.h>
45 #include <sys/stat.h>
46 #include <sys/time.h>
47 #include <sys/socket.h>
48 #include <sys/uio.h>
49 #include <sys/syscall.h>
50 #include <fcntl.h>
51 #include <pthread.h>
52 #include "pthread_private.h"
53
54 /* #define DEBUG_THREAD_KERN */
55 #ifdef DEBUG_THREAD_KERN
56 #define DBG_MSG         stdout_debug
57 #else
58 #define DBG_MSG(x...)
59 #endif
60
61 /* Static function prototype definitions: */
62 static void
63 thread_kern_poll(int wait_reqd);
64
65 static void
66 dequeue_signals(void);
67
68 static inline void
69 thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in);
70
71 /* Static variables: */
72 static int      last_tick = 0;
73 static int      called_from_handler = 0;
74
75 /*
76  * This is called when a signal handler finishes and wants to
77  * return to a previous frame.
78  */
79 void
80 _thread_kern_sched_frame(struct pthread_signal_frame *psf)
81 {
82         struct pthread  *curthread = _get_curthread();
83
84         /*
85          * Flag the pthread kernel as executing scheduler code
86          * to avoid a signal from interrupting this execution and
87          * corrupting the (soon-to-be) current frame.
88          */
89         _thread_kern_in_sched = 1;
90
91         /* Restore the signal frame: */
92         _thread_sigframe_restore(curthread, psf);
93
94         /* The signal mask was restored; check for any pending signals: */
95         curthread->check_pending = 1;
96
97         /* Switch to the thread scheduler: */
98         ___longjmp(_thread_kern_sched_jb, 1);
99 }
100
101
102 void
103 _thread_kern_sched(ucontext_t *ucp)
104 {
105         struct pthread  *curthread = _get_curthread();
106
107         /*
108          * Flag the pthread kernel as executing scheduler code
109          * to avoid a scheduler signal from interrupting this
110          * execution and calling the scheduler again.
111          */
112         _thread_kern_in_sched = 1;
113
114         /* Check if this function was called from the signal handler: */
115         if (ucp != NULL) {
116                 called_from_handler = 1;
117                 DBG_MSG("Entering scheduler due to signal\n");
118         }
119
120         /* Save the state of the current thread: */
121         if (_setjmp(curthread->ctx.jb) != 0) {
122                 DBG_MSG("Returned from ___longjmp, thread %p\n",
123                     curthread);
124                 /*
125                  * This point is reached when a longjmp() is called
126                  * to restore the state of a thread.
127                  *
128                  * This is the normal way out of the scheduler.
129                  */
130                 _thread_kern_in_sched = 0;
131
132                 if (curthread->sig_defer_count == 0) {
133                         if (((curthread->cancelflags &
134                             PTHREAD_AT_CANCEL_POINT) == 0) &&
135                             ((curthread->cancelflags &
136                             PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
137                                 /*
138                                  * Cancellations override signals.
139                                  *
140                                  * Stick a cancellation point at the
141                                  * start of each async-cancellable
142                                  * thread's resumption.
143                                  *
144                                  * We allow threads woken at cancel
145                                  * points to do their own checks.
146                                  */
147                                 pthread_testcancel();
148                 }
149
150                 if (_sched_switch_hook != NULL) {
151                         /* Run the installed switch hook: */
152                         thread_run_switch_hook(_last_user_thread, curthread);
153                 }
154                 if (ucp == NULL)
155                         return;
156                 else {
157                         /*
158                          * Set the process signal mask in the context; it
159                          * could have changed by the handler.
160                          */
161                         ucp->uc_sigmask = _process_sigmask;
162
163                         /* Resume the interrupted thread: */
164                         __sys_sigreturn(ucp);
165                 }
166         }
167         /* Switch to the thread scheduler: */
168         ___longjmp(_thread_kern_sched_jb, 1);
169 }
170
171 void
172 _thread_kern_sched_sig(void)
173 {
174         struct pthread  *curthread = _get_curthread();
175
176         curthread->check_pending = 1;
177         _thread_kern_sched(NULL);
178 }
179
180
181 void
182 _thread_kern_scheduler(void)
183 {
184         struct timespec ts;
185         struct timeval  tv;
186         struct pthread  *curthread = _get_curthread();
187         pthread_t       pthread, pthread_h;
188         unsigned int    current_tick;
189         int             add_to_prioq;
190
191         /* If the currently running thread is a user thread, save it: */
192         if ((curthread->flags & PTHREAD_FLAGS_PRIVATE) == 0)
193                 _last_user_thread = curthread;
194
195         if (called_from_handler != 0) {
196                 called_from_handler = 0;
197
198                 /*
199                  * We were called from a signal handler; restore the process
200                  * signal mask.
201                  */
202                 if (__sys_sigprocmask(SIG_SETMASK,
203                     &_process_sigmask, NULL) != 0)
204                         PANIC("Unable to restore process mask after signal");
205         }
206
207         /*
208          * Enter a scheduling loop that finds the next thread that is
209          * ready to run. This loop completes when there are no more threads
210          * in the global list or when a thread has its state restored by
211          * either a sigreturn (if the state was saved as a sigcontext) or a
212          * longjmp (if the state was saved by a setjmp).
213          */
214         while (!(TAILQ_EMPTY(&_thread_list))) {
215                 /* Get the current time of day: */
216                 GET_CURRENT_TOD(tv);
217                 TIMEVAL_TO_TIMESPEC(&tv, &ts);
218                 current_tick = _sched_ticks;
219
220                 /*
221                  * Protect the scheduling queues from access by the signal
222                  * handler.
223                  */
224                 _queue_signals = 1;
225                 add_to_prioq = 0;
226
227                 if (curthread != &_thread_kern_thread) {
228                         /*
229                          * This thread no longer needs to yield the CPU.
230                          */
231                         curthread->yield_on_sig_undefer = 0;
232         
233                         if (curthread->state != PS_RUNNING) {
234                                 /*
235                                  * Save the current time as the time that the
236                                  * thread became inactive:
237                                  */
238                                 curthread->last_inactive = (long)current_tick;
239                                 if (curthread->last_inactive <
240                                     curthread->last_active) {
241                                         /* Account for a rollover: */
242                                         curthread->last_inactive =+
243                                             UINT_MAX + 1;
244                                 }
245                         }
246
247                         /*
248                          * Place the currently running thread into the
249                          * appropriate queue(s).
250                          */
251                         switch (curthread->state) {
252                         case PS_DEAD:
253                         case PS_STATE_MAX: /* to silence -Wall */
254                         case PS_SUSPENDED:
255                                 /*
256                                  * Dead and suspended threads are not placed
257                                  * in any queue:
258                                  */
259                                 break;
260
261                         case PS_RUNNING:
262                                 /*
263                                  * Runnable threads can't be placed in the
264                                  * priority queue until after waiting threads
265                                  * are polled (to preserve round-robin
266                                  * scheduling).
267                                  */
268                                 add_to_prioq = 1;
269                                 break;
270
271                         /*
272                          * States which do not depend on file descriptor I/O
273                          * operations or timeouts:
274                          */
275                         case PS_DEADLOCK:
276                         case PS_FDLR_WAIT:
277                         case PS_FDLW_WAIT:
278                         case PS_FILE_WAIT:
279                         case PS_JOIN:
280                         case PS_MUTEX_WAIT:
281                         case PS_SIGSUSPEND:
282                         case PS_SIGTHREAD:
283                         case PS_SIGWAIT:
284                         case PS_WAIT_WAIT:
285                                 /* No timeouts for these states: */
286                                 curthread->wakeup_time.tv_sec = -1;
287                                 curthread->wakeup_time.tv_nsec = -1;
288
289                                 /* Restart the time slice: */
290                                 curthread->slice_usec = -1;
291
292                                 /* Insert into the waiting queue: */
293                                 PTHREAD_WAITQ_INSERT(curthread);
294                                 break;
295
296                         /* States which can timeout: */
297                         case PS_COND_WAIT:
298                         case PS_SLEEP_WAIT:
299                                 /* Restart the time slice: */
300                                 curthread->slice_usec = -1;
301
302                                 /* Insert into the waiting queue: */
303                                 PTHREAD_WAITQ_INSERT(curthread);
304                                 break;
305         
306                         /* States that require periodic work: */
307                         case PS_SPINBLOCK:
308                                 /* No timeouts for this state: */
309                                 curthread->wakeup_time.tv_sec = -1;
310                                 curthread->wakeup_time.tv_nsec = -1;
311
312                                 /* Increment spinblock count: */
313                                 _spinblock_count++;
314
315                                 /* FALLTHROUGH */
316                         case PS_FDR_WAIT:
317                         case PS_FDW_WAIT:
318                         case PS_POLL_WAIT:
319                         case PS_SELECT_WAIT:
320                                 /* Restart the time slice: */
321                                 curthread->slice_usec = -1;
322         
323                                 /* Insert into the waiting queue: */
324                                 PTHREAD_WAITQ_INSERT(curthread);
325         
326                                 /* Insert into the work queue: */
327                                 PTHREAD_WORKQ_INSERT(curthread);
328                                 break;
329                         }
330
331                         /*
332                          * Are there pending signals for this thread?
333                          *
334                          * This check has to be performed after the thread
335                          * has been placed in the queue(s) appropriate for
336                          * its state.  The process of adding pending signals
337                          * can change a threads state, which in turn will
338                          * attempt to add or remove the thread from any
339                          * scheduling queue to which it belongs.
340                          */
341                         if (curthread->check_pending != 0) {
342                                 curthread->check_pending = 0;
343                                 _thread_sig_check_pending(curthread);
344                         }
345                 }
346
347                 /*
348                  * Avoid polling file descriptors if there are none
349                  * waiting:
350                  */
351                 if (TAILQ_EMPTY(&_workq) != 0) {
352                 }
353                 /*
354                  * Poll file descriptors only if a new scheduling signal
355                  * has occurred or if we have no more runnable threads.
356                  */
357                 else if (((current_tick = _sched_ticks) != last_tick) ||
358                     ((curthread->state != PS_RUNNING) &&
359                     (PTHREAD_PRIOQ_FIRST() == NULL))) {
360                         /* Unprotect the scheduling queues: */
361                         _queue_signals = 0;
362
363                         /*
364                          * Poll file descriptors to update the state of threads
365                          * waiting on file I/O where data may be available:
366                          */
367                         thread_kern_poll(0);
368
369                         /* Protect the scheduling queues: */
370                         _queue_signals = 1;
371                 }
372                 last_tick = current_tick;
373
374                 /*
375                  * Wake up threads that have timedout.  This has to be
376                  * done after polling in case a thread does a poll or
377                  * select with zero time.
378                  */
379                 PTHREAD_WAITQ_SETACTIVE();
380                 while (((pthread = TAILQ_FIRST(&_waitingq)) != NULL) &&
381                     (pthread->wakeup_time.tv_sec != -1) &&
382                     (((pthread->wakeup_time.tv_sec == 0) &&
383                     (pthread->wakeup_time.tv_nsec == 0)) ||
384                     (pthread->wakeup_time.tv_sec < ts.tv_sec) ||
385                     ((pthread->wakeup_time.tv_sec == ts.tv_sec) &&
386                     (pthread->wakeup_time.tv_nsec <= ts.tv_nsec)))) {
387                         switch (pthread->state) {
388                         case PS_POLL_WAIT:
389                         case PS_SELECT_WAIT:
390                                 /* Return zero file descriptors ready: */
391                                 pthread->data.poll_data->nfds = 0;
392                                 /* FALLTHROUGH */
393                         default:
394                                 /*
395                                  * Remove this thread from the waiting queue
396                                  * (and work queue if necessary) and place it
397                                  * in the ready queue.
398                                  */
399                                 PTHREAD_WAITQ_CLEARACTIVE();
400                                 if (pthread->flags & PTHREAD_FLAGS_IN_WORKQ)
401                                         PTHREAD_WORKQ_REMOVE(pthread);
402                                 PTHREAD_NEW_STATE(pthread, PS_RUNNING);
403                                 PTHREAD_WAITQ_SETACTIVE();
404                                 break;
405                         }
406                         /*
407                          * Flag the timeout in the thread structure:
408                          */
409                         pthread->timeout = 1;
410                 }
411                 PTHREAD_WAITQ_CLEARACTIVE();
412
413                 /*
414                  * Check to see if the current thread needs to be added
415                  * to the priority queue:
416                  */
417                 if (add_to_prioq != 0) {
418                         /*
419                          * Save the current time as the time that the
420                          * thread became inactive:
421                          */
422                         current_tick = _sched_ticks;
423                         curthread->last_inactive = (long)current_tick;
424                         if (curthread->last_inactive <
425                             curthread->last_active) {
426                                 /* Account for a rollover: */
427                                 curthread->last_inactive =+ UINT_MAX + 1;
428                         }
429
430                         if ((curthread->slice_usec != -1) &&
431                            (curthread->attr.sched_policy != SCHED_FIFO)) {
432                                 /*
433                                  * Accumulate the number of microseconds for
434                                  * which the current thread has run:
435                                  */
436                                 curthread->slice_usec +=
437                                     (curthread->last_inactive -
438                                     curthread->last_active) *
439                                     (long)_clock_res_usec;
440                                 /* Check for time quantum exceeded: */
441                                 if (curthread->slice_usec > TIMESLICE_USEC)
442                                         curthread->slice_usec = -1;
443                         }
444
445                         if (curthread->slice_usec == -1) {
446                                 /*
447                                  * The thread exceeded its time
448                                  * quantum or it yielded the CPU;
449                                  * place it at the tail of the
450                                  * queue for its priority.
451                                  */
452                                 PTHREAD_PRIOQ_INSERT_TAIL(curthread);
453                         } else {
454                                 /*
455                                  * The thread hasn't exceeded its
456                                  * interval.  Place it at the head
457                                  * of the queue for its priority.
458                                  */
459                                 PTHREAD_PRIOQ_INSERT_HEAD(curthread);
460                         }
461                 }
462
463                 /*
464                  * Get the highest priority thread in the ready queue.
465                  */
466                 pthread_h = PTHREAD_PRIOQ_FIRST();
467
468                 /* Check if there are no threads ready to run: */
469                 if (pthread_h == NULL) {
470                         /*
471                          * Lock the pthread kernel by changing the pointer to
472                          * the running thread to point to the global kernel
473                          * thread structure:
474                          */
475                         _set_curthread(&_thread_kern_thread);
476                         curthread = &_thread_kern_thread;
477
478                         DBG_MSG("No runnable threads, using kernel thread %p\n",
479                             curthread);
480
481                         /* Unprotect the scheduling queues: */
482                         _queue_signals = 0;
483
484                         /*
485                          * There are no threads ready to run, so wait until
486                          * something happens that changes this condition:
487                          */
488                         thread_kern_poll(1);
489
490                         /*
491                          * This process' usage will likely be very small
492                          * while waiting in a poll.  Since the scheduling
493                          * clock is based on the profiling timer, it is
494                          * unlikely that the profiling timer will fire
495                          * and update the time of day.  To account for this,
496                          * get the time of day after polling with a timeout.
497                          */
498                         gettimeofday((struct timeval *) &_sched_tod, NULL);
499                         
500                         /* Check once more for a runnable thread: */
501                         _queue_signals = 1;
502                         pthread_h = PTHREAD_PRIOQ_FIRST();
503                         _queue_signals = 0;
504                 }
505
506                 if (pthread_h != NULL) {
507                         /* Remove the thread from the ready queue: */
508                         PTHREAD_PRIOQ_REMOVE(pthread_h);
509
510                         /* Unprotect the scheduling queues: */
511                         _queue_signals = 0;
512
513                         /*
514                          * Check for signals queued while the scheduling
515                          * queues were protected:
516                          */
517                         while (_sigq_check_reqd != 0) {
518                                 /* Clear before handling queued signals: */
519                                 _sigq_check_reqd = 0;
520
521                                 /* Protect the scheduling queues again: */
522                                 _queue_signals = 1;
523
524                                 dequeue_signals();
525
526                                 /*
527                                  * Check for a higher priority thread that
528                                  * became runnable due to signal handling.
529                                  */
530                                 if (((pthread = PTHREAD_PRIOQ_FIRST()) != NULL) &&
531                                     (pthread->active_priority > pthread_h->active_priority)) {
532                                         /* Remove the thread from the ready queue: */
533                                         PTHREAD_PRIOQ_REMOVE(pthread);
534
535                                         /*
536                                          * Insert the lower priority thread
537                                          * at the head of its priority list:
538                                          */
539                                         PTHREAD_PRIOQ_INSERT_HEAD(pthread_h);
540
541                                         /* There's a new thread in town: */
542                                         pthread_h = pthread;
543                                 }
544
545                                 /* Unprotect the scheduling queues: */
546                                 _queue_signals = 0;
547                         }
548
549                         /* Make the selected thread the current thread: */
550                         _set_curthread(pthread_h);
551                         curthread = pthread_h;
552
553                         /*
554                          * Save the current time as the time that the thread
555                          * became active:
556                          */
557                         current_tick = _sched_ticks;
558                         curthread->last_active = (long) current_tick;
559
560                         /*
561                          * Check if this thread is running for the first time
562                          * or running again after using its full time slice
563                          * allocation:
564                          */
565                         if (curthread->slice_usec == -1) {
566                                 /* Reset the accumulated time slice period: */
567                                 curthread->slice_usec = 0;
568                         }
569
570                         /*
571                          * If we had a context switch, run any
572                          * installed switch hooks.
573                          */
574                         if ((_sched_switch_hook != NULL) &&
575                             (_last_user_thread != curthread)) {
576                                 thread_run_switch_hook(_last_user_thread,
577                                     curthread);
578                         }
579                         /*
580                          * Continue the thread at its current frame:
581                          */
582 #if NOT_YET
583                         _setcontext(&curthread->ctx.uc);
584 #else
585                         ___longjmp(curthread->ctx.jb, 1);
586 #endif
587                         /* This point should not be reached. */
588                         PANIC("Thread has returned from sigreturn or longjmp");
589                 }
590         }
591
592         /* There are no more threads, so exit this process: */
593         exit(0);
594 }
595
596 void
597 _thread_kern_sched_state(enum pthread_state state, char *fname, int lineno)
598 {
599         struct pthread  *curthread = _get_curthread();
600
601         /*
602          * Flag the pthread kernel as executing scheduler code
603          * to avoid a scheduler signal from interrupting this
604          * execution and calling the scheduler again.
605          */
606         _thread_kern_in_sched = 1;
607
608         /*
609          * Prevent the signal handler from fiddling with this thread
610          * before its state is set and is placed into the proper queue.
611          */
612         _queue_signals = 1;
613
614         /* Change the state of the current thread: */
615         curthread->state = state;
616         curthread->fname = fname;
617         curthread->lineno = lineno;
618
619         /* Schedule the next thread that is ready: */
620         _thread_kern_sched(NULL);
621 }
622
623 void
624 _thread_kern_sched_state_unlock(enum pthread_state state,
625     spinlock_t *lock, char *fname, int lineno)
626 {
627         struct pthread  *curthread = _get_curthread();
628
629         /*
630          * Flag the pthread kernel as executing scheduler code
631          * to avoid a scheduler signal from interrupting this
632          * execution and calling the scheduler again.
633          */
634         _thread_kern_in_sched = 1;
635
636         /*
637          * Prevent the signal handler from fiddling with this thread
638          * before its state is set and it is placed into the proper
639          * queue(s).
640          */
641         _queue_signals = 1;
642
643         /* Change the state of the current thread: */
644         curthread->state = state;
645         curthread->fname = fname;
646         curthread->lineno = lineno;
647
648         _SPINUNLOCK(lock);
649
650         /* Schedule the next thread that is ready: */
651         _thread_kern_sched(NULL);
652 }
653
654 static void
655 thread_kern_poll(int wait_reqd)
656 {
657         int             count = 0;
658         int             i, found;
659         int             kern_pipe_added = 0;
660         int             nfds = 0;
661         int             timeout_ms = 0;
662         struct pthread  *pthread;
663         struct timespec ts;
664         struct timeval  tv;
665
666         /* Check if the caller wants to wait: */
667         if (wait_reqd == 0) {
668                 timeout_ms = 0;
669         }
670         else {
671                 /* Get the current time of day: */
672                 GET_CURRENT_TOD(tv);
673                 TIMEVAL_TO_TIMESPEC(&tv, &ts);
674
675                 _queue_signals = 1;
676                 pthread = TAILQ_FIRST(&_waitingq);
677                 _queue_signals = 0;
678
679                 if ((pthread == NULL) || (pthread->wakeup_time.tv_sec == -1)) {
680                         /*
681                          * Either there are no threads in the waiting queue,
682                          * or there are no threads that can timeout.
683                          */
684                         timeout_ms = INFTIM;
685                 }
686                 else if (pthread->wakeup_time.tv_sec - ts.tv_sec > 60000)
687                         /* Limit maximum timeout to prevent rollover. */
688                         timeout_ms = 60000;
689                 else {
690                         /*
691                          * Calculate the time left for the next thread to
692                          * timeout:
693                          */
694                         timeout_ms = ((pthread->wakeup_time.tv_sec - ts.tv_sec) *
695                             1000) + ((pthread->wakeup_time.tv_nsec - ts.tv_nsec) /
696                             1000000);
697                         /*
698                          * Don't allow negative timeouts:
699                          */
700                         if (timeout_ms < 0)
701                                 timeout_ms = 0;
702                 }
703         }
704                         
705         /* Protect the scheduling queues: */
706         _queue_signals = 1;
707
708         /*
709          * Check to see if the signal queue needs to be walked to look
710          * for threads awoken by a signal while in the scheduler.
711          */
712         if (_sigq_check_reqd != 0) {
713                 /* Reset flag before handling queued signals: */
714                 _sigq_check_reqd = 0;
715
716                 dequeue_signals();
717         }
718
719         /*
720          * Check for a thread that became runnable due to a signal:
721          */
722         if (PTHREAD_PRIOQ_FIRST() != NULL) {
723                 /*
724                  * Since there is at least one runnable thread,
725                  * disable the wait.
726                  */
727                 timeout_ms = 0;
728         }
729
730         /*
731          * Form the poll table:
732          */
733         nfds = 0;
734         if (timeout_ms != 0) {
735                 /* Add the kernel pipe to the poll table: */
736                 _thread_pfd_table[nfds].fd = _thread_kern_pipe[0];
737                 _thread_pfd_table[nfds].events = POLLRDNORM;
738                 _thread_pfd_table[nfds].revents = 0;
739                 nfds++;
740                 kern_pipe_added = 1;
741         }
742
743         PTHREAD_WAITQ_SETACTIVE();
744         TAILQ_FOREACH(pthread, &_workq, qe) {
745                 switch (pthread->state) {
746                 case PS_SPINBLOCK:
747                         /*
748                          * If the lock is available, let the thread run.
749                          */
750                         if (pthread->data.spinlock->access_lock == 0) {
751                                 PTHREAD_WAITQ_CLEARACTIVE();
752                                 PTHREAD_WORKQ_REMOVE(pthread);
753                                 PTHREAD_NEW_STATE(pthread,PS_RUNNING);
754                                 PTHREAD_WAITQ_SETACTIVE();
755                                 /* One less thread in a spinblock state: */
756                                 _spinblock_count--;
757                                 /*
758                                  * Since there is at least one runnable
759                                  * thread, disable the wait.
760                                  */
761                                 timeout_ms = 0;
762                         }
763                         break;
764
765                 /* File descriptor read wait: */
766                 case PS_FDR_WAIT:
767                         /* Limit number of polled files to table size: */
768                         if (nfds < _thread_dtablesize) {
769                                 _thread_pfd_table[nfds].events = POLLRDNORM;
770                                 _thread_pfd_table[nfds].fd = pthread->data.fd.fd;
771                                 nfds++;
772                         }
773                         break;
774
775                 /* File descriptor write wait: */
776                 case PS_FDW_WAIT:
777                         /* Limit number of polled files to table size: */
778                         if (nfds < _thread_dtablesize) {
779                                 _thread_pfd_table[nfds].events = POLLWRNORM;
780                                 _thread_pfd_table[nfds].fd = pthread->data.fd.fd;
781                                 nfds++;
782                         }
783                         break;
784
785                 /* File descriptor poll or select wait: */
786                 case PS_POLL_WAIT:
787                 case PS_SELECT_WAIT:
788                         /* Limit number of polled files to table size: */
789                         if (pthread->data.poll_data->nfds + nfds <
790                             _thread_dtablesize) {
791                                 for (i = 0; i < pthread->data.poll_data->nfds; i++) {
792                                         _thread_pfd_table[nfds + i].fd =
793                                             pthread->data.poll_data->fds[i].fd;
794                                         _thread_pfd_table[nfds + i].events =
795                                             pthread->data.poll_data->fds[i].events;
796                                 }
797                                 nfds += pthread->data.poll_data->nfds;
798                         }
799                         break;
800
801                 /* Other states do not depend on file I/O. */
802                 default:
803                         break;
804                 }
805         }
806         PTHREAD_WAITQ_CLEARACTIVE();
807
808         /*
809          * Wait for a file descriptor to be ready for read, write, or
810          * an exception, or a timeout to occur:
811          */
812         count = __sys_poll(_thread_pfd_table, nfds, timeout_ms);
813
814         if (kern_pipe_added != 0)
815                 /*
816                  * Remove the pthread kernel pipe file descriptor
817                  * from the pollfd table:
818                  */
819                 nfds = 1;
820         else
821                 nfds = 0;
822
823         /*
824          * Check if it is possible that there are bytes in the kernel
825          * read pipe waiting to be read:
826          */
827         if (count < 0 || ((kern_pipe_added != 0) &&
828             (_thread_pfd_table[0].revents & POLLRDNORM))) {
829                 /*
830                  * If the kernel read pipe was included in the
831                  * count:
832                  */
833                 if (count > 0) {
834                         /* Decrement the count of file descriptors: */
835                         count--;
836                 }
837
838                 if (_sigq_check_reqd != 0) {
839                         /* Reset flag before handling signals: */
840                         _sigq_check_reqd = 0;
841
842                         dequeue_signals();
843                 }
844         }
845
846         /*
847          * Check if any file descriptors are ready:
848          */
849         if (count > 0) {
850                 /*
851                  * Enter a loop to look for threads waiting on file
852                  * descriptors that are flagged as available by the
853                  * _poll syscall:
854                  */
855                 PTHREAD_WAITQ_SETACTIVE();
856                 TAILQ_FOREACH(pthread, &_workq, qe) {
857                         switch (pthread->state) {
858                         case PS_SPINBLOCK:
859                                 /*
860                                  * If the lock is available, let the thread run.
861                                  */
862                                 if (pthread->data.spinlock->access_lock == 0) {
863                                         PTHREAD_WAITQ_CLEARACTIVE();
864                                         PTHREAD_WORKQ_REMOVE(pthread);
865                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
866                                         PTHREAD_WAITQ_SETACTIVE();
867
868                                         /*
869                                          * One less thread in a spinblock state:
870                                          */
871                                         _spinblock_count--;
872                                 }
873                                 break;
874
875                         /* File descriptor read wait: */
876                         case PS_FDR_WAIT:
877                                 if ((nfds < _thread_dtablesize) &&
878                                     (_thread_pfd_table[nfds].revents
879                                        & (POLLRDNORM|POLLERR|POLLHUP|POLLNVAL))
880                                       != 0) {
881                                         PTHREAD_WAITQ_CLEARACTIVE();
882                                         PTHREAD_WORKQ_REMOVE(pthread);
883                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
884                                         PTHREAD_WAITQ_SETACTIVE();
885                                 }
886                                 nfds++;
887                                 break;
888
889                         /* File descriptor write wait: */
890                         case PS_FDW_WAIT:
891                                 if ((nfds < _thread_dtablesize) &&
892                                     (_thread_pfd_table[nfds].revents
893                                        & (POLLWRNORM|POLLERR|POLLHUP|POLLNVAL))
894                                       != 0) {
895                                         PTHREAD_WAITQ_CLEARACTIVE();
896                                         PTHREAD_WORKQ_REMOVE(pthread);
897                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
898                                         PTHREAD_WAITQ_SETACTIVE();
899                                 }
900                                 nfds++;
901                                 break;
902
903                         /* File descriptor poll or select wait: */
904                         case PS_POLL_WAIT:
905                         case PS_SELECT_WAIT:
906                                 if (pthread->data.poll_data->nfds + nfds <
907                                     _thread_dtablesize) {
908                                         /*
909                                          * Enter a loop looking for I/O
910                                          * readiness:
911                                          */
912                                         found = 0;
913                                         for (i = 0; i < pthread->data.poll_data->nfds; i++) {
914                                                 if (_thread_pfd_table[nfds + i].revents != 0) {
915                                                         pthread->data.poll_data->fds[i].revents =
916                                                             _thread_pfd_table[nfds + i].revents;
917                                                         found++;
918                                                 }
919                                         }
920
921                                         /* Increment before destroying: */
922                                         nfds += pthread->data.poll_data->nfds;
923
924                                         if (found != 0) {
925                                                 pthread->data.poll_data->nfds = found;
926                                                 PTHREAD_WAITQ_CLEARACTIVE();
927                                                 PTHREAD_WORKQ_REMOVE(pthread);
928                                                 PTHREAD_NEW_STATE(pthread,PS_RUNNING);
929                                                 PTHREAD_WAITQ_SETACTIVE();
930                                         }
931                                 }
932                                 else
933                                         nfds += pthread->data.poll_data->nfds;
934                                 break;
935
936                         /* Other states do not depend on file I/O. */
937                         default:
938                                 break;
939                         }
940                 }
941                 PTHREAD_WAITQ_CLEARACTIVE();
942         }
943         else if (_spinblock_count != 0) {
944                 /*
945                  * Enter a loop to look for threads waiting on a spinlock
946                  * that is now available.
947                  */
948                 PTHREAD_WAITQ_SETACTIVE();
949                 TAILQ_FOREACH(pthread, &_workq, qe) {
950                         if (pthread->state == PS_SPINBLOCK) {
951                                 /*
952                                  * If the lock is available, let the thread run.
953                                  */
954                                 if (pthread->data.spinlock->access_lock == 0) {
955                                         PTHREAD_WAITQ_CLEARACTIVE();
956                                         PTHREAD_WORKQ_REMOVE(pthread);
957                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
958                                         PTHREAD_WAITQ_SETACTIVE();
959
960                                         /*
961                                          * One less thread in a spinblock state:
962                                          */
963                                         _spinblock_count--;
964                                 }
965                         }
966                 }
967                 PTHREAD_WAITQ_CLEARACTIVE();
968         }
969
970         /* Unprotect the scheduling queues: */
971         _queue_signals = 0;
972
973         while (_sigq_check_reqd != 0) {
974                 /* Handle queued signals: */
975                 _sigq_check_reqd = 0;
976
977                 /* Protect the scheduling queues: */
978                 _queue_signals = 1;
979
980                 dequeue_signals();
981
982                 /* Unprotect the scheduling queues: */
983                 _queue_signals = 0;
984         }
985 }
986
987 void
988 _thread_kern_set_timeout(const struct timespec * timeout)
989 {
990         struct pthread  *curthread = _get_curthread();
991         struct timespec current_time;
992         struct timeval  tv;
993
994         /* Reset the timeout flag for the running thread: */
995         curthread->timeout = 0;
996
997         /* Check if the thread is to wait forever: */
998         if (timeout == NULL) {
999                 /*
1000                  * Set the wakeup time to something that can be recognised as
1001                  * different to an actual time of day:
1002                  */
1003                 curthread->wakeup_time.tv_sec = -1;
1004                 curthread->wakeup_time.tv_nsec = -1;
1005         }
1006         /* Check if no waiting is required: */
1007         else if (timeout->tv_sec == 0 && timeout->tv_nsec == 0) {
1008                 /* Set the wake up time to 'immediately': */
1009                 curthread->wakeup_time.tv_sec = 0;
1010                 curthread->wakeup_time.tv_nsec = 0;
1011         } else {
1012                 /* Get the current time: */
1013                 GET_CURRENT_TOD(tv);
1014                 TIMEVAL_TO_TIMESPEC(&tv, &current_time);
1015
1016                 /* Calculate the time for the current thread to wake up: */
1017                 curthread->wakeup_time.tv_sec = current_time.tv_sec + timeout->tv_sec;
1018                 curthread->wakeup_time.tv_nsec = current_time.tv_nsec + timeout->tv_nsec;
1019
1020                 /* Check if the nanosecond field needs to wrap: */
1021                 if (curthread->wakeup_time.tv_nsec >= 1000000000) {
1022                         /* Wrap the nanosecond field: */
1023                         curthread->wakeup_time.tv_sec += 1;
1024                         curthread->wakeup_time.tv_nsec -= 1000000000;
1025                 }
1026         }
1027 }
1028
1029 void
1030 _thread_kern_sig_defer(void)
1031 {
1032         struct pthread  *curthread = _get_curthread();
1033
1034         /* Allow signal deferral to be recursive. */
1035         curthread->sig_defer_count++;
1036 }
1037
1038 void
1039 _thread_kern_sig_undefer(void)
1040 {
1041         struct pthread  *curthread = _get_curthread();
1042
1043         /*
1044          * Perform checks to yield only if we are about to undefer
1045          * signals.
1046          */
1047         if (curthread->sig_defer_count > 1) {
1048                 /* Decrement the signal deferral count. */
1049                 curthread->sig_defer_count--;
1050         }
1051         else if (curthread->sig_defer_count == 1) {
1052                 /* Reenable signals: */
1053                 curthread->sig_defer_count = 0;
1054
1055                 /*
1056                  * Check if there are queued signals:
1057                  */
1058                 if (_sigq_check_reqd != 0)
1059                         _thread_kern_sched(NULL);
1060
1061                 /*
1062                  * Check for asynchronous cancellation before delivering any
1063                  * pending signals:
1064                  */
1065                 if (((curthread->cancelflags & PTHREAD_AT_CANCEL_POINT) == 0) &&
1066                     ((curthread->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
1067                         pthread_testcancel();
1068
1069                 /*
1070                  * If there are pending signals or this thread has
1071                  * to yield the CPU, call the kernel scheduler:
1072                  *
1073                  * XXX - Come back and revisit the pending signal problem
1074                  */
1075                 if ((curthread->yield_on_sig_undefer != 0) ||
1076                     SIGNOTEMPTY(curthread->sigpend)) {
1077                         curthread->yield_on_sig_undefer = 0;
1078                         _thread_kern_sched(NULL);
1079                 }
1080         }
1081 }
1082
1083 static void
1084 dequeue_signals(void)
1085 {
1086         char    bufr[128];
1087         int     num;
1088
1089         /*
1090          * Enter a loop to clear the pthread kernel pipe:
1091          */
1092         while (((num = __sys_read(_thread_kern_pipe[0], bufr,
1093             sizeof(bufr))) > 0) || (num == -1 && errno == EINTR)) {
1094         }
1095         if ((num < 0) && (errno != EAGAIN)) {
1096                 /*
1097                  * The only error we should expect is if there is
1098                  * no data to read.
1099                  */
1100                 PANIC("Unable to read from thread kernel pipe");
1101         }
1102         /* Handle any pending signals: */
1103         _thread_sig_handle_pending();
1104 }
1105
1106 static inline void
1107 thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in)
1108 {
1109         pthread_t tid_out = thread_out;
1110         pthread_t tid_in = thread_in;
1111
1112         if ((tid_out != NULL) &&
1113             (tid_out->flags & PTHREAD_FLAGS_PRIVATE) != 0)
1114                 tid_out = NULL;
1115         if ((tid_in != NULL) &&
1116             (tid_in->flags & PTHREAD_FLAGS_PRIVATE) != 0)
1117                 tid_in = NULL;
1118
1119         if ((_sched_switch_hook != NULL) && (tid_out != tid_in)) {
1120                 /* Run the scheduler switch hook: */
1121                 _sched_switch_hook(tid_out, tid_in);
1122         }
1123 }
1124
1125 struct pthread *
1126 _get_curthread(void)
1127 {
1128         if (_thread_initial == NULL)
1129                 _thread_init();
1130
1131         return (_thread_run);
1132 }
1133
1134 void
1135 _set_curthread(struct pthread *newthread)
1136 {
1137         _thread_run = newthread;
1138 }