]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/libc_r/uthread/uthread_kern.c
This commit was generated by cvs2svn to compensate for changes in r55099,
[FreeBSD/FreeBSD.git] / lib / libc_r / uthread / uthread_kern.c
1 /*
2  * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by John Birrell.
16  * 4. Neither the name of the author nor the names of any co-contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $FreeBSD$
33  *
34  */
35 #include <errno.h>
36 #include <poll.h>
37 #include <stdlib.h>
38 #include <stdarg.h>
39 #include <string.h>
40 #include <unistd.h>
41 #include <setjmp.h>
42 #include <sys/types.h>
43 #include <sys/stat.h>
44 #include <sys/time.h>
45 #include <sys/socket.h>
46 #include <sys/uio.h>
47 #include <sys/syscall.h>
48 #include <fcntl.h>
49 #ifdef _THREAD_SAFE
50 #include <pthread.h>
51 #include "pthread_private.h"
52
53 /* Static function prototype definitions: */
54 static void 
55 _thread_kern_poll(int wait_reqd);
56
57 static void
58 dequeue_signals(void);
59
60 static inline void
61 thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in);
62
63 void
64 _thread_kern_sched(ucontext_t * scp)
65 {
66 #ifndef __alpha__
67         char           *fdata;
68 #endif
69         pthread_t       pthread, pthread_h = NULL;
70         struct itimerval itimer;
71         struct timespec ts, ts1;
72         struct timeval  tv, tv1;
73         int             set_timer = 0;
74
75         /*
76          * Flag the pthread kernel as executing scheduler code
77          * to avoid a scheduler signal from interrupting this
78          * execution and calling the scheduler again.
79          */
80         _thread_kern_in_sched = 1;
81
82         /* Check if this function was called from the signal handler: */
83         if (scp != NULL) {
84                 /*
85                  * Copy the signal context to the current thread's jump
86                  * buffer: 
87                  */
88                 memcpy(&_thread_run->saved_sigcontext, scp, sizeof(_thread_run->saved_sigcontext));
89
90 #ifndef __alpha__
91                 /* Point to the floating point data in the running thread: */
92                 fdata = _thread_run->saved_fp;
93
94                 /* Save the floating point data: */
95 __asm__("fnsave %0": :"m"(*fdata));
96 #endif
97
98                 /* Flag the signal context as the last state saved: */
99                 _thread_run->sig_saved = 1;
100         }
101         /* Save the state of the current thread: */
102         else if (setjmp(_thread_run->saved_jmp_buf) != 0) {
103                 /*
104                  * This point is reached when a longjmp() is called to
105                  * restore the state of a thread. 
106                  *
107                  * This is the normal way out of the scheduler.
108                  */
109                 _thread_kern_in_sched = 0;
110
111                 if (((_thread_run->cancelflags & PTHREAD_AT_CANCEL_POINT) == 0) &&
112                     ((_thread_run->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0)) {
113                         /* 
114                          * Cancellations override signals.
115                          *
116                          * Stick a cancellation point at the start of
117                          * each async-cancellable thread's resumption.
118                          *
119                          * We allow threads woken at cancel points to do their
120                          * own checks.
121                          */
122                         pthread_testcancel();
123                 }
124
125                 if (_sched_switch_hook != NULL) {
126                         /* Run the installed switch hook: */
127                         thread_run_switch_hook(_last_user_thread, _thread_run);
128                 }
129
130                 return;
131         } else
132                 /* Flag the jump buffer was the last state saved: */
133                 _thread_run->sig_saved = 0;
134
135         /* If the currently running thread is a user thread, save it: */
136         if ((_thread_run->flags & PTHREAD_FLAGS_PRIVATE) == 0)
137                 _last_user_thread = _thread_run;
138
139         /*
140          * Enter a scheduling loop that finds the next thread that is
141          * ready to run. This loop completes when there are no more threads
142          * in the global list or when a thread has its state restored by
143          * either a sigreturn (if the state was saved as a sigcontext) or a
144          * longjmp (if the state was saved by a setjmp). 
145          */
146         while (!(TAILQ_EMPTY(&_thread_list))) {
147                 /* Get the current time of day: */
148                 gettimeofday(&tv, NULL);
149                 TIMEVAL_TO_TIMESPEC(&tv, &ts);
150
151                 /*
152                  * Protect the scheduling queues from access by the signal
153                  * handler.
154                  */
155                 _queue_signals = 1;
156
157                 if (_thread_run != &_thread_kern_thread) {
158
159                         /*
160                          * This thread no longer needs to yield the CPU.
161                          */
162                         _thread_run->yield_on_sig_undefer = 0;
163         
164                         /*
165                          * Save the current time as the time that the thread
166                          * became inactive: 
167                          */
168                         _thread_run->last_inactive.tv_sec = tv.tv_sec;
169                         _thread_run->last_inactive.tv_usec = tv.tv_usec;
170         
171                         /*
172                          * Place the currently running thread into the
173                          * appropriate queue(s).
174                          */
175                         switch (_thread_run->state) {
176                         case PS_DEAD:
177                         case PS_STATE_MAX: /* to silence -Wall */
178                                 /*
179                                  * Dead threads are not placed in any queue:
180                                  */
181                                 break;
182
183                         case PS_RUNNING:
184                                 /*
185                                  * Runnable threads can't be placed in the
186                                  * priority queue until after waiting threads
187                                  * are polled (to preserve round-robin
188                                  * scheduling).
189                                  */
190                                 if ((_thread_run->slice_usec != -1) &&
191                                     (_thread_run->attr.sched_policy != SCHED_FIFO)) {
192                                         /*
193                                          * Accumulate the number of microseconds that
194                                          * this thread has run for:
195                                          */
196                                         _thread_run->slice_usec +=
197                                             (_thread_run->last_inactive.tv_sec -
198                                             _thread_run->last_active.tv_sec) * 1000000 +
199                                             _thread_run->last_inactive.tv_usec -
200                                             _thread_run->last_active.tv_usec;
201         
202                                         /* Check for time quantum exceeded: */
203                                         if (_thread_run->slice_usec > TIMESLICE_USEC)
204                                                 _thread_run->slice_usec = -1;
205                                 }
206                                 break;
207
208                         /*
209                          * States which do not depend on file descriptor I/O
210                          * operations or timeouts: 
211                          */
212                         case PS_DEADLOCK:
213                         case PS_FDLR_WAIT:
214                         case PS_FDLW_WAIT:
215                         case PS_FILE_WAIT:
216                         case PS_JOIN:
217                         case PS_MUTEX_WAIT:
218                         case PS_SIGSUSPEND:
219                         case PS_SIGTHREAD:
220                         case PS_SIGWAIT:
221                         case PS_SUSPENDED:
222                         case PS_WAIT_WAIT:
223                                 /* No timeouts for these states: */
224                                 _thread_run->wakeup_time.tv_sec = -1;
225                                 _thread_run->wakeup_time.tv_nsec = -1;
226
227                                 /* Restart the time slice: */
228                                 _thread_run->slice_usec = -1;
229
230                                 /* Insert into the waiting queue: */
231                                 PTHREAD_WAITQ_INSERT(_thread_run);
232                                 break;
233
234                         /* States which can timeout: */
235                         case PS_COND_WAIT:
236                         case PS_SLEEP_WAIT:
237                                 /* Restart the time slice: */
238                                 _thread_run->slice_usec = -1;
239
240                                 /* Insert into the waiting queue: */
241                                 PTHREAD_WAITQ_INSERT(_thread_run);
242                                 break;
243         
244                         /* States that require periodic work: */
245                         case PS_SPINBLOCK:
246                                 /* No timeouts for this state: */
247                                 _thread_run->wakeup_time.tv_sec = -1;
248                                 _thread_run->wakeup_time.tv_nsec = -1;
249
250                                 /* Increment spinblock count: */
251                                 _spinblock_count++;
252
253                                 /* fall through */
254                         case PS_FDR_WAIT:
255                         case PS_FDW_WAIT:
256                         case PS_POLL_WAIT:
257                         case PS_SELECT_WAIT:
258                                 /* Restart the time slice: */
259                                 _thread_run->slice_usec = -1;
260         
261                                 /* Insert into the waiting queue: */
262                                 PTHREAD_WAITQ_INSERT(_thread_run);
263         
264                                 /* Insert into the work queue: */
265                                 PTHREAD_WORKQ_INSERT(_thread_run);
266                                 break;
267                         }
268                 }
269
270                 /* Unprotect the scheduling queues: */
271                 _queue_signals = 0;
272
273                 /*
274                  * Poll file descriptors to update the state of threads
275                  * waiting on file I/O where data may be available: 
276                  */
277                 _thread_kern_poll(0);
278
279                 /* Protect the scheduling queues: */
280                 _queue_signals = 1;
281
282                 /*
283                  * Wake up threads that have timedout.  This has to be
284                  * done after polling in case a thread does a poll or
285                  * select with zero time.
286                  */
287                 PTHREAD_WAITQ_SETACTIVE();
288                 while (((pthread = TAILQ_FIRST(&_waitingq)) != NULL) &&
289                     (pthread->wakeup_time.tv_sec != -1) &&
290                     (((pthread->wakeup_time.tv_sec == 0) &&
291                     (pthread->wakeup_time.tv_nsec == 0)) ||
292                     (pthread->wakeup_time.tv_sec < ts.tv_sec) ||
293                     ((pthread->wakeup_time.tv_sec == ts.tv_sec) &&
294                     (pthread->wakeup_time.tv_nsec <= ts.tv_nsec)))) {
295                         switch (pthread->state) {
296                         case PS_POLL_WAIT:
297                         case PS_SELECT_WAIT:
298                                 /* Return zero file descriptors ready: */
299                                 pthread->data.poll_data->nfds = 0;
300                                 /* fall through */
301                         default:
302                                 /*
303                                  * Remove this thread from the waiting queue
304                                  * (and work queue if necessary) and place it
305                                  * in the ready queue.
306                                  */
307                                 PTHREAD_WAITQ_CLEARACTIVE();
308                                 if (pthread->flags & PTHREAD_FLAGS_IN_WORKQ)
309                                         PTHREAD_WORKQ_REMOVE(pthread);
310                                 PTHREAD_NEW_STATE(pthread, PS_RUNNING);
311                                 PTHREAD_WAITQ_SETACTIVE();
312                                 break;
313                         }
314                         /*
315                          * Flag the timeout in the thread structure:
316                          */
317                         pthread->timeout = 1;
318                 }
319                 PTHREAD_WAITQ_CLEARACTIVE();
320
321                 /*
322                  * Check if there is a current runnable thread that isn't
323                  * already in the ready queue:
324                  */
325                 if ((_thread_run != &_thread_kern_thread) &&
326                     (_thread_run->state == PS_RUNNING) &&
327                     ((_thread_run->flags & PTHREAD_FLAGS_IN_PRIOQ) == 0)) {
328                         if (_thread_run->slice_usec == -1) {
329                                 /*
330                                  * The thread exceeded its time
331                                  * quantum or it yielded the CPU;
332                                  * place it at the tail of the
333                                  * queue for its priority.
334                                  */
335                                 PTHREAD_PRIOQ_INSERT_TAIL(_thread_run);
336                         } else {
337                                 /*
338                                  * The thread hasn't exceeded its
339                                  * interval.  Place it at the head
340                                  * of the queue for its priority.
341                                  */
342                                 PTHREAD_PRIOQ_INSERT_HEAD(_thread_run);
343                         }
344                 }
345
346                 /*
347                  * Get the highest priority thread in the ready queue.
348                  */
349                 pthread_h = PTHREAD_PRIOQ_FIRST();
350
351                 /* Check if there are no threads ready to run: */
352                 if (pthread_h == NULL) {
353                         /*
354                          * Lock the pthread kernel by changing the pointer to
355                          * the running thread to point to the global kernel
356                          * thread structure: 
357                          */
358                         _thread_run = &_thread_kern_thread;
359
360                         /* Unprotect the scheduling queues: */
361                         _queue_signals = 0;
362
363                         /*
364                          * There are no threads ready to run, so wait until
365                          * something happens that changes this condition: 
366                          */
367                         _thread_kern_poll(1);
368                 }
369                 else {
370                         /* Remove the thread from the ready queue: */
371                         PTHREAD_PRIOQ_REMOVE(pthread_h);
372
373                         /* Get first thread on the waiting list: */
374                         pthread = TAILQ_FIRST(&_waitingq);
375
376                         /* Check to see if there is more than one thread: */
377                         if (pthread_h != TAILQ_FIRST(&_thread_list) ||
378                             TAILQ_NEXT(pthread_h, tle) != NULL)
379                                 set_timer = 1;
380                         else
381                                 set_timer = 0;
382
383                         /* Unprotect the scheduling queues: */
384                         _queue_signals = 0;
385
386                         /*
387                          * Check for signals queued while the scheduling
388                          * queues were protected:
389                          */
390                         while (_sigq_check_reqd != 0) {
391                                 /* Clear before handling queued signals: */
392                                 _sigq_check_reqd = 0;
393
394                                 /* Protect the scheduling queues again: */
395                                 _queue_signals = 1;
396
397                                 dequeue_signals();
398
399                                 /*
400                                  * Check for a higher priority thread that
401                                  * became runnable due to signal handling.
402                                  */
403                                 if (((pthread = PTHREAD_PRIOQ_FIRST()) != NULL) &&
404                                     (pthread->active_priority > pthread_h->active_priority)) {
405                                         /*
406                                          * Insert the lower priority thread
407                                          * at the head of its priority list:
408                                          */
409                                         PTHREAD_PRIOQ_INSERT_HEAD(pthread_h);
410
411                                         /* Remove the thread from the ready queue: */
412                                         PTHREAD_PRIOQ_REMOVE(pthread);
413
414                                         /* There's a new thread in town: */
415                                         pthread_h = pthread;
416                                 }
417
418                                 /* Get first thread on the waiting list: */
419                                 pthread = TAILQ_FIRST(&_waitingq);
420
421                                 /*
422                                  * Check to see if there is more than one
423                                  * thread:
424                                  */
425                                 if (pthread_h != TAILQ_FIRST(&_thread_list) ||
426                                     TAILQ_NEXT(pthread_h, tle) != NULL)
427                                         set_timer = 1;
428                                 else
429                                         set_timer = 0;
430
431                                 /* Unprotect the scheduling queues: */
432                                 _queue_signals = 0;
433                         }
434
435                         /* Make the selected thread the current thread: */
436                         _thread_run = pthread_h;
437
438                         /*
439                          * Save the current time as the time that the thread
440                          * became active: 
441                          */
442                         _thread_run->last_active.tv_sec = tv.tv_sec;
443                         _thread_run->last_active.tv_usec = tv.tv_usec;
444
445                         /*
446                          * Define the maximum time before a scheduling signal
447                          * is required: 
448                          */
449                         itimer.it_value.tv_sec = 0;
450                         itimer.it_value.tv_usec = TIMESLICE_USEC;
451
452                         /*
453                          * The interval timer is not reloaded when it
454                          * times out. The interval time needs to be
455                          * calculated every time. 
456                          */
457                         itimer.it_interval.tv_sec = 0;
458                         itimer.it_interval.tv_usec = 0;
459
460                         /* Get first thread on the waiting list: */
461                         if ((pthread != NULL) &&
462                             (pthread->wakeup_time.tv_sec != -1)) {
463                                 /*
464                                  * Calculate the time until this thread
465                                  * is ready, allowing for the clock
466                                  * resolution: 
467                                  */
468                                 ts1.tv_sec = pthread->wakeup_time.tv_sec
469                                     - ts.tv_sec;
470                                 ts1.tv_nsec = pthread->wakeup_time.tv_nsec
471                                     - ts.tv_nsec + _clock_res_nsec;
472
473                                 /*
474                                  * Check for underflow of the nanosecond field:
475                                  */
476                                 while (ts1.tv_nsec < 0) {
477                                         /*
478                                          * Allow for the underflow of the
479                                          * nanosecond field: 
480                                          */
481                                         ts1.tv_sec--;
482                                         ts1.tv_nsec += 1000000000;
483                                 }
484                                 /*
485                                  * Check for overflow of the nanosecond field: 
486                                  */
487                                 while (ts1.tv_nsec >= 1000000000) {
488                                         /*
489                                          * Allow for the overflow of the
490                                          * nanosecond field: 
491                                          */
492                                         ts1.tv_sec++;
493                                         ts1.tv_nsec -= 1000000000;
494                                 }
495                                 /*
496                                  * Convert the timespec structure to a
497                                  * timeval structure: 
498                                  */
499                                 TIMESPEC_TO_TIMEVAL(&tv1, &ts1);
500
501                                 /*
502                                  * Check if the thread will be ready
503                                  * sooner than the earliest ones found
504                                  * so far: 
505                                  */
506                                 if (timercmp(&tv1, &itimer.it_value, <)) {
507                                         /*
508                                          * Update the time value: 
509                                          */
510                                         itimer.it_value.tv_sec = tv1.tv_sec;
511                                         itimer.it_value.tv_usec = tv1.tv_usec;
512                                 }
513                         }
514
515                         /*
516                          * Check if this thread is running for the first time
517                          * or running again after using its full time slice
518                          * allocation: 
519                          */
520                         if (_thread_run->slice_usec == -1) {
521                                 /* Reset the accumulated time slice period: */
522                                 _thread_run->slice_usec = 0;
523                         }
524
525                         /* Check if there is more than one thread: */
526                         if (set_timer != 0) {
527                                 /*
528                                  * Start the interval timer for the
529                                  * calculated time interval: 
530                                  */
531                                 if (setitimer(_ITIMER_SCHED_TIMER, &itimer, NULL) != 0) {
532                                         /*
533                                          * Cannot initialise the timer, so
534                                          * abort this process: 
535                                          */
536                                         PANIC("Cannot set scheduling timer");
537                                 }
538                         }
539
540                         /* Check if a signal context was saved: */
541                         if (_thread_run->sig_saved == 1) {
542 #ifndef __alpha__
543                                 /*
544                                  * Point to the floating point data in the
545                                  * running thread: 
546                                  */
547                                 fdata = _thread_run->saved_fp;
548
549                                 /* Restore the floating point state: */
550                 __asm__("frstor %0": :"m"(*fdata));
551 #endif
552                                 /*
553                                  * Do a sigreturn to restart the thread that
554                                  * was interrupted by a signal: 
555                                  */
556                                 _thread_kern_in_sched = 0;
557
558                                 /*
559                                  * If we had a context switch, run any
560                                  * installed switch hooks.
561                                  */
562                                 if ((_sched_switch_hook != NULL) &&
563                                     (_last_user_thread != _thread_run)) {
564                                         thread_run_switch_hook(_last_user_thread,
565                                             _thread_run);
566                                 }
567                                 _thread_sys_sigreturn(&_thread_run->saved_sigcontext);
568                         } else {
569                                 /*
570                                  * Do a longjmp to restart the thread that
571                                  * was context switched out (by a longjmp to
572                                  * a different thread): 
573                                  */
574                                 longjmp(_thread_run->saved_jmp_buf, 1);
575                         }
576
577                         /* This point should not be reached. */
578                         PANIC("Thread has returned from sigreturn or longjmp");
579                 }
580         }
581
582         /* There are no more threads, so exit this process: */
583         exit(0);
584 }
585
586 void
587 _thread_kern_sched_state(enum pthread_state state, char *fname, int lineno)
588 {
589         /*
590          * Flag the pthread kernel as executing scheduler code
591          * to avoid a scheduler signal from interrupting this
592          * execution and calling the scheduler again.
593          */
594         _thread_kern_in_sched = 1;
595
596         /*
597          * Prevent the signal handler from fiddling with this thread
598          * before its state is set and is placed into the proper queue.
599          */
600         _queue_signals = 1;
601
602         /* Change the state of the current thread: */
603         _thread_run->state = state;
604         _thread_run->fname = fname;
605         _thread_run->lineno = lineno;
606
607         /* Schedule the next thread that is ready: */
608         _thread_kern_sched(NULL);
609         return;
610 }
611
612 void
613 _thread_kern_sched_state_unlock(enum pthread_state state,
614     spinlock_t *lock, char *fname, int lineno)
615 {
616         /*
617          * Flag the pthread kernel as executing scheduler code
618          * to avoid a scheduler signal from interrupting this
619          * execution and calling the scheduler again.
620          */
621         _thread_kern_in_sched = 1;
622
623         /*
624          * Prevent the signal handler from fiddling with this thread
625          * before its state is set and it is placed into the proper
626          * queue(s).
627          */
628         _queue_signals = 1;
629
630         /* Change the state of the current thread: */
631         _thread_run->state = state;
632         _thread_run->fname = fname;
633         _thread_run->lineno = lineno;
634
635         _SPINUNLOCK(lock);
636
637         /* Schedule the next thread that is ready: */
638         _thread_kern_sched(NULL);
639         return;
640 }
641
642 static void
643 _thread_kern_poll(int wait_reqd)
644 {
645         int             count = 0;
646         int             i, found;
647         int             kern_pipe_added = 0;
648         int             nfds = 0;
649         int             timeout_ms = 0;
650         struct pthread  *pthread;
651         struct timespec ts;
652         struct timeval  tv;
653
654         /* Check if the caller wants to wait: */
655         if (wait_reqd == 0) {
656                 timeout_ms = 0;
657         }
658         else {
659                 /* Get the current time of day: */
660                 gettimeofday(&tv, NULL);
661                 TIMEVAL_TO_TIMESPEC(&tv, &ts);
662
663                 _queue_signals = 1;
664                 pthread = TAILQ_FIRST(&_waitingq);
665                 _queue_signals = 0;
666
667                 if ((pthread == NULL) || (pthread->wakeup_time.tv_sec == -1)) {
668                         /*
669                          * Either there are no threads in the waiting queue,
670                          * or there are no threads that can timeout.
671                          */
672                         timeout_ms = INFTIM;
673                 }
674                 else {
675                         /*
676                          * Calculate the time left for the next thread to
677                          * timeout allowing for the clock resolution:
678                          */
679                         timeout_ms = ((pthread->wakeup_time.tv_sec - ts.tv_sec) *
680                             1000) + ((pthread->wakeup_time.tv_nsec - ts.tv_nsec +
681                             _clock_res_nsec) / 1000000);
682                         /*
683                          * Don't allow negative timeouts:
684                          */
685                         if (timeout_ms < 0)
686                                 timeout_ms = 0;
687                 }
688         }
689                         
690         /* Protect the scheduling queues: */
691         _queue_signals = 1;
692
693         /*
694          * Check to see if the signal queue needs to be walked to look
695          * for threads awoken by a signal while in the scheduler.
696          */
697         if (_sigq_check_reqd != 0) {
698                 /* Reset flag before handling queued signals: */
699                 _sigq_check_reqd = 0;
700
701                 dequeue_signals();
702         }
703
704         /*
705          * Check for a thread that became runnable due to a signal:
706          */
707         if (PTHREAD_PRIOQ_FIRST() != NULL) {
708                 /*
709                  * Since there is at least one runnable thread,
710                  * disable the wait.
711                  */
712                 timeout_ms = 0;
713         }
714
715         /*
716          * Form the poll table:
717          */
718         nfds = 0;
719         if (timeout_ms != 0) {
720                 /* Add the kernel pipe to the poll table: */
721                 _thread_pfd_table[nfds].fd = _thread_kern_pipe[0];
722                 _thread_pfd_table[nfds].events = POLLRDNORM;
723                 _thread_pfd_table[nfds].revents = 0;
724                 nfds++;
725                 kern_pipe_added = 1;
726         }
727
728         PTHREAD_WAITQ_SETACTIVE();
729         TAILQ_FOREACH(pthread, &_workq, qe) {
730                 switch (pthread->state) {
731                 case PS_SPINBLOCK:
732                         /*
733                          * If the lock is available, let the thread run.
734                          */
735                         if (pthread->data.spinlock->access_lock == 0) {
736                                 PTHREAD_WAITQ_CLEARACTIVE();
737                                 PTHREAD_WORKQ_REMOVE(pthread);
738                                 PTHREAD_NEW_STATE(pthread,PS_RUNNING);
739                                 PTHREAD_WAITQ_SETACTIVE();
740                                 /* One less thread in a spinblock state: */
741                                 _spinblock_count--;
742                                 /*
743                                  * Since there is at least one runnable
744                                  * thread, disable the wait.
745                                  */
746                                 timeout_ms = 0;
747                         }
748                         break;
749
750                 /* File descriptor read wait: */
751                 case PS_FDR_WAIT:
752                         /* Limit number of polled files to table size: */
753                         if (nfds < _thread_dtablesize) {
754                                 _thread_pfd_table[nfds].events = POLLRDNORM;
755                                 _thread_pfd_table[nfds].fd = pthread->data.fd.fd;
756                                 nfds++;
757                         }
758                         break;
759
760                 /* File descriptor write wait: */
761                 case PS_FDW_WAIT:
762                         /* Limit number of polled files to table size: */
763                         if (nfds < _thread_dtablesize) {
764                                 _thread_pfd_table[nfds].events = POLLWRNORM;
765                                 _thread_pfd_table[nfds].fd = pthread->data.fd.fd;
766                                 nfds++;
767                         }
768                         break;
769
770                 /* File descriptor poll or select wait: */
771                 case PS_POLL_WAIT:
772                 case PS_SELECT_WAIT:
773                         /* Limit number of polled files to table size: */
774                         if (pthread->data.poll_data->nfds + nfds <
775                             _thread_dtablesize) {
776                                 for (i = 0; i < pthread->data.poll_data->nfds; i++) {
777                                         _thread_pfd_table[nfds + i].fd =
778                                             pthread->data.poll_data->fds[i].fd;
779                                         _thread_pfd_table[nfds + i].events =
780                                             pthread->data.poll_data->fds[i].events;
781                                 }
782                                 nfds += pthread->data.poll_data->nfds;
783                         }
784                         break;
785
786                 /* Other states do not depend on file I/O. */
787                 default:
788                         break;
789                 }
790         }
791         PTHREAD_WAITQ_CLEARACTIVE();
792
793         /*
794          * Wait for a file descriptor to be ready for read, write, or
795          * an exception, or a timeout to occur: 
796          */
797         count = _thread_sys_poll(_thread_pfd_table, nfds, timeout_ms);
798
799         if (kern_pipe_added != 0)
800                 /*
801                  * Remove the pthread kernel pipe file descriptor
802                  * from the pollfd table: 
803                  */
804                 nfds = 1;
805         else
806                 nfds = 0;
807
808         /*
809          * Check if it is possible that there are bytes in the kernel
810          * read pipe waiting to be read:
811          */
812         if (count < 0 || ((kern_pipe_added != 0) &&
813             (_thread_pfd_table[0].revents & POLLRDNORM))) {
814                 /*
815                  * If the kernel read pipe was included in the
816                  * count: 
817                  */
818                 if (count > 0) {
819                         /* Decrement the count of file descriptors: */
820                         count--;
821                 }
822
823                 if (_sigq_check_reqd != 0) {
824                         /* Reset flag before handling signals: */
825                         _sigq_check_reqd = 0;
826
827                         dequeue_signals();
828                 }
829         }
830
831         /*
832          * Check if any file descriptors are ready:
833          */
834         if (count > 0) {
835                 /*
836                  * Enter a loop to look for threads waiting on file
837                  * descriptors that are flagged as available by the
838                  * _poll syscall: 
839                  */
840                 PTHREAD_WAITQ_SETACTIVE();
841                 TAILQ_FOREACH(pthread, &_workq, qe) {
842                         switch (pthread->state) {
843                         case PS_SPINBLOCK:
844                                 /*
845                                  * If the lock is available, let the thread run.
846                                  */
847                                 if (pthread->data.spinlock->access_lock == 0) {
848                                         PTHREAD_WAITQ_CLEARACTIVE();
849                                         PTHREAD_WORKQ_REMOVE(pthread);
850                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
851                                         PTHREAD_WAITQ_SETACTIVE();
852
853                                         /*
854                                          * One less thread in a spinblock state:
855                                          */
856                                         _spinblock_count--;
857                                 }
858                                 break;
859
860                         /* File descriptor read wait: */
861                         case PS_FDR_WAIT:
862                                 if ((nfds < _thread_dtablesize) &&
863                                     (_thread_pfd_table[nfds].revents & POLLRDNORM)) {
864                                         PTHREAD_WAITQ_CLEARACTIVE();
865                                         PTHREAD_WORKQ_REMOVE(pthread);
866                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
867                                         PTHREAD_WAITQ_SETACTIVE();
868                                 }
869                                 nfds++;
870                                 break;
871
872                         /* File descriptor write wait: */
873                         case PS_FDW_WAIT:
874                                 if ((nfds < _thread_dtablesize) &&
875                                     (_thread_pfd_table[nfds].revents & POLLWRNORM)) {
876                                         PTHREAD_WAITQ_CLEARACTIVE();
877                                         PTHREAD_WORKQ_REMOVE(pthread);
878                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
879                                         PTHREAD_WAITQ_SETACTIVE();
880                                 }
881                                 nfds++;
882                                 break;
883
884                         /* File descriptor poll or select wait: */
885                         case PS_POLL_WAIT:
886                         case PS_SELECT_WAIT:
887                                 if (pthread->data.poll_data->nfds + nfds <
888                                     _thread_dtablesize) {
889                                         /*
890                                          * Enter a loop looking for I/O
891                                          * readiness:
892                                          */
893                                         found = 0;
894                                         for (i = 0; i < pthread->data.poll_data->nfds; i++) {
895                                                 if (_thread_pfd_table[nfds + i].revents != 0) {
896                                                         pthread->data.poll_data->fds[i].revents =
897                                                             _thread_pfd_table[nfds + i].revents;
898                                                         found++;
899                                                 }
900                                         }
901
902                                         /* Increment before destroying: */
903                                         nfds += pthread->data.poll_data->nfds;
904
905                                         if (found != 0) {
906                                                 pthread->data.poll_data->nfds = found;
907                                                 PTHREAD_WAITQ_CLEARACTIVE();
908                                                 PTHREAD_WORKQ_REMOVE(pthread);
909                                                 PTHREAD_NEW_STATE(pthread,PS_RUNNING);
910                                                 PTHREAD_WAITQ_SETACTIVE();
911                                         }
912                                 }
913                                 else
914                                         nfds += pthread->data.poll_data->nfds;
915                                 break;
916
917                         /* Other states do not depend on file I/O. */
918                         default:
919                                 break;
920                         }
921                 }
922                 PTHREAD_WAITQ_CLEARACTIVE();
923         }
924         else if (_spinblock_count != 0) {
925                 /*
926                  * Enter a loop to look for threads waiting on a spinlock
927                  * that is now available.
928                  */
929                 PTHREAD_WAITQ_SETACTIVE();
930                 TAILQ_FOREACH(pthread, &_workq, qe) {
931                         if (pthread->state == PS_SPINBLOCK) {
932                                 /*
933                                  * If the lock is available, let the thread run.
934                                  */
935                                 if (pthread->data.spinlock->access_lock == 0) {
936                                         PTHREAD_WAITQ_CLEARACTIVE();
937                                         PTHREAD_WORKQ_REMOVE(pthread);
938                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
939                                         PTHREAD_WAITQ_SETACTIVE();
940
941                                         /*
942                                          * One less thread in a spinblock state:
943                                          */
944                                         _spinblock_count--;
945                                 }
946                         }
947                 }
948                 PTHREAD_WAITQ_CLEARACTIVE();
949         }
950
951         /* Unprotect the scheduling queues: */
952         _queue_signals = 0;
953
954         while (_sigq_check_reqd != 0) {
955                 /* Handle queued signals: */
956                 _sigq_check_reqd = 0;
957
958                 /* Protect the scheduling queues: */
959                 _queue_signals = 1;
960
961                 dequeue_signals();
962
963                 /* Unprotect the scheduling queues: */
964                 _queue_signals = 0;
965         }
966
967         /* Nothing to return. */
968         return;
969 }
970
971 void
972 _thread_kern_set_timeout(struct timespec * timeout)
973 {
974         struct timespec current_time;
975         struct timeval  tv;
976
977         /* Reset the timeout flag for the running thread: */
978         _thread_run->timeout = 0;
979
980         /* Check if the thread is to wait forever: */
981         if (timeout == NULL) {
982                 /*
983                  * Set the wakeup time to something that can be recognised as
984                  * different to an actual time of day: 
985                  */
986                 _thread_run->wakeup_time.tv_sec = -1;
987                 _thread_run->wakeup_time.tv_nsec = -1;
988         }
989         /* Check if no waiting is required: */
990         else if (timeout->tv_sec == 0 && timeout->tv_nsec == 0) {
991                 /* Set the wake up time to 'immediately': */
992                 _thread_run->wakeup_time.tv_sec = 0;
993                 _thread_run->wakeup_time.tv_nsec = 0;
994         } else {
995                 /* Get the current time: */
996                 gettimeofday(&tv, NULL);
997                 TIMEVAL_TO_TIMESPEC(&tv, &current_time);
998
999                 /* Calculate the time for the current thread to wake up: */
1000                 _thread_run->wakeup_time.tv_sec = current_time.tv_sec + timeout->tv_sec;
1001                 _thread_run->wakeup_time.tv_nsec = current_time.tv_nsec + timeout->tv_nsec;
1002
1003                 /* Check if the nanosecond field needs to wrap: */
1004                 if (_thread_run->wakeup_time.tv_nsec >= 1000000000) {
1005                         /* Wrap the nanosecond field: */
1006                         _thread_run->wakeup_time.tv_sec += 1;
1007                         _thread_run->wakeup_time.tv_nsec -= 1000000000;
1008                 }
1009         }
1010         return;
1011 }
1012
1013 void
1014 _thread_kern_sig_defer(void)
1015 {
1016         /* Allow signal deferral to be recursive. */
1017         _thread_run->sig_defer_count++;
1018 }
1019
1020 void
1021 _thread_kern_sig_undefer(void)
1022 {
1023         pthread_t pthread;
1024         int need_resched = 0;
1025
1026         /*
1027          * Perform checks to yield only if we are about to undefer
1028          * signals.
1029          */
1030         if (_thread_run->sig_defer_count > 1) {
1031                 /* Decrement the signal deferral count. */
1032                 _thread_run->sig_defer_count--;
1033         }
1034         else if (_thread_run->sig_defer_count == 1) {
1035                 /* Reenable signals: */
1036                 _thread_run->sig_defer_count = 0;
1037
1038                 /*
1039                  * Check if there are queued signals:
1040                  */
1041                 while (_sigq_check_reqd != 0) {
1042                         /* Defer scheduling while we process queued signals: */
1043                         _thread_run->sig_defer_count = 1;
1044
1045                         /* Clear the flag before checking the signal queue: */
1046                         _sigq_check_reqd = 0;
1047
1048                         /* Dequeue and handle signals: */
1049                         dequeue_signals();
1050
1051                         /*
1052                          * Avoiding an unnecessary check to reschedule, check
1053                          * to see if signal handling caused a higher priority
1054                          * thread to become ready.
1055                          */
1056                         if ((need_resched == 0) &&
1057                             (((pthread = PTHREAD_PRIOQ_FIRST()) != NULL) &&
1058                             (pthread->active_priority > _thread_run->active_priority))) {
1059                                 need_resched = 1;
1060                         }
1061
1062                         /* Reenable signals: */
1063                         _thread_run->sig_defer_count = 0;
1064                 }
1065
1066                 /* Yield the CPU if necessary: */
1067                 if (need_resched || _thread_run->yield_on_sig_undefer != 0) {
1068                         _thread_run->yield_on_sig_undefer = 0;
1069                         _thread_kern_sched(NULL);
1070                 }
1071         }
1072 }
1073
1074 static void
1075 dequeue_signals(void)
1076 {
1077         char    bufr[128];
1078         int     i, num;
1079         pthread_t pthread;
1080
1081         /*
1082          * Enter a loop to read and handle queued signals from the
1083          * pthread kernel pipe: 
1084          */
1085         while (((num = _thread_sys_read(_thread_kern_pipe[0], bufr,
1086             sizeof(bufr))) > 0) || (num == -1 && errno == EINTR)) {
1087                 /*
1088                  * The buffer read contains one byte per signal and
1089                  * each byte is the signal number.
1090                  */
1091                 for (i = 0; i < num; i++) {
1092                         if ((int) bufr[i] == _SCHED_SIGNAL) {
1093                                 /*
1094                                  * Scheduling signals shouldn't ever be
1095                                  * queued; just ignore it for now.
1096                                  */
1097                         }
1098                         else {
1099                                 /* Handle this signal: */
1100                                 pthread = _thread_sig_handle((int) bufr[i],
1101                                     NULL);
1102                                 if (pthread != NULL)
1103                                         _thread_sig_deliver(pthread,
1104                                             (int) bufr[i]);
1105                         }
1106                 }
1107         }
1108         if ((num < 0) && (errno != EAGAIN)) {
1109                 /*
1110                  * The only error we should expect is if there is
1111                  * no data to read.
1112                  */
1113                 PANIC("Unable to read from thread kernel pipe");
1114         }
1115 }
1116
1117 static inline void
1118 thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in)
1119 {
1120         pthread_t tid_out = thread_out;
1121         pthread_t tid_in = thread_in;
1122
1123         if ((tid_out != NULL) &&
1124             (tid_out->flags & PTHREAD_FLAGS_PRIVATE) != 0)
1125                 tid_out = NULL;
1126         if ((tid_in != NULL) &&
1127             (tid_in->flags & PTHREAD_FLAGS_PRIVATE) != 0)
1128                 tid_in = NULL;
1129
1130         if ((_sched_switch_hook != NULL) && (tid_out != tid_in)) {
1131                 /* Run the scheduler switch hook: */
1132                 _sched_switch_hook(tid_out, tid_in);
1133         }
1134 }
1135 #endif