]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/libkse/thread/thr_kern.c
When entering the scheduler from the signal handler, tell
[FreeBSD/FreeBSD.git] / lib / libkse / thread / thr_kern.c
1 /*
2  * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by John Birrell.
16  * 4. Neither the name of the author nor the names of any co-contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $FreeBSD$
33  *
34  */
35 #include <errno.h>
36 #include <poll.h>
37 #include <stdlib.h>
38 #include <stdarg.h>
39 #include <string.h>
40 #include <unistd.h>
41 #include <setjmp.h>
42 #include <sys/param.h>
43 #include <sys/types.h>
44 #include <sys/signalvar.h>
45 #include <sys/stat.h>
46 #include <sys/time.h>
47 #include <sys/socket.h>
48 #include <sys/uio.h>
49 #include <sys/syscall.h>
50 #include <fcntl.h>
51 #ifdef _THREAD_SAFE
52 #include <pthread.h>
53 #include "pthread_private.h"
54
55 /* #define DEBUG_THREAD_KERN */
56 #ifdef DEBUG_THREAD_KERN
57 #define DBG_MSG         stdout_debug
58 #else
59 #define DBG_MSG(x...)
60 #endif
61
62 /* Static function prototype definitions: */
63 static void
64 thread_kern_poll(int wait_reqd);
65
66 static void
67 dequeue_signals(void);
68
69 static inline void
70 thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in);
71
72 /* Static variables: */
73 static int      last_tick = 0;
74 static int      called_from_handler = 0;
75
76 /*
77  * This is called when a signal handler finishes and wants to
78  * return to a previous frame.
79  */
80 void
81 _thread_kern_sched_frame(struct pthread_signal_frame *psf)
82 {
83         /*
84          * Flag the pthread kernel as executing scheduler code
85          * to avoid a signal from interrupting this execution and
86          * corrupting the (soon-to-be) current frame.
87          */
88         _thread_kern_in_sched = 1;
89
90         /* Restore the signal frame: */
91         _thread_sigframe_restore(_thread_run, psf);
92
93         /* Switch to the thread scheduler: */
94         ___longjmp(_thread_kern_sched_jb, 1);
95 }
96
97
98 void
99 _thread_kern_sched(ucontext_t *scp)
100 {
101         /*
102          * Flag the pthread kernel as executing scheduler code
103          * to avoid a scheduler signal from interrupting this
104          * execution and calling the scheduler again.
105          */
106         _thread_kern_in_sched = 1;
107
108         /* Check if this function was called from the signal handler: */
109         if (scp != NULL) {
110                 called_from_handler = 1;
111                 /*
112                  * We're running on the signal stack; just call the
113                  * kernel scheduler directly.
114                  */
115                 DBG_MSG("Entering scheduler due to signal\n");
116         } else {
117                 /* Save the state of the current thread: */
118                 if (_setjmp(_thread_run->ctx.jb) == 0) {
119                         /* Flag the jump buffer was the last state saved: */
120                         _thread_run->ctxtype = CTX_JB_NOSIG;
121                         _thread_run->longjmp_val = 1;
122                 } else {
123                         DBG_MSG("Returned from ___longjmp, thread %p\n",
124                             _thread_run);
125                         /*
126                          * This point is reached when a longjmp() is called
127                          * to restore the state of a thread.
128                          *
129                          * This is the normal way out of the scheduler.
130                          */
131                         _thread_kern_in_sched = 0;
132
133                         if (_thread_run->sig_defer_count == 0) {
134                                 if (((_thread_run->cancelflags &
135                                     PTHREAD_AT_CANCEL_POINT) == 0) &&
136                                     ((_thread_run->cancelflags &
137                                     PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
138                                         /*
139                                          * Cancellations override signals.
140                                          *
141                                          * Stick a cancellation point at the
142                                          * start of each async-cancellable
143                                          * thread's resumption.
144                                          *
145                                          * We allow threads woken at cancel
146                                          * points to do their own checks.
147                                          */
148                                         pthread_testcancel();
149                         }
150
151                         if (_sched_switch_hook != NULL) {
152                                 /* Run the installed switch hook: */
153                                 thread_run_switch_hook(_last_user_thread,
154                                     _thread_run);
155                         }
156                         return;
157                 }
158         }
159         /* Switch to the thread scheduler: */
160         ___longjmp(_thread_kern_sched_jb, 1);
161 }
162
163 void
164 _thread_kern_sched_sig(void)
165 {
166         _thread_run->check_pending = 1;
167         _thread_kern_sched(NULL);
168 }
169
170
171 void
172 _thread_kern_scheduler(void)
173 {
174         struct timespec ts;
175         struct timeval  tv;
176         pthread_t       pthread, pthread_h;
177         unsigned int    current_tick;
178         int             add_to_prioq;
179
180         /* If the currently running thread is a user thread, save it: */
181         if ((_thread_run->flags & PTHREAD_FLAGS_PRIVATE) == 0)
182                 _last_user_thread = _thread_run;
183
184         if (called_from_handler != 0) {
185                 called_from_handler = 0;
186
187                 /*
188                  * The signal handler should have saved the state of
189                  * the current thread.  Restore the process signal
190                  * mask.
191                  */
192                 if (_thread_sys_sigprocmask(SIG_SETMASK,
193                     &_process_sigmask, NULL) != 0)
194                         PANIC("Unable to restore process mask after signal");
195
196                 /*
197                  * Since the signal handler didn't return normally, we
198                  * have to tell the kernel to reuse the signal stack.
199                  */
200                 if (_thread_sys_sigaltstack(&_thread_sigstack, NULL) != 0)
201                         PANIC("Unable to restore alternate signal stack");
202         }
203
204         /* Are there pending signals for this thread? */
205         if (_thread_run->check_pending != 0) {
206                 _thread_run->check_pending = 0;
207                 _thread_sig_check_pending(_thread_run);
208         }
209
210         /*
211          * Enter a scheduling loop that finds the next thread that is
212          * ready to run. This loop completes when there are no more threads
213          * in the global list or when a thread has its state restored by
214          * either a sigreturn (if the state was saved as a sigcontext) or a
215          * longjmp (if the state was saved by a setjmp).
216          */
217         while (!(TAILQ_EMPTY(&_thread_list))) {
218                 /* Get the current time of day: */
219                 GET_CURRENT_TOD(tv);
220                 TIMEVAL_TO_TIMESPEC(&tv, &ts);
221                 current_tick = _sched_ticks;
222
223                 /*
224                  * Protect the scheduling queues from access by the signal
225                  * handler.
226                  */
227                 _queue_signals = 1;
228                 add_to_prioq = 0;
229
230                 if (_thread_run != &_thread_kern_thread) {
231                         /*
232                          * This thread no longer needs to yield the CPU.
233                          */
234                         _thread_run->yield_on_sig_undefer = 0;
235         
236                         if (_thread_run->state != PS_RUNNING) {
237                                 /*
238                                  * Save the current time as the time that the
239                                  * thread became inactive:
240                                  */
241                                 _thread_run->last_inactive = (long)current_tick;
242                                 if (_thread_run->last_inactive <
243                                     _thread_run->last_active) {
244                                         /* Account for a rollover: */
245                                         _thread_run->last_inactive =+
246                                             UINT_MAX + 1;
247                                 }
248                         }
249
250                         /*
251                          * Place the currently running thread into the
252                          * appropriate queue(s).
253                          */
254                         switch (_thread_run->state) {
255                         case PS_DEAD:
256                         case PS_STATE_MAX: /* to silence -Wall */
257                         case PS_SUSPENDED:
258                                 /*
259                                  * Dead and suspended threads are not placed
260                                  * in any queue:
261                                  */
262                                 break;
263
264                         case PS_RUNNING:
265                                 /*
266                                  * Runnable threads can't be placed in the
267                                  * priority queue until after waiting threads
268                                  * are polled (to preserve round-robin
269                                  * scheduling).
270                                  */
271                                 add_to_prioq = 1;
272                                 break;
273
274                         /*
275                          * States which do not depend on file descriptor I/O
276                          * operations or timeouts:
277                          */
278                         case PS_DEADLOCK:
279                         case PS_FDLR_WAIT:
280                         case PS_FDLW_WAIT:
281                         case PS_FILE_WAIT:
282                         case PS_JOIN:
283                         case PS_MUTEX_WAIT:
284                         case PS_SIGSUSPEND:
285                         case PS_SIGTHREAD:
286                         case PS_SIGWAIT:
287                         case PS_WAIT_WAIT:
288                                 /* No timeouts for these states: */
289                                 _thread_run->wakeup_time.tv_sec = -1;
290                                 _thread_run->wakeup_time.tv_nsec = -1;
291
292                                 /* Restart the time slice: */
293                                 _thread_run->slice_usec = -1;
294
295                                 /* Insert into the waiting queue: */
296                                 PTHREAD_WAITQ_INSERT(_thread_run);
297                                 break;
298
299                         /* States which can timeout: */
300                         case PS_COND_WAIT:
301                         case PS_SLEEP_WAIT:
302                                 /* Restart the time slice: */
303                                 _thread_run->slice_usec = -1;
304
305                                 /* Insert into the waiting queue: */
306                                 PTHREAD_WAITQ_INSERT(_thread_run);
307                                 break;
308         
309                         /* States that require periodic work: */
310                         case PS_SPINBLOCK:
311                                 /* No timeouts for this state: */
312                                 _thread_run->wakeup_time.tv_sec = -1;
313                                 _thread_run->wakeup_time.tv_nsec = -1;
314
315                                 /* Increment spinblock count: */
316                                 _spinblock_count++;
317
318                                 /* FALLTHROUGH */
319                         case PS_FDR_WAIT:
320                         case PS_FDW_WAIT:
321                         case PS_POLL_WAIT:
322                         case PS_SELECT_WAIT:
323                                 /* Restart the time slice: */
324                                 _thread_run->slice_usec = -1;
325         
326                                 /* Insert into the waiting queue: */
327                                 PTHREAD_WAITQ_INSERT(_thread_run);
328         
329                                 /* Insert into the work queue: */
330                                 PTHREAD_WORKQ_INSERT(_thread_run);
331                                 break;
332                         }
333                 }
334
335                 /*
336                  * Avoid polling file descriptors if there are none
337                  * waiting:
338                  */
339                 if (TAILQ_EMPTY(&_workq) != 0) {
340                 }
341                 /*
342                  * Poll file descriptors only if a new scheduling signal
343                  * has occurred or if we have no more runnable threads.
344                  */
345                 else if (((current_tick = _sched_ticks) != last_tick) ||
346                     ((_thread_run->state != PS_RUNNING) &&
347                     (PTHREAD_PRIOQ_FIRST() == NULL))) {
348                         /* Unprotect the scheduling queues: */
349                         _queue_signals = 0;
350
351                         /*
352                          * Poll file descriptors to update the state of threads
353                          * waiting on file I/O where data may be available:
354                          */
355                         thread_kern_poll(0);
356
357                         /* Protect the scheduling queues: */
358                         _queue_signals = 1;
359                 }
360                 last_tick = current_tick;
361
362                 /*
363                  * Wake up threads that have timedout.  This has to be
364                  * done after polling in case a thread does a poll or
365                  * select with zero time.
366                  */
367                 PTHREAD_WAITQ_SETACTIVE();
368                 while (((pthread = TAILQ_FIRST(&_waitingq)) != NULL) &&
369                     (pthread->wakeup_time.tv_sec != -1) &&
370                     (((pthread->wakeup_time.tv_sec == 0) &&
371                     (pthread->wakeup_time.tv_nsec == 0)) ||
372                     (pthread->wakeup_time.tv_sec < ts.tv_sec) ||
373                     ((pthread->wakeup_time.tv_sec == ts.tv_sec) &&
374                     (pthread->wakeup_time.tv_nsec <= ts.tv_nsec)))) {
375                         switch (pthread->state) {
376                         case PS_POLL_WAIT:
377                         case PS_SELECT_WAIT:
378                                 /* Return zero file descriptors ready: */
379                                 pthread->data.poll_data->nfds = 0;
380                                 /* fall through */
381                         default:
382                                 /*
383                                  * Remove this thread from the waiting queue
384                                  * (and work queue if necessary) and place it
385                                  * in the ready queue.
386                                  */
387                                 PTHREAD_WAITQ_CLEARACTIVE();
388                                 if (pthread->flags & PTHREAD_FLAGS_IN_WORKQ)
389                                         PTHREAD_WORKQ_REMOVE(pthread);
390                                 PTHREAD_NEW_STATE(pthread, PS_RUNNING);
391                                 PTHREAD_WAITQ_SETACTIVE();
392                                 break;
393                         }
394                         /*
395                          * Flag the timeout in the thread structure:
396                          */
397                         pthread->timeout = 1;
398                 }
399                 PTHREAD_WAITQ_CLEARACTIVE();
400
401                 /*
402                  * Check to see if the current thread needs to be added
403                  * to the priority queue:
404                  */
405                 if (add_to_prioq != 0) {
406                         /*
407                          * Save the current time as the time that the
408                          * thread became inactive:
409                          */
410                         current_tick = _sched_ticks;
411                         _thread_run->last_inactive = (long)current_tick;
412                         if (_thread_run->last_inactive <
413                             _thread_run->last_active) {
414                                 /* Account for a rollover: */
415                                 _thread_run->last_inactive =+ UINT_MAX + 1;
416                         }
417
418                         if ((_thread_run->slice_usec != -1) &&
419                            (_thread_run->attr.sched_policy != SCHED_FIFO)) {
420                                 /*
421                                  * Accumulate the number of microseconds for
422                                  * which the current thread has run:
423                                  */
424                                 _thread_run->slice_usec +=
425                                     (_thread_run->last_inactive -
426                                     _thread_run->last_active) *
427                                     (long)_clock_res_usec;
428                                 /* Check for time quantum exceeded: */
429                                 if (_thread_run->slice_usec > TIMESLICE_USEC)
430                                         _thread_run->slice_usec = -1;
431                         }
432
433                         if (_thread_run->slice_usec == -1) {
434                                 /*
435                                  * The thread exceeded its time
436                                  * quantum or it yielded the CPU;
437                                  * place it at the tail of the
438                                  * queue for its priority.
439                                  */
440                                 PTHREAD_PRIOQ_INSERT_TAIL(_thread_run);
441                         } else {
442                                 /*
443                                  * The thread hasn't exceeded its
444                                  * interval.  Place it at the head
445                                  * of the queue for its priority.
446                                  */
447                                 PTHREAD_PRIOQ_INSERT_HEAD(_thread_run);
448                         }
449                 }
450
451                 /*
452                  * Get the highest priority thread in the ready queue.
453                  */
454                 pthread_h = PTHREAD_PRIOQ_FIRST();
455
456                 /* Check if there are no threads ready to run: */
457                 if (pthread_h == NULL) {
458                         /*
459                          * Lock the pthread kernel by changing the pointer to
460                          * the running thread to point to the global kernel
461                          * thread structure:
462                          */
463                         _thread_run = &_thread_kern_thread;
464                         DBG_MSG("No runnable threads, using kernel thread %p\n",
465                             _thread_run);
466
467                         /* Unprotect the scheduling queues: */
468                         _queue_signals = 0;
469
470                         /*
471                          * There are no threads ready to run, so wait until
472                          * something happens that changes this condition:
473                          */
474                         thread_kern_poll(1);
475
476                         /*
477                          * This process' usage will likely be very small
478                          * while waiting in a poll.  Since the scheduling
479                          * clock is based on the profiling timer, it is
480                          * unlikely that the profiling timer will fire
481                          * and update the time of day.  To account for this,
482                          * get the time of day after polling with a timeout.
483                          */
484                         gettimeofday((struct timeval *) &_sched_tod, NULL);
485                         
486                         /* Check once more for a runnable thread: */
487                         _queue_signals = 1;
488                         pthread_h = PTHREAD_PRIOQ_FIRST();
489                         _queue_signals = 0;
490                 }
491
492                 if (pthread_h != NULL) {
493                         /* Remove the thread from the ready queue: */
494                         PTHREAD_PRIOQ_REMOVE(pthread_h);
495
496                         /* Unprotect the scheduling queues: */
497                         _queue_signals = 0;
498
499                         /*
500                          * Check for signals queued while the scheduling
501                          * queues were protected:
502                          */
503                         while (_sigq_check_reqd != 0) {
504                                 /* Clear before handling queued signals: */
505                                 _sigq_check_reqd = 0;
506
507                                 /* Protect the scheduling queues again: */
508                                 _queue_signals = 1;
509
510                                 dequeue_signals();
511
512                                 /*
513                                  * Check for a higher priority thread that
514                                  * became runnable due to signal handling.
515                                  */
516                                 if (((pthread = PTHREAD_PRIOQ_FIRST()) != NULL) &&
517                                     (pthread->active_priority > pthread_h->active_priority)) {
518                                         /* Remove the thread from the ready queue: */
519                                         PTHREAD_PRIOQ_REMOVE(pthread);
520
521                                         /*
522                                          * Insert the lower priority thread
523                                          * at the head of its priority list:
524                                          */
525                                         PTHREAD_PRIOQ_INSERT_HEAD(pthread_h);
526
527                                         /* There's a new thread in town: */
528                                         pthread_h = pthread;
529                                 }
530
531                                 /* Unprotect the scheduling queues: */
532                                 _queue_signals = 0;
533                         }
534
535                         /* Make the selected thread the current thread: */
536                         _thread_run = pthread_h;
537
538                         /*
539                          * Save the current time as the time that the thread
540                          * became active:
541                          */
542                         current_tick = _sched_ticks;
543                         _thread_run->last_active = (long) current_tick;
544
545                         /*
546                          * Check if this thread is running for the first time
547                          * or running again after using its full time slice
548                          * allocation:
549                          */
550                         if (_thread_run->slice_usec == -1) {
551                                 /* Reset the accumulated time slice period: */
552                                 _thread_run->slice_usec = 0;
553                         }
554
555                         /*
556                          * If we had a context switch, run any
557                          * installed switch hooks.
558                          */
559                         if ((_sched_switch_hook != NULL) &&
560                             (_last_user_thread != _thread_run)) {
561                                 thread_run_switch_hook(_last_user_thread,
562                                     _thread_run);
563                         }
564                         /*
565                          * Continue the thread at its current frame:
566                          */
567                         switch(_thread_run->ctxtype) {
568                         case CTX_JB_NOSIG:
569                                 ___longjmp(_thread_run->ctx.jb,
570                                     _thread_run->longjmp_val);
571                                 break;
572                         case CTX_JB:
573                                 __longjmp(_thread_run->ctx.jb,
574                                     _thread_run->longjmp_val);
575                                 break;
576                         case CTX_SJB:
577                                 __siglongjmp(_thread_run->ctx.sigjb,
578                                     _thread_run->longjmp_val);
579                                 break;
580                         case CTX_UC:
581                                 /* XXX - Restore FP regsisters? */
582                                 FP_RESTORE_UC(&_thread_run->ctx.uc);
583
584                                 /*
585                                  * Do a sigreturn to restart the thread that
586                                  * was interrupted by a signal:
587                                  */
588                                 _thread_kern_in_sched = 0;
589
590 #if NOT_YET
591                                 _setcontext(&_thread_run->ctx.uc);
592 #else
593                                 /*
594                                  * Ensure the process signal mask is set
595                                  * correctly:
596                                  */
597                                 _thread_run->ctx.uc.uc_sigmask =
598                                     _process_sigmask;
599                                 _thread_sys_sigreturn(&_thread_run->ctx.uc);
600 #endif
601                                 break;
602                         }
603                         /* This point should not be reached. */
604                         PANIC("Thread has returned from sigreturn or longjmp");
605                 }
606         }
607
608         /* There are no more threads, so exit this process: */
609         exit(0);
610 }
611
612 void
613 _thread_kern_sched_state(enum pthread_state state, char *fname, int lineno)
614 {
615         /*
616          * Flag the pthread kernel as executing scheduler code
617          * to avoid a scheduler signal from interrupting this
618          * execution and calling the scheduler again.
619          */
620         _thread_kern_in_sched = 1;
621
622         /*
623          * Prevent the signal handler from fiddling with this thread
624          * before its state is set and is placed into the proper queue.
625          */
626         _queue_signals = 1;
627
628         /* Change the state of the current thread: */
629         _thread_run->state = state;
630         _thread_run->fname = fname;
631         _thread_run->lineno = lineno;
632
633         /* Schedule the next thread that is ready: */
634         _thread_kern_sched(NULL);
635 }
636
637 void
638 _thread_kern_sched_state_unlock(enum pthread_state state,
639     spinlock_t *lock, char *fname, int lineno)
640 {
641         /*
642          * Flag the pthread kernel as executing scheduler code
643          * to avoid a scheduler signal from interrupting this
644          * execution and calling the scheduler again.
645          */
646         _thread_kern_in_sched = 1;
647
648         /*
649          * Prevent the signal handler from fiddling with this thread
650          * before its state is set and it is placed into the proper
651          * queue(s).
652          */
653         _queue_signals = 1;
654
655         /* Change the state of the current thread: */
656         _thread_run->state = state;
657         _thread_run->fname = fname;
658         _thread_run->lineno = lineno;
659
660         _SPINUNLOCK(lock);
661
662         /* Schedule the next thread that is ready: */
663         _thread_kern_sched(NULL);
664 }
665
666 static void
667 thread_kern_poll(int wait_reqd)
668 {
669         int             count = 0;
670         int             i, found;
671         int             kern_pipe_added = 0;
672         int             nfds = 0;
673         int             timeout_ms = 0;
674         struct pthread  *pthread;
675         struct timespec ts;
676         struct timeval  tv;
677
678         /* Check if the caller wants to wait: */
679         if (wait_reqd == 0) {
680                 timeout_ms = 0;
681         }
682         else {
683                 /* Get the current time of day: */
684                 GET_CURRENT_TOD(tv);
685                 TIMEVAL_TO_TIMESPEC(&tv, &ts);
686
687                 _queue_signals = 1;
688                 pthread = TAILQ_FIRST(&_waitingq);
689                 _queue_signals = 0;
690
691                 if ((pthread == NULL) || (pthread->wakeup_time.tv_sec == -1)) {
692                         /*
693                          * Either there are no threads in the waiting queue,
694                          * or there are no threads that can timeout.
695                          */
696                         timeout_ms = INFTIM;
697                 }
698                 else {
699                         /*
700                          * Calculate the time left for the next thread to
701                          * timeout:
702                          */
703                         timeout_ms = ((pthread->wakeup_time.tv_sec - ts.tv_sec) *
704                             1000) + ((pthread->wakeup_time.tv_nsec - ts.tv_nsec) /
705                             1000000);
706                         /*
707                          * Don't allow negative timeouts:
708                          */
709                         if (timeout_ms < 0)
710                                 timeout_ms = 0;
711                 }
712         }
713                         
714         /* Protect the scheduling queues: */
715         _queue_signals = 1;
716
717         /*
718          * Check to see if the signal queue needs to be walked to look
719          * for threads awoken by a signal while in the scheduler.
720          */
721         if (_sigq_check_reqd != 0) {
722                 /* Reset flag before handling queued signals: */
723                 _sigq_check_reqd = 0;
724
725                 dequeue_signals();
726         }
727
728         /*
729          * Check for a thread that became runnable due to a signal:
730          */
731         if (PTHREAD_PRIOQ_FIRST() != NULL) {
732                 /*
733                  * Since there is at least one runnable thread,
734                  * disable the wait.
735                  */
736                 timeout_ms = 0;
737         }
738
739         /*
740          * Form the poll table:
741          */
742         nfds = 0;
743         if (timeout_ms != 0) {
744                 /* Add the kernel pipe to the poll table: */
745                 _thread_pfd_table[nfds].fd = _thread_kern_pipe[0];
746                 _thread_pfd_table[nfds].events = POLLRDNORM;
747                 _thread_pfd_table[nfds].revents = 0;
748                 nfds++;
749                 kern_pipe_added = 1;
750         }
751
752         PTHREAD_WAITQ_SETACTIVE();
753         TAILQ_FOREACH(pthread, &_workq, qe) {
754                 switch (pthread->state) {
755                 case PS_SPINBLOCK:
756                         /*
757                          * If the lock is available, let the thread run.
758                          */
759                         if (pthread->data.spinlock->access_lock == 0) {
760                                 PTHREAD_WAITQ_CLEARACTIVE();
761                                 PTHREAD_WORKQ_REMOVE(pthread);
762                                 PTHREAD_NEW_STATE(pthread,PS_RUNNING);
763                                 PTHREAD_WAITQ_SETACTIVE();
764                                 /* One less thread in a spinblock state: */
765                                 _spinblock_count--;
766                                 /*
767                                  * Since there is at least one runnable
768                                  * thread, disable the wait.
769                                  */
770                                 timeout_ms = 0;
771                         }
772                         break;
773
774                 /* File descriptor read wait: */
775                 case PS_FDR_WAIT:
776                         /* Limit number of polled files to table size: */
777                         if (nfds < _thread_dtablesize) {
778                                 _thread_pfd_table[nfds].events = POLLRDNORM;
779                                 _thread_pfd_table[nfds].fd = pthread->data.fd.fd;
780                                 nfds++;
781                         }
782                         break;
783
784                 /* File descriptor write wait: */
785                 case PS_FDW_WAIT:
786                         /* Limit number of polled files to table size: */
787                         if (nfds < _thread_dtablesize) {
788                                 _thread_pfd_table[nfds].events = POLLWRNORM;
789                                 _thread_pfd_table[nfds].fd = pthread->data.fd.fd;
790                                 nfds++;
791                         }
792                         break;
793
794                 /* File descriptor poll or select wait: */
795                 case PS_POLL_WAIT:
796                 case PS_SELECT_WAIT:
797                         /* Limit number of polled files to table size: */
798                         if (pthread->data.poll_data->nfds + nfds <
799                             _thread_dtablesize) {
800                                 for (i = 0; i < pthread->data.poll_data->nfds; i++) {
801                                         _thread_pfd_table[nfds + i].fd =
802                                             pthread->data.poll_data->fds[i].fd;
803                                         _thread_pfd_table[nfds + i].events =
804                                             pthread->data.poll_data->fds[i].events;
805                                 }
806                                 nfds += pthread->data.poll_data->nfds;
807                         }
808                         break;
809
810                 /* Other states do not depend on file I/O. */
811                 default:
812                         break;
813                 }
814         }
815         PTHREAD_WAITQ_CLEARACTIVE();
816
817         /*
818          * Wait for a file descriptor to be ready for read, write, or
819          * an exception, or a timeout to occur:
820          */
821         count = _thread_sys_poll(_thread_pfd_table, nfds, timeout_ms);
822
823         if (kern_pipe_added != 0)
824                 /*
825                  * Remove the pthread kernel pipe file descriptor
826                  * from the pollfd table:
827                  */
828                 nfds = 1;
829         else
830                 nfds = 0;
831
832         /*
833          * Check if it is possible that there are bytes in the kernel
834          * read pipe waiting to be read:
835          */
836         if (count < 0 || ((kern_pipe_added != 0) &&
837             (_thread_pfd_table[0].revents & POLLRDNORM))) {
838                 /*
839                  * If the kernel read pipe was included in the
840                  * count:
841                  */
842                 if (count > 0) {
843                         /* Decrement the count of file descriptors: */
844                         count--;
845                 }
846
847                 if (_sigq_check_reqd != 0) {
848                         /* Reset flag before handling signals: */
849                         _sigq_check_reqd = 0;
850
851                         dequeue_signals();
852                 }
853         }
854
855         /*
856          * Check if any file descriptors are ready:
857          */
858         if (count > 0) {
859                 /*
860                  * Enter a loop to look for threads waiting on file
861                  * descriptors that are flagged as available by the
862                  * _poll syscall:
863                  */
864                 PTHREAD_WAITQ_SETACTIVE();
865                 TAILQ_FOREACH(pthread, &_workq, qe) {
866                         switch (pthread->state) {
867                         case PS_SPINBLOCK:
868                                 /*
869                                  * If the lock is available, let the thread run.
870                                  */
871                                 if (pthread->data.spinlock->access_lock == 0) {
872                                         PTHREAD_WAITQ_CLEARACTIVE();
873                                         PTHREAD_WORKQ_REMOVE(pthread);
874                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
875                                         PTHREAD_WAITQ_SETACTIVE();
876
877                                         /*
878                                          * One less thread in a spinblock state:
879                                          */
880                                         _spinblock_count--;
881                                 }
882                                 break;
883
884                         /* File descriptor read wait: */
885                         case PS_FDR_WAIT:
886                                 if ((nfds < _thread_dtablesize) &&
887                                     (_thread_pfd_table[nfds].revents & POLLRDNORM)) {
888                                         PTHREAD_WAITQ_CLEARACTIVE();
889                                         PTHREAD_WORKQ_REMOVE(pthread);
890                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
891                                         PTHREAD_WAITQ_SETACTIVE();
892                                 }
893                                 nfds++;
894                                 break;
895
896                         /* File descriptor write wait: */
897                         case PS_FDW_WAIT:
898                                 if ((nfds < _thread_dtablesize) &&
899                                     (_thread_pfd_table[nfds].revents & POLLWRNORM)) {
900                                         PTHREAD_WAITQ_CLEARACTIVE();
901                                         PTHREAD_WORKQ_REMOVE(pthread);
902                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
903                                         PTHREAD_WAITQ_SETACTIVE();
904                                 }
905                                 nfds++;
906                                 break;
907
908                         /* File descriptor poll or select wait: */
909                         case PS_POLL_WAIT:
910                         case PS_SELECT_WAIT:
911                                 if (pthread->data.poll_data->nfds + nfds <
912                                     _thread_dtablesize) {
913                                         /*
914                                          * Enter a loop looking for I/O
915                                          * readiness:
916                                          */
917                                         found = 0;
918                                         for (i = 0; i < pthread->data.poll_data->nfds; i++) {
919                                                 if (_thread_pfd_table[nfds + i].revents != 0) {
920                                                         pthread->data.poll_data->fds[i].revents =
921                                                             _thread_pfd_table[nfds + i].revents;
922                                                         found++;
923                                                 }
924                                         }
925
926                                         /* Increment before destroying: */
927                                         nfds += pthread->data.poll_data->nfds;
928
929                                         if (found != 0) {
930                                                 pthread->data.poll_data->nfds = found;
931                                                 PTHREAD_WAITQ_CLEARACTIVE();
932                                                 PTHREAD_WORKQ_REMOVE(pthread);
933                                                 PTHREAD_NEW_STATE(pthread,PS_RUNNING);
934                                                 PTHREAD_WAITQ_SETACTIVE();
935                                         }
936                                 }
937                                 else
938                                         nfds += pthread->data.poll_data->nfds;
939                                 break;
940
941                         /* Other states do not depend on file I/O. */
942                         default:
943                                 break;
944                         }
945                 }
946                 PTHREAD_WAITQ_CLEARACTIVE();
947         }
948         else if (_spinblock_count != 0) {
949                 /*
950                  * Enter a loop to look for threads waiting on a spinlock
951                  * that is now available.
952                  */
953                 PTHREAD_WAITQ_SETACTIVE();
954                 TAILQ_FOREACH(pthread, &_workq, qe) {
955                         if (pthread->state == PS_SPINBLOCK) {
956                                 /*
957                                  * If the lock is available, let the thread run.
958                                  */
959                                 if (pthread->data.spinlock->access_lock == 0) {
960                                         PTHREAD_WAITQ_CLEARACTIVE();
961                                         PTHREAD_WORKQ_REMOVE(pthread);
962                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
963                                         PTHREAD_WAITQ_SETACTIVE();
964
965                                         /*
966                                          * One less thread in a spinblock state:
967                                          */
968                                         _spinblock_count--;
969                                 }
970                         }
971                 }
972                 PTHREAD_WAITQ_CLEARACTIVE();
973         }
974
975         /* Unprotect the scheduling queues: */
976         _queue_signals = 0;
977
978         while (_sigq_check_reqd != 0) {
979                 /* Handle queued signals: */
980                 _sigq_check_reqd = 0;
981
982                 /* Protect the scheduling queues: */
983                 _queue_signals = 1;
984
985                 dequeue_signals();
986
987                 /* Unprotect the scheduling queues: */
988                 _queue_signals = 0;
989         }
990 }
991
992 void
993 _thread_kern_set_timeout(const struct timespec * timeout)
994 {
995         struct timespec current_time;
996         struct timeval  tv;
997
998         /* Reset the timeout flag for the running thread: */
999         _thread_run->timeout = 0;
1000
1001         /* Check if the thread is to wait forever: */
1002         if (timeout == NULL) {
1003                 /*
1004                  * Set the wakeup time to something that can be recognised as
1005                  * different to an actual time of day:
1006                  */
1007                 _thread_run->wakeup_time.tv_sec = -1;
1008                 _thread_run->wakeup_time.tv_nsec = -1;
1009         }
1010         /* Check if no waiting is required: */
1011         else if (timeout->tv_sec == 0 && timeout->tv_nsec == 0) {
1012                 /* Set the wake up time to 'immediately': */
1013                 _thread_run->wakeup_time.tv_sec = 0;
1014                 _thread_run->wakeup_time.tv_nsec = 0;
1015         } else {
1016                 /* Get the current time: */
1017                 GET_CURRENT_TOD(tv);
1018                 TIMEVAL_TO_TIMESPEC(&tv, &current_time);
1019
1020                 /* Calculate the time for the current thread to wake up: */
1021                 _thread_run->wakeup_time.tv_sec = current_time.tv_sec + timeout->tv_sec;
1022                 _thread_run->wakeup_time.tv_nsec = current_time.tv_nsec + timeout->tv_nsec;
1023
1024                 /* Check if the nanosecond field needs to wrap: */
1025                 if (_thread_run->wakeup_time.tv_nsec >= 1000000000) {
1026                         /* Wrap the nanosecond field: */
1027                         _thread_run->wakeup_time.tv_sec += 1;
1028                         _thread_run->wakeup_time.tv_nsec -= 1000000000;
1029                 }
1030         }
1031 }
1032
1033 void
1034 _thread_kern_sig_defer(void)
1035 {
1036         /* Allow signal deferral to be recursive. */
1037         _thread_run->sig_defer_count++;
1038 }
1039
1040 void
1041 _thread_kern_sig_undefer(void)
1042 {
1043         /*
1044          * Perform checks to yield only if we are about to undefer
1045          * signals.
1046          */
1047         if (_thread_run->sig_defer_count > 1) {
1048                 /* Decrement the signal deferral count. */
1049                 _thread_run->sig_defer_count--;
1050         }
1051         else if (_thread_run->sig_defer_count == 1) {
1052                 /* Reenable signals: */
1053                 _thread_run->sig_defer_count = 0;
1054
1055                 /*
1056                  * Check if there are queued signals:
1057                  */
1058                 if (_sigq_check_reqd != 0)
1059                         _thread_kern_sched(NULL);
1060
1061                 /*
1062                  * Check for asynchronous cancellation before delivering any
1063                  * pending signals:
1064                  */
1065                 if (((_thread_run->cancelflags & PTHREAD_AT_CANCEL_POINT) == 0) &&
1066                     ((_thread_run->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
1067                         pthread_testcancel();
1068
1069                 /*
1070                  * If there are pending signals or this thread has
1071                  * to yield the CPU, call the kernel scheduler:
1072                  *
1073                  * XXX - Come back and revisit the pending signal problem
1074                  */
1075                 if ((_thread_run->yield_on_sig_undefer != 0) ||
1076                     SIGNOTEMPTY(_thread_run->sigpend)) {
1077                         _thread_run->yield_on_sig_undefer = 0;
1078                         _thread_kern_sched(NULL);
1079                 }
1080         }
1081 }
1082
1083 static void
1084 dequeue_signals(void)
1085 {
1086         char    bufr[128];
1087         int     num;
1088
1089         /*
1090          * Enter a loop to clear the pthread kernel pipe:
1091          */
1092         while (((num = _thread_sys_read(_thread_kern_pipe[0], bufr,
1093             sizeof(bufr))) > 0) || (num == -1 && errno == EINTR)) {
1094         }
1095         if ((num < 0) && (errno != EAGAIN)) {
1096                 /*
1097                  * The only error we should expect is if there is
1098                  * no data to read.
1099                  */
1100                 PANIC("Unable to read from thread kernel pipe");
1101         }
1102         /* Handle any pending signals: */
1103         _thread_sig_handle_pending();
1104 }
1105
1106 static inline void
1107 thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in)
1108 {
1109         pthread_t tid_out = thread_out;
1110         pthread_t tid_in = thread_in;
1111
1112         if ((tid_out != NULL) &&
1113             (tid_out->flags & PTHREAD_FLAGS_PRIVATE) != 0)
1114                 tid_out = NULL;
1115         if ((tid_in != NULL) &&
1116             (tid_in->flags & PTHREAD_FLAGS_PRIVATE) != 0)
1117                 tid_in = NULL;
1118
1119         if ((_sched_switch_hook != NULL) && (tid_out != tid_in)) {
1120                 /* Run the scheduler switch hook: */
1121                 _sched_switch_hook(tid_out, tid_in);
1122         }
1123 }
1124 #endif