]> CyberLeo.Net >> Repos - FreeBSD/releng/8.1.git/blob - lib/libc_r/uthread/uthread_kern.c
Copy stable/8 to releng/8.1 in preparation for 8.1-RC1.
[FreeBSD/releng/8.1.git] / lib / libc_r / uthread / uthread_kern.c
1 /*
2  * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the author nor the names of any co-contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  *
31  */
32 #include <errno.h>
33 #include <poll.h>
34 #include <stdlib.h>
35 #include <stdarg.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <setjmp.h>
39 #include <sys/param.h>
40 #include <sys/types.h>
41 #include <sys/signalvar.h>
42 #include <sys/stat.h>
43 #include <sys/time.h>
44 #include <sys/socket.h>
45 #include <sys/uio.h>
46 #include <sys/syscall.h>
47 #include <fcntl.h>
48 #include <pthread.h>
49 #include "pthread_private.h"
50
51 /* #define DEBUG_THREAD_KERN */
52 #ifdef DEBUG_THREAD_KERN
53 #define DBG_MSG         stdout_debug
54 #else
55 #define DBG_MSG(x...)
56 #endif
57
58 /* Static function prototype definitions: */
59 static void
60 thread_kern_poll(int wait_reqd);
61
62 static void
63 dequeue_signals(void);
64
65 static inline void
66 thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in);
67
68 /* Static variables: */
69 static int      last_tick = 0;
70 static int      called_from_handler = 0;
71
72 /*
73  * This is called when a signal handler finishes and wants to
74  * return to a previous frame.
75  */
76 void
77 _thread_kern_sched_frame(struct pthread_signal_frame *psf)
78 {
79         struct pthread  *curthread = _get_curthread();
80
81         /*
82          * Flag the pthread kernel as executing scheduler code
83          * to avoid a signal from interrupting this execution and
84          * corrupting the (soon-to-be) current frame.
85          */
86         _thread_kern_in_sched = 1;
87
88         /* Restore the signal frame: */
89         _thread_sigframe_restore(curthread, psf);
90
91         /* The signal mask was restored; check for any pending signals: */
92         curthread->check_pending = 1;
93
94         /* Switch to the thread scheduler: */
95         ___longjmp(_thread_kern_sched_jb, 1);
96 }
97
98
99 void
100 _thread_kern_sched(ucontext_t *ucp)
101 {
102         struct pthread  *curthread = _get_curthread();
103
104         /*
105          * Flag the pthread kernel as executing scheduler code
106          * to avoid a scheduler signal from interrupting this
107          * execution and calling the scheduler again.
108          */
109         _thread_kern_in_sched = 1;
110
111         /* Check if this function was called from the signal handler: */
112         if (ucp != NULL) {
113                 called_from_handler = 1;
114                 DBG_MSG("Entering scheduler due to signal\n");
115         }
116
117         /* Save the state of the current thread: */
118         if (_setjmp(curthread->ctx.jb) != 0) {
119                 DBG_MSG("Returned from ___longjmp, thread %p\n",
120                     curthread);
121                 /*
122                  * This point is reached when a longjmp() is called
123                  * to restore the state of a thread.
124                  *
125                  * This is the normal way out of the scheduler.
126                  */
127                 _thread_kern_in_sched = 0;
128
129                 if (curthread->sig_defer_count == 0) {
130                         if (((curthread->cancelflags &
131                             PTHREAD_AT_CANCEL_POINT) == 0) &&
132                             ((curthread->cancelflags &
133                             PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
134                                 /*
135                                  * Cancellations override signals.
136                                  *
137                                  * Stick a cancellation point at the
138                                  * start of each async-cancellable
139                                  * thread's resumption.
140                                  *
141                                  * We allow threads woken at cancel
142                                  * points to do their own checks.
143                                  */
144                                 pthread_testcancel();
145                 }
146
147                 if (_sched_switch_hook != NULL) {
148                         /* Run the installed switch hook: */
149                         thread_run_switch_hook(_last_user_thread, curthread);
150                 }
151                 if (ucp == NULL)
152                         return;
153                 else {
154                         /*
155                          * Set the process signal mask in the context; it
156                          * could have changed by the handler.
157                          */
158                         ucp->uc_sigmask = _process_sigmask;
159
160                         /* Resume the interrupted thread: */
161                         __sys_sigreturn(ucp);
162                 }
163         }
164         /* Switch to the thread scheduler: */
165         ___longjmp(_thread_kern_sched_jb, 1);
166 }
167
168 void
169 _thread_kern_sched_sig(void)
170 {
171         struct pthread  *curthread = _get_curthread();
172
173         curthread->check_pending = 1;
174         _thread_kern_sched(NULL);
175 }
176
177
178 void
179 _thread_kern_scheduler(void)
180 {
181         struct timespec ts;
182         struct timeval  tv;
183         struct pthread  *curthread = _get_curthread();
184         pthread_t       pthread, pthread_h;
185         unsigned int    current_tick;
186         int             add_to_prioq;
187
188         /* If the currently running thread is a user thread, save it: */
189         if ((curthread->flags & PTHREAD_FLAGS_PRIVATE) == 0)
190                 _last_user_thread = curthread;
191
192         if (called_from_handler != 0) {
193                 called_from_handler = 0;
194
195                 /*
196                  * We were called from a signal handler; restore the process
197                  * signal mask.
198                  */
199                 if (__sys_sigprocmask(SIG_SETMASK,
200                     &_process_sigmask, NULL) != 0)
201                         PANIC("Unable to restore process mask after signal");
202         }
203
204         /*
205          * Enter a scheduling loop that finds the next thread that is
206          * ready to run. This loop completes when there are no more threads
207          * in the global list or when a thread has its state restored by
208          * either a sigreturn (if the state was saved as a sigcontext) or a
209          * longjmp (if the state was saved by a setjmp).
210          */
211         while (!(TAILQ_EMPTY(&_thread_list))) {
212                 /* Get the current time of day: */
213                 GET_CURRENT_TOD(tv);
214                 TIMEVAL_TO_TIMESPEC(&tv, &ts);
215                 current_tick = _sched_ticks;
216
217                 /*
218                  * Protect the scheduling queues from access by the signal
219                  * handler.
220                  */
221                 _queue_signals = 1;
222                 add_to_prioq = 0;
223
224                 if (curthread != &_thread_kern_thread) {
225                         /*
226                          * This thread no longer needs to yield the CPU.
227                          */
228                         curthread->yield_on_sig_undefer = 0;
229         
230                         if (curthread->state != PS_RUNNING) {
231                                 /*
232                                  * Save the current time as the time that the
233                                  * thread became inactive:
234                                  */
235                                 curthread->last_inactive = (long)current_tick;
236                                 if (curthread->last_inactive <
237                                     curthread->last_active) {
238                                         /* Account for a rollover: */
239                                         curthread->last_inactive =+
240                                             UINT_MAX + 1;
241                                 }
242                         }
243
244                         /*
245                          * Place the currently running thread into the
246                          * appropriate queue(s).
247                          */
248                         switch (curthread->state) {
249                         case PS_DEAD:
250                         case PS_STATE_MAX: /* to silence -Wall */
251                         case PS_SUSPENDED:
252                                 /*
253                                  * Dead and suspended threads are not placed
254                                  * in any queue:
255                                  */
256                                 break;
257
258                         case PS_RUNNING:
259                                 /*
260                                  * Runnable threads can't be placed in the
261                                  * priority queue until after waiting threads
262                                  * are polled (to preserve round-robin
263                                  * scheduling).
264                                  */
265                                 add_to_prioq = 1;
266                                 break;
267
268                         /*
269                          * States which do not depend on file descriptor I/O
270                          * operations or timeouts:
271                          */
272                         case PS_DEADLOCK:
273                         case PS_FDLR_WAIT:
274                         case PS_FDLW_WAIT:
275                         case PS_FILE_WAIT:
276                         case PS_JOIN:
277                         case PS_MUTEX_WAIT:
278                         case PS_SIGSUSPEND:
279                         case PS_SIGTHREAD:
280                         case PS_SIGWAIT:
281                         case PS_WAIT_WAIT:
282                                 /* No timeouts for these states: */
283                                 curthread->wakeup_time.tv_sec = -1;
284                                 curthread->wakeup_time.tv_nsec = -1;
285
286                                 /* Restart the time slice: */
287                                 curthread->slice_usec = -1;
288
289                                 /* Insert into the waiting queue: */
290                                 PTHREAD_WAITQ_INSERT(curthread);
291                                 break;
292
293                         /* States which can timeout: */
294                         case PS_COND_WAIT:
295                         case PS_SLEEP_WAIT:
296                                 /* Restart the time slice: */
297                                 curthread->slice_usec = -1;
298
299                                 /* Insert into the waiting queue: */
300                                 PTHREAD_WAITQ_INSERT(curthread);
301                                 break;
302         
303                         /* States that require periodic work: */
304                         case PS_SPINBLOCK:
305                                 /* No timeouts for this state: */
306                                 curthread->wakeup_time.tv_sec = -1;
307                                 curthread->wakeup_time.tv_nsec = -1;
308
309                                 /* Increment spinblock count: */
310                                 _spinblock_count++;
311
312                                 /* FALLTHROUGH */
313                         case PS_FDR_WAIT:
314                         case PS_FDW_WAIT:
315                         case PS_POLL_WAIT:
316                         case PS_SELECT_WAIT:
317                                 /* Restart the time slice: */
318                                 curthread->slice_usec = -1;
319         
320                                 /* Insert into the waiting queue: */
321                                 PTHREAD_WAITQ_INSERT(curthread);
322         
323                                 /* Insert into the work queue: */
324                                 PTHREAD_WORKQ_INSERT(curthread);
325                                 break;
326                         }
327
328                         /*
329                          * Are there pending signals for this thread?
330                          *
331                          * This check has to be performed after the thread
332                          * has been placed in the queue(s) appropriate for
333                          * its state.  The process of adding pending signals
334                          * can change a threads state, which in turn will
335                          * attempt to add or remove the thread from any
336                          * scheduling queue to which it belongs.
337                          */
338                         if (curthread->check_pending != 0) {
339                                 curthread->check_pending = 0;
340                                 _thread_sig_check_pending(curthread);
341                         }
342                 }
343
344                 /*
345                  * Avoid polling file descriptors if there are none
346                  * waiting:
347                  */
348                 if (TAILQ_EMPTY(&_workq) != 0) {
349                 }
350                 /*
351                  * Poll file descriptors only if a new scheduling signal
352                  * has occurred or if we have no more runnable threads.
353                  */
354                 else if (((current_tick = _sched_ticks) != last_tick) ||
355                     ((curthread->state != PS_RUNNING) &&
356                     (PTHREAD_PRIOQ_FIRST() == NULL))) {
357                         /* Unprotect the scheduling queues: */
358                         _queue_signals = 0;
359
360                         /*
361                          * Poll file descriptors to update the state of threads
362                          * waiting on file I/O where data may be available:
363                          */
364                         thread_kern_poll(0);
365
366                         /* Protect the scheduling queues: */
367                         _queue_signals = 1;
368                 }
369                 last_tick = current_tick;
370
371                 /*
372                  * Wake up threads that have timedout.  This has to be
373                  * done after polling in case a thread does a poll or
374                  * select with zero time.
375                  */
376                 PTHREAD_WAITQ_SETACTIVE();
377                 while (((pthread = TAILQ_FIRST(&_waitingq)) != NULL) &&
378                     (pthread->wakeup_time.tv_sec != -1) &&
379                     (((pthread->wakeup_time.tv_sec == 0) &&
380                     (pthread->wakeup_time.tv_nsec == 0)) ||
381                     (pthread->wakeup_time.tv_sec < ts.tv_sec) ||
382                     ((pthread->wakeup_time.tv_sec == ts.tv_sec) &&
383                     (pthread->wakeup_time.tv_nsec <= ts.tv_nsec)))) {
384                         switch (pthread->state) {
385                         case PS_POLL_WAIT:
386                         case PS_SELECT_WAIT:
387                                 /* Return zero file descriptors ready: */
388                                 pthread->data.poll_data->nfds = 0;
389                                 /* FALLTHROUGH */
390                         default:
391                                 /*
392                                  * Remove this thread from the waiting queue
393                                  * (and work queue if necessary) and place it
394                                  * in the ready queue.
395                                  */
396                                 PTHREAD_WAITQ_CLEARACTIVE();
397                                 if (pthread->flags & PTHREAD_FLAGS_IN_WORKQ)
398                                         PTHREAD_WORKQ_REMOVE(pthread);
399                                 PTHREAD_NEW_STATE(pthread, PS_RUNNING);
400                                 PTHREAD_WAITQ_SETACTIVE();
401                                 break;
402                         }
403                         /*
404                          * Flag the timeout in the thread structure:
405                          */
406                         pthread->timeout = 1;
407                 }
408                 PTHREAD_WAITQ_CLEARACTIVE();
409
410                 /*
411                  * Check to see if the current thread needs to be added
412                  * to the priority queue:
413                  */
414                 if (add_to_prioq != 0) {
415                         /*
416                          * Save the current time as the time that the
417                          * thread became inactive:
418                          */
419                         current_tick = _sched_ticks;
420                         curthread->last_inactive = (long)current_tick;
421                         if (curthread->last_inactive <
422                             curthread->last_active) {
423                                 /* Account for a rollover: */
424                                 curthread->last_inactive =+ UINT_MAX + 1;
425                         }
426
427                         if ((curthread->slice_usec != -1) &&
428                            (curthread->attr.sched_policy != SCHED_FIFO)) {
429                                 /*
430                                  * Accumulate the number of microseconds for
431                                  * which the current thread has run:
432                                  */
433                                 curthread->slice_usec +=
434                                     (curthread->last_inactive -
435                                     curthread->last_active) *
436                                     (long)_clock_res_usec;
437                                 /* Check for time quantum exceeded: */
438                                 if (curthread->slice_usec > TIMESLICE_USEC)
439                                         curthread->slice_usec = -1;
440                         }
441
442                         if (curthread->slice_usec == -1) {
443                                 /*
444                                  * The thread exceeded its time
445                                  * quantum or it yielded the CPU;
446                                  * place it at the tail of the
447                                  * queue for its priority.
448                                  */
449                                 PTHREAD_PRIOQ_INSERT_TAIL(curthread);
450                         } else {
451                                 /*
452                                  * The thread hasn't exceeded its
453                                  * interval.  Place it at the head
454                                  * of the queue for its priority.
455                                  */
456                                 PTHREAD_PRIOQ_INSERT_HEAD(curthread);
457                         }
458                 }
459
460                 /*
461                  * Get the highest priority thread in the ready queue.
462                  */
463                 pthread_h = PTHREAD_PRIOQ_FIRST();
464
465                 /* Check if there are no threads ready to run: */
466                 if (pthread_h == NULL) {
467                         /*
468                          * Lock the pthread kernel by changing the pointer to
469                          * the running thread to point to the global kernel
470                          * thread structure:
471                          */
472                         _set_curthread(&_thread_kern_thread);
473                         curthread = &_thread_kern_thread;
474
475                         DBG_MSG("No runnable threads, using kernel thread %p\n",
476                             curthread);
477
478                         /* Unprotect the scheduling queues: */
479                         _queue_signals = 0;
480
481                         /*
482                          * There are no threads ready to run, so wait until
483                          * something happens that changes this condition:
484                          */
485                         thread_kern_poll(1);
486
487                         /*
488                          * This process' usage will likely be very small
489                          * while waiting in a poll.  Since the scheduling
490                          * clock is based on the profiling timer, it is
491                          * unlikely that the profiling timer will fire
492                          * and update the time of day.  To account for this,
493                          * get the time of day after polling with a timeout.
494                          */
495                         gettimeofday((struct timeval *) &_sched_tod, NULL);
496                         
497                         /* Check once more for a runnable thread: */
498                         _queue_signals = 1;
499                         pthread_h = PTHREAD_PRIOQ_FIRST();
500                         _queue_signals = 0;
501                 }
502
503                 if (pthread_h != NULL) {
504                         /* Remove the thread from the ready queue: */
505                         PTHREAD_PRIOQ_REMOVE(pthread_h);
506
507                         /* Unprotect the scheduling queues: */
508                         _queue_signals = 0;
509
510                         /*
511                          * Check for signals queued while the scheduling
512                          * queues were protected:
513                          */
514                         while (_sigq_check_reqd != 0) {
515                                 /* Clear before handling queued signals: */
516                                 _sigq_check_reqd = 0;
517
518                                 /* Protect the scheduling queues again: */
519                                 _queue_signals = 1;
520
521                                 dequeue_signals();
522
523                                 /*
524                                  * Check for a higher priority thread that
525                                  * became runnable due to signal handling.
526                                  */
527                                 if (((pthread = PTHREAD_PRIOQ_FIRST()) != NULL) &&
528                                     (pthread->active_priority > pthread_h->active_priority)) {
529                                         /* Remove the thread from the ready queue: */
530                                         PTHREAD_PRIOQ_REMOVE(pthread);
531
532                                         /*
533                                          * Insert the lower priority thread
534                                          * at the head of its priority list:
535                                          */
536                                         PTHREAD_PRIOQ_INSERT_HEAD(pthread_h);
537
538                                         /* There's a new thread in town: */
539                                         pthread_h = pthread;
540                                 }
541
542                                 /* Unprotect the scheduling queues: */
543                                 _queue_signals = 0;
544                         }
545
546                         /* Make the selected thread the current thread: */
547                         _set_curthread(pthread_h);
548                         curthread = pthread_h;
549
550                         /*
551                          * Save the current time as the time that the thread
552                          * became active:
553                          */
554                         current_tick = _sched_ticks;
555                         curthread->last_active = (long) current_tick;
556
557                         /*
558                          * Check if this thread is running for the first time
559                          * or running again after using its full time slice
560                          * allocation:
561                          */
562                         if (curthread->slice_usec == -1) {
563                                 /* Reset the accumulated time slice period: */
564                                 curthread->slice_usec = 0;
565                         }
566
567                         /*
568                          * If we had a context switch, run any
569                          * installed switch hooks.
570                          */
571                         if ((_sched_switch_hook != NULL) &&
572                             (_last_user_thread != curthread)) {
573                                 thread_run_switch_hook(_last_user_thread,
574                                     curthread);
575                         }
576                         /*
577                          * Continue the thread at its current frame:
578                          */
579 #if NOT_YET
580                         _setcontext(&curthread->ctx.uc);
581 #else
582                         ___longjmp(curthread->ctx.jb, 1);
583 #endif
584                         /* This point should not be reached. */
585                         PANIC("Thread has returned from sigreturn or longjmp");
586                 }
587         }
588
589         /* There are no more threads, so exit this process: */
590         exit(0);
591 }
592
593 void
594 _thread_kern_sched_state(enum pthread_state state, char *fname, int lineno)
595 {
596         struct pthread  *curthread = _get_curthread();
597
598         /*
599          * Flag the pthread kernel as executing scheduler code
600          * to avoid a scheduler signal from interrupting this
601          * execution and calling the scheduler again.
602          */
603         _thread_kern_in_sched = 1;
604
605         /*
606          * Prevent the signal handler from fiddling with this thread
607          * before its state is set and is placed into the proper queue.
608          */
609         _queue_signals = 1;
610
611         /* Change the state of the current thread: */
612         curthread->state = state;
613         curthread->fname = fname;
614         curthread->lineno = lineno;
615
616         /* Schedule the next thread that is ready: */
617         _thread_kern_sched(NULL);
618 }
619
620 void
621 _thread_kern_sched_state_unlock(enum pthread_state state,
622     spinlock_t *lock, char *fname, int lineno)
623 {
624         struct pthread  *curthread = _get_curthread();
625
626         /*
627          * Flag the pthread kernel as executing scheduler code
628          * to avoid a scheduler signal from interrupting this
629          * execution and calling the scheduler again.
630          */
631         _thread_kern_in_sched = 1;
632
633         /*
634          * Prevent the signal handler from fiddling with this thread
635          * before its state is set and it is placed into the proper
636          * queue(s).
637          */
638         _queue_signals = 1;
639
640         /* Change the state of the current thread: */
641         curthread->state = state;
642         curthread->fname = fname;
643         curthread->lineno = lineno;
644
645         _SPINUNLOCK(lock);
646
647         /* Schedule the next thread that is ready: */
648         _thread_kern_sched(NULL);
649 }
650
651 static void
652 thread_kern_poll(int wait_reqd)
653 {
654         int             count = 0;
655         int             i, found;
656         int             kern_pipe_added = 0;
657         int             nfds = 0;
658         int             timeout_ms = 0;
659         struct pthread  *pthread;
660         struct timespec ts;
661         struct timeval  tv;
662
663         /* Check if the caller wants to wait: */
664         if (wait_reqd == 0) {
665                 timeout_ms = 0;
666         }
667         else {
668                 /* Get the current time of day: */
669                 GET_CURRENT_TOD(tv);
670                 TIMEVAL_TO_TIMESPEC(&tv, &ts);
671
672                 _queue_signals = 1;
673                 pthread = TAILQ_FIRST(&_waitingq);
674                 _queue_signals = 0;
675
676                 if ((pthread == NULL) || (pthread->wakeup_time.tv_sec == -1)) {
677                         /*
678                          * Either there are no threads in the waiting queue,
679                          * or there are no threads that can timeout.
680                          */
681                         timeout_ms = INFTIM;
682                 }
683                 else if (pthread->wakeup_time.tv_sec - ts.tv_sec > 60000)
684                         /* Limit maximum timeout to prevent rollover. */
685                         timeout_ms = 60000;
686                 else {
687                         /*
688                          * Calculate the time left for the next thread to
689                          * timeout:
690                          */
691                         timeout_ms = ((pthread->wakeup_time.tv_sec - ts.tv_sec) *
692                             1000) + ((pthread->wakeup_time.tv_nsec - ts.tv_nsec) /
693                             1000000);
694                         /*
695                          * Don't allow negative timeouts:
696                          */
697                         if (timeout_ms < 0)
698                                 timeout_ms = 0;
699                 }
700         }
701                         
702         /* Protect the scheduling queues: */
703         _queue_signals = 1;
704
705         /*
706          * Check to see if the signal queue needs to be walked to look
707          * for threads awoken by a signal while in the scheduler.
708          */
709         if (_sigq_check_reqd != 0) {
710                 /* Reset flag before handling queued signals: */
711                 _sigq_check_reqd = 0;
712
713                 dequeue_signals();
714         }
715
716         /*
717          * Check for a thread that became runnable due to a signal:
718          */
719         if (PTHREAD_PRIOQ_FIRST() != NULL) {
720                 /*
721                  * Since there is at least one runnable thread,
722                  * disable the wait.
723                  */
724                 timeout_ms = 0;
725         }
726
727         /*
728          * Form the poll table:
729          */
730         nfds = 0;
731         if (timeout_ms != 0) {
732                 /* Add the kernel pipe to the poll table: */
733                 _thread_pfd_table[nfds].fd = _thread_kern_pipe[0];
734                 _thread_pfd_table[nfds].events = POLLRDNORM;
735                 _thread_pfd_table[nfds].revents = 0;
736                 nfds++;
737                 kern_pipe_added = 1;
738         }
739
740         PTHREAD_WAITQ_SETACTIVE();
741         TAILQ_FOREACH(pthread, &_workq, qe) {
742                 switch (pthread->state) {
743                 case PS_SPINBLOCK:
744                         /*
745                          * If the lock is available, let the thread run.
746                          */
747                         if (pthread->data.spinlock->access_lock == 0) {
748                                 PTHREAD_WAITQ_CLEARACTIVE();
749                                 PTHREAD_WORKQ_REMOVE(pthread);
750                                 PTHREAD_NEW_STATE(pthread,PS_RUNNING);
751                                 PTHREAD_WAITQ_SETACTIVE();
752                                 /* One less thread in a spinblock state: */
753                                 _spinblock_count--;
754                                 /*
755                                  * Since there is at least one runnable
756                                  * thread, disable the wait.
757                                  */
758                                 timeout_ms = 0;
759                         }
760                         break;
761
762                 /* File descriptor read wait: */
763                 case PS_FDR_WAIT:
764                         /* Limit number of polled files to table size: */
765                         if (nfds < _thread_dtablesize) {
766                                 _thread_pfd_table[nfds].events = POLLRDNORM;
767                                 _thread_pfd_table[nfds].fd = pthread->data.fd.fd;
768                                 nfds++;
769                         }
770                         break;
771
772                 /* File descriptor write wait: */
773                 case PS_FDW_WAIT:
774                         /* Limit number of polled files to table size: */
775                         if (nfds < _thread_dtablesize) {
776                                 _thread_pfd_table[nfds].events = POLLWRNORM;
777                                 _thread_pfd_table[nfds].fd = pthread->data.fd.fd;
778                                 nfds++;
779                         }
780                         break;
781
782                 /* File descriptor poll or select wait: */
783                 case PS_POLL_WAIT:
784                 case PS_SELECT_WAIT:
785                         /* Limit number of polled files to table size: */
786                         if (pthread->data.poll_data->nfds + nfds <
787                             _thread_dtablesize) {
788                                 for (i = 0; i < pthread->data.poll_data->nfds; i++) {
789                                         _thread_pfd_table[nfds + i].fd =
790                                             pthread->data.poll_data->fds[i].fd;
791                                         _thread_pfd_table[nfds + i].events =
792                                             pthread->data.poll_data->fds[i].events;
793                                 }
794                                 nfds += pthread->data.poll_data->nfds;
795                         }
796                         break;
797
798                 /* Other states do not depend on file I/O. */
799                 default:
800                         break;
801                 }
802         }
803         PTHREAD_WAITQ_CLEARACTIVE();
804
805         /*
806          * Wait for a file descriptor to be ready for read, write, or
807          * an exception, or a timeout to occur:
808          */
809         count = __sys_poll(_thread_pfd_table, nfds, timeout_ms);
810
811         if (kern_pipe_added != 0)
812                 /*
813                  * Remove the pthread kernel pipe file descriptor
814                  * from the pollfd table:
815                  */
816                 nfds = 1;
817         else
818                 nfds = 0;
819
820         /*
821          * Check if it is possible that there are bytes in the kernel
822          * read pipe waiting to be read:
823          */
824         if (count < 0 || ((kern_pipe_added != 0) &&
825             (_thread_pfd_table[0].revents & POLLRDNORM))) {
826                 /*
827                  * If the kernel read pipe was included in the
828                  * count:
829                  */
830                 if (count > 0) {
831                         /* Decrement the count of file descriptors: */
832                         count--;
833                 }
834
835                 if (_sigq_check_reqd != 0) {
836                         /* Reset flag before handling signals: */
837                         _sigq_check_reqd = 0;
838
839                         dequeue_signals();
840                 }
841         }
842
843         /*
844          * Check if any file descriptors are ready:
845          */
846         if (count > 0) {
847                 /*
848                  * Enter a loop to look for threads waiting on file
849                  * descriptors that are flagged as available by the
850                  * _poll syscall:
851                  */
852                 PTHREAD_WAITQ_SETACTIVE();
853                 TAILQ_FOREACH(pthread, &_workq, qe) {
854                         switch (pthread->state) {
855                         case PS_SPINBLOCK:
856                                 /*
857                                  * If the lock is available, let the thread run.
858                                  */
859                                 if (pthread->data.spinlock->access_lock == 0) {
860                                         PTHREAD_WAITQ_CLEARACTIVE();
861                                         PTHREAD_WORKQ_REMOVE(pthread);
862                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
863                                         PTHREAD_WAITQ_SETACTIVE();
864
865                                         /*
866                                          * One less thread in a spinblock state:
867                                          */
868                                         _spinblock_count--;
869                                 }
870                                 break;
871
872                         /* File descriptor read wait: */
873                         case PS_FDR_WAIT:
874                                 if ((nfds < _thread_dtablesize) &&
875                                     (_thread_pfd_table[nfds].revents
876                                        & (POLLRDNORM|POLLERR|POLLHUP|POLLNVAL))
877                                       != 0) {
878                                         PTHREAD_WAITQ_CLEARACTIVE();
879                                         PTHREAD_WORKQ_REMOVE(pthread);
880                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
881                                         PTHREAD_WAITQ_SETACTIVE();
882                                 }
883                                 nfds++;
884                                 break;
885
886                         /* File descriptor write wait: */
887                         case PS_FDW_WAIT:
888                                 if ((nfds < _thread_dtablesize) &&
889                                     (_thread_pfd_table[nfds].revents
890                                        & (POLLWRNORM|POLLERR|POLLHUP|POLLNVAL))
891                                       != 0) {
892                                         PTHREAD_WAITQ_CLEARACTIVE();
893                                         PTHREAD_WORKQ_REMOVE(pthread);
894                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
895                                         PTHREAD_WAITQ_SETACTIVE();
896                                 }
897                                 nfds++;
898                                 break;
899
900                         /* File descriptor poll or select wait: */
901                         case PS_POLL_WAIT:
902                         case PS_SELECT_WAIT:
903                                 if (pthread->data.poll_data->nfds + nfds <
904                                     _thread_dtablesize) {
905                                         /*
906                                          * Enter a loop looking for I/O
907                                          * readiness:
908                                          */
909                                         found = 0;
910                                         for (i = 0; i < pthread->data.poll_data->nfds; i++) {
911                                                 if (_thread_pfd_table[nfds + i].revents != 0) {
912                                                         pthread->data.poll_data->fds[i].revents =
913                                                             _thread_pfd_table[nfds + i].revents;
914                                                         found++;
915                                                 }
916                                         }
917
918                                         /* Increment before destroying: */
919                                         nfds += pthread->data.poll_data->nfds;
920
921                                         if (found != 0) {
922                                                 pthread->data.poll_data->nfds = found;
923                                                 PTHREAD_WAITQ_CLEARACTIVE();
924                                                 PTHREAD_WORKQ_REMOVE(pthread);
925                                                 PTHREAD_NEW_STATE(pthread,PS_RUNNING);
926                                                 PTHREAD_WAITQ_SETACTIVE();
927                                         }
928                                 }
929                                 else
930                                         nfds += pthread->data.poll_data->nfds;
931                                 break;
932
933                         /* Other states do not depend on file I/O. */
934                         default:
935                                 break;
936                         }
937                 }
938                 PTHREAD_WAITQ_CLEARACTIVE();
939         }
940         else if (_spinblock_count != 0) {
941                 /*
942                  * Enter a loop to look for threads waiting on a spinlock
943                  * that is now available.
944                  */
945                 PTHREAD_WAITQ_SETACTIVE();
946                 TAILQ_FOREACH(pthread, &_workq, qe) {
947                         if (pthread->state == PS_SPINBLOCK) {
948                                 /*
949                                  * If the lock is available, let the thread run.
950                                  */
951                                 if (pthread->data.spinlock->access_lock == 0) {
952                                         PTHREAD_WAITQ_CLEARACTIVE();
953                                         PTHREAD_WORKQ_REMOVE(pthread);
954                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
955                                         PTHREAD_WAITQ_SETACTIVE();
956
957                                         /*
958                                          * One less thread in a spinblock state:
959                                          */
960                                         _spinblock_count--;
961                                 }
962                         }
963                 }
964                 PTHREAD_WAITQ_CLEARACTIVE();
965         }
966
967         /* Unprotect the scheduling queues: */
968         _queue_signals = 0;
969
970         while (_sigq_check_reqd != 0) {
971                 /* Handle queued signals: */
972                 _sigq_check_reqd = 0;
973
974                 /* Protect the scheduling queues: */
975                 _queue_signals = 1;
976
977                 dequeue_signals();
978
979                 /* Unprotect the scheduling queues: */
980                 _queue_signals = 0;
981         }
982 }
983
984 void
985 _thread_kern_set_timeout(const struct timespec * timeout)
986 {
987         struct pthread  *curthread = _get_curthread();
988         struct timespec current_time;
989         struct timeval  tv;
990
991         /* Reset the timeout flag for the running thread: */
992         curthread->timeout = 0;
993
994         /* Check if the thread is to wait forever: */
995         if (timeout == NULL) {
996                 /*
997                  * Set the wakeup time to something that can be recognised as
998                  * different to an actual time of day:
999                  */
1000                 curthread->wakeup_time.tv_sec = -1;
1001                 curthread->wakeup_time.tv_nsec = -1;
1002         }
1003         /* Check if no waiting is required: */
1004         else if (timeout->tv_sec == 0 && timeout->tv_nsec == 0) {
1005                 /* Set the wake up time to 'immediately': */
1006                 curthread->wakeup_time.tv_sec = 0;
1007                 curthread->wakeup_time.tv_nsec = 0;
1008         } else {
1009                 /* Get the current time: */
1010                 GET_CURRENT_TOD(tv);
1011                 TIMEVAL_TO_TIMESPEC(&tv, &current_time);
1012
1013                 /* Calculate the time for the current thread to wake up: */
1014                 curthread->wakeup_time.tv_sec = current_time.tv_sec + timeout->tv_sec;
1015                 curthread->wakeup_time.tv_nsec = current_time.tv_nsec + timeout->tv_nsec;
1016
1017                 /* Check if the nanosecond field needs to wrap: */
1018                 if (curthread->wakeup_time.tv_nsec >= 1000000000) {
1019                         /* Wrap the nanosecond field: */
1020                         curthread->wakeup_time.tv_sec += 1;
1021                         curthread->wakeup_time.tv_nsec -= 1000000000;
1022                 }
1023         }
1024 }
1025
1026 void
1027 _thread_kern_sig_defer(void)
1028 {
1029         struct pthread  *curthread = _get_curthread();
1030
1031         /* Allow signal deferral to be recursive. */
1032         curthread->sig_defer_count++;
1033 }
1034
1035 void
1036 _thread_kern_sig_undefer(void)
1037 {
1038         struct pthread  *curthread = _get_curthread();
1039
1040         /*
1041          * Perform checks to yield only if we are about to undefer
1042          * signals.
1043          */
1044         if (curthread->sig_defer_count > 1) {
1045                 /* Decrement the signal deferral count. */
1046                 curthread->sig_defer_count--;
1047         }
1048         else if (curthread->sig_defer_count == 1) {
1049                 /* Reenable signals: */
1050                 curthread->sig_defer_count = 0;
1051
1052                 /*
1053                  * Check if there are queued signals:
1054                  */
1055                 if (_sigq_check_reqd != 0)
1056                         _thread_kern_sched(NULL);
1057
1058                 /*
1059                  * Check for asynchronous cancellation before delivering any
1060                  * pending signals:
1061                  */
1062                 if (((curthread->cancelflags & PTHREAD_AT_CANCEL_POINT) == 0) &&
1063                     ((curthread->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
1064                         pthread_testcancel();
1065
1066                 /*
1067                  * If there are pending signals or this thread has
1068                  * to yield the CPU, call the kernel scheduler:
1069                  *
1070                  * XXX - Come back and revisit the pending signal problem
1071                  */
1072                 if ((curthread->yield_on_sig_undefer != 0) ||
1073                     SIGNOTEMPTY(curthread->sigpend)) {
1074                         curthread->yield_on_sig_undefer = 0;
1075                         _thread_kern_sched(NULL);
1076                 }
1077         }
1078 }
1079
1080 static void
1081 dequeue_signals(void)
1082 {
1083         char    bufr[128];
1084         int     num;
1085
1086         /*
1087          * Enter a loop to clear the pthread kernel pipe:
1088          */
1089         while (((num = __sys_read(_thread_kern_pipe[0], bufr,
1090             sizeof(bufr))) > 0) || (num == -1 && errno == EINTR)) {
1091         }
1092         if ((num < 0) && (errno != EAGAIN)) {
1093                 /*
1094                  * The only error we should expect is if there is
1095                  * no data to read.
1096                  */
1097                 PANIC("Unable to read from thread kernel pipe");
1098         }
1099         /* Handle any pending signals: */
1100         _thread_sig_handle_pending();
1101 }
1102
1103 static inline void
1104 thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in)
1105 {
1106         pthread_t tid_out = thread_out;
1107         pthread_t tid_in = thread_in;
1108
1109         if ((tid_out != NULL) &&
1110             (tid_out->flags & PTHREAD_FLAGS_PRIVATE) != 0)
1111                 tid_out = NULL;
1112         if ((tid_in != NULL) &&
1113             (tid_in->flags & PTHREAD_FLAGS_PRIVATE) != 0)
1114                 tid_in = NULL;
1115
1116         if ((_sched_switch_hook != NULL) && (tid_out != tid_in)) {
1117                 /* Run the scheduler switch hook: */
1118                 _sched_switch_hook(tid_out, tid_in);
1119         }
1120 }
1121
1122 struct pthread *
1123 _get_curthread(void)
1124 {
1125         if (_thread_initial == NULL)
1126                 _thread_init();
1127
1128         return (_thread_run);
1129 }
1130
1131 void
1132 _set_curthread(struct pthread *newthread)
1133 {
1134         _thread_run = newthread;
1135 }