]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/libc_r/uthread/uthread_kern.c
This commit was generated by cvs2svn to compensate for changes in r56915,
[FreeBSD/FreeBSD.git] / lib / libc_r / uthread / uthread_kern.c
1 /*
2  * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by John Birrell.
16  * 4. Neither the name of the author nor the names of any co-contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $FreeBSD$
33  *
34  */
35 #include <errno.h>
36 #include <poll.h>
37 #include <stdlib.h>
38 #include <stdarg.h>
39 #include <string.h>
40 #include <unistd.h>
41 #include <setjmp.h>
42 #include <sys/param.h>
43 #include <sys/types.h>
44 #include <sys/signalvar.h>
45 #include <sys/stat.h>
46 #include <sys/time.h>
47 #include <sys/socket.h>
48 #include <sys/uio.h>
49 #include <sys/syscall.h>
50 #include <fcntl.h>
51 #ifdef _THREAD_SAFE
52 #include <pthread.h>
53 #include "pthread_private.h"
54
55 /* Static function prototype definitions: */
56 static void 
57 _thread_kern_poll(int wait_reqd);
58
59 static void
60 dequeue_signals(void);
61
62 static inline void
63 thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in);
64
65 void
66 _thread_kern_sched(ucontext_t * scp)
67 {
68 #ifndef __alpha__
69         char           *fdata;
70 #endif
71         pthread_t       pthread, pthread_h = NULL;
72         struct itimerval itimer;
73         struct timespec ts, ts1;
74         struct timeval  tv, tv1;
75         int             set_timer = 0;
76
77         /*
78          * Flag the pthread kernel as executing scheduler code
79          * to avoid a scheduler signal from interrupting this
80          * execution and calling the scheduler again.
81          */
82         _thread_kern_in_sched = 1;
83
84         /* Check if this function was called from the signal handler: */
85         if (scp != NULL) {
86                 /*
87                  * Copy the signal context to the current thread's jump
88                  * buffer: 
89                  */
90                 memcpy(&_thread_run->saved_sigcontext, scp, sizeof(_thread_run->saved_sigcontext));
91
92 #ifndef __alpha__
93                 /* Point to the floating point data in the running thread: */
94                 fdata = _thread_run->saved_fp;
95
96                 /* Save the floating point data: */
97 __asm__("fnsave %0": :"m"(*fdata));
98 #endif
99
100                 /* Flag the signal context as the last state saved: */
101                 _thread_run->sig_saved = 1;
102         }
103         /* Save the state of the current thread: */
104         else if (setjmp(_thread_run->saved_jmp_buf) != 0) {
105                 /*
106                  * This point is reached when a longjmp() is called to
107                  * restore the state of a thread. 
108                  *
109                  * This is the normal way out of the scheduler.
110                  */
111                 _thread_kern_in_sched = 0;
112
113                 if (((_thread_run->cancelflags & PTHREAD_AT_CANCEL_POINT) == 0) &&
114                     ((_thread_run->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0)) {
115                         /* 
116                          * Cancellations override signals.
117                          *
118                          * Stick a cancellation point at the start of
119                          * each async-cancellable thread's resumption.
120                          *
121                          * We allow threads woken at cancel points to do their
122                          * own checks.
123                          */
124                         pthread_testcancel();
125                 }
126
127                 /*
128                  * Check for undispatched signals due to calls to
129                  * pthread_kill().
130                  */
131                 if (SIGNOTEMPTY(_thread_run->sigpend))
132                         _dispatch_signals();
133
134                 if (_sched_switch_hook != NULL) {
135                         /* Run the installed switch hook: */
136                         thread_run_switch_hook(_last_user_thread, _thread_run);
137                 }
138
139                 return;
140         } else
141                 /* Flag the jump buffer was the last state saved: */
142                 _thread_run->sig_saved = 0;
143
144         /* If the currently running thread is a user thread, save it: */
145         if ((_thread_run->flags & PTHREAD_FLAGS_PRIVATE) == 0)
146                 _last_user_thread = _thread_run;
147
148         /*
149          * Enter a scheduling loop that finds the next thread that is
150          * ready to run. This loop completes when there are no more threads
151          * in the global list or when a thread has its state restored by
152          * either a sigreturn (if the state was saved as a sigcontext) or a
153          * longjmp (if the state was saved by a setjmp). 
154          */
155         while (!(TAILQ_EMPTY(&_thread_list))) {
156                 /* Get the current time of day: */
157                 gettimeofday(&tv, NULL);
158                 TIMEVAL_TO_TIMESPEC(&tv, &ts);
159
160                 /*
161                  * Protect the scheduling queues from access by the signal
162                  * handler.
163                  */
164                 _queue_signals = 1;
165
166                 if (_thread_run != &_thread_kern_thread) {
167
168                         /*
169                          * This thread no longer needs to yield the CPU.
170                          */
171                         _thread_run->yield_on_sig_undefer = 0;
172         
173                         /*
174                          * Save the current time as the time that the thread
175                          * became inactive: 
176                          */
177                         _thread_run->last_inactive.tv_sec = tv.tv_sec;
178                         _thread_run->last_inactive.tv_usec = tv.tv_usec;
179         
180                         /*
181                          * Place the currently running thread into the
182                          * appropriate queue(s).
183                          */
184                         switch (_thread_run->state) {
185                         case PS_DEAD:
186                         case PS_STATE_MAX: /* to silence -Wall */
187                                 /*
188                                  * Dead threads are not placed in any queue:
189                                  */
190                                 break;
191
192                         case PS_RUNNING:
193                                 /*
194                                  * Runnable threads can't be placed in the
195                                  * priority queue until after waiting threads
196                                  * are polled (to preserve round-robin
197                                  * scheduling).
198                                  */
199                                 if ((_thread_run->slice_usec != -1) &&
200                                     (_thread_run->attr.sched_policy != SCHED_FIFO)) {
201                                         /*
202                                          * Accumulate the number of microseconds that
203                                          * this thread has run for:
204                                          */
205                                         _thread_run->slice_usec +=
206                                             (_thread_run->last_inactive.tv_sec -
207                                             _thread_run->last_active.tv_sec) * 1000000 +
208                                             _thread_run->last_inactive.tv_usec -
209                                             _thread_run->last_active.tv_usec;
210         
211                                         /* Check for time quantum exceeded: */
212                                         if (_thread_run->slice_usec > TIMESLICE_USEC)
213                                                 _thread_run->slice_usec = -1;
214                                 }
215                                 break;
216
217                         /*
218                          * States which do not depend on file descriptor I/O
219                          * operations or timeouts: 
220                          */
221                         case PS_DEADLOCK:
222                         case PS_FDLR_WAIT:
223                         case PS_FDLW_WAIT:
224                         case PS_FILE_WAIT:
225                         case PS_JOIN:
226                         case PS_MUTEX_WAIT:
227                         case PS_SIGSUSPEND:
228                         case PS_SIGTHREAD:
229                         case PS_SIGWAIT:
230                         case PS_SUSPENDED:
231                         case PS_WAIT_WAIT:
232                                 /* No timeouts for these states: */
233                                 _thread_run->wakeup_time.tv_sec = -1;
234                                 _thread_run->wakeup_time.tv_nsec = -1;
235
236                                 /* Restart the time slice: */
237                                 _thread_run->slice_usec = -1;
238
239                                 /* Insert into the waiting queue: */
240                                 PTHREAD_WAITQ_INSERT(_thread_run);
241                                 break;
242
243                         /* States which can timeout: */
244                         case PS_COND_WAIT:
245                         case PS_SLEEP_WAIT:
246                                 /* Restart the time slice: */
247                                 _thread_run->slice_usec = -1;
248
249                                 /* Insert into the waiting queue: */
250                                 PTHREAD_WAITQ_INSERT(_thread_run);
251                                 break;
252         
253                         /* States that require periodic work: */
254                         case PS_SPINBLOCK:
255                                 /* No timeouts for this state: */
256                                 _thread_run->wakeup_time.tv_sec = -1;
257                                 _thread_run->wakeup_time.tv_nsec = -1;
258
259                                 /* Increment spinblock count: */
260                                 _spinblock_count++;
261
262                                 /* fall through */
263                         case PS_FDR_WAIT:
264                         case PS_FDW_WAIT:
265                         case PS_POLL_WAIT:
266                         case PS_SELECT_WAIT:
267                                 /* Restart the time slice: */
268                                 _thread_run->slice_usec = -1;
269         
270                                 /* Insert into the waiting queue: */
271                                 PTHREAD_WAITQ_INSERT(_thread_run);
272         
273                                 /* Insert into the work queue: */
274                                 PTHREAD_WORKQ_INSERT(_thread_run);
275                                 break;
276                         }
277                 }
278
279                 /* Unprotect the scheduling queues: */
280                 _queue_signals = 0;
281
282                 /*
283                  * Poll file descriptors to update the state of threads
284                  * waiting on file I/O where data may be available: 
285                  */
286                 _thread_kern_poll(0);
287
288                 /* Protect the scheduling queues: */
289                 _queue_signals = 1;
290
291                 /*
292                  * Wake up threads that have timedout.  This has to be
293                  * done after polling in case a thread does a poll or
294                  * select with zero time.
295                  */
296                 PTHREAD_WAITQ_SETACTIVE();
297                 while (((pthread = TAILQ_FIRST(&_waitingq)) != NULL) &&
298                     (pthread->wakeup_time.tv_sec != -1) &&
299                     (((pthread->wakeup_time.tv_sec == 0) &&
300                     (pthread->wakeup_time.tv_nsec == 0)) ||
301                     (pthread->wakeup_time.tv_sec < ts.tv_sec) ||
302                     ((pthread->wakeup_time.tv_sec == ts.tv_sec) &&
303                     (pthread->wakeup_time.tv_nsec <= ts.tv_nsec)))) {
304                         switch (pthread->state) {
305                         case PS_POLL_WAIT:
306                         case PS_SELECT_WAIT:
307                                 /* Return zero file descriptors ready: */
308                                 pthread->data.poll_data->nfds = 0;
309                                 /* fall through */
310                         default:
311                                 /*
312                                  * Remove this thread from the waiting queue
313                                  * (and work queue if necessary) and place it
314                                  * in the ready queue.
315                                  */
316                                 PTHREAD_WAITQ_CLEARACTIVE();
317                                 if (pthread->flags & PTHREAD_FLAGS_IN_WORKQ)
318                                         PTHREAD_WORKQ_REMOVE(pthread);
319                                 PTHREAD_NEW_STATE(pthread, PS_RUNNING);
320                                 PTHREAD_WAITQ_SETACTIVE();
321                                 break;
322                         }
323                         /*
324                          * Flag the timeout in the thread structure:
325                          */
326                         pthread->timeout = 1;
327                 }
328                 PTHREAD_WAITQ_CLEARACTIVE();
329
330                 /*
331                  * Check if there is a current runnable thread that isn't
332                  * already in the ready queue:
333                  */
334                 if ((_thread_run != &_thread_kern_thread) &&
335                     (_thread_run->state == PS_RUNNING) &&
336                     ((_thread_run->flags & PTHREAD_FLAGS_IN_PRIOQ) == 0)) {
337                         if (_thread_run->slice_usec == -1) {
338                                 /*
339                                  * The thread exceeded its time
340                                  * quantum or it yielded the CPU;
341                                  * place it at the tail of the
342                                  * queue for its priority.
343                                  */
344                                 PTHREAD_PRIOQ_INSERT_TAIL(_thread_run);
345                         } else {
346                                 /*
347                                  * The thread hasn't exceeded its
348                                  * interval.  Place it at the head
349                                  * of the queue for its priority.
350                                  */
351                                 PTHREAD_PRIOQ_INSERT_HEAD(_thread_run);
352                         }
353                 }
354
355                 /*
356                  * Get the highest priority thread in the ready queue.
357                  */
358                 pthread_h = PTHREAD_PRIOQ_FIRST();
359
360                 /* Check if there are no threads ready to run: */
361                 if (pthread_h == NULL) {
362                         /*
363                          * Lock the pthread kernel by changing the pointer to
364                          * the running thread to point to the global kernel
365                          * thread structure: 
366                          */
367                         _thread_run = &_thread_kern_thread;
368
369                         /* Unprotect the scheduling queues: */
370                         _queue_signals = 0;
371
372                         /*
373                          * There are no threads ready to run, so wait until
374                          * something happens that changes this condition: 
375                          */
376                         _thread_kern_poll(1);
377                 } else {
378                         /* Remove the thread from the ready queue: */
379                         PTHREAD_PRIOQ_REMOVE(pthread_h);
380
381                         /* Get first thread on the waiting list: */
382                         pthread = TAILQ_FIRST(&_waitingq);
383
384                         /* Check to see if there is more than one thread: */
385                         if (pthread_h != TAILQ_FIRST(&_thread_list) ||
386                             TAILQ_NEXT(pthread_h, tle) != NULL)
387                                 set_timer = 1;
388                         else
389                                 set_timer = 0;
390
391                         /* Unprotect the scheduling queues: */
392                         _queue_signals = 0;
393
394                         /*
395                          * Check for signals queued while the scheduling
396                          * queues were protected:
397                          */
398                         while (_sigq_check_reqd != 0) {
399                                 /* Clear before handling queued signals: */
400                                 _sigq_check_reqd = 0;
401
402                                 /* Protect the scheduling queues again: */
403                                 _queue_signals = 1;
404
405                                 dequeue_signals();
406
407                                 /*
408                                  * Check for a higher priority thread that
409                                  * became runnable due to signal handling.
410                                  */
411                                 if (((pthread = PTHREAD_PRIOQ_FIRST()) != NULL) &&
412                                     (pthread->active_priority > pthread_h->active_priority)) {
413                                         /*
414                                          * Insert the lower priority thread
415                                          * at the head of its priority list:
416                                          */
417                                         PTHREAD_PRIOQ_INSERT_HEAD(pthread_h);
418
419                                         /* Remove the thread from the ready queue: */
420                                         PTHREAD_PRIOQ_REMOVE(pthread);
421
422                                         /* There's a new thread in town: */
423                                         pthread_h = pthread;
424                                 }
425
426                                 /* Get first thread on the waiting list: */
427                                 pthread = TAILQ_FIRST(&_waitingq);
428
429                                 /*
430                                  * Check to see if there is more than one
431                                  * thread:
432                                  */
433                                 if (pthread_h != TAILQ_FIRST(&_thread_list) ||
434                                     TAILQ_NEXT(pthread_h, tle) != NULL)
435                                         set_timer = 1;
436                                 else
437                                         set_timer = 0;
438
439                                 /* Unprotect the scheduling queues: */
440                                 _queue_signals = 0;
441                         }
442
443                         /* Make the selected thread the current thread: */
444                         _thread_run = pthread_h;
445
446                         /*
447                          * Save the current time as the time that the thread
448                          * became active: 
449                          */
450                         _thread_run->last_active.tv_sec = tv.tv_sec;
451                         _thread_run->last_active.tv_usec = tv.tv_usec;
452
453                         /*
454                          * Define the maximum time before a scheduling signal
455                          * is required: 
456                          */
457                         itimer.it_value.tv_sec = 0;
458                         itimer.it_value.tv_usec = TIMESLICE_USEC;
459
460                         /*
461                          * The interval timer is not reloaded when it
462                          * times out. The interval time needs to be
463                          * calculated every time. 
464                          */
465                         itimer.it_interval.tv_sec = 0;
466                         itimer.it_interval.tv_usec = 0;
467
468                         /* Get first thread on the waiting list: */
469                         if ((pthread != NULL) &&
470                             (pthread->wakeup_time.tv_sec != -1)) {
471                                 /*
472                                  * Calculate the time until this thread
473                                  * is ready, allowing for the clock
474                                  * resolution: 
475                                  */
476                                 ts1.tv_sec = pthread->wakeup_time.tv_sec
477                                     - ts.tv_sec;
478                                 ts1.tv_nsec = pthread->wakeup_time.tv_nsec
479                                     - ts.tv_nsec + _clock_res_nsec;
480
481                                 /*
482                                  * Check for underflow of the nanosecond field:
483                                  */
484                                 while (ts1.tv_nsec < 0) {
485                                         /*
486                                          * Allow for the underflow of the
487                                          * nanosecond field: 
488                                          */
489                                         ts1.tv_sec--;
490                                         ts1.tv_nsec += 1000000000;
491                                 }
492                                 /*
493                                  * Check for overflow of the nanosecond field: 
494                                  */
495                                 while (ts1.tv_nsec >= 1000000000) {
496                                         /*
497                                          * Allow for the overflow of the
498                                          * nanosecond field: 
499                                          */
500                                         ts1.tv_sec++;
501                                         ts1.tv_nsec -= 1000000000;
502                                 }
503                                 /*
504                                  * Convert the timespec structure to a
505                                  * timeval structure: 
506                                  */
507                                 TIMESPEC_TO_TIMEVAL(&tv1, &ts1);
508
509                                 /*
510                                  * Check if the thread will be ready
511                                  * sooner than the earliest ones found
512                                  * so far: 
513                                  */
514                                 if (timercmp(&tv1, &itimer.it_value, <)) {
515                                         /*
516                                          * Update the time value: 
517                                          */
518                                         itimer.it_value.tv_sec = tv1.tv_sec;
519                                         itimer.it_value.tv_usec = tv1.tv_usec;
520                                 }
521                         }
522
523                         /*
524                          * Check if this thread is running for the first time
525                          * or running again after using its full time slice
526                          * allocation: 
527                          */
528                         if (_thread_run->slice_usec == -1) {
529                                 /* Reset the accumulated time slice period: */
530                                 _thread_run->slice_usec = 0;
531                         }
532
533                         /* Check if there is more than one thread: */
534                         if (set_timer != 0) {
535                                 /*
536                                  * Start the interval timer for the
537                                  * calculated time interval: 
538                                  */
539                                 if (setitimer(_ITIMER_SCHED_TIMER, &itimer, NULL) != 0) {
540                                         /*
541                                          * Cannot initialise the timer, so
542                                          * abort this process: 
543                                          */
544                                         PANIC("Cannot set scheduling timer");
545                                 }
546                         }
547
548                         /*
549                          * Check if this thread is being continued from a
550                          * longjmp() out of a signal handler:
551                          */
552                         if ((_thread_run->jmpflags & JMPFLAGS_LONGJMP) != 0) {
553                                 _thread_run->jmpflags = 0;
554                                 __longjmp(_thread_run->nested_jmp.jmp,
555                                     _thread_run->longjmp_val);
556                         }
557                         /*
558                          * Check if this thread is being continued from a
559                          * _longjmp() out of a signal handler:
560                          */
561                         else if ((_thread_run->jmpflags & JMPFLAGS__LONGJMP) !=
562                             0) {
563                                 _thread_run->jmpflags = 0;
564                                 ___longjmp(_thread_run->nested_jmp.jmp,
565                                     _thread_run->longjmp_val);
566                         }
567                         /*
568                          * Check if this thread is being continued from a
569                          * siglongjmp() out of a signal handler:
570                          */
571                         else if ((_thread_run->jmpflags & JMPFLAGS_SIGLONGJMP)
572                             != 0) {
573                                 _thread_run->jmpflags = 0;
574                                 __siglongjmp(
575                                     _thread_run->nested_jmp.sigjmp,
576                                     _thread_run->longjmp_val);
577                         }
578                         /* Check if a signal context was saved: */
579                         else if (_thread_run->sig_saved == 1) {
580 #ifndef __alpha__
581                                 /*
582                                  * Point to the floating point data in the
583                                  * running thread: 
584                                  */
585                                 fdata = _thread_run->saved_fp;
586
587                                 /* Restore the floating point state: */
588                 __asm__("frstor %0": :"m"(*fdata));
589 #endif
590                                 /*
591                                  * Do a sigreturn to restart the thread that
592                                  * was interrupted by a signal: 
593                                  */
594                                 _thread_kern_in_sched = 0;
595
596                                 /*
597                                  * If we had a context switch, run any
598                                  * installed switch hooks.
599                                  */
600                                 if ((_sched_switch_hook != NULL) &&
601                                     (_last_user_thread != _thread_run)) {
602                                         thread_run_switch_hook(_last_user_thread,
603                                             _thread_run);
604                                 }
605                                 _thread_sys_sigreturn(&_thread_run->saved_sigcontext);
606                         } else {
607                                 /*
608                                  * Do a longjmp to restart the thread that
609                                  * was context switched out (by a longjmp to
610                                  * a different thread): 
611                                  */
612                                 __longjmp(_thread_run->saved_jmp_buf, 1);
613                         }
614
615                         /* This point should not be reached. */
616                         PANIC("Thread has returned from sigreturn or longjmp");
617                 }
618         }
619
620         /* There are no more threads, so exit this process: */
621         exit(0);
622 }
623
624 void
625 _thread_kern_sched_state(enum pthread_state state, char *fname, int lineno)
626 {
627         /*
628          * Flag the pthread kernel as executing scheduler code
629          * to avoid a scheduler signal from interrupting this
630          * execution and calling the scheduler again.
631          */
632         _thread_kern_in_sched = 1;
633
634         /*
635          * Prevent the signal handler from fiddling with this thread
636          * before its state is set and is placed into the proper queue.
637          */
638         _queue_signals = 1;
639
640         /* Change the state of the current thread: */
641         _thread_run->state = state;
642         _thread_run->fname = fname;
643         _thread_run->lineno = lineno;
644
645         /* Schedule the next thread that is ready: */
646         _thread_kern_sched(NULL);
647         return;
648 }
649
650 void
651 _thread_kern_sched_state_unlock(enum pthread_state state,
652     spinlock_t *lock, char *fname, int lineno)
653 {
654         /*
655          * Flag the pthread kernel as executing scheduler code
656          * to avoid a scheduler signal from interrupting this
657          * execution and calling the scheduler again.
658          */
659         _thread_kern_in_sched = 1;
660
661         /*
662          * Prevent the signal handler from fiddling with this thread
663          * before its state is set and it is placed into the proper
664          * queue(s).
665          */
666         _queue_signals = 1;
667
668         /* Change the state of the current thread: */
669         _thread_run->state = state;
670         _thread_run->fname = fname;
671         _thread_run->lineno = lineno;
672
673         _SPINUNLOCK(lock);
674
675         /* Schedule the next thread that is ready: */
676         _thread_kern_sched(NULL);
677         return;
678 }
679
680 static void
681 _thread_kern_poll(int wait_reqd)
682 {
683         int             count = 0;
684         int             i, found;
685         int             kern_pipe_added = 0;
686         int             nfds = 0;
687         int             timeout_ms = 0;
688         struct pthread  *pthread;
689         struct timespec ts;
690         struct timeval  tv;
691
692         /* Check if the caller wants to wait: */
693         if (wait_reqd == 0) {
694                 timeout_ms = 0;
695         }
696         else {
697                 /* Get the current time of day: */
698                 gettimeofday(&tv, NULL);
699                 TIMEVAL_TO_TIMESPEC(&tv, &ts);
700
701                 _queue_signals = 1;
702                 pthread = TAILQ_FIRST(&_waitingq);
703                 _queue_signals = 0;
704
705                 if ((pthread == NULL) || (pthread->wakeup_time.tv_sec == -1)) {
706                         /*
707                          * Either there are no threads in the waiting queue,
708                          * or there are no threads that can timeout.
709                          */
710                         timeout_ms = INFTIM;
711                 }
712                 else {
713                         /*
714                          * Calculate the time left for the next thread to
715                          * timeout allowing for the clock resolution:
716                          */
717                         timeout_ms = ((pthread->wakeup_time.tv_sec - ts.tv_sec) *
718                             1000) + ((pthread->wakeup_time.tv_nsec - ts.tv_nsec +
719                             _clock_res_nsec) / 1000000);
720                         /*
721                          * Don't allow negative timeouts:
722                          */
723                         if (timeout_ms < 0)
724                                 timeout_ms = 0;
725                 }
726         }
727                         
728         /* Protect the scheduling queues: */
729         _queue_signals = 1;
730
731         /*
732          * Check to see if the signal queue needs to be walked to look
733          * for threads awoken by a signal while in the scheduler.
734          */
735         if (_sigq_check_reqd != 0) {
736                 /* Reset flag before handling queued signals: */
737                 _sigq_check_reqd = 0;
738
739                 dequeue_signals();
740         }
741
742         /*
743          * Check for a thread that became runnable due to a signal:
744          */
745         if (PTHREAD_PRIOQ_FIRST() != NULL) {
746                 /*
747                  * Since there is at least one runnable thread,
748                  * disable the wait.
749                  */
750                 timeout_ms = 0;
751         }
752
753         /*
754          * Form the poll table:
755          */
756         nfds = 0;
757         if (timeout_ms != 0) {
758                 /* Add the kernel pipe to the poll table: */
759                 _thread_pfd_table[nfds].fd = _thread_kern_pipe[0];
760                 _thread_pfd_table[nfds].events = POLLRDNORM;
761                 _thread_pfd_table[nfds].revents = 0;
762                 nfds++;
763                 kern_pipe_added = 1;
764         }
765
766         PTHREAD_WAITQ_SETACTIVE();
767         TAILQ_FOREACH(pthread, &_workq, qe) {
768                 switch (pthread->state) {
769                 case PS_SPINBLOCK:
770                         /*
771                          * If the lock is available, let the thread run.
772                          */
773                         if (pthread->data.spinlock->access_lock == 0) {
774                                 PTHREAD_WAITQ_CLEARACTIVE();
775                                 PTHREAD_WORKQ_REMOVE(pthread);
776                                 PTHREAD_NEW_STATE(pthread,PS_RUNNING);
777                                 PTHREAD_WAITQ_SETACTIVE();
778                                 /* One less thread in a spinblock state: */
779                                 _spinblock_count--;
780                                 /*
781                                  * Since there is at least one runnable
782                                  * thread, disable the wait.
783                                  */
784                                 timeout_ms = 0;
785                         }
786                         break;
787
788                 /* File descriptor read wait: */
789                 case PS_FDR_WAIT:
790                         /* Limit number of polled files to table size: */
791                         if (nfds < _thread_dtablesize) {
792                                 _thread_pfd_table[nfds].events = POLLRDNORM;
793                                 _thread_pfd_table[nfds].fd = pthread->data.fd.fd;
794                                 nfds++;
795                         }
796                         break;
797
798                 /* File descriptor write wait: */
799                 case PS_FDW_WAIT:
800                         /* Limit number of polled files to table size: */
801                         if (nfds < _thread_dtablesize) {
802                                 _thread_pfd_table[nfds].events = POLLWRNORM;
803                                 _thread_pfd_table[nfds].fd = pthread->data.fd.fd;
804                                 nfds++;
805                         }
806                         break;
807
808                 /* File descriptor poll or select wait: */
809                 case PS_POLL_WAIT:
810                 case PS_SELECT_WAIT:
811                         /* Limit number of polled files to table size: */
812                         if (pthread->data.poll_data->nfds + nfds <
813                             _thread_dtablesize) {
814                                 for (i = 0; i < pthread->data.poll_data->nfds; i++) {
815                                         _thread_pfd_table[nfds + i].fd =
816                                             pthread->data.poll_data->fds[i].fd;
817                                         _thread_pfd_table[nfds + i].events =
818                                             pthread->data.poll_data->fds[i].events;
819                                 }
820                                 nfds += pthread->data.poll_data->nfds;
821                         }
822                         break;
823
824                 /* Other states do not depend on file I/O. */
825                 default:
826                         break;
827                 }
828         }
829         PTHREAD_WAITQ_CLEARACTIVE();
830
831         /*
832          * Wait for a file descriptor to be ready for read, write, or
833          * an exception, or a timeout to occur: 
834          */
835         count = _thread_sys_poll(_thread_pfd_table, nfds, timeout_ms);
836
837         if (kern_pipe_added != 0)
838                 /*
839                  * Remove the pthread kernel pipe file descriptor
840                  * from the pollfd table: 
841                  */
842                 nfds = 1;
843         else
844                 nfds = 0;
845
846         /*
847          * Check if it is possible that there are bytes in the kernel
848          * read pipe waiting to be read:
849          */
850         if (count < 0 || ((kern_pipe_added != 0) &&
851             (_thread_pfd_table[0].revents & POLLRDNORM))) {
852                 /*
853                  * If the kernel read pipe was included in the
854                  * count: 
855                  */
856                 if (count > 0) {
857                         /* Decrement the count of file descriptors: */
858                         count--;
859                 }
860
861                 if (_sigq_check_reqd != 0) {
862                         /* Reset flag before handling signals: */
863                         _sigq_check_reqd = 0;
864
865                         dequeue_signals();
866                 }
867         }
868
869         /*
870          * Check if any file descriptors are ready:
871          */
872         if (count > 0) {
873                 /*
874                  * Enter a loop to look for threads waiting on file
875                  * descriptors that are flagged as available by the
876                  * _poll syscall: 
877                  */
878                 PTHREAD_WAITQ_SETACTIVE();
879                 TAILQ_FOREACH(pthread, &_workq, qe) {
880                         switch (pthread->state) {
881                         case PS_SPINBLOCK:
882                                 /*
883                                  * If the lock is available, let the thread run.
884                                  */
885                                 if (pthread->data.spinlock->access_lock == 0) {
886                                         PTHREAD_WAITQ_CLEARACTIVE();
887                                         PTHREAD_WORKQ_REMOVE(pthread);
888                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
889                                         PTHREAD_WAITQ_SETACTIVE();
890
891                                         /*
892                                          * One less thread in a spinblock state:
893                                          */
894                                         _spinblock_count--;
895                                 }
896                                 break;
897
898                         /* File descriptor read wait: */
899                         case PS_FDR_WAIT:
900                                 if ((nfds < _thread_dtablesize) &&
901                                     (_thread_pfd_table[nfds].revents & POLLRDNORM)) {
902                                         PTHREAD_WAITQ_CLEARACTIVE();
903                                         PTHREAD_WORKQ_REMOVE(pthread);
904                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
905                                         PTHREAD_WAITQ_SETACTIVE();
906                                 }
907                                 nfds++;
908                                 break;
909
910                         /* File descriptor write wait: */
911                         case PS_FDW_WAIT:
912                                 if ((nfds < _thread_dtablesize) &&
913                                     (_thread_pfd_table[nfds].revents & POLLWRNORM)) {
914                                         PTHREAD_WAITQ_CLEARACTIVE();
915                                         PTHREAD_WORKQ_REMOVE(pthread);
916                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
917                                         PTHREAD_WAITQ_SETACTIVE();
918                                 }
919                                 nfds++;
920                                 break;
921
922                         /* File descriptor poll or select wait: */
923                         case PS_POLL_WAIT:
924                         case PS_SELECT_WAIT:
925                                 if (pthread->data.poll_data->nfds + nfds <
926                                     _thread_dtablesize) {
927                                         /*
928                                          * Enter a loop looking for I/O
929                                          * readiness:
930                                          */
931                                         found = 0;
932                                         for (i = 0; i < pthread->data.poll_data->nfds; i++) {
933                                                 if (_thread_pfd_table[nfds + i].revents != 0) {
934                                                         pthread->data.poll_data->fds[i].revents =
935                                                             _thread_pfd_table[nfds + i].revents;
936                                                         found++;
937                                                 }
938                                         }
939
940                                         /* Increment before destroying: */
941                                         nfds += pthread->data.poll_data->nfds;
942
943                                         if (found != 0) {
944                                                 pthread->data.poll_data->nfds = found;
945                                                 PTHREAD_WAITQ_CLEARACTIVE();
946                                                 PTHREAD_WORKQ_REMOVE(pthread);
947                                                 PTHREAD_NEW_STATE(pthread,PS_RUNNING);
948                                                 PTHREAD_WAITQ_SETACTIVE();
949                                         }
950                                 }
951                                 else
952                                         nfds += pthread->data.poll_data->nfds;
953                                 break;
954
955                         /* Other states do not depend on file I/O. */
956                         default:
957                                 break;
958                         }
959                 }
960                 PTHREAD_WAITQ_CLEARACTIVE();
961         }
962         else if (_spinblock_count != 0) {
963                 /*
964                  * Enter a loop to look for threads waiting on a spinlock
965                  * that is now available.
966                  */
967                 PTHREAD_WAITQ_SETACTIVE();
968                 TAILQ_FOREACH(pthread, &_workq, qe) {
969                         if (pthread->state == PS_SPINBLOCK) {
970                                 /*
971                                  * If the lock is available, let the thread run.
972                                  */
973                                 if (pthread->data.spinlock->access_lock == 0) {
974                                         PTHREAD_WAITQ_CLEARACTIVE();
975                                         PTHREAD_WORKQ_REMOVE(pthread);
976                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
977                                         PTHREAD_WAITQ_SETACTIVE();
978
979                                         /*
980                                          * One less thread in a spinblock state:
981                                          */
982                                         _spinblock_count--;
983                                 }
984                         }
985                 }
986                 PTHREAD_WAITQ_CLEARACTIVE();
987         }
988
989         /* Unprotect the scheduling queues: */
990         _queue_signals = 0;
991
992         while (_sigq_check_reqd != 0) {
993                 /* Handle queued signals: */
994                 _sigq_check_reqd = 0;
995
996                 /* Protect the scheduling queues: */
997                 _queue_signals = 1;
998
999                 dequeue_signals();
1000
1001                 /* Unprotect the scheduling queues: */
1002                 _queue_signals = 0;
1003         }
1004
1005         /* Nothing to return. */
1006         return;
1007 }
1008
1009 void
1010 _thread_kern_set_timeout(struct timespec * timeout)
1011 {
1012         struct timespec current_time;
1013         struct timeval  tv;
1014
1015         /* Reset the timeout flag for the running thread: */
1016         _thread_run->timeout = 0;
1017
1018         /* Check if the thread is to wait forever: */
1019         if (timeout == NULL) {
1020                 /*
1021                  * Set the wakeup time to something that can be recognised as
1022                  * different to an actual time of day: 
1023                  */
1024                 _thread_run->wakeup_time.tv_sec = -1;
1025                 _thread_run->wakeup_time.tv_nsec = -1;
1026         }
1027         /* Check if no waiting is required: */
1028         else if (timeout->tv_sec == 0 && timeout->tv_nsec == 0) {
1029                 /* Set the wake up time to 'immediately': */
1030                 _thread_run->wakeup_time.tv_sec = 0;
1031                 _thread_run->wakeup_time.tv_nsec = 0;
1032         } else {
1033                 /* Get the current time: */
1034                 gettimeofday(&tv, NULL);
1035                 TIMEVAL_TO_TIMESPEC(&tv, &current_time);
1036
1037                 /* Calculate the time for the current thread to wake up: */
1038                 _thread_run->wakeup_time.tv_sec = current_time.tv_sec + timeout->tv_sec;
1039                 _thread_run->wakeup_time.tv_nsec = current_time.tv_nsec + timeout->tv_nsec;
1040
1041                 /* Check if the nanosecond field needs to wrap: */
1042                 if (_thread_run->wakeup_time.tv_nsec >= 1000000000) {
1043                         /* Wrap the nanosecond field: */
1044                         _thread_run->wakeup_time.tv_sec += 1;
1045                         _thread_run->wakeup_time.tv_nsec -= 1000000000;
1046                 }
1047         }
1048         return;
1049 }
1050
1051 void
1052 _thread_kern_sig_defer(void)
1053 {
1054         /* Allow signal deferral to be recursive. */
1055         _thread_run->sig_defer_count++;
1056 }
1057
1058 void
1059 _thread_kern_sig_undefer(void)
1060 {
1061         pthread_t pthread;
1062         int need_resched = 0;
1063
1064         /*
1065          * Perform checks to yield only if we are about to undefer
1066          * signals.
1067          */
1068         if (_thread_run->sig_defer_count > 1) {
1069                 /* Decrement the signal deferral count. */
1070                 _thread_run->sig_defer_count--;
1071         }
1072         else if (_thread_run->sig_defer_count == 1) {
1073                 /* Reenable signals: */
1074                 _thread_run->sig_defer_count = 0;
1075
1076                 /*
1077                  * Check if there are queued signals:
1078                  */
1079                 while (_sigq_check_reqd != 0) {
1080                         /* Defer scheduling while we process queued signals: */
1081                         _thread_run->sig_defer_count = 1;
1082
1083                         /* Clear the flag before checking the signal queue: */
1084                         _sigq_check_reqd = 0;
1085
1086                         /* Dequeue and handle signals: */
1087                         dequeue_signals();
1088
1089                         /*
1090                          * Avoiding an unnecessary check to reschedule, check
1091                          * to see if signal handling caused a higher priority
1092                          * thread to become ready.
1093                          */
1094                         if ((need_resched == 0) &&
1095                             (((pthread = PTHREAD_PRIOQ_FIRST()) != NULL) &&
1096                             (pthread->active_priority > _thread_run->active_priority))) {
1097                                 need_resched = 1;
1098                         }
1099
1100                         /* Reenable signals: */
1101                         _thread_run->sig_defer_count = 0;
1102                 }
1103
1104                 /* Yield the CPU if necessary: */
1105                 if (need_resched || _thread_run->yield_on_sig_undefer != 0) {
1106                         _thread_run->yield_on_sig_undefer = 0;
1107                         _thread_kern_sched(NULL);
1108                 }
1109         }
1110 }
1111
1112 static void
1113 dequeue_signals(void)
1114 {
1115         char    bufr[128];
1116         int     i, num;
1117         pthread_t pthread;
1118
1119         /*
1120          * Enter a loop to read and handle queued signals from the
1121          * pthread kernel pipe: 
1122          */
1123         while (((num = _thread_sys_read(_thread_kern_pipe[0], bufr,
1124             sizeof(bufr))) > 0) || (num == -1 && errno == EINTR)) {
1125                 /*
1126                  * The buffer read contains one byte per signal and
1127                  * each byte is the signal number.
1128                  */
1129                 for (i = 0; i < num; i++) {
1130                         if ((int) bufr[i] == _SCHED_SIGNAL) {
1131                                 /*
1132                                  * Scheduling signals shouldn't ever be
1133                                  * queued; just ignore it for now.
1134                                  */
1135                         }
1136                         else {
1137                                 /* Handle this signal: */
1138                                 pthread = _thread_sig_handle((int) bufr[i],
1139                                     NULL);
1140                                 if (pthread != NULL)
1141                                         _thread_sig_deliver(pthread,
1142                                             (int) bufr[i]);
1143                         }
1144                 }
1145         }
1146         if ((num < 0) && (errno != EAGAIN)) {
1147                 /*
1148                  * The only error we should expect is if there is
1149                  * no data to read.
1150                  */
1151                 PANIC("Unable to read from thread kernel pipe");
1152         }
1153 }
1154
1155 static inline void
1156 thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in)
1157 {
1158         pthread_t tid_out = thread_out;
1159         pthread_t tid_in = thread_in;
1160
1161         if ((tid_out != NULL) &&
1162             (tid_out->flags & PTHREAD_FLAGS_PRIVATE) != 0)
1163                 tid_out = NULL;
1164         if ((tid_in != NULL) &&
1165             (tid_in->flags & PTHREAD_FLAGS_PRIVATE) != 0)
1166                 tid_in = NULL;
1167
1168         if ((_sched_switch_hook != NULL) && (tid_out != tid_in)) {
1169                 /* Run the scheduler switch hook: */
1170                 _sched_switch_hook(tid_out, tid_in);
1171         }
1172 }
1173 #endif