]> CyberLeo.Net >> Repos - FreeBSD/releng/7.2.git/blob - lib/libkse/thread/thr_kern.c
Create releng/7.2 from stable/7 in preparation for 7.2-RELEASE.
[FreeBSD/releng/7.2.git] / lib / libkse / thread / thr_kern.c
1 /*
2  * Copyright (C) 2003 Daniel M. Eischen <deischen@freebsd.org>
3  * Copyright (C) 2002 Jonathon Mini <mini@freebsd.org>
4  * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed by John Birrell.
18  * 4. Neither the name of the author nor the names of any co-contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 #include <sys/types.h>
39 #include <sys/kse.h>
40 #include <sys/ptrace.h>
41 #include <sys/signalvar.h>
42 #include <sys/queue.h>
43 #include <machine/atomic.h>
44 #include <machine/sigframe.h>
45
46 #include <assert.h>
47 #include <errno.h>
48 #include <signal.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include <time.h>
52 #include <ucontext.h>
53 #include <unistd.h>
54
55 #include "atomic_ops.h"
56 #include "thr_private.h"
57 #include "libc_private.h"
58 #ifdef NOTYET
59 #include "spinlock.h"
60 #endif
61
62 /* #define DEBUG_THREAD_KERN */
63 #ifdef DEBUG_THREAD_KERN
64 #define DBG_MSG         stdout_debug
65 #else
66 #define DBG_MSG(x...)
67 #endif
68
69 /*
70  * Define a high water mark for the maximum number of threads that
71  * will be cached.  Once this level is reached, any extra threads
72  * will be free()'d.
73  */
74 #define MAX_CACHED_THREADS      100
75 /*
76  * Define high water marks for the maximum number of KSEs and KSE groups
77  * that will be cached. Because we support 1:1 threading, there could have
78  * same number of KSEs and KSE groups as threads. Once these levels are
79  * reached, any extra KSE and KSE groups will be free()'d.
80  */
81 #define MAX_CACHED_KSES         ((_thread_scope_system <= 0) ? 50 : 100)
82 #define MAX_CACHED_KSEGS        ((_thread_scope_system <= 0) ? 50 : 100)
83
84 #define KSE_SET_MBOX(kse, thrd) \
85         (kse)->k_kcb->kcb_kmbx.km_curthread = &(thrd)->tcb->tcb_tmbx
86
87 #define KSE_SET_EXITED(kse)     (kse)->k_flags |= KF_EXITED
88
89 /*
90  * Macros for manipulating the run queues.  The priority queue
91  * routines use the thread's pqe link and also handle the setting
92  * and clearing of the thread's THR_FLAGS_IN_RUNQ flag.
93  */
94 #define KSE_RUNQ_INSERT_HEAD(kse, thrd)                 \
95         _pq_insert_head(&(kse)->k_schedq->sq_runq, thrd)
96 #define KSE_RUNQ_INSERT_TAIL(kse, thrd)                 \
97         _pq_insert_tail(&(kse)->k_schedq->sq_runq, thrd)
98 #define KSE_RUNQ_REMOVE(kse, thrd)                      \
99         _pq_remove(&(kse)->k_schedq->sq_runq, thrd)
100 #define KSE_RUNQ_FIRST(kse)                             \
101         ((_libkse_debug == 0) ?                         \
102          _pq_first(&(kse)->k_schedq->sq_runq) :         \
103          _pq_first_debug(&(kse)->k_schedq->sq_runq))
104
105 #define KSE_RUNQ_THREADS(kse)   ((kse)->k_schedq->sq_runq.pq_threads)
106
107 #define THR_NEED_CANCEL(thrd)                                           \
108          (((thrd)->cancelflags & THR_CANCELLING) != 0 &&                \
109           ((thrd)->cancelflags & PTHREAD_CANCEL_DISABLE) == 0 &&        \
110           (((thrd)->cancelflags & THR_AT_CANCEL_POINT) != 0 ||          \
111            ((thrd)->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
112
113 #define THR_NEED_ASYNC_CANCEL(thrd)                                     \
114          (((thrd)->cancelflags & THR_CANCELLING) != 0 &&                \
115           ((thrd)->cancelflags & PTHREAD_CANCEL_DISABLE) == 0 &&        \
116           (((thrd)->cancelflags & THR_AT_CANCEL_POINT) == 0 &&          \
117            ((thrd)->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
118
119 /*
120  * We've got to keep track of everything that is allocated, not only
121  * to have a speedy free list, but also so they can be deallocated
122  * after a fork().
123  */
124 static TAILQ_HEAD(, kse)        active_kseq;
125 static TAILQ_HEAD(, kse)        free_kseq;
126 static TAILQ_HEAD(, kse_group)  free_kse_groupq;
127 static TAILQ_HEAD(, kse_group)  active_kse_groupq;
128 static TAILQ_HEAD(, kse_group)  gc_ksegq;
129 static struct lock              kse_lock;       /* also used for kseg queue */
130 static int                      free_kse_count = 0;
131 static int                      free_kseg_count = 0;
132 static TAILQ_HEAD(, pthread)    free_threadq;
133 static struct lock              thread_lock;
134 static int                      free_thread_count = 0;
135 static int                      inited = 0;
136 static int                      active_kse_count = 0;
137 static int                      active_kseg_count = 0;
138 static u_int64_t                next_uniqueid = 1;
139
140 LIST_HEAD(thread_hash_head, pthread);
141 #define THREAD_HASH_QUEUES      127
142 static struct thread_hash_head  thr_hashtable[THREAD_HASH_QUEUES];
143 #define THREAD_HASH(thrd)       ((unsigned long)thrd % THREAD_HASH_QUEUES)
144
145 /* Lock for thread tcb constructor/destructor */
146 static pthread_mutex_t          _tcb_mutex;
147
148 #ifdef DEBUG_THREAD_KERN
149 static void     dump_queues(struct kse *curkse);
150 #endif
151 static void     kse_check_completed(struct kse *kse);
152 static void     kse_check_waitq(struct kse *kse);
153 static void     kse_fini(struct kse *curkse);
154 static void     kse_reinit(struct kse *kse, int sys_scope);
155 static void     kse_sched_multi(struct kse_mailbox *kmbx);
156 static void     kse_sched_single(struct kse_mailbox *kmbx);
157 static void     kse_switchout_thread(struct kse *kse, struct pthread *thread);
158 static void     kse_wait(struct kse *kse, struct pthread *td_wait, int sigseq);
159 static void     kse_free_unlocked(struct kse *kse);
160 static void     kse_destroy(struct kse *kse);
161 static void     kseg_free_unlocked(struct kse_group *kseg);
162 static void     kseg_init(struct kse_group *kseg);
163 static void     kseg_reinit(struct kse_group *kseg);
164 static void     kseg_destroy(struct kse_group *kseg);
165 static void     kse_waitq_insert(struct pthread *thread);
166 static void     kse_wakeup_multi(struct kse *curkse);
167 static struct kse_mailbox *kse_wakeup_one(struct pthread *thread);
168 static void     thr_cleanup(struct kse *kse, struct pthread *curthread);
169 static void     thr_link(struct pthread *thread);
170 static void     thr_resume_wrapper(int sig, siginfo_t *, ucontext_t *);
171 static void     thr_resume_check(struct pthread *curthread, ucontext_t *ucp);
172 static int      thr_timedout(struct pthread *thread, struct timespec *curtime);
173 static void     thr_unlink(struct pthread *thread);
174 static void     thr_destroy(struct pthread *curthread, struct pthread *thread);
175 static void     thread_gc(struct pthread *thread);
176 static void     kse_gc(struct pthread *thread);
177 static void     kseg_gc(struct pthread *thread);
178
179 static void __inline
180 thr_accounting(struct pthread *thread)
181 {
182         if ((thread->slice_usec != -1) &&
183             (thread->slice_usec <= TIMESLICE_USEC) &&
184             (thread->attr.sched_policy != SCHED_FIFO)) {
185                 thread->slice_usec += (thread->tcb->tcb_tmbx.tm_uticks
186                     + thread->tcb->tcb_tmbx.tm_sticks) * _clock_res_usec;
187                 /* Check for time quantum exceeded: */
188                 if (thread->slice_usec > TIMESLICE_USEC)
189                         thread->slice_usec = -1;
190         }
191         thread->tcb->tcb_tmbx.tm_uticks = 0;
192         thread->tcb->tcb_tmbx.tm_sticks = 0;
193 }
194
195 /*
196  * This is called after a fork().
197  * No locks need to be taken here since we are guaranteed to be
198  * single threaded.
199  * 
200  * XXX
201  * POSIX says for threaded process, fork() function is used
202  * only to run new programs, and the effects of calling functions
203  * that require certain resources between the call to fork() and
204  * the call to an exec function are undefined.
205  *
206  * It is not safe to free memory after fork(), because these data
207  * structures may be in inconsistent state.
208  */
209 void
210 _kse_single_thread(struct pthread *curthread)
211 {
212 #ifdef NOTYET
213         struct kse *kse;
214         struct kse_group *kseg;
215         struct pthread *thread;
216
217         _thr_spinlock_init();
218         *__malloc_lock = (spinlock_t)_SPINLOCK_INITIALIZER;
219         if (__isthreaded) {
220                 _thr_rtld_fini();
221                 _thr_signal_deinit();
222         }
223         __isthreaded = 0;
224         /*
225          * Restore signal mask early, so any memory problems could
226          * dump core.
227          */ 
228         __sys_sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
229         _thread_active_threads = 1;
230
231         curthread->kse->k_kcb->kcb_kmbx.km_curthread = NULL;
232         curthread->attr.flags &= ~PTHREAD_SCOPE_PROCESS;
233         curthread->attr.flags |= PTHREAD_SCOPE_SYSTEM;
234
235         /*
236          * Enter a loop to remove and free all threads other than
237          * the running thread from the active thread list:
238          */
239         while ((thread = TAILQ_FIRST(&_thread_list)) != NULL) {
240                 THR_GCLIST_REMOVE(thread);
241                 /*
242                  * Remove this thread from the list (the current
243                  * thread will be removed but re-added by libpthread
244                  * initialization.
245                  */
246                 TAILQ_REMOVE(&_thread_list, thread, tle);
247                 /* Make sure this isn't the running thread: */
248                 if (thread != curthread) {
249                         _thr_stack_free(&thread->attr);
250                         if (thread->specific != NULL)
251                                 free(thread->specific);
252                         thr_destroy(curthread, thread);
253                 }
254         }
255
256         TAILQ_INIT(&curthread->mutexq);         /* initialize mutex queue */
257         curthread->joiner = NULL;               /* no joining threads yet */
258         curthread->refcount = 0;
259         SIGEMPTYSET(curthread->sigpend);        /* clear pending signals */
260
261         /* Don't free thread-specific data as the caller may require it */
262
263         /* Free the free KSEs: */
264         while ((kse = TAILQ_FIRST(&free_kseq)) != NULL) {
265                 TAILQ_REMOVE(&free_kseq, kse, k_qe);
266                 kse_destroy(kse);
267         }
268         free_kse_count = 0;
269
270         /* Free the active KSEs: */
271         while ((kse = TAILQ_FIRST(&active_kseq)) != NULL) {
272                 TAILQ_REMOVE(&active_kseq, kse, k_qe);
273                 kse_destroy(kse);
274         }
275         active_kse_count = 0;
276
277         /* Free the free KSEGs: */
278         while ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) {
279                 TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
280                 kseg_destroy(kseg);
281         }
282         free_kseg_count = 0;
283
284         /* Free the active KSEGs: */
285         while ((kseg = TAILQ_FIRST(&active_kse_groupq)) != NULL) {
286                 TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe);
287                 kseg_destroy(kseg);
288         }
289         active_kseg_count = 0;
290
291         /* Free the free threads. */
292         while ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
293                 TAILQ_REMOVE(&free_threadq, thread, tle);
294                 thr_destroy(curthread, thread);
295         }
296         free_thread_count = 0;
297
298         /* Free the to-be-gc'd threads. */
299         while ((thread = TAILQ_FIRST(&_thread_gc_list)) != NULL) {
300                 TAILQ_REMOVE(&_thread_gc_list, thread, gcle);
301                 thr_destroy(curthread, thread);
302         }
303         TAILQ_INIT(&gc_ksegq);
304         _gc_count = 0;
305
306         if (inited != 0) {
307                 /*
308                  * Destroy these locks; they'll be recreated to assure they
309                  * are in the unlocked state.
310                  */
311                 _lock_destroy(&kse_lock);
312                 _lock_destroy(&thread_lock);
313                 _lock_destroy(&_thread_list_lock);
314                 inited = 0;
315         }
316
317         /* We're no longer part of any lists */
318         curthread->tlflags = 0;
319
320         /*
321          * After a fork, we are still operating on the thread's original
322          * stack.  Don't clear the THR_FLAGS_USER from the thread's
323          * attribute flags.
324          */
325
326         /* Initialize the threads library. */
327         curthread->kse = NULL;
328         curthread->kseg = NULL;
329         _kse_initial = NULL;
330         _libpthread_init(curthread);
331 #else
332         int i;
333
334         /* Reset the current thread and KSE lock data. */
335         for (i = 0; i < curthread->locklevel; i++) {
336                 _lockuser_reinit(&curthread->lockusers[i], (void *)curthread);
337         }
338         curthread->locklevel = 0;
339         for (i = 0; i < curthread->kse->k_locklevel; i++) {
340                 _lockuser_reinit(&curthread->kse->k_lockusers[i],
341                     (void *)curthread->kse);
342                 _LCK_SET_PRIVATE2(&curthread->kse->k_lockusers[i], NULL);
343         }
344         curthread->kse->k_locklevel = 0;
345
346         /*
347          * Reinitialize the thread and signal locks so that
348          * sigaction() will work after a fork().
349          */
350         _lock_reinit(&curthread->lock, LCK_ADAPTIVE, _thr_lock_wait,
351             _thr_lock_wakeup);
352         _lock_reinit(&_thread_signal_lock, LCK_ADAPTIVE, _kse_lock_wait,
353             _kse_lock_wakeup);
354
355  
356         _thr_spinlock_init();
357         if (__isthreaded) {
358                 _thr_rtld_fini();
359                 _thr_signal_deinit();
360         }
361         __isthreaded = 0;
362         curthread->kse->k_kcb->kcb_kmbx.km_curthread = NULL;
363         curthread->attr.flags |= PTHREAD_SCOPE_SYSTEM;
364
365         /*
366          * After a fork, it is possible that an upcall occurs in
367          * the parent KSE that fork()'d before the child process
368          * is fully created and before its vm space is copied.
369          * During the upcall, the tcb is set to null or to another
370          * thread, and this is what gets copied in the child process
371          * when the vm space is cloned sometime after the upcall
372          * occurs.  Note that we shouldn't have to set the kcb, but
373          * we do it for completeness.
374          */
375         _kcb_set(curthread->kse->k_kcb);
376         _tcb_set(curthread->kse->k_kcb, curthread->tcb);
377  
378
379         /* After a fork(), there child should have no pending signals. */
380         sigemptyset(&curthread->sigpend);
381
382         /*
383          * Restore signal mask early, so any memory problems could
384          * dump core.
385          */ 
386         sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
387         _thread_active_threads = 1;
388 #endif
389 }
390
391 /*
392  * This is used to initialize housekeeping and to initialize the
393  * KSD for the KSE.
394  */
395 void
396 _kse_init(void)
397 {
398         if (inited == 0) {
399                 TAILQ_INIT(&active_kseq);
400                 TAILQ_INIT(&active_kse_groupq);
401                 TAILQ_INIT(&free_kseq);
402                 TAILQ_INIT(&free_kse_groupq);
403                 TAILQ_INIT(&free_threadq);
404                 TAILQ_INIT(&gc_ksegq);
405                 if (_lock_init(&kse_lock, LCK_ADAPTIVE,
406                     _kse_lock_wait, _kse_lock_wakeup, calloc) != 0)
407                         PANIC("Unable to initialize free KSE queue lock");
408                 if (_lock_init(&thread_lock, LCK_ADAPTIVE,
409                     _kse_lock_wait, _kse_lock_wakeup, calloc) != 0)
410                         PANIC("Unable to initialize free thread queue lock");
411                 if (_lock_init(&_thread_list_lock, LCK_ADAPTIVE,
412                     _kse_lock_wait, _kse_lock_wakeup, calloc) != 0)
413                         PANIC("Unable to initialize thread list lock");
414                 _pthread_mutex_init(&_tcb_mutex, NULL);
415                 active_kse_count = 0;
416                 active_kseg_count = 0;
417                 _gc_count = 0;
418                 inited = 1;
419         }
420 }
421
422 /*
423  * This is called when the first thread (other than the initial
424  * thread) is created.
425  */
426 int
427 _kse_setthreaded(int threaded)
428 {
429         sigset_t sigset;
430
431         if ((threaded != 0) && (__isthreaded == 0)) {
432                 SIGFILLSET(sigset);
433                 __sys_sigprocmask(SIG_SETMASK, &sigset, &_thr_initial->sigmask);
434
435                 /*
436                  * Tell the kernel to create a KSE for the initial thread
437                  * and enable upcalls in it.
438                  */
439                 _kse_initial->k_flags |= KF_STARTED;
440
441                 if (_thread_scope_system <= 0) {
442                         _thr_initial->attr.flags &= ~PTHREAD_SCOPE_SYSTEM;
443                         _kse_initial->k_kseg->kg_flags &= ~KGF_SINGLE_THREAD;
444                         _kse_initial->k_kcb->kcb_kmbx.km_curthread = NULL;
445                 }
446                 else {
447                         /*
448                          * For bound thread, kernel reads mailbox pointer
449                          * once, we'd set it here before calling kse_create.
450                          */
451                         _tcb_set(_kse_initial->k_kcb, _thr_initial->tcb);
452                         KSE_SET_MBOX(_kse_initial, _thr_initial);
453                         _kse_initial->k_kcb->kcb_kmbx.km_flags |= KMF_BOUND;
454                 }
455
456                 /*
457                  * Locking functions in libc are required when there are
458                  * threads other than the initial thread.
459                  */
460                 _thr_rtld_init();
461
462                 __isthreaded = 1;
463                 if (kse_create(&_kse_initial->k_kcb->kcb_kmbx, 0) != 0) {
464                         _kse_initial->k_flags &= ~KF_STARTED;
465                         __isthreaded = 0;
466                         PANIC("kse_create() failed\n");
467                         return (-1);
468                 }
469                 _thr_initial->tcb->tcb_tmbx.tm_lwp = 
470                         _kse_initial->k_kcb->kcb_kmbx.km_lwp;
471                 _thread_activated = 1;
472
473 #ifndef SYSTEM_SCOPE_ONLY
474                 if (_thread_scope_system <= 0) {
475                         /* Set current thread to initial thread */
476                         _tcb_set(_kse_initial->k_kcb, _thr_initial->tcb);
477                         KSE_SET_MBOX(_kse_initial, _thr_initial);
478                         _thr_start_sig_daemon();
479                         _thr_setmaxconcurrency();
480                 }
481                 else
482 #endif
483                         __sys_sigprocmask(SIG_SETMASK, &_thr_initial->sigmask,
484                             NULL);
485         }
486         return (0);
487 }
488
489 /*
490  * Lock wait and wakeup handlers for KSE locks.  These are only used by
491  * KSEs, and should never be used by threads.  KSE locks include the
492  * KSE group lock (used for locking the scheduling queue) and the
493  * kse_lock defined above.
494  *
495  * When a KSE lock attempt blocks, the entire KSE blocks allowing another
496  * KSE to run.  For the most part, it doesn't make much sense to try and
497  * schedule another thread because you need to lock the scheduling queue
498  * in order to do that.  And since the KSE lock is used to lock the scheduling
499  * queue, you would just end up blocking again.
500  */
501 void
502 _kse_lock_wait(struct lock *lock, struct lockuser *lu)
503 {
504         struct kse *curkse = (struct kse *)_LCK_GET_PRIVATE(lu);
505         struct timespec ts;
506         int saved_flags;
507
508         if (curkse->k_kcb->kcb_kmbx.km_curthread != NULL)
509                 PANIC("kse_lock_wait does not disable upcall.\n");
510         /*
511          * Enter a loop to wait until we get the lock.
512          */
513         ts.tv_sec = 0;
514         ts.tv_nsec = 1000000;  /* 1 sec */
515         while (!_LCK_GRANTED(lu)) {
516                 /*
517                  * Yield the kse and wait to be notified when the lock
518                  * is granted.
519                  */
520                 saved_flags = curkse->k_kcb->kcb_kmbx.km_flags;
521                 curkse->k_kcb->kcb_kmbx.km_flags |= KMF_NOUPCALL |
522                     KMF_NOCOMPLETED;
523                 kse_release(&ts);
524                 curkse->k_kcb->kcb_kmbx.km_flags = saved_flags;
525         }
526 }
527
528 void
529 _kse_lock_wakeup(struct lock *lock, struct lockuser *lu)
530 {
531         struct kse *curkse;
532         struct kse *kse;
533         struct kse_mailbox *mbx;
534
535         curkse = _get_curkse();
536         kse = (struct kse *)_LCK_GET_PRIVATE(lu);
537
538         if (kse == curkse)
539                 PANIC("KSE trying to wake itself up in lock");
540         else {
541                 mbx = &kse->k_kcb->kcb_kmbx;
542                 _lock_grant(lock, lu);
543                 /*
544                  * Notify the owning kse that it has the lock.
545                  * It is safe to pass invalid address to kse_wakeup
546                  * even if the mailbox is not in kernel at all,
547                  * and waking up a wrong kse is also harmless.
548                  */
549                 kse_wakeup(mbx);
550         }
551 }
552
553 /*
554  * Thread wait and wakeup handlers for thread locks.  These are only used
555  * by threads, never by KSEs.  Thread locks include the per-thread lock
556  * (defined in its structure), and condition variable and mutex locks.
557  */
558 void
559 _thr_lock_wait(struct lock *lock, struct lockuser *lu)
560 {
561         struct pthread *curthread = (struct pthread *)lu->lu_private;
562
563         do {
564                 THR_LOCK_SWITCH(curthread);
565                 THR_SET_STATE(curthread, PS_LOCKWAIT);
566                 _thr_sched_switch_unlocked(curthread);
567         } while (!_LCK_GRANTED(lu));
568 }
569
570 void
571 _thr_lock_wakeup(struct lock *lock, struct lockuser *lu)
572 {
573         struct pthread *thread;
574         struct pthread *curthread;
575         struct kse_mailbox *kmbx;
576
577         curthread = _get_curthread();
578         thread = (struct pthread *)_LCK_GET_PRIVATE(lu);
579
580         THR_SCHED_LOCK(curthread, thread);
581         _lock_grant(lock, lu);
582         kmbx = _thr_setrunnable_unlocked(thread);
583         THR_SCHED_UNLOCK(curthread, thread);
584         if (kmbx != NULL)
585                 kse_wakeup(kmbx);
586 }
587
588 kse_critical_t
589 _kse_critical_enter(void)
590 {
591         kse_critical_t crit;
592
593         crit = (kse_critical_t)_kcb_critical_enter();
594         return (crit);
595 }
596
597 void
598 _kse_critical_leave(kse_critical_t crit)
599 {
600         struct pthread *curthread;
601
602         _kcb_critical_leave((struct kse_thr_mailbox *)crit);
603         if ((crit != NULL) && ((curthread = _get_curthread()) != NULL))
604                 THR_YIELD_CHECK(curthread);
605 }
606
607 int
608 _kse_in_critical(void)
609 {
610         return (_kcb_in_critical());
611 }
612
613 void
614 _thr_critical_enter(struct pthread *thread)
615 {
616         thread->critical_count++;
617 }
618
619 void
620 _thr_critical_leave(struct pthread *thread)
621 {
622         thread->critical_count--;
623         THR_YIELD_CHECK(thread);
624 }
625
626 void
627 _thr_sched_switch(struct pthread *curthread)
628 {
629         struct kse *curkse;
630
631         (void)_kse_critical_enter();
632         curkse = _get_curkse();
633         KSE_SCHED_LOCK(curkse, curkse->k_kseg);
634         _thr_sched_switch_unlocked(curthread);
635 }
636
637 /*
638  * XXX - We may need to take the scheduling lock before calling
639  *       this, or perhaps take the lock within here before
640  *       doing anything else.
641  */
642 void
643 _thr_sched_switch_unlocked(struct pthread *curthread)
644 {
645         struct kse *curkse;
646         volatile int resume_once = 0;
647         ucontext_t *uc;
648
649         /* We're in the scheduler, 5 by 5: */
650         curkse = curthread->kse;
651
652         curthread->need_switchout = 1;  /* The thread yielded on its own. */
653         curthread->critical_yield = 0;  /* No need to yield anymore. */
654
655         /* Thread can unlock the scheduler lock. */
656         curthread->lock_switch = 1;
657
658         if (curthread->attr.flags & PTHREAD_SCOPE_SYSTEM)
659                 kse_sched_single(&curkse->k_kcb->kcb_kmbx);
660         else {
661                 if (__predict_false(_libkse_debug != 0)) {
662                         /*
663                          * Because debugger saves single step status in thread
664                          * mailbox's tm_dflags, we can safely clear single 
665                          * step status here. the single step status will be
666                          * restored by kse_switchin when the thread is
667                          * switched in again. This also lets uts run in full
668                          * speed.
669                          */
670                          ptrace(PT_CLEARSTEP, curkse->k_kcb->kcb_kmbx.km_lwp,
671                                 (caddr_t) 1, 0);
672                 }
673
674                 KSE_SET_SWITCH(curkse);
675                 _thread_enter_uts(curthread->tcb, curkse->k_kcb);
676         }
677         
678         /*
679          * Unlock the scheduling queue and leave the
680          * critical region.
681          */
682         /* Don't trust this after a switch! */
683         curkse = curthread->kse;
684
685         curthread->lock_switch = 0;
686         KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
687         _kse_critical_leave(&curthread->tcb->tcb_tmbx);
688
689         /*
690          * This thread is being resumed; check for cancellations.
691          */
692         if (THR_NEED_ASYNC_CANCEL(curthread) && !THR_IN_CRITICAL(curthread)) {
693                 uc = alloca(sizeof(ucontext_t));
694                 resume_once = 0;
695                 THR_GETCONTEXT(uc);
696                 if (resume_once == 0) {
697                         resume_once = 1;
698                         curthread->check_pending = 0;
699                         thr_resume_check(curthread, uc);
700                 }
701         }
702         THR_ACTIVATE_LAST_LOCK(curthread);
703 }
704
705 /*
706  * This is the scheduler for a KSE which runs a scope system thread.
707  * The multi-thread KSE scheduler should also work for a single threaded
708  * KSE, but we use a separate scheduler so that it can be fine-tuned
709  * to be more efficient (and perhaps not need a separate stack for
710  * the KSE, allowing it to use the thread's stack).
711  */
712
713 static void
714 kse_sched_single(struct kse_mailbox *kmbx)
715 {
716         struct kse *curkse;
717         struct pthread *curthread;
718         struct timespec ts;
719         sigset_t sigmask;
720         int i, sigseqno, level, first = 0;
721
722         curkse = (struct kse *)kmbx->km_udata;
723         curthread = curkse->k_curthread;
724
725         if (__predict_false((curkse->k_flags & KF_INITIALIZED) == 0)) {
726                 /* Setup this KSEs specific data. */
727                 _kcb_set(curkse->k_kcb);
728                 _tcb_set(curkse->k_kcb, curthread->tcb);
729                 curkse->k_flags |= KF_INITIALIZED;
730                 first = 1;
731                 curthread->active = 1;
732
733                 /* Setup kernel signal masks for new thread. */
734                 __sys_sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
735                 /*
736                  * Enter critical region, this is meanless for bound thread,
737                  * It is used to let other code work, those code want mailbox
738                  * to be cleared.
739                  */
740                 (void)_kse_critical_enter();
741         } else {
742                 /*
743                  * Bound thread always has tcb set, this prevent some
744                  * code from blindly setting bound thread tcb to NULL,
745                  * buggy code ?
746                  */
747                 _tcb_set(curkse->k_kcb, curthread->tcb);
748         }
749
750         curthread->critical_yield = 0;
751         curthread->need_switchout = 0;
752
753         /*
754          * Lock the scheduling queue.
755          *
756          * There is no scheduling queue for single threaded KSEs,
757          * but we need a lock for protection regardless.
758          */
759         if (curthread->lock_switch == 0)
760                 KSE_SCHED_LOCK(curkse, curkse->k_kseg);
761
762         /*
763          * This has to do the job of kse_switchout_thread(), only
764          * for a single threaded KSE/KSEG.
765          */
766
767         switch (curthread->state) {
768         case PS_MUTEX_WAIT:
769         case PS_COND_WAIT:
770                 if (THR_NEED_CANCEL(curthread)) {
771                         curthread->interrupted = 1;
772                         curthread->continuation = _thr_finish_cancellation;
773                         THR_SET_STATE(curthread, PS_RUNNING);
774                 }
775                 break;
776
777         case PS_LOCKWAIT:
778                 /*
779                  * This state doesn't timeout.
780                  */
781                 curthread->wakeup_time.tv_sec = -1;
782                 curthread->wakeup_time.tv_nsec = -1;
783                 level = curthread->locklevel - 1;
784                 if (_LCK_GRANTED(&curthread->lockusers[level]))
785                         THR_SET_STATE(curthread, PS_RUNNING);
786                 break;
787
788         case PS_DEAD:
789                 /* Unlock the scheduling queue and exit the KSE and thread. */
790                 thr_cleanup(curkse, curthread);
791                 KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
792                 PANIC("bound thread shouldn't get here\n");
793                 break;
794
795         case PS_JOIN:
796                 if (THR_NEED_CANCEL(curthread)) {
797                         curthread->join_status.thread = NULL;
798                         THR_SET_STATE(curthread, PS_RUNNING);
799                 } else {
800                         /*
801                          * This state doesn't timeout.
802                          */
803                         curthread->wakeup_time.tv_sec = -1;
804                         curthread->wakeup_time.tv_nsec = -1;
805                 }
806                 break;
807
808         case PS_SUSPENDED:
809                 if (THR_NEED_CANCEL(curthread)) {
810                         curthread->interrupted = 1;
811                         THR_SET_STATE(curthread, PS_RUNNING);
812                 } else {
813                         /*
814                          * These states don't timeout.
815                          */
816                         curthread->wakeup_time.tv_sec = -1;
817                         curthread->wakeup_time.tv_nsec = -1;
818                 }
819                 break;
820
821         case PS_RUNNING:
822                 if ((curthread->flags & THR_FLAGS_SUSPENDED) != 0 &&
823                     !THR_NEED_CANCEL(curthread)) {
824                         THR_SET_STATE(curthread, PS_SUSPENDED);
825                         /*
826                          * These states don't timeout.
827                          */
828                         curthread->wakeup_time.tv_sec = -1;
829                         curthread->wakeup_time.tv_nsec = -1;
830                 }
831                 break;
832
833         case PS_SIGWAIT:
834                 PANIC("bound thread does not have SIGWAIT state\n");
835
836         case PS_SLEEP_WAIT:
837                 PANIC("bound thread does not have SLEEP_WAIT state\n");
838
839         case PS_SIGSUSPEND:
840                 PANIC("bound thread does not have SIGSUSPEND state\n");
841         
842         case PS_DEADLOCK:
843                 /*
844                  * These states don't timeout and don't need
845                  * to be in the waiting queue.
846                  */
847                 curthread->wakeup_time.tv_sec = -1;
848                 curthread->wakeup_time.tv_nsec = -1;
849                 break;
850
851         default:
852                 PANIC("Unknown state\n");
853                 break;
854         }
855
856         while (curthread->state != PS_RUNNING) {
857                 sigseqno = curkse->k_sigseqno;
858                 if (curthread->check_pending != 0) {
859                         /*
860                          * Install pending signals into the frame, possible
861                          * cause mutex or condvar backout.
862                          */
863                         curthread->check_pending = 0;
864                         SIGFILLSET(sigmask);
865
866                         /*
867                          * Lock out kernel signal code when we are processing
868                          * signals, and get a fresh copy of signal mask.
869                          */
870                         __sys_sigprocmask(SIG_SETMASK, &sigmask,
871                                           &curthread->sigmask);
872                         for (i = 1; i <= _SIG_MAXSIG; i++) {
873                                 if (SIGISMEMBER(curthread->sigmask, i))
874                                         continue;
875                                 if (SIGISMEMBER(curthread->sigpend, i))
876                                         (void)_thr_sig_add(curthread, i, 
877                                             &curthread->siginfo[i-1]);
878                         }
879                         __sys_sigprocmask(SIG_SETMASK, &curthread->sigmask,
880                                 NULL);
881                         /* The above code might make thread runnable */
882                         if (curthread->state == PS_RUNNING)
883                                 break;
884                 }
885                 THR_DEACTIVATE_LAST_LOCK(curthread);
886                 kse_wait(curkse, curthread, sigseqno);
887                 THR_ACTIVATE_LAST_LOCK(curthread);
888                 if (curthread->wakeup_time.tv_sec >= 0) {
889                         KSE_GET_TOD(curkse, &ts);
890                         if (thr_timedout(curthread, &ts)) {
891                                 /* Indicate the thread timedout: */
892                                 curthread->timeout = 1;
893                                 /* Make the thread runnable. */
894                                 THR_SET_STATE(curthread, PS_RUNNING);
895                         }
896                 }
897         }
898
899         if (curthread->lock_switch == 0) {
900                 /* Unlock the scheduling queue. */
901                 KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
902         }
903
904         DBG_MSG("Continuing bound thread %p\n", curthread);
905         if (first) {
906                 _kse_critical_leave(&curthread->tcb->tcb_tmbx);
907                 pthread_exit(curthread->start_routine(curthread->arg));
908         }
909 }
910
911 #ifdef DEBUG_THREAD_KERN
912 static void
913 dump_queues(struct kse *curkse)
914 {
915         struct pthread *thread;
916
917         DBG_MSG("Threads in waiting queue:\n");
918         TAILQ_FOREACH(thread, &curkse->k_kseg->kg_schedq.sq_waitq, pqe) {
919                 DBG_MSG("  thread %p, state %d, blocked %d\n",
920                     thread, thread->state, thread->blocked);
921         }
922 }
923 #endif
924
925 /*
926  * This is the scheduler for a KSE which runs multiple threads.
927  */
928 static void
929 kse_sched_multi(struct kse_mailbox *kmbx)
930 {
931         struct kse *curkse;
932         struct pthread *curthread, *td_wait;
933         int ret;
934
935         curkse = (struct kse *)kmbx->km_udata;
936         THR_ASSERT(curkse->k_kcb->kcb_kmbx.km_curthread == NULL,
937             "Mailbox not null in kse_sched_multi");
938
939         /* Check for first time initialization: */
940         if (__predict_false((curkse->k_flags & KF_INITIALIZED) == 0)) {
941                 /* Setup this KSEs specific data. */
942                 _kcb_set(curkse->k_kcb);
943
944                 /* Set this before grabbing the context. */
945                 curkse->k_flags |= KF_INITIALIZED;
946         }
947
948         /*
949          * No current thread anymore, calling _get_curthread in UTS
950          * should dump core
951          */
952         _tcb_set(curkse->k_kcb, NULL);
953
954         /* If this is an upcall; take the scheduler lock. */
955         if (!KSE_IS_SWITCH(curkse))
956                 KSE_SCHED_LOCK(curkse, curkse->k_kseg);
957         else
958                 KSE_CLEAR_SWITCH(curkse);
959
960         if (KSE_IS_IDLE(curkse)) {
961                 KSE_CLEAR_IDLE(curkse);
962                 curkse->k_kseg->kg_idle_kses--;
963         }
964
965         /*
966          * Now that the scheduler lock is held, get the current
967          * thread.  The KSE's current thread cannot be safely
968          * examined without the lock because it could have returned
969          * as completed on another KSE.  See kse_check_completed().
970          */
971         curthread = curkse->k_curthread;
972
973         /*
974          * If the current thread was completed in another KSE, then
975          * it will be in the run queue.  Don't mark it as being blocked.
976          */
977         if ((curthread != NULL) &&
978             ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0) &&
979             (curthread->need_switchout == 0)) {
980                 /*
981                  * Assume the current thread is blocked; when the
982                  * completed threads are checked and if the current
983                  * thread is among the completed, the blocked flag
984                  * will be cleared.
985                  */
986                 curthread->blocked = 1;
987                 DBG_MSG("Running thread %p is now blocked in kernel.\n",
988                     curthread);
989         }
990
991         /* Check for any unblocked threads in the kernel. */
992         kse_check_completed(curkse);
993
994         /*
995          * Check for threads that have timed-out.
996          */
997         kse_check_waitq(curkse);
998
999         /*
1000          * Switchout the current thread, if necessary, as the last step
1001          * so that it is inserted into the run queue (if it's runnable)
1002          * _after_ any other threads that were added to it above.
1003          */
1004         if (curthread == NULL)
1005                 ;  /* Nothing to do here. */
1006         else if ((curthread->need_switchout == 0) && DBG_CAN_RUN(curthread) &&
1007             (curthread->blocked == 0) && (THR_IN_CRITICAL(curthread))) {
1008                 /*
1009                  * Resume the thread and tell it to yield when
1010                  * it leaves the critical region.
1011                  */
1012                 curthread->critical_yield = 1;
1013                 curthread->active = 1;
1014                 if ((curthread->flags & THR_FLAGS_IN_RUNQ) != 0)
1015                         KSE_RUNQ_REMOVE(curkse, curthread);
1016                 curkse->k_curthread = curthread;
1017                 curthread->kse = curkse;
1018                 DBG_MSG("Continuing thread %p in critical region\n",
1019                     curthread);
1020                 kse_wakeup_multi(curkse);
1021                 KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1022                 ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
1023                 if (ret != 0)
1024                         PANIC("Can't resume thread in critical region\n");
1025         }
1026         else if ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0) {
1027                 curthread->tcb->tcb_tmbx.tm_lwp = 0;
1028                 kse_switchout_thread(curkse, curthread);
1029         }
1030         curkse->k_curthread = NULL;
1031
1032 #ifdef DEBUG_THREAD_KERN
1033         dump_queues(curkse);
1034 #endif
1035
1036         /* Check if there are no threads ready to run: */
1037         while (((curthread = KSE_RUNQ_FIRST(curkse)) == NULL) &&
1038             (curkse->k_kseg->kg_threadcount != 0) &&
1039             ((curkse->k_flags & KF_TERMINATED) == 0)) {
1040                 /*
1041                  * Wait for a thread to become active or until there are
1042                  * no more threads.
1043                  */
1044                 td_wait = KSE_WAITQ_FIRST(curkse);
1045                 kse_wait(curkse, td_wait, 0);
1046                 kse_check_completed(curkse);
1047                 kse_check_waitq(curkse);
1048         }
1049
1050         /* Check for no more threads: */
1051         if ((curkse->k_kseg->kg_threadcount == 0) ||
1052             ((curkse->k_flags & KF_TERMINATED) != 0)) {
1053                 /*
1054                  * Normally this shouldn't return, but it will if there
1055                  * are other KSEs running that create new threads that
1056                  * are assigned to this KSE[G].  For instance, if a scope
1057                  * system thread were to create a scope process thread
1058                  * and this kse[g] is the initial kse[g], then that newly
1059                  * created thread would be assigned to us (the initial
1060                  * kse[g]).
1061                  */
1062                 kse_wakeup_multi(curkse);
1063                 KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1064                 kse_fini(curkse);
1065                 /* never returns */
1066         }
1067
1068         THR_ASSERT(curthread != NULL,
1069             "Return from kse_wait/fini without thread.");
1070         THR_ASSERT(curthread->state != PS_DEAD,
1071             "Trying to resume dead thread!");
1072         KSE_RUNQ_REMOVE(curkse, curthread);
1073
1074         /*
1075          * Make the selected thread the current thread.
1076          */
1077         curkse->k_curthread = curthread;
1078
1079         /*
1080          * Make sure the current thread's kse points to this kse.
1081          */
1082         curthread->kse = curkse;
1083
1084         /*
1085          * Reset the time slice if this thread is running for the first
1086          * time or running again after using its full time slice allocation.
1087          */
1088         if (curthread->slice_usec == -1)
1089                 curthread->slice_usec = 0;
1090
1091         /* Mark the thread active. */
1092         curthread->active = 1;
1093
1094         /*
1095          * The thread's current signal frame will only be NULL if it
1096          * is being resumed after being blocked in the kernel.  In
1097          * this case, and if the thread needs to run down pending
1098          * signals or needs a cancellation check, we need to add a
1099          * signal frame to the thread's context.
1100          */
1101         if (curthread->lock_switch == 0 && curthread->state == PS_RUNNING &&
1102             (curthread->check_pending != 0 ||
1103              THR_NEED_ASYNC_CANCEL(curthread)) &&
1104             !THR_IN_CRITICAL(curthread)) {
1105                 curthread->check_pending = 0;
1106                 signalcontext(&curthread->tcb->tcb_tmbx.tm_context, 0,
1107                     (__sighandler_t *)thr_resume_wrapper);
1108         }
1109         kse_wakeup_multi(curkse);
1110         /*
1111          * Continue the thread at its current frame:
1112          */
1113         if (curthread->lock_switch != 0) {
1114                 /*
1115                  * This thread came from a scheduler switch; it will
1116                  * unlock the scheduler lock and set the mailbox.
1117                  */
1118                 ret = _thread_switch(curkse->k_kcb, curthread->tcb, 0);
1119         } else {
1120                 /* This thread won't unlock the scheduler lock. */
1121                 KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1122                 ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
1123         }
1124         if (ret != 0)
1125                 PANIC("Thread has returned from _thread_switch");
1126
1127         /* This point should not be reached. */
1128         PANIC("Thread has returned from _thread_switch");
1129 }
1130
1131 static void
1132 thr_resume_wrapper(int sig, siginfo_t *siginfo, ucontext_t *ucp)
1133 {
1134         struct pthread *curthread = _get_curthread();
1135         struct kse *curkse;
1136         int ret, err_save = errno;
1137
1138         DBG_MSG(">>> sig wrapper\n");
1139         if (curthread->lock_switch)
1140                 PANIC("thr_resume_wrapper, lock_switch != 0\n");
1141         thr_resume_check(curthread, ucp);
1142         errno = err_save;
1143         _kse_critical_enter();
1144         curkse = curthread->kse;
1145         curthread->tcb->tcb_tmbx.tm_context = *ucp;
1146         ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
1147         if (ret != 0)
1148                 PANIC("thr_resume_wrapper: thread has returned "
1149                       "from _thread_switch");
1150         /* THR_SETCONTEXT(ucp); */ /* not work, why ? */
1151 }
1152
1153 static void
1154 thr_resume_check(struct pthread *curthread, ucontext_t *ucp)
1155 {
1156         _thr_sig_rundown(curthread, ucp);
1157
1158         if (THR_NEED_ASYNC_CANCEL(curthread))
1159                 pthread_testcancel();
1160 }
1161
1162 /*
1163  * Clean up a thread.  This must be called with the thread's KSE
1164  * scheduling lock held.  The thread must be a thread from the
1165  * KSE's group.
1166  */
1167 static void
1168 thr_cleanup(struct kse *curkse, struct pthread *thread)
1169 {
1170         struct pthread *joiner;
1171         struct kse_mailbox *kmbx = NULL;
1172         int sys_scope;
1173
1174         thread->active = 0;
1175         thread->need_switchout = 0;
1176         thread->lock_switch = 0;
1177         thread->check_pending = 0;
1178
1179         if ((joiner = thread->joiner) != NULL) {
1180                 /* Joinee scheduler lock held; joiner won't leave. */
1181                 if (joiner->kseg == curkse->k_kseg) {
1182                         if (joiner->join_status.thread == thread) {
1183                                 joiner->join_status.thread = NULL;
1184                                 joiner->join_status.ret = thread->ret;
1185                                 (void)_thr_setrunnable_unlocked(joiner);
1186                         }
1187                 } else {
1188                         KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1189                         /* The joiner may have removed itself and exited. */
1190                         if (_thr_ref_add(thread, joiner, 0) == 0) {
1191                                 KSE_SCHED_LOCK(curkse, joiner->kseg);
1192                                 if (joiner->join_status.thread == thread) {
1193                                         joiner->join_status.thread = NULL;
1194                                         joiner->join_status.ret = thread->ret;
1195                                         kmbx = _thr_setrunnable_unlocked(joiner);
1196                                 }
1197                                 KSE_SCHED_UNLOCK(curkse, joiner->kseg);
1198                                 _thr_ref_delete(thread, joiner);
1199                                 if (kmbx != NULL)
1200                                         kse_wakeup(kmbx);
1201                         }
1202                         KSE_SCHED_LOCK(curkse, curkse->k_kseg);
1203                 }
1204                 thread->attr.flags |= PTHREAD_DETACHED;
1205         }
1206
1207         if (!(sys_scope = (thread->attr.flags & PTHREAD_SCOPE_SYSTEM))) {
1208                 /*
1209                  * Remove the thread from the KSEG's list of threads.
1210                  */
1211                 KSEG_THRQ_REMOVE(thread->kseg, thread);
1212                 /*
1213                  * Migrate the thread to the main KSE so that this
1214                  * KSE and KSEG can be cleaned when their last thread
1215                  * exits.
1216                  */
1217                 thread->kseg = _kse_initial->k_kseg;
1218                 thread->kse = _kse_initial;
1219         }
1220
1221         /*
1222          * We can't hold the thread list lock while holding the
1223          * scheduler lock.
1224          */
1225         KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1226         DBG_MSG("Adding thread %p to GC list\n", thread);
1227         KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
1228         thread->tlflags |= TLFLAGS_GC_SAFE;
1229         THR_GCLIST_ADD(thread);
1230         KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
1231         if (sys_scope) {
1232                 /*
1233                  * System scope thread is single thread group, 
1234                  * when thread is exited, its kse and ksegrp should
1235                  * be recycled as well.
1236                  * kse upcall stack belongs to thread, clear it here.
1237                  */
1238                 curkse->k_stack.ss_sp = 0;
1239                 curkse->k_stack.ss_size = 0;
1240                 kse_exit();
1241                 PANIC("kse_exit() failed for system scope thread");
1242         }
1243         KSE_SCHED_LOCK(curkse, curkse->k_kseg);
1244 }
1245
1246 void
1247 _thr_gc(struct pthread *curthread)
1248 {
1249         thread_gc(curthread);
1250         kse_gc(curthread);
1251         kseg_gc(curthread);
1252 }
1253
1254 static void
1255 thread_gc(struct pthread *curthread)
1256 {
1257         struct pthread *td, *td_next;
1258         kse_critical_t crit;
1259         TAILQ_HEAD(, pthread) worklist;
1260
1261         TAILQ_INIT(&worklist);
1262         crit = _kse_critical_enter();
1263         KSE_LOCK_ACQUIRE(curthread->kse, &_thread_list_lock);
1264
1265         /* Check the threads waiting for GC. */
1266         for (td = TAILQ_FIRST(&_thread_gc_list); td != NULL; td = td_next) {
1267                 td_next = TAILQ_NEXT(td, gcle);
1268                 if ((td->tlflags & TLFLAGS_GC_SAFE) == 0)
1269                         continue;
1270                 else if (((td->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) &&
1271                     ((td->kse->k_kcb->kcb_kmbx.km_flags & KMF_DONE) == 0)) {
1272                         /*
1273                          * The thread and KSE are operating on the same
1274                          * stack.  Wait for the KSE to exit before freeing
1275                          * the thread's stack as well as everything else.
1276                          */
1277                         continue;
1278                 }
1279                 /*
1280                  * Remove the thread from the GC list.  If the thread
1281                  * isn't yet detached, it will get added back to the
1282                  * GC list at a later time.
1283                  */
1284                 THR_GCLIST_REMOVE(td);
1285                 DBG_MSG("Freeing thread %p stack\n", td);
1286                 /*
1287                  * We can free the thread stack since it's no longer
1288                  * in use.
1289                  */
1290                 _thr_stack_free(&td->attr);
1291                 if (((td->attr.flags & PTHREAD_DETACHED) != 0) &&
1292                     (td->refcount == 0)) {
1293                         /*
1294                          * The thread has detached and is no longer
1295                          * referenced.  It is safe to remove all
1296                          * remnants of the thread.
1297                          */
1298                         THR_LIST_REMOVE(td);
1299                         TAILQ_INSERT_HEAD(&worklist, td, gcle);
1300                 }
1301         }
1302         KSE_LOCK_RELEASE(curthread->kse, &_thread_list_lock);
1303         _kse_critical_leave(crit);
1304
1305         while ((td = TAILQ_FIRST(&worklist)) != NULL) {
1306                 TAILQ_REMOVE(&worklist, td, gcle);
1307                 /*
1308                  * XXX we don't free initial thread and its kse
1309                  * (if thread is a bound thread), because there might
1310                  * have some code referencing initial thread and kse.
1311                  */
1312                 if (td == _thr_initial) {
1313                         DBG_MSG("Initial thread won't be freed\n");
1314                         continue;
1315                 }
1316
1317                 if ((td->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) {
1318                         crit = _kse_critical_enter();
1319                         KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1320                         kse_free_unlocked(td->kse);
1321                         kseg_free_unlocked(td->kseg);
1322                         KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1323                         _kse_critical_leave(crit);
1324                 }
1325                 DBG_MSG("Freeing thread %p\n", td);
1326                 _thr_free(curthread, td);
1327         }
1328 }
1329
1330 static void
1331 kse_gc(struct pthread *curthread)
1332 {
1333         kse_critical_t crit;
1334         TAILQ_HEAD(, kse) worklist;
1335         struct kse *kse;
1336
1337         if (free_kse_count <= MAX_CACHED_KSES)
1338                 return;
1339         TAILQ_INIT(&worklist);
1340         crit = _kse_critical_enter();
1341         KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1342         while (free_kse_count > MAX_CACHED_KSES) {
1343                 kse = TAILQ_FIRST(&free_kseq);
1344                 TAILQ_REMOVE(&free_kseq, kse, k_qe);
1345                 TAILQ_INSERT_HEAD(&worklist, kse, k_qe);
1346                 free_kse_count--;
1347         }
1348         KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1349         _kse_critical_leave(crit);
1350
1351         while ((kse = TAILQ_FIRST(&worklist))) {
1352                 TAILQ_REMOVE(&worklist, kse, k_qe);
1353                 kse_destroy(kse);
1354         }
1355 }
1356
1357 static void
1358 kseg_gc(struct pthread *curthread)
1359 {
1360         kse_critical_t crit;
1361         TAILQ_HEAD(, kse_group) worklist;
1362         struct kse_group *kseg;
1363
1364         if (free_kseg_count <= MAX_CACHED_KSEGS)
1365                 return; 
1366         TAILQ_INIT(&worklist);
1367         crit = _kse_critical_enter();
1368         KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1369         while (free_kseg_count > MAX_CACHED_KSEGS) {
1370                 kseg = TAILQ_FIRST(&free_kse_groupq);
1371                 TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
1372                 free_kseg_count--;
1373                 TAILQ_INSERT_HEAD(&worklist, kseg, kg_qe);
1374         }
1375         KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1376         _kse_critical_leave(crit);
1377
1378         while ((kseg = TAILQ_FIRST(&worklist))) {
1379                 TAILQ_REMOVE(&worklist, kseg, kg_qe);
1380                 kseg_destroy(kseg);
1381         }
1382 }
1383
1384 /*
1385  * Only new threads that are running or suspended may be scheduled.
1386  */
1387 int
1388 _thr_schedule_add(struct pthread *curthread, struct pthread *newthread)
1389 {
1390         kse_critical_t crit;
1391         int ret;
1392
1393         /* Add the new thread. */
1394         thr_link(newthread);
1395
1396         /*
1397          * If this is the first time creating a thread, make sure
1398          * the mailbox is set for the current thread.
1399          */
1400         if ((newthread->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) {
1401                 /* We use the thread's stack as the KSE's stack. */
1402                 newthread->kse->k_kcb->kcb_kmbx.km_stack.ss_sp =
1403                     newthread->attr.stackaddr_attr;
1404                 newthread->kse->k_kcb->kcb_kmbx.km_stack.ss_size =
1405                     newthread->attr.stacksize_attr;
1406
1407                 /*
1408                  * No need to lock the scheduling queue since the
1409                  * KSE/KSEG pair have not yet been started.
1410                  */
1411                 KSEG_THRQ_ADD(newthread->kseg, newthread);
1412                 /* this thread never gives up kse */
1413                 newthread->active = 1;
1414                 newthread->kse->k_curthread = newthread;
1415                 newthread->kse->k_kcb->kcb_kmbx.km_flags = KMF_BOUND;
1416                 newthread->kse->k_kcb->kcb_kmbx.km_func =
1417                     (kse_func_t *)kse_sched_single;
1418                 newthread->kse->k_kcb->kcb_kmbx.km_quantum = 0;
1419                 KSE_SET_MBOX(newthread->kse, newthread);
1420                 /*
1421                  * This thread needs a new KSE and KSEG.
1422                  */
1423                 newthread->kse->k_flags &= ~KF_INITIALIZED;
1424                 newthread->kse->k_flags |= KF_STARTED;
1425                 /* Fire up! */
1426                 ret = kse_create(&newthread->kse->k_kcb->kcb_kmbx, 1);
1427                 if (ret != 0)
1428                         ret = errno;
1429         }
1430         else {
1431                 /*
1432                  * Lock the KSE and add the new thread to its list of
1433                  * assigned threads.  If the new thread is runnable, also
1434                  * add it to the KSE's run queue.
1435                  */
1436                 crit = _kse_critical_enter();
1437                 KSE_SCHED_LOCK(curthread->kse, newthread->kseg);
1438                 KSEG_THRQ_ADD(newthread->kseg, newthread);
1439                 if (newthread->state == PS_RUNNING)
1440                         THR_RUNQ_INSERT_TAIL(newthread);
1441                 if ((newthread->kse->k_flags & KF_STARTED) == 0) {
1442                         /*
1443                          * This KSE hasn't been started yet.  Start it
1444                          * outside of holding the lock.
1445                          */
1446                         newthread->kse->k_flags |= KF_STARTED;
1447                         newthread->kse->k_kcb->kcb_kmbx.km_func =
1448                             (kse_func_t *)kse_sched_multi;
1449                         newthread->kse->k_kcb->kcb_kmbx.km_flags = 0;
1450                         kse_create(&newthread->kse->k_kcb->kcb_kmbx, 0);
1451                  } else if ((newthread->state == PS_RUNNING) &&
1452                      KSE_IS_IDLE(newthread->kse)) {
1453                         /*
1454                          * The thread is being scheduled on another KSEG.
1455                          */
1456                         kse_wakeup_one(newthread);
1457                 }
1458                 KSE_SCHED_UNLOCK(curthread->kse, newthread->kseg);
1459                 _kse_critical_leave(crit);
1460                 ret = 0;
1461         }
1462         if (ret != 0)
1463                 thr_unlink(newthread);
1464
1465         return (ret);
1466 }
1467
1468 void
1469 kse_waitq_insert(struct pthread *thread)
1470 {
1471         struct pthread *td;
1472
1473         if (thread->wakeup_time.tv_sec == -1)
1474                 TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq, thread,
1475                     pqe);
1476         else {
1477                 td = TAILQ_FIRST(&thread->kse->k_schedq->sq_waitq);
1478                 while ((td != NULL) && (td->wakeup_time.tv_sec != -1) &&
1479                     ((td->wakeup_time.tv_sec < thread->wakeup_time.tv_sec) ||
1480                     ((td->wakeup_time.tv_sec == thread->wakeup_time.tv_sec) &&
1481                     (td->wakeup_time.tv_nsec <= thread->wakeup_time.tv_nsec))))
1482                         td = TAILQ_NEXT(td, pqe);
1483                 if (td == NULL)
1484                         TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq,
1485                             thread, pqe);
1486                 else
1487                         TAILQ_INSERT_BEFORE(td, thread, pqe);
1488         }
1489         thread->flags |= THR_FLAGS_IN_WAITQ;
1490 }
1491
1492 /*
1493  * This must be called with the scheduling lock held.
1494  */
1495 static void
1496 kse_check_completed(struct kse *kse)
1497 {
1498         struct pthread *thread;
1499         struct kse_thr_mailbox *completed;
1500         int sig;
1501
1502         if ((completed = kse->k_kcb->kcb_kmbx.km_completed) != NULL) {
1503                 kse->k_kcb->kcb_kmbx.km_completed = NULL;
1504                 while (completed != NULL) {
1505                         thread = completed->tm_udata;
1506                         DBG_MSG("Found completed thread %p, name %s\n",
1507                             thread,
1508                             (thread->name == NULL) ? "none" : thread->name);
1509                         thread->blocked = 0;
1510                         if (thread != kse->k_curthread) {
1511                                 thr_accounting(thread);
1512                                 if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
1513                                         THR_SET_STATE(thread, PS_SUSPENDED);
1514                                 else
1515                                         KSE_RUNQ_INSERT_TAIL(kse, thread);
1516                                 if ((thread->kse != kse) &&
1517                                     (thread->kse->k_curthread == thread)) {
1518                                         /*
1519                                          * Remove this thread from its
1520                                          * previous KSE so that it (the KSE)
1521                                          * doesn't think it is still active.
1522                                          */
1523                                         thread->kse->k_curthread = NULL;
1524                                         thread->active = 0;
1525                                 }
1526                         }
1527                         if ((sig = thread->tcb->tcb_tmbx.tm_syncsig.si_signo)
1528                             != 0) {
1529                                 if (SIGISMEMBER(thread->sigmask, sig))
1530                                         SIGADDSET(thread->sigpend, sig);
1531                                 else if (THR_IN_CRITICAL(thread))
1532                                         kse_thr_interrupt(NULL, KSE_INTR_SIGEXIT, sig);
1533                                 else
1534                                         (void)_thr_sig_add(thread, sig,
1535                                             &thread->tcb->tcb_tmbx.tm_syncsig);
1536                                 thread->tcb->tcb_tmbx.tm_syncsig.si_signo = 0;
1537                         }
1538                         completed = completed->tm_next;
1539                 }
1540         }
1541 }
1542
1543 /*
1544  * This must be called with the scheduling lock held.
1545  */
1546 static void
1547 kse_check_waitq(struct kse *kse)
1548 {
1549         struct pthread  *pthread;
1550         struct timespec ts;
1551
1552         KSE_GET_TOD(kse, &ts);
1553
1554         /*
1555          * Wake up threads that have timedout.  This has to be
1556          * done before adding the current thread to the run queue
1557          * so that a CPU intensive thread doesn't get preference
1558          * over waiting threads.
1559          */
1560         while (((pthread = KSE_WAITQ_FIRST(kse)) != NULL) &&
1561             thr_timedout(pthread, &ts)) {
1562                 /* Remove the thread from the wait queue: */
1563                 KSE_WAITQ_REMOVE(kse, pthread);
1564                 DBG_MSG("Found timedout thread %p in waitq\n", pthread);
1565
1566                 /* Indicate the thread timedout: */
1567                 pthread->timeout = 1;
1568
1569                 /* Add the thread to the priority queue: */
1570                 if ((pthread->flags & THR_FLAGS_SUSPENDED) != 0)
1571                         THR_SET_STATE(pthread, PS_SUSPENDED);
1572                 else {
1573                         THR_SET_STATE(pthread, PS_RUNNING);
1574                         KSE_RUNQ_INSERT_TAIL(kse, pthread);
1575                 }
1576         }
1577 }
1578
1579 static int
1580 thr_timedout(struct pthread *thread, struct timespec *curtime)
1581 {
1582         if (thread->wakeup_time.tv_sec < 0)
1583                 return (0);
1584         else if (thread->wakeup_time.tv_sec > curtime->tv_sec)
1585                 return (0);
1586         else if ((thread->wakeup_time.tv_sec == curtime->tv_sec) &&
1587             (thread->wakeup_time.tv_nsec > curtime->tv_nsec))
1588                 return (0);
1589         else
1590                 return (1);
1591 }
1592
1593 /*
1594  * This must be called with the scheduling lock held.
1595  *
1596  * Each thread has a time slice, a wakeup time (used when it wants
1597  * to wait for a specified amount of time), a run state, and an
1598  * active flag.
1599  *
1600  * When a thread gets run by the scheduler, the active flag is
1601  * set to non-zero (1).  When a thread performs an explicit yield
1602  * or schedules a state change, it enters the scheduler and the
1603  * active flag is cleared.  When the active flag is still seen
1604  * set in the scheduler, that means that the thread is blocked in
1605  * the kernel (because it is cleared before entering the scheduler
1606  * in all other instances).
1607  *
1608  * The wakeup time is only set for those states that can timeout.
1609  * It is set to (-1, -1) for all other instances.
1610  *
1611  * The thread's run state, aside from being useful when debugging,
1612  * is used to place the thread in an appropriate queue.  There
1613  * are 2 basic queues:
1614  *
1615  *   o run queue - queue ordered by priority for all threads
1616  *                 that are runnable
1617  *   o waiting queue - queue sorted by wakeup time for all threads
1618  *                     that are not otherwise runnable (not blocked
1619  *                     in kernel, not waiting for locks)
1620  *
1621  * The thread's time slice is used for round-robin scheduling
1622  * (the default scheduling policy).  While a SCHED_RR thread
1623  * is runnable it's time slice accumulates.  When it reaches
1624  * the time slice interval, it gets reset and added to the end
1625  * of the queue of threads at its priority.  When a thread no
1626  * longer becomes runnable (blocks in kernel, waits, etc), its
1627  * time slice is reset.
1628  *
1629  * The job of kse_switchout_thread() is to handle all of the above.
1630  */
1631 static void
1632 kse_switchout_thread(struct kse *kse, struct pthread *thread)
1633 {
1634         int level;
1635         int i;
1636         int restart;
1637         siginfo_t siginfo;
1638
1639         /*
1640          * Place the currently running thread into the
1641          * appropriate queue(s).
1642          */
1643         DBG_MSG("Switching out thread %p, state %d\n", thread, thread->state);
1644
1645         THR_DEACTIVATE_LAST_LOCK(thread);
1646         if (thread->blocked != 0) {
1647                 thread->active = 0;
1648                 thread->need_switchout = 0;
1649                 /* This thread must have blocked in the kernel. */
1650                 /*
1651                  * Check for pending signals and cancellation for
1652                  * this thread to see if we need to interrupt it
1653                  * in the kernel.
1654                  */
1655                 if (THR_NEED_CANCEL(thread)) {
1656                         kse_thr_interrupt(&thread->tcb->tcb_tmbx,
1657                                           KSE_INTR_INTERRUPT, 0);
1658                 } else if (thread->check_pending != 0) {
1659                         for (i = 1; i <= _SIG_MAXSIG; ++i) {
1660                                 if (SIGISMEMBER(thread->sigpend, i) &&
1661                                     !SIGISMEMBER(thread->sigmask, i)) {
1662                                         restart = _thread_sigact[i - 1].sa_flags & SA_RESTART;
1663                                         kse_thr_interrupt(&thread->tcb->tcb_tmbx,
1664                                             restart ? KSE_INTR_RESTART : KSE_INTR_INTERRUPT, 0);
1665                                         break;
1666                                 }
1667                         }
1668                 }
1669         }
1670         else {
1671                 switch (thread->state) {
1672                 case PS_MUTEX_WAIT:
1673                 case PS_COND_WAIT:
1674                         if (THR_NEED_CANCEL(thread)) {
1675                                 thread->interrupted = 1;
1676                                 thread->continuation = _thr_finish_cancellation;
1677                                 THR_SET_STATE(thread, PS_RUNNING);
1678                         } else {
1679                                 /* Insert into the waiting queue: */
1680                                 KSE_WAITQ_INSERT(kse, thread);
1681                         }
1682                         break;
1683
1684                 case PS_LOCKWAIT:
1685                         /*
1686                          * This state doesn't timeout.
1687                          */
1688                         thread->wakeup_time.tv_sec = -1;
1689                         thread->wakeup_time.tv_nsec = -1;
1690                         level = thread->locklevel - 1;
1691                         if (!_LCK_GRANTED(&thread->lockusers[level]))
1692                                 KSE_WAITQ_INSERT(kse, thread);
1693                         else
1694                                 THR_SET_STATE(thread, PS_RUNNING);
1695                         break;
1696
1697                 case PS_SLEEP_WAIT:
1698                 case PS_SIGWAIT:
1699                         if (THR_NEED_CANCEL(thread)) {
1700                                 thread->interrupted = 1;
1701                                 THR_SET_STATE(thread, PS_RUNNING);
1702                         } else {
1703                                 KSE_WAITQ_INSERT(kse, thread);
1704                         }
1705                         break;
1706
1707                 case PS_JOIN:
1708                         if (THR_NEED_CANCEL(thread)) {
1709                                 thread->join_status.thread = NULL;
1710                                 THR_SET_STATE(thread, PS_RUNNING);
1711                         } else {
1712                                 /*
1713                                  * This state doesn't timeout.
1714                                  */
1715                                 thread->wakeup_time.tv_sec = -1;
1716                                 thread->wakeup_time.tv_nsec = -1;
1717
1718                                 /* Insert into the waiting queue: */
1719                                 KSE_WAITQ_INSERT(kse, thread);
1720                         }
1721                         break;
1722
1723                 case PS_SIGSUSPEND:
1724                 case PS_SUSPENDED:
1725                         if (THR_NEED_CANCEL(thread)) {
1726                                 thread->interrupted = 1;
1727                                 THR_SET_STATE(thread, PS_RUNNING);
1728                         } else {
1729                                 /*
1730                                  * These states don't timeout.
1731                                  */
1732                                 thread->wakeup_time.tv_sec = -1;
1733                                 thread->wakeup_time.tv_nsec = -1;
1734
1735                                 /* Insert into the waiting queue: */
1736                                 KSE_WAITQ_INSERT(kse, thread);
1737                         }
1738                         break;
1739
1740                 case PS_DEAD:
1741                         /*
1742                          * The scheduler is operating on a different
1743                          * stack.  It is safe to do garbage collecting
1744                          * here.
1745                          */
1746                         thr_cleanup(kse, thread);
1747                         return;
1748                         break;
1749
1750                 case PS_RUNNING:
1751                         if ((thread->flags & THR_FLAGS_SUSPENDED) != 0 &&
1752                             !THR_NEED_CANCEL(thread))
1753                                 THR_SET_STATE(thread, PS_SUSPENDED);
1754                         break;
1755
1756                 case PS_DEADLOCK:
1757                         /*
1758                          * These states don't timeout.
1759                          */
1760                         thread->wakeup_time.tv_sec = -1;
1761                         thread->wakeup_time.tv_nsec = -1;
1762
1763                         /* Insert into the waiting queue: */
1764                         KSE_WAITQ_INSERT(kse, thread);
1765                         break;
1766
1767                 default:
1768                         PANIC("Unknown state\n");
1769                         break;
1770                 }
1771
1772                 thr_accounting(thread);
1773                 if (thread->state == PS_RUNNING) {
1774                         if (thread->slice_usec == -1) {
1775                                 /*
1776                                  * The thread exceeded its time quantum or
1777                                  * it yielded the CPU; place it at the tail
1778                                  * of the queue for its priority.
1779                                  */
1780                                 KSE_RUNQ_INSERT_TAIL(kse, thread);
1781                         } else {
1782                                 /*
1783                                  * The thread hasn't exceeded its interval
1784                                  * Place it at the head of the queue for its
1785                                  * priority.
1786                                  */
1787                                 KSE_RUNQ_INSERT_HEAD(kse, thread);
1788                         }
1789                 }
1790         }
1791         thread->active = 0;
1792         thread->need_switchout = 0;
1793         if (thread->check_pending != 0) {
1794                 /* Install pending signals into the frame. */
1795                 thread->check_pending = 0;
1796                 KSE_LOCK_ACQUIRE(kse, &_thread_signal_lock);
1797                 for (i = 1; i <= _SIG_MAXSIG; i++) {
1798                         if (SIGISMEMBER(thread->sigmask, i))
1799                                 continue;
1800                         if (SIGISMEMBER(thread->sigpend, i))
1801                                 (void)_thr_sig_add(thread, i,
1802                                     &thread->siginfo[i-1]);
1803                         else if (SIGISMEMBER(_thr_proc_sigpending, i) &&
1804                                 _thr_getprocsig_unlocked(i, &siginfo)) {
1805                                 (void)_thr_sig_add(thread, i, &siginfo);
1806                         }
1807                 }
1808                 KSE_LOCK_RELEASE(kse, &_thread_signal_lock);
1809         }
1810 }
1811
1812 /*
1813  * This function waits for the smallest timeout value of any waiting
1814  * thread, or until it receives a message from another KSE.
1815  *
1816  * This must be called with the scheduling lock held.
1817  */
1818 static void
1819 kse_wait(struct kse *kse, struct pthread *td_wait, int sigseqno)
1820 {
1821         struct timespec ts, ts_sleep;
1822         int saved_flags;
1823
1824         if ((td_wait == NULL) || (td_wait->wakeup_time.tv_sec < 0)) {
1825                 /* Limit sleep to no more than 1 minute. */
1826                 ts_sleep.tv_sec = 60;
1827                 ts_sleep.tv_nsec = 0;
1828         } else {
1829                 KSE_GET_TOD(kse, &ts);
1830                 TIMESPEC_SUB(&ts_sleep, &td_wait->wakeup_time, &ts);
1831                 if (ts_sleep.tv_sec > 60) {
1832                         ts_sleep.tv_sec = 60;
1833                         ts_sleep.tv_nsec = 0;
1834                 }
1835         }
1836         /* Don't sleep for negative times. */
1837         if ((ts_sleep.tv_sec >= 0) && (ts_sleep.tv_nsec >= 0)) {
1838                 KSE_SET_IDLE(kse);
1839                 kse->k_kseg->kg_idle_kses++;
1840                 KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1841                 if ((kse->k_kseg->kg_flags & KGF_SINGLE_THREAD) &&
1842                     (kse->k_sigseqno != sigseqno))
1843                         ; /* don't sleep */
1844                 else {
1845                         saved_flags = kse->k_kcb->kcb_kmbx.km_flags;
1846                         kse->k_kcb->kcb_kmbx.km_flags |= KMF_NOUPCALL;
1847                         kse_release(&ts_sleep);
1848                         kse->k_kcb->kcb_kmbx.km_flags = saved_flags;
1849                 }
1850                 KSE_SCHED_LOCK(kse, kse->k_kseg);
1851                 if (KSE_IS_IDLE(kse)) {
1852                         KSE_CLEAR_IDLE(kse);
1853                         kse->k_kseg->kg_idle_kses--;
1854                 }
1855         }
1856 }
1857
1858 /*
1859  * Avoid calling this kse_exit() so as not to confuse it with the
1860  * system call of the same name.
1861  */
1862 static void
1863 kse_fini(struct kse *kse)
1864 {
1865         /* struct kse_group *free_kseg = NULL; */
1866         struct timespec ts;
1867         struct pthread *td;
1868
1869         /*
1870          * Check to see if this is one of the main kses.
1871          */
1872         if (kse->k_kseg != _kse_initial->k_kseg) {
1873                 PANIC("shouldn't get here");
1874                 /* This is for supporting thread groups. */
1875 #ifdef NOT_YET
1876                 /* Remove this KSE from the KSEG's list of KSEs. */
1877                 KSE_SCHED_LOCK(kse, kse->k_kseg);
1878                 TAILQ_REMOVE(&kse->k_kseg->kg_kseq, kse, k_kgqe);
1879                 kse->k_kseg->kg_ksecount--;
1880                 if (TAILQ_EMPTY(&kse->k_kseg->kg_kseq))
1881                         free_kseg = kse->k_kseg;
1882                 KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1883
1884                 /*
1885                  * Add this KSE to the list of free KSEs along with
1886                  * the KSEG if is now orphaned.
1887                  */
1888                 KSE_LOCK_ACQUIRE(kse, &kse_lock);
1889                 if (free_kseg != NULL)
1890                         kseg_free_unlocked(free_kseg);
1891                 kse_free_unlocked(kse);
1892                 KSE_LOCK_RELEASE(kse, &kse_lock);
1893                 kse_exit();
1894                 /* Never returns. */
1895                 PANIC("kse_exit()");
1896 #endif
1897         } else {
1898                 /*
1899                  * We allow program to kill kse in initial group (by
1900                  * lowering the concurrency).
1901                  */
1902                 if ((kse != _kse_initial) &&
1903                     ((kse->k_flags & KF_TERMINATED) != 0)) {
1904                         KSE_SCHED_LOCK(kse, kse->k_kseg);
1905                         TAILQ_REMOVE(&kse->k_kseg->kg_kseq, kse, k_kgqe);
1906                         kse->k_kseg->kg_ksecount--;
1907                         /*
1908                          * Migrate thread to  _kse_initial if its lastest
1909                          * kse it ran on is the kse.
1910                          */
1911                         td = TAILQ_FIRST(&kse->k_kseg->kg_threadq);
1912                         while (td != NULL) {
1913                                 if (td->kse == kse)
1914                                         td->kse = _kse_initial;
1915                                 td = TAILQ_NEXT(td, kle);
1916                         }
1917                         KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1918                         KSE_LOCK_ACQUIRE(kse, &kse_lock);
1919                         kse_free_unlocked(kse);
1920                         KSE_LOCK_RELEASE(kse, &kse_lock);
1921                         /* Make sure there is always at least one is awake */
1922                         KSE_WAKEUP(_kse_initial);
1923                         kse_exit();
1924                         /* Never returns. */
1925                         PANIC("kse_exit() failed for initial kseg");
1926                 }
1927                 KSE_SCHED_LOCK(kse, kse->k_kseg);
1928                 KSE_SET_IDLE(kse);
1929                 kse->k_kseg->kg_idle_kses++;
1930                 KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1931                 ts.tv_sec = 120;
1932                 ts.tv_nsec = 0;
1933                 kse->k_kcb->kcb_kmbx.km_flags = 0;
1934                 kse_release(&ts);
1935                 /* Never reach */
1936         }
1937 }
1938
1939 void
1940 _thr_set_timeout(const struct timespec *timeout)
1941 {
1942         struct pthread  *curthread = _get_curthread();
1943         struct timespec ts;
1944
1945         /* Reset the timeout flag for the running thread: */
1946         curthread->timeout = 0;
1947
1948         /* Check if the thread is to wait forever: */
1949         if (timeout == NULL) {
1950                 /*
1951                  * Set the wakeup time to something that can be recognised as
1952                  * different to an actual time of day:
1953                  */
1954                 curthread->wakeup_time.tv_sec = -1;
1955                 curthread->wakeup_time.tv_nsec = -1;
1956         }
1957         /* Check if no waiting is required: */
1958         else if ((timeout->tv_sec == 0) && (timeout->tv_nsec == 0)) {
1959                 /* Set the wake up time to 'immediately': */
1960                 curthread->wakeup_time.tv_sec = 0;
1961                 curthread->wakeup_time.tv_nsec = 0;
1962         } else {
1963                 /* Calculate the time for the current thread to wakeup: */
1964                 KSE_GET_TOD(curthread->kse, &ts);
1965                 TIMESPEC_ADD(&curthread->wakeup_time, &ts, timeout);
1966         }
1967 }
1968
1969 void
1970 _thr_panic_exit(char *file, int line, char *msg)
1971 {
1972         char buf[256];
1973
1974         snprintf(buf, sizeof(buf), "(%s:%d) %s\n", file, line, msg);
1975         __sys_write(2, buf, strlen(buf));
1976         abort();
1977 }
1978
1979 void
1980 _thr_setrunnable(struct pthread *curthread, struct pthread *thread)
1981 {
1982         kse_critical_t crit;
1983         struct kse_mailbox *kmbx;
1984
1985         crit = _kse_critical_enter();
1986         KSE_SCHED_LOCK(curthread->kse, thread->kseg);
1987         kmbx = _thr_setrunnable_unlocked(thread);
1988         KSE_SCHED_UNLOCK(curthread->kse, thread->kseg);
1989         _kse_critical_leave(crit);
1990         if ((kmbx != NULL) && (__isthreaded != 0))
1991                 kse_wakeup(kmbx);
1992 }
1993
1994 struct kse_mailbox *
1995 _thr_setrunnable_unlocked(struct pthread *thread)
1996 {
1997         struct kse_mailbox *kmbx = NULL;
1998
1999         if ((thread->kseg->kg_flags & KGF_SINGLE_THREAD) != 0) {
2000                 /* No silly queues for these threads. */
2001                 if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
2002                         THR_SET_STATE(thread, PS_SUSPENDED);
2003                 else {
2004                         THR_SET_STATE(thread, PS_RUNNING);
2005                         kmbx = kse_wakeup_one(thread);
2006                 }
2007
2008         } else if (thread->state != PS_RUNNING) {
2009                 if ((thread->flags & THR_FLAGS_IN_WAITQ) != 0)
2010                         KSE_WAITQ_REMOVE(thread->kse, thread);
2011                 if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
2012                         THR_SET_STATE(thread, PS_SUSPENDED);
2013                 else {
2014                         THR_SET_STATE(thread, PS_RUNNING);
2015                         if ((thread->blocked == 0) && (thread->active == 0) &&
2016                             (thread->flags & THR_FLAGS_IN_RUNQ) == 0)
2017                                 THR_RUNQ_INSERT_TAIL(thread);
2018                         /*
2019                          * XXX - Threads are not yet assigned to specific
2020                          *       KSEs; they are assigned to the KSEG.  So
2021                          *       the fact that a thread's KSE is waiting
2022                          *       doesn't necessarily mean that it will be
2023                          *       the KSE that runs the thread after the
2024                          *       lock is granted.  But we don't know if the
2025                          *       other KSEs within the same KSEG are also
2026                          *       in a waiting state or not so we err on the
2027                          *       side of caution and wakeup the thread's
2028                          *       last known KSE.  We ensure that the
2029                          *       threads KSE doesn't change while it's
2030                          *       scheduling lock is held so it is safe to
2031                          *       reference it (the KSE).  If the KSE wakes
2032                          *       up and doesn't find any more work it will
2033                          *       again go back to waiting so no harm is
2034                          *       done.
2035                          */
2036                         kmbx = kse_wakeup_one(thread);
2037                 }
2038         }
2039         return (kmbx);
2040 }
2041
2042 static struct kse_mailbox *
2043 kse_wakeup_one(struct pthread *thread)
2044 {
2045         struct kse *ke;
2046
2047         if (KSE_IS_IDLE(thread->kse)) {
2048                 KSE_CLEAR_IDLE(thread->kse);
2049                 thread->kseg->kg_idle_kses--;
2050                 return (&thread->kse->k_kcb->kcb_kmbx);
2051         } else {
2052                 TAILQ_FOREACH(ke, &thread->kseg->kg_kseq, k_kgqe) {
2053                         if (KSE_IS_IDLE(ke)) {
2054                                 KSE_CLEAR_IDLE(ke);
2055                                 ke->k_kseg->kg_idle_kses--;
2056                                 return (&ke->k_kcb->kcb_kmbx);
2057                         }
2058                 }
2059         }
2060         return (NULL);
2061 }
2062
2063 static void
2064 kse_wakeup_multi(struct kse *curkse)
2065 {
2066         struct kse *ke;
2067         int tmp;
2068
2069         if ((tmp = KSE_RUNQ_THREADS(curkse)) && curkse->k_kseg->kg_idle_kses) {
2070                 TAILQ_FOREACH(ke, &curkse->k_kseg->kg_kseq, k_kgqe) {
2071                         if (KSE_IS_IDLE(ke)) {
2072                                 KSE_CLEAR_IDLE(ke);
2073                                 ke->k_kseg->kg_idle_kses--;
2074                                 KSE_WAKEUP(ke);
2075                                 if (--tmp == 0)
2076                                         break;
2077                         }
2078                 }
2079         }
2080 }
2081
2082 /*
2083  * Allocate a new KSEG.
2084  *
2085  * We allow the current thread to be NULL in the case that this
2086  * is the first time a KSEG is being created (library initialization).
2087  * In this case, we don't need to (and can't) take any locks.
2088  */
2089 struct kse_group *
2090 _kseg_alloc(struct pthread *curthread)
2091 {
2092         struct kse_group *kseg = NULL;
2093         kse_critical_t crit;
2094
2095         if ((curthread != NULL) && (free_kseg_count > 0)) {
2096                 /* Use the kse lock for the kseg queue. */
2097                 crit = _kse_critical_enter();
2098                 KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2099                 if ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) {
2100                         TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
2101                         free_kseg_count--;
2102                         active_kseg_count++;
2103                         TAILQ_INSERT_TAIL(&active_kse_groupq, kseg, kg_qe);
2104                 }
2105                 KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2106                 _kse_critical_leave(crit);
2107                 if (kseg)
2108                         kseg_reinit(kseg);
2109         }
2110
2111         /*
2112          * If requested, attempt to allocate a new KSE group only if the
2113          * KSE allocation was successful and a KSE group wasn't found in
2114          * the free list.
2115          */
2116         if ((kseg == NULL) &&
2117             ((kseg = (struct kse_group *)malloc(sizeof(*kseg))) != NULL)) {
2118                 if (_pq_alloc(&kseg->kg_schedq.sq_runq,
2119                     THR_MIN_PRIORITY, THR_LAST_PRIORITY) != 0) {
2120                         free(kseg);
2121                         kseg = NULL;
2122                 } else {
2123                         kseg_init(kseg);
2124                         /* Add the KSEG to the list of active KSEGs. */
2125                         if (curthread != NULL) {
2126                                 crit = _kse_critical_enter();
2127                                 KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2128                                 active_kseg_count++;
2129                                 TAILQ_INSERT_TAIL(&active_kse_groupq,
2130                                     kseg, kg_qe);
2131                                 KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2132                                 _kse_critical_leave(crit);
2133                         } else {
2134                                 active_kseg_count++;
2135                                 TAILQ_INSERT_TAIL(&active_kse_groupq,
2136                                     kseg, kg_qe);
2137                         }
2138                 }
2139         }
2140         return (kseg);
2141 }
2142
2143 static void
2144 kseg_init(struct kse_group *kseg)
2145 {
2146         kseg_reinit(kseg);
2147         _lock_init(&kseg->kg_lock, LCK_ADAPTIVE, _kse_lock_wait,
2148             _kse_lock_wakeup, calloc);
2149 }
2150
2151 static void
2152 kseg_reinit(struct kse_group *kseg)
2153 {
2154         TAILQ_INIT(&kseg->kg_kseq);
2155         TAILQ_INIT(&kseg->kg_threadq);
2156         TAILQ_INIT(&kseg->kg_schedq.sq_waitq);
2157         kseg->kg_threadcount = 0;
2158         kseg->kg_ksecount = 0;
2159         kseg->kg_idle_kses = 0;
2160         kseg->kg_flags = 0;
2161 }
2162
2163 /*
2164  * This must be called with the kse lock held and when there are
2165  * no more threads that reference it.
2166  */
2167 static void
2168 kseg_free_unlocked(struct kse_group *kseg)
2169 {
2170         TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe);
2171         TAILQ_INSERT_HEAD(&free_kse_groupq, kseg, kg_qe);
2172         free_kseg_count++;
2173         active_kseg_count--;
2174 }
2175
2176 void
2177 _kseg_free(struct kse_group *kseg)
2178 {
2179         struct kse *curkse;
2180         kse_critical_t crit;
2181
2182         crit = _kse_critical_enter();
2183         curkse = _get_curkse();
2184         KSE_LOCK_ACQUIRE(curkse, &kse_lock);
2185         kseg_free_unlocked(kseg);
2186         KSE_LOCK_RELEASE(curkse, &kse_lock);
2187         _kse_critical_leave(crit);
2188 }
2189
2190 static void
2191 kseg_destroy(struct kse_group *kseg)
2192 {
2193         _lock_destroy(&kseg->kg_lock);
2194         _pq_free(&kseg->kg_schedq.sq_runq);
2195         free(kseg);
2196 }
2197
2198 /*
2199  * Allocate a new KSE.
2200  *
2201  * We allow the current thread to be NULL in the case that this
2202  * is the first time a KSE is being created (library initialization).
2203  * In this case, we don't need to (and can't) take any locks.
2204  */
2205 struct kse *
2206 _kse_alloc(struct pthread *curthread, int sys_scope)
2207 {
2208         struct kse *kse = NULL;
2209         char *stack;
2210         kse_critical_t crit;
2211         int i;
2212
2213         if ((curthread != NULL) && (free_kse_count > 0)) {
2214                 crit = _kse_critical_enter();
2215                 KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2216                 /* Search for a finished KSE. */
2217                 kse = TAILQ_FIRST(&free_kseq);
2218                 while ((kse != NULL) &&
2219                     ((kse->k_kcb->kcb_kmbx.km_flags & KMF_DONE) == 0)) {
2220                         kse = TAILQ_NEXT(kse, k_qe);
2221                 }
2222                 if (kse != NULL) {
2223                         DBG_MSG("found an unused kse.\n");
2224                         TAILQ_REMOVE(&free_kseq, kse, k_qe);
2225                         free_kse_count--;
2226                         TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe);
2227                         active_kse_count++;
2228                 }
2229                 KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2230                 _kse_critical_leave(crit);
2231                 if (kse != NULL)
2232                         kse_reinit(kse, sys_scope);
2233         }
2234         if ((kse == NULL) &&
2235             ((kse = (struct kse *)malloc(sizeof(*kse))) != NULL)) {
2236                 if (sys_scope != 0)
2237                         stack = NULL;
2238                 else if ((stack = malloc(KSE_STACKSIZE)) == NULL) {
2239                         free(kse);
2240                         return (NULL);
2241                 }
2242                 bzero(kse, sizeof(*kse));
2243
2244                 /* Initialize KCB without the lock. */
2245                 if ((kse->k_kcb = _kcb_ctor(kse)) == NULL) {
2246                         if (stack != NULL)
2247                                 free(stack);
2248                         free(kse);
2249                         return (NULL);
2250                 }
2251
2252                 /* Initialize the lockusers. */
2253                 for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) {
2254                         _lockuser_init(&kse->k_lockusers[i], (void *)kse);
2255                         _LCK_SET_PRIVATE2(&kse->k_lockusers[i], NULL);
2256                 }
2257                 /* _lock_init(kse->k_lock, ...) */
2258
2259                 if (curthread != NULL) {
2260                         crit = _kse_critical_enter();
2261                         KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2262                 }
2263                 kse->k_flags = 0;
2264                 TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe);
2265                 active_kse_count++;
2266                 if (curthread != NULL) {
2267                         KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2268                         _kse_critical_leave(crit);
2269                 }
2270                 /*
2271                  * Create the KSE context.
2272                  * Scope system threads (one thread per KSE) are not required
2273                  * to have a stack for an unneeded kse upcall.
2274                  */
2275                 if (!sys_scope) {
2276                         kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_multi;
2277                         kse->k_stack.ss_sp = stack;
2278                         kse->k_stack.ss_size = KSE_STACKSIZE;
2279                 } else {
2280                         kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_single;
2281                         kse->k_stack.ss_sp = NULL;
2282                         kse->k_stack.ss_size = 0;
2283                 }
2284                 kse->k_kcb->kcb_kmbx.km_udata = (void *)kse;
2285                 kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2286                 /*
2287                  * We need to keep a copy of the stack in case it
2288                  * doesn't get used; a KSE running a scope system
2289                  * thread will use that thread's stack.
2290                  */
2291                 kse->k_kcb->kcb_kmbx.km_stack = kse->k_stack;
2292         }
2293         return (kse);
2294 }
2295
2296 static void
2297 kse_reinit(struct kse *kse, int sys_scope)
2298 {
2299         if (!sys_scope) {
2300                 kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_multi;
2301                 if (kse->k_stack.ss_sp == NULL) {
2302                         /* XXX check allocation failure */
2303                         kse->k_stack.ss_sp = (char *) malloc(KSE_STACKSIZE);
2304                         kse->k_stack.ss_size = KSE_STACKSIZE;
2305                 }
2306                 kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2307         } else {
2308                 kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_single;
2309                 if (kse->k_stack.ss_sp)
2310                         free(kse->k_stack.ss_sp);
2311                 kse->k_stack.ss_sp = NULL;
2312                 kse->k_stack.ss_size = 0;
2313                 kse->k_kcb->kcb_kmbx.km_quantum = 0;
2314         }
2315         kse->k_kcb->kcb_kmbx.km_stack = kse->k_stack;
2316         kse->k_kcb->kcb_kmbx.km_udata = (void *)kse;
2317         kse->k_kcb->kcb_kmbx.km_curthread = NULL;
2318         kse->k_kcb->kcb_kmbx.km_flags = 0;
2319         kse->k_curthread = NULL;
2320         kse->k_kseg = 0;
2321         kse->k_schedq = 0;
2322         kse->k_locklevel = 0;
2323         kse->k_flags = 0;
2324         kse->k_error = 0;
2325         kse->k_cpu = 0;
2326         kse->k_sigseqno = 0;
2327 }
2328
2329 void
2330 kse_free_unlocked(struct kse *kse)
2331 {
2332         TAILQ_REMOVE(&active_kseq, kse, k_qe);
2333         active_kse_count--;
2334         kse->k_kseg = NULL;
2335         kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2336         kse->k_flags = 0;
2337         TAILQ_INSERT_HEAD(&free_kseq, kse, k_qe);
2338         free_kse_count++;
2339 }
2340
2341 void
2342 _kse_free(struct pthread *curthread, struct kse *kse)
2343 {
2344         kse_critical_t crit;
2345
2346         if (curthread == NULL)
2347                 kse_free_unlocked(kse);
2348         else {
2349                 crit = _kse_critical_enter();
2350                 KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2351                 kse_free_unlocked(kse);
2352                 KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2353                 _kse_critical_leave(crit);
2354         }
2355 }
2356
2357 static void
2358 kse_destroy(struct kse *kse)
2359 {
2360         int i;
2361
2362         if (kse->k_stack.ss_sp != NULL)
2363                 free(kse->k_stack.ss_sp);
2364         _kcb_dtor(kse->k_kcb);
2365         for (i = 0; i < MAX_KSE_LOCKLEVEL; ++i)
2366                 _lockuser_destroy(&kse->k_lockusers[i]);
2367         _lock_destroy(&kse->k_lock);
2368         free(kse);
2369 }
2370
2371 struct pthread *
2372 _thr_alloc(struct pthread *curthread)
2373 {
2374         kse_critical_t  crit;
2375         struct pthread  *thread = NULL;
2376         int i;
2377
2378         if (curthread != NULL) {
2379                 if (GC_NEEDED())
2380                         _thr_gc(curthread);
2381                 if (free_thread_count > 0) {
2382                         crit = _kse_critical_enter();
2383                         KSE_LOCK_ACQUIRE(curthread->kse, &thread_lock);
2384                         if ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
2385                                 TAILQ_REMOVE(&free_threadq, thread, tle);
2386                                 free_thread_count--;
2387                         }
2388                         KSE_LOCK_RELEASE(curthread->kse, &thread_lock);
2389                         _kse_critical_leave(crit);
2390                 }
2391         }
2392         if ((thread == NULL) &&
2393             ((thread = malloc(sizeof(struct pthread))) != NULL)) {
2394                 bzero(thread, sizeof(struct pthread));
2395                 thread->siginfo = calloc(_SIG_MAXSIG, sizeof(siginfo_t));
2396                 if (thread->siginfo == NULL) {
2397                         free(thread);
2398                         return (NULL);
2399                 }
2400                 if (curthread) {
2401                         _pthread_mutex_lock(&_tcb_mutex);
2402                         thread->tcb = _tcb_ctor(thread, 0 /* not initial tls */);
2403                         _pthread_mutex_unlock(&_tcb_mutex);
2404                 } else {
2405                         thread->tcb = _tcb_ctor(thread, 1 /* initial tls */);
2406                 }
2407                 if (thread->tcb == NULL) {
2408                         free(thread->siginfo);
2409                         free(thread);
2410                         return (NULL);
2411                 }
2412                 /*
2413                  * Initialize thread locking.
2414                  * Lock initializing needs malloc, so don't
2415                  * enter critical region before doing this!
2416                  */
2417                 if (_lock_init(&thread->lock, LCK_ADAPTIVE,
2418                     _thr_lock_wait, _thr_lock_wakeup, calloc) != 0)
2419                         PANIC("Cannot initialize thread lock");
2420                 for (i = 0; i < MAX_THR_LOCKLEVEL; i++) {
2421                         _lockuser_init(&thread->lockusers[i], (void *)thread);
2422                         _LCK_SET_PRIVATE2(&thread->lockusers[i],
2423                             (void *)thread);
2424                 }
2425         }
2426         return (thread);
2427 }
2428
2429 void
2430 _thr_free(struct pthread *curthread, struct pthread *thread)
2431 {
2432         kse_critical_t crit;
2433
2434         DBG_MSG("Freeing thread %p\n", thread);
2435         if (thread->name) {
2436                 free(thread->name);
2437                 thread->name = NULL;
2438         }
2439         if ((curthread == NULL) || (free_thread_count >= MAX_CACHED_THREADS)) {
2440                 thr_destroy(curthread, thread);
2441         } else {
2442                 /* Add the thread to the free thread list. */
2443                 crit = _kse_critical_enter();
2444                 KSE_LOCK_ACQUIRE(curthread->kse, &thread_lock);
2445                 TAILQ_INSERT_TAIL(&free_threadq, thread, tle);
2446                 free_thread_count++;
2447                 KSE_LOCK_RELEASE(curthread->kse, &thread_lock);
2448                 _kse_critical_leave(crit);
2449         }
2450 }
2451
2452 static void
2453 thr_destroy(struct pthread *curthread, struct pthread *thread)
2454 {
2455         int i;
2456
2457         for (i = 0; i < MAX_THR_LOCKLEVEL; i++)
2458                 _lockuser_destroy(&thread->lockusers[i]);
2459         _lock_destroy(&thread->lock);
2460         if (curthread) {
2461                 _pthread_mutex_lock(&_tcb_mutex);
2462                 _tcb_dtor(thread->tcb);
2463                 _pthread_mutex_unlock(&_tcb_mutex);
2464         } else {
2465                 _tcb_dtor(thread->tcb);
2466         }
2467         free(thread->siginfo);
2468         free(thread);
2469 }
2470
2471 /*
2472  * Add an active thread:
2473  *
2474  *   o Assign the thread a unique id (which GDB uses to track
2475  *     threads.
2476  *   o Add the thread to the list of all threads and increment
2477  *     number of active threads.
2478  */
2479 static void
2480 thr_link(struct pthread *thread)
2481 {
2482         kse_critical_t crit;
2483         struct kse *curkse;
2484
2485         crit = _kse_critical_enter();
2486         curkse = _get_curkse();
2487         KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
2488         /*
2489          * Initialize the unique id (which GDB uses to track
2490          * threads), add the thread to the list of all threads,
2491          * and
2492          */
2493         thread->uniqueid = next_uniqueid++;
2494         THR_LIST_ADD(thread);
2495         _thread_active_threads++;
2496         KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
2497         _kse_critical_leave(crit);
2498 }
2499
2500 /*
2501  * Remove an active thread.
2502  */
2503 static void
2504 thr_unlink(struct pthread *thread)
2505 {
2506         kse_critical_t crit;
2507         struct kse *curkse;
2508
2509         crit = _kse_critical_enter();
2510         curkse = _get_curkse();
2511         KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
2512         THR_LIST_REMOVE(thread);
2513         _thread_active_threads--;
2514         KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
2515         _kse_critical_leave(crit);
2516 }
2517
2518 void
2519 _thr_hash_add(struct pthread *thread)
2520 {
2521         struct thread_hash_head *head;
2522
2523         head = &thr_hashtable[THREAD_HASH(thread)];
2524         LIST_INSERT_HEAD(head, thread, hle);
2525 }
2526
2527 void
2528 _thr_hash_remove(struct pthread *thread)
2529 {
2530         LIST_REMOVE(thread, hle);
2531 }
2532
2533 struct pthread *
2534 _thr_hash_find(struct pthread *thread)
2535 {
2536         struct pthread *td;
2537         struct thread_hash_head *head;
2538
2539         head = &thr_hashtable[THREAD_HASH(thread)];
2540         LIST_FOREACH(td, head, hle) {
2541                 if (td == thread)
2542                         return (thread);
2543         }
2544         return (NULL);
2545 }
2546
2547 void
2548 _thr_debug_check_yield(struct pthread *curthread)
2549 {
2550         /*
2551          * Note that TMDF_SUSPEND is set after process is suspended.
2552          * When we are being debugged, every suspension in process
2553          * will cause all KSEs to schedule an upcall in kernel, unless the
2554          * KSE is in critical region.
2555          * If the function is being called, it means the KSE is no longer
2556          * in critical region, if the TMDF_SUSPEND is set by debugger
2557          * before KSE leaves critical region, we will catch it here, else
2558          * if the flag is changed during testing, it also not a problem,
2559          * because the change only occurs after a process suspension event
2560          * occurs. A suspension event will always cause KSE to schedule an
2561          * upcall, in the case, because we are not in critical region,
2562          * upcall will be scheduled sucessfully, the flag will be checked
2563          * again in kse_sched_multi, we won't back until the flag
2564          * is cleared by debugger, the flag will be cleared in next
2565          * suspension event. 
2566          */
2567         if (!DBG_CAN_RUN(curthread)) {
2568                 if ((curthread->attr.flags & PTHREAD_SCOPE_SYSTEM) == 0)
2569                         _thr_sched_switch(curthread);
2570                 else
2571                         kse_thr_interrupt(&curthread->tcb->tcb_tmbx,
2572                                 KSE_INTR_DBSUSPEND, 0);
2573         }
2574 }