]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - lib/libkse/thread/thr_kern.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / lib / libkse / thread / thr_kern.c
1 /*
2  * Copyright (C) 2003 Daniel M. Eischen <deischen@freebsd.org>
3  * Copyright (C) 2002 Jonathon Mini <mini@freebsd.org>
4  * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed by John Birrell.
18  * 4. Neither the name of the author nor the names of any co-contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 #include <sys/types.h>
39 #include <sys/kse.h>
40 #include <sys/ptrace.h>
41 #include <sys/signalvar.h>
42 #include <sys/queue.h>
43 #include <machine/atomic.h>
44 #include <machine/sigframe.h>
45
46 #include <assert.h>
47 #include <errno.h>
48 #include <signal.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include <time.h>
52 #include <ucontext.h>
53 #include <unistd.h>
54
55 #include "atomic_ops.h"
56 #include "thr_private.h"
57 #include "libc_private.h"
58 #ifdef NOTYET
59 #include "spinlock.h"
60 #endif
61
62 /* #define DEBUG_THREAD_KERN */
63 #ifdef DEBUG_THREAD_KERN
64 #define DBG_MSG         stdout_debug
65 #else
66 #define DBG_MSG(x...)
67 #endif
68
69 /*
70  * Define a high water mark for the maximum number of threads that
71  * will be cached.  Once this level is reached, any extra threads
72  * will be free()'d.
73  */
74 #define MAX_CACHED_THREADS      100
75 /*
76  * Define high water marks for the maximum number of KSEs and KSE groups
77  * that will be cached. Because we support 1:1 threading, there could have
78  * same number of KSEs and KSE groups as threads. Once these levels are
79  * reached, any extra KSE and KSE groups will be free()'d.
80  */
81 #define MAX_CACHED_KSES         ((_thread_scope_system <= 0) ? 50 : 100)
82 #define MAX_CACHED_KSEGS        ((_thread_scope_system <= 0) ? 50 : 100)
83
84 #define KSE_SET_MBOX(kse, thrd) \
85         (kse)->k_kcb->kcb_kmbx.km_curthread = &(thrd)->tcb->tcb_tmbx
86
87 #define KSE_SET_EXITED(kse)     (kse)->k_flags |= KF_EXITED
88
89 /*
90  * Macros for manipulating the run queues.  The priority queue
91  * routines use the thread's pqe link and also handle the setting
92  * and clearing of the thread's THR_FLAGS_IN_RUNQ flag.
93  */
94 #define KSE_RUNQ_INSERT_HEAD(kse, thrd)                 \
95         _pq_insert_head(&(kse)->k_schedq->sq_runq, thrd)
96 #define KSE_RUNQ_INSERT_TAIL(kse, thrd)                 \
97         _pq_insert_tail(&(kse)->k_schedq->sq_runq, thrd)
98 #define KSE_RUNQ_REMOVE(kse, thrd)                      \
99         _pq_remove(&(kse)->k_schedq->sq_runq, thrd)
100 #define KSE_RUNQ_FIRST(kse)                             \
101         ((_libkse_debug == 0) ?                         \
102          _pq_first(&(kse)->k_schedq->sq_runq) :         \
103          _pq_first_debug(&(kse)->k_schedq->sq_runq))
104
105 #define KSE_RUNQ_THREADS(kse)   ((kse)->k_schedq->sq_runq.pq_threads)
106
107 #define THR_NEED_CANCEL(thrd)                                           \
108          (((thrd)->cancelflags & THR_CANCELLING) != 0 &&                \
109           ((thrd)->cancelflags & PTHREAD_CANCEL_DISABLE) == 0 &&        \
110           (((thrd)->cancelflags & THR_AT_CANCEL_POINT) != 0 ||          \
111            ((thrd)->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
112
113 #define THR_NEED_ASYNC_CANCEL(thrd)                                     \
114          (((thrd)->cancelflags & THR_CANCELLING) != 0 &&                \
115           ((thrd)->cancelflags & PTHREAD_CANCEL_DISABLE) == 0 &&        \
116           (((thrd)->cancelflags & THR_AT_CANCEL_POINT) == 0 &&          \
117            ((thrd)->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
118
119 /*
120  * We've got to keep track of everything that is allocated, not only
121  * to have a speedy free list, but also so they can be deallocated
122  * after a fork().
123  */
124 static TAILQ_HEAD(, kse)        active_kseq;
125 static TAILQ_HEAD(, kse)        free_kseq;
126 static TAILQ_HEAD(, kse_group)  free_kse_groupq;
127 static TAILQ_HEAD(, kse_group)  active_kse_groupq;
128 static TAILQ_HEAD(, kse_group)  gc_ksegq;
129 static struct lock              kse_lock;       /* also used for kseg queue */
130 static int                      free_kse_count = 0;
131 static int                      free_kseg_count = 0;
132 static TAILQ_HEAD(, pthread)    free_threadq;
133 static struct lock              thread_lock;
134 static int                      free_thread_count = 0;
135 static int                      inited = 0;
136 static int                      active_kse_count = 0;
137 static int                      active_kseg_count = 0;
138 static u_int64_t                next_uniqueid = 1;
139
140 LIST_HEAD(thread_hash_head, pthread);
141 #define THREAD_HASH_QUEUES      127
142 static struct thread_hash_head  thr_hashtable[THREAD_HASH_QUEUES];
143 #define THREAD_HASH(thrd)       ((unsigned long)thrd % THREAD_HASH_QUEUES)
144
145 /* Lock for thread tcb constructor/destructor */
146 static pthread_mutex_t          _tcb_mutex;
147
148 #ifdef DEBUG_THREAD_KERN
149 static void     dump_queues(struct kse *curkse);
150 #endif
151 static void     kse_check_completed(struct kse *kse);
152 static void     kse_check_waitq(struct kse *kse);
153 static void     kse_fini(struct kse *curkse);
154 static void     kse_reinit(struct kse *kse, int sys_scope);
155 static void     kse_sched_multi(struct kse_mailbox *kmbx);
156 static void     kse_sched_single(struct kse_mailbox *kmbx);
157 static void     kse_switchout_thread(struct kse *kse, struct pthread *thread);
158 static void     kse_wait(struct kse *kse, struct pthread *td_wait, int sigseq);
159 static void     kse_free_unlocked(struct kse *kse);
160 static void     kse_destroy(struct kse *kse);
161 static void     kseg_free_unlocked(struct kse_group *kseg);
162 static void     kseg_init(struct kse_group *kseg);
163 static void     kseg_reinit(struct kse_group *kseg);
164 static void     kseg_destroy(struct kse_group *kseg);
165 static void     kse_waitq_insert(struct pthread *thread);
166 static void     kse_wakeup_multi(struct kse *curkse);
167 static struct kse_mailbox *kse_wakeup_one(struct pthread *thread);
168 static void     thr_cleanup(struct kse *kse, struct pthread *curthread);
169 static void     thr_link(struct pthread *thread);
170 static void     thr_resume_wrapper(int sig, siginfo_t *, ucontext_t *);
171 static void     thr_resume_check(struct pthread *curthread, ucontext_t *ucp);
172 static int      thr_timedout(struct pthread *thread, struct timespec *curtime);
173 static void     thr_unlink(struct pthread *thread);
174 static void     thr_destroy(struct pthread *curthread, struct pthread *thread);
175 static void     thread_gc(struct pthread *thread);
176 static void     kse_gc(struct pthread *thread);
177 static void     kseg_gc(struct pthread *thread);
178
179 static __inline void
180 thr_accounting(struct pthread *thread)
181 {
182         if ((thread->slice_usec != -1) &&
183             (thread->slice_usec <= TIMESLICE_USEC) &&
184             (thread->attr.sched_policy != SCHED_FIFO)) {
185                 thread->slice_usec += (thread->tcb->tcb_tmbx.tm_uticks
186                     + thread->tcb->tcb_tmbx.tm_sticks) * _clock_res_usec;
187                 /* Check for time quantum exceeded: */
188                 if (thread->slice_usec > TIMESLICE_USEC)
189                         thread->slice_usec = -1;
190         }
191         thread->tcb->tcb_tmbx.tm_uticks = 0;
192         thread->tcb->tcb_tmbx.tm_sticks = 0;
193 }
194
195 /*
196  * This is called after a fork().
197  * No locks need to be taken here since we are guaranteed to be
198  * single threaded.
199  * 
200  * XXX
201  * POSIX says for threaded process, fork() function is used
202  * only to run new programs, and the effects of calling functions
203  * that require certain resources between the call to fork() and
204  * the call to an exec function are undefined.
205  *
206  * It is not safe to free memory after fork(), because these data
207  * structures may be in inconsistent state.
208  */
209 void
210 _kse_single_thread(struct pthread *curthread)
211 {
212 #ifdef NOTYET
213         struct kse *kse;
214         struct kse_group *kseg;
215         struct pthread *thread;
216
217         _thr_spinlock_init();
218         *__malloc_lock = (spinlock_t)_SPINLOCK_INITIALIZER;
219         if (__isthreaded) {
220                 _thr_rtld_fini();
221                 _thr_signal_deinit();
222         }
223         __isthreaded = 0;
224         /*
225          * Restore signal mask early, so any memory problems could
226          * dump core.
227          */ 
228         __sys_sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
229         _thread_active_threads = 1;
230
231         curthread->kse->k_kcb->kcb_kmbx.km_curthread = NULL;
232         curthread->attr.flags &= ~PTHREAD_SCOPE_PROCESS;
233         curthread->attr.flags |= PTHREAD_SCOPE_SYSTEM;
234
235         /*
236          * Enter a loop to remove and free all threads other than
237          * the running thread from the active thread list:
238          */
239         while ((thread = TAILQ_FIRST(&_thread_list)) != NULL) {
240                 THR_GCLIST_REMOVE(thread);
241                 /*
242                  * Remove this thread from the list (the current
243                  * thread will be removed but re-added by libpthread
244                  * initialization.
245                  */
246                 TAILQ_REMOVE(&_thread_list, thread, tle);
247                 /* Make sure this isn't the running thread: */
248                 if (thread != curthread) {
249                         _thr_stack_free(&thread->attr);
250                         if (thread->specific != NULL)
251                                 free(thread->specific);
252                         thr_destroy(curthread, thread);
253                 }
254         }
255
256         TAILQ_INIT(&curthread->mutexq);         /* initialize mutex queue */
257         curthread->joiner = NULL;               /* no joining threads yet */
258         curthread->refcount = 0;
259         SIGEMPTYSET(curthread->sigpend);        /* clear pending signals */
260
261         /* Don't free thread-specific data as the caller may require it */
262
263         /* Free the free KSEs: */
264         while ((kse = TAILQ_FIRST(&free_kseq)) != NULL) {
265                 TAILQ_REMOVE(&free_kseq, kse, k_qe);
266                 kse_destroy(kse);
267         }
268         free_kse_count = 0;
269
270         /* Free the active KSEs: */
271         while ((kse = TAILQ_FIRST(&active_kseq)) != NULL) {
272                 TAILQ_REMOVE(&active_kseq, kse, k_qe);
273                 kse_destroy(kse);
274         }
275         active_kse_count = 0;
276
277         /* Free the free KSEGs: */
278         while ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) {
279                 TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
280                 kseg_destroy(kseg);
281         }
282         free_kseg_count = 0;
283
284         /* Free the active KSEGs: */
285         while ((kseg = TAILQ_FIRST(&active_kse_groupq)) != NULL) {
286                 TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe);
287                 kseg_destroy(kseg);
288         }
289         active_kseg_count = 0;
290
291         /* Free the free threads. */
292         while ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
293                 TAILQ_REMOVE(&free_threadq, thread, tle);
294                 thr_destroy(curthread, thread);
295         }
296         free_thread_count = 0;
297
298         /* Free the to-be-gc'd threads. */
299         while ((thread = TAILQ_FIRST(&_thread_gc_list)) != NULL) {
300                 TAILQ_REMOVE(&_thread_gc_list, thread, gcle);
301                 thr_destroy(curthread, thread);
302         }
303         TAILQ_INIT(&gc_ksegq);
304         _gc_count = 0;
305
306         if (inited != 0) {
307                 /*
308                  * Destroy these locks; they'll be recreated to assure they
309                  * are in the unlocked state.
310                  */
311                 _lock_destroy(&kse_lock);
312                 _lock_destroy(&thread_lock);
313                 _lock_destroy(&_thread_list_lock);
314                 inited = 0;
315         }
316
317         /* We're no longer part of any lists */
318         curthread->tlflags = 0;
319
320         /*
321          * After a fork, we are still operating on the thread's original
322          * stack.  Don't clear the THR_FLAGS_USER from the thread's
323          * attribute flags.
324          */
325
326         /* Initialize the threads library. */
327         curthread->kse = NULL;
328         curthread->kseg = NULL;
329         _kse_initial = NULL;
330         _libpthread_init(curthread);
331 #else
332         int i;
333
334         /* Reset the current thread and KSE lock data. */
335         for (i = 0; i < curthread->locklevel; i++) {
336                 _lockuser_reinit(&curthread->lockusers[i], (void *)curthread);
337         }
338         curthread->locklevel = 0;
339         for (i = 0; i < curthread->kse->k_locklevel; i++) {
340                 _lockuser_reinit(&curthread->kse->k_lockusers[i],
341                     (void *)curthread->kse);
342                 _LCK_SET_PRIVATE2(&curthread->kse->k_lockusers[i], NULL);
343         }
344         curthread->kse->k_locklevel = 0;
345
346         /*
347          * Reinitialize the thread and signal locks so that
348          * sigaction() will work after a fork().
349          */
350         _lock_reinit(&curthread->lock, LCK_ADAPTIVE, _thr_lock_wait,
351             _thr_lock_wakeup);
352         _lock_reinit(&_thread_signal_lock, LCK_ADAPTIVE, _kse_lock_wait,
353             _kse_lock_wakeup);
354
355         _thr_spinlock_init();
356         if (__isthreaded) {
357                 _thr_rtld_fini();
358                 _thr_signal_deinit();
359         }
360         __isthreaded = 0;
361         curthread->kse->k_kcb->kcb_kmbx.km_curthread = NULL;
362         curthread->attr.flags |= PTHREAD_SCOPE_SYSTEM;
363
364         /*
365          * After a fork, it is possible that an upcall occurs in
366          * the parent KSE that fork()'d before the child process
367          * is fully created and before its vm space is copied.
368          * During the upcall, the tcb is set to null or to another
369          * thread, and this is what gets copied in the child process
370          * when the vm space is cloned sometime after the upcall
371          * occurs.  Note that we shouldn't have to set the kcb, but
372          * we do it for completeness.
373          */
374         _kcb_set(curthread->kse->k_kcb);
375         _tcb_set(curthread->kse->k_kcb, curthread->tcb);
376
377         /* After a fork(), there child should have no pending signals. */
378         sigemptyset(&curthread->sigpend);
379
380         /*
381          * Restore signal mask early, so any memory problems could
382          * dump core.
383          */ 
384         sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
385         _thread_active_threads = 1;
386 #endif
387 }
388
389 /*
390  * This is used to initialize housekeeping and to initialize the
391  * KSD for the KSE.
392  */
393 void
394 _kse_init(void)
395 {
396         if (inited == 0) {
397                 TAILQ_INIT(&active_kseq);
398                 TAILQ_INIT(&active_kse_groupq);
399                 TAILQ_INIT(&free_kseq);
400                 TAILQ_INIT(&free_kse_groupq);
401                 TAILQ_INIT(&free_threadq);
402                 TAILQ_INIT(&gc_ksegq);
403                 if (_lock_init(&kse_lock, LCK_ADAPTIVE,
404                     _kse_lock_wait, _kse_lock_wakeup, calloc) != 0)
405                         PANIC("Unable to initialize free KSE queue lock");
406                 if (_lock_init(&thread_lock, LCK_ADAPTIVE,
407                     _kse_lock_wait, _kse_lock_wakeup, calloc) != 0)
408                         PANIC("Unable to initialize free thread queue lock");
409                 if (_lock_init(&_thread_list_lock, LCK_ADAPTIVE,
410                     _kse_lock_wait, _kse_lock_wakeup, calloc) != 0)
411                         PANIC("Unable to initialize thread list lock");
412                 _pthread_mutex_init(&_tcb_mutex, NULL);
413                 active_kse_count = 0;
414                 active_kseg_count = 0;
415                 _gc_count = 0;
416                 inited = 1;
417         }
418 }
419
420 /*
421  * This is called when the first thread (other than the initial
422  * thread) is created.
423  */
424 int
425 _kse_setthreaded(int threaded)
426 {
427         sigset_t sigset;
428
429         if ((threaded != 0) && (__isthreaded == 0)) {
430                 SIGFILLSET(sigset);
431                 __sys_sigprocmask(SIG_SETMASK, &sigset, &_thr_initial->sigmask);
432
433                 /*
434                  * Tell the kernel to create a KSE for the initial thread
435                  * and enable upcalls in it.
436                  */
437                 _kse_initial->k_flags |= KF_STARTED;
438
439                 if (_thread_scope_system <= 0) {
440                         _thr_initial->attr.flags &= ~PTHREAD_SCOPE_SYSTEM;
441                         _kse_initial->k_kseg->kg_flags &= ~KGF_SINGLE_THREAD;
442                         _kse_initial->k_kcb->kcb_kmbx.km_curthread = NULL;
443                 }
444                 else {
445                         /*
446                          * For bound thread, kernel reads mailbox pointer
447                          * once, we'd set it here before calling kse_create.
448                          */
449                         _tcb_set(_kse_initial->k_kcb, _thr_initial->tcb);
450                         KSE_SET_MBOX(_kse_initial, _thr_initial);
451                         _kse_initial->k_kcb->kcb_kmbx.km_flags |= KMF_BOUND;
452                 }
453
454                 /*
455                  * Locking functions in libc are required when there are
456                  * threads other than the initial thread.
457                  */
458                 _thr_rtld_init();
459
460                 __isthreaded = 1;
461                 if (kse_create(&_kse_initial->k_kcb->kcb_kmbx, 0) != 0) {
462                         _kse_initial->k_flags &= ~KF_STARTED;
463                         __isthreaded = 0;
464                         PANIC("kse_create() failed\n");
465                         return (-1);
466                 }
467                 _thr_initial->tcb->tcb_tmbx.tm_lwp = 
468                         _kse_initial->k_kcb->kcb_kmbx.km_lwp;
469                 _thread_activated = 1;
470
471 #ifndef SYSTEM_SCOPE_ONLY
472                 if (_thread_scope_system <= 0) {
473                         /* Set current thread to initial thread */
474                         _tcb_set(_kse_initial->k_kcb, _thr_initial->tcb);
475                         KSE_SET_MBOX(_kse_initial, _thr_initial);
476                         _thr_start_sig_daemon();
477                         _thr_setmaxconcurrency();
478                 }
479                 else
480 #endif
481                         __sys_sigprocmask(SIG_SETMASK, &_thr_initial->sigmask,
482                             NULL);
483         }
484         return (0);
485 }
486
487 /*
488  * Lock wait and wakeup handlers for KSE locks.  These are only used by
489  * KSEs, and should never be used by threads.  KSE locks include the
490  * KSE group lock (used for locking the scheduling queue) and the
491  * kse_lock defined above.
492  *
493  * When a KSE lock attempt blocks, the entire KSE blocks allowing another
494  * KSE to run.  For the most part, it doesn't make much sense to try and
495  * schedule another thread because you need to lock the scheduling queue
496  * in order to do that.  And since the KSE lock is used to lock the scheduling
497  * queue, you would just end up blocking again.
498  */
499 void
500 _kse_lock_wait(struct lock *lock __unused, struct lockuser *lu)
501 {
502         struct kse *curkse = (struct kse *)_LCK_GET_PRIVATE(lu);
503         struct timespec ts;
504         int saved_flags;
505
506         if (curkse->k_kcb->kcb_kmbx.km_curthread != NULL)
507                 PANIC("kse_lock_wait does not disable upcall.\n");
508         /*
509          * Enter a loop to wait until we get the lock.
510          */
511         ts.tv_sec = 0;
512         ts.tv_nsec = 1000000;  /* 1 sec */
513         while (!_LCK_GRANTED(lu)) {
514                 /*
515                  * Yield the kse and wait to be notified when the lock
516                  * is granted.
517                  */
518                 saved_flags = curkse->k_kcb->kcb_kmbx.km_flags;
519                 curkse->k_kcb->kcb_kmbx.km_flags |= KMF_NOUPCALL |
520                     KMF_NOCOMPLETED;
521                 kse_release(&ts);
522                 curkse->k_kcb->kcb_kmbx.km_flags = saved_flags;
523         }
524 }
525
526 void
527 _kse_lock_wakeup(struct lock *lock, struct lockuser *lu)
528 {
529         struct kse *curkse;
530         struct kse *kse;
531         struct kse_mailbox *mbx;
532
533         curkse = _get_curkse();
534         kse = (struct kse *)_LCK_GET_PRIVATE(lu);
535
536         if (kse == curkse)
537                 PANIC("KSE trying to wake itself up in lock");
538         else {
539                 mbx = &kse->k_kcb->kcb_kmbx;
540                 _lock_grant(lock, lu);
541                 /*
542                  * Notify the owning kse that it has the lock.
543                  * It is safe to pass invalid address to kse_wakeup
544                  * even if the mailbox is not in kernel at all,
545                  * and waking up a wrong kse is also harmless.
546                  */
547                 kse_wakeup(mbx);
548         }
549 }
550
551 /*
552  * Thread wait and wakeup handlers for thread locks.  These are only used
553  * by threads, never by KSEs.  Thread locks include the per-thread lock
554  * (defined in its structure), and condition variable and mutex locks.
555  */
556 void
557 _thr_lock_wait(struct lock *lock __unused, struct lockuser *lu)
558 {
559         struct pthread *curthread = (struct pthread *)lu->lu_private;
560
561         do {
562                 THR_LOCK_SWITCH(curthread);
563                 THR_SET_STATE(curthread, PS_LOCKWAIT);
564                 _thr_sched_switch_unlocked(curthread);
565         } while (!_LCK_GRANTED(lu));
566 }
567
568 void
569 _thr_lock_wakeup(struct lock *lock __unused, struct lockuser *lu)
570 {
571         struct pthread *thread;
572         struct pthread *curthread;
573         struct kse_mailbox *kmbx;
574
575         curthread = _get_curthread();
576         thread = (struct pthread *)_LCK_GET_PRIVATE(lu);
577
578         THR_SCHED_LOCK(curthread, thread);
579         _lock_grant(lock, lu);
580         kmbx = _thr_setrunnable_unlocked(thread);
581         THR_SCHED_UNLOCK(curthread, thread);
582         if (kmbx != NULL)
583                 kse_wakeup(kmbx);
584 }
585
586 kse_critical_t
587 _kse_critical_enter(void)
588 {
589         kse_critical_t crit;
590
591         crit = (kse_critical_t)_kcb_critical_enter();
592         return (crit);
593 }
594
595 void
596 _kse_critical_leave(kse_critical_t crit)
597 {
598         struct pthread *curthread;
599
600         _kcb_critical_leave((struct kse_thr_mailbox *)crit);
601         if ((crit != NULL) && ((curthread = _get_curthread()) != NULL))
602                 THR_YIELD_CHECK(curthread);
603 }
604
605 int
606 _kse_in_critical(void)
607 {
608         return (_kcb_in_critical());
609 }
610
611 void
612 _thr_critical_enter(struct pthread *thread)
613 {
614         thread->critical_count++;
615 }
616
617 void
618 _thr_critical_leave(struct pthread *thread)
619 {
620         thread->critical_count--;
621         THR_YIELD_CHECK(thread);
622 }
623
624 void
625 _thr_sched_switch(struct pthread *curthread)
626 {
627         struct kse *curkse;
628
629         (void)_kse_critical_enter();
630         curkse = _get_curkse();
631         KSE_SCHED_LOCK(curkse, curkse->k_kseg);
632         _thr_sched_switch_unlocked(curthread);
633 }
634
635 /*
636  * XXX - We may need to take the scheduling lock before calling
637  *       this, or perhaps take the lock within here before
638  *       doing anything else.
639  */
640 void
641 _thr_sched_switch_unlocked(struct pthread *curthread)
642 {
643         struct kse *curkse;
644         volatile int resume_once = 0;
645         ucontext_t *uc;
646
647         /* We're in the scheduler, 5 by 5: */
648         curkse = curthread->kse;
649
650         curthread->need_switchout = 1;  /* The thread yielded on its own. */
651         curthread->critical_yield = 0;  /* No need to yield anymore. */
652
653         /* Thread can unlock the scheduler lock. */
654         curthread->lock_switch = 1;
655
656         if (curthread->attr.flags & PTHREAD_SCOPE_SYSTEM)
657                 kse_sched_single(&curkse->k_kcb->kcb_kmbx);
658         else {
659                 if (__predict_false(_libkse_debug != 0)) {
660                         /*
661                          * Because debugger saves single step status in thread
662                          * mailbox's tm_dflags, we can safely clear single 
663                          * step status here. the single step status will be
664                          * restored by kse_switchin when the thread is
665                          * switched in again. This also lets uts run in full
666                          * speed.
667                          */
668                          ptrace(PT_CLEARSTEP, curkse->k_kcb->kcb_kmbx.km_lwp,
669                                 (caddr_t) 1, 0);
670                 }
671
672                 KSE_SET_SWITCH(curkse);
673                 _thread_enter_uts(curthread->tcb, curkse->k_kcb);
674         }
675         
676         /*
677          * Unlock the scheduling queue and leave the
678          * critical region.
679          */
680         /* Don't trust this after a switch! */
681         curkse = curthread->kse;
682
683         curthread->lock_switch = 0;
684         KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
685         _kse_critical_leave(&curthread->tcb->tcb_tmbx);
686
687         /*
688          * This thread is being resumed; check for cancellations.
689          */
690         if (THR_NEED_ASYNC_CANCEL(curthread) && !THR_IN_CRITICAL(curthread)) {
691                 uc = alloca(sizeof(ucontext_t));
692                 resume_once = 0;
693                 THR_GETCONTEXT(uc);
694                 if (resume_once == 0) {
695                         resume_once = 1;
696                         curthread->check_pending = 0;
697                         thr_resume_check(curthread, uc);
698                 }
699         }
700         THR_ACTIVATE_LAST_LOCK(curthread);
701 }
702
703 /*
704  * This is the scheduler for a KSE which runs a scope system thread.
705  * The multi-thread KSE scheduler should also work for a single threaded
706  * KSE, but we use a separate scheduler so that it can be fine-tuned
707  * to be more efficient (and perhaps not need a separate stack for
708  * the KSE, allowing it to use the thread's stack).
709  */
710
711 static void
712 kse_sched_single(struct kse_mailbox *kmbx)
713 {
714         struct kse *curkse;
715         struct pthread *curthread;
716         struct timespec ts;
717         sigset_t sigmask;
718         int i, sigseqno, level, first = 0;
719
720         curkse = (struct kse *)kmbx->km_udata;
721         curthread = curkse->k_curthread;
722
723         if (__predict_false((curkse->k_flags & KF_INITIALIZED) == 0)) {
724                 /* Setup this KSEs specific data. */
725                 _kcb_set(curkse->k_kcb);
726                 _tcb_set(curkse->k_kcb, curthread->tcb);
727                 curkse->k_flags |= KF_INITIALIZED;
728                 first = 1;
729                 curthread->active = 1;
730
731                 /* Setup kernel signal masks for new thread. */
732                 __sys_sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
733                 /*
734                  * Enter critical region, this is meanless for bound thread,
735                  * It is used to let other code work, those code want mailbox
736                  * to be cleared.
737                  */
738                 (void)_kse_critical_enter();
739         } else {
740                 /*
741                  * Bound thread always has tcb set, this prevent some
742                  * code from blindly setting bound thread tcb to NULL,
743                  * buggy code ?
744                  */
745                 _tcb_set(curkse->k_kcb, curthread->tcb);
746         }
747
748         curthread->critical_yield = 0;
749         curthread->need_switchout = 0;
750
751         /*
752          * Lock the scheduling queue.
753          *
754          * There is no scheduling queue for single threaded KSEs,
755          * but we need a lock for protection regardless.
756          */
757         if (curthread->lock_switch == 0)
758                 KSE_SCHED_LOCK(curkse, curkse->k_kseg);
759
760         /*
761          * This has to do the job of kse_switchout_thread(), only
762          * for a single threaded KSE/KSEG.
763          */
764
765         switch (curthread->state) {
766         case PS_MUTEX_WAIT:
767         case PS_COND_WAIT:
768                 if (THR_NEED_CANCEL(curthread)) {
769                         curthread->interrupted = 1;
770                         curthread->continuation = _thr_finish_cancellation;
771                         THR_SET_STATE(curthread, PS_RUNNING);
772                 }
773                 break;
774
775         case PS_LOCKWAIT:
776                 /*
777                  * This state doesn't timeout.
778                  */
779                 curthread->wakeup_time.tv_sec = -1;
780                 curthread->wakeup_time.tv_nsec = -1;
781                 level = curthread->locklevel - 1;
782                 if (_LCK_GRANTED(&curthread->lockusers[level]))
783                         THR_SET_STATE(curthread, PS_RUNNING);
784                 break;
785
786         case PS_DEAD:
787                 /* Unlock the scheduling queue and exit the KSE and thread. */
788                 thr_cleanup(curkse, curthread);
789                 KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
790                 PANIC("bound thread shouldn't get here\n");
791                 break;
792
793         case PS_JOIN:
794                 if (THR_NEED_CANCEL(curthread)) {
795                         curthread->join_status.thread = NULL;
796                         THR_SET_STATE(curthread, PS_RUNNING);
797                 } else {
798                         /*
799                          * This state doesn't timeout.
800                          */
801                         curthread->wakeup_time.tv_sec = -1;
802                         curthread->wakeup_time.tv_nsec = -1;
803                 }
804                 break;
805
806         case PS_SUSPENDED:
807                 if (THR_NEED_CANCEL(curthread)) {
808                         curthread->interrupted = 1;
809                         THR_SET_STATE(curthread, PS_RUNNING);
810                 } else {
811                         /*
812                          * These states don't timeout.
813                          */
814                         curthread->wakeup_time.tv_sec = -1;
815                         curthread->wakeup_time.tv_nsec = -1;
816                 }
817                 break;
818
819         case PS_RUNNING:
820                 if ((curthread->flags & THR_FLAGS_SUSPENDED) != 0 &&
821                     !THR_NEED_CANCEL(curthread)) {
822                         THR_SET_STATE(curthread, PS_SUSPENDED);
823                         /*
824                          * These states don't timeout.
825                          */
826                         curthread->wakeup_time.tv_sec = -1;
827                         curthread->wakeup_time.tv_nsec = -1;
828                 }
829                 break;
830
831         case PS_SIGWAIT:
832                 PANIC("bound thread does not have SIGWAIT state\n");
833
834         case PS_SLEEP_WAIT:
835                 PANIC("bound thread does not have SLEEP_WAIT state\n");
836
837         case PS_SIGSUSPEND:
838                 PANIC("bound thread does not have SIGSUSPEND state\n");
839         
840         case PS_DEADLOCK:
841                 /*
842                  * These states don't timeout and don't need
843                  * to be in the waiting queue.
844                  */
845                 curthread->wakeup_time.tv_sec = -1;
846                 curthread->wakeup_time.tv_nsec = -1;
847                 break;
848
849         default:
850                 PANIC("Unknown state\n");
851                 break;
852         }
853
854         while (curthread->state != PS_RUNNING) {
855                 sigseqno = curkse->k_sigseqno;
856                 if (curthread->check_pending != 0) {
857                         /*
858                          * Install pending signals into the frame, possible
859                          * cause mutex or condvar backout.
860                          */
861                         curthread->check_pending = 0;
862                         SIGFILLSET(sigmask);
863
864                         /*
865                          * Lock out kernel signal code when we are processing
866                          * signals, and get a fresh copy of signal mask.
867                          */
868                         __sys_sigprocmask(SIG_SETMASK, &sigmask,
869                                           &curthread->sigmask);
870                         for (i = 1; i <= _SIG_MAXSIG; i++) {
871                                 if (SIGISMEMBER(curthread->sigmask, i))
872                                         continue;
873                                 if (SIGISMEMBER(curthread->sigpend, i))
874                                         (void)_thr_sig_add(curthread, i, 
875                                             &curthread->siginfo[i-1]);
876                         }
877                         __sys_sigprocmask(SIG_SETMASK, &curthread->sigmask,
878                                 NULL);
879                         /* The above code might make thread runnable */
880                         if (curthread->state == PS_RUNNING)
881                                 break;
882                 }
883                 THR_DEACTIVATE_LAST_LOCK(curthread);
884                 kse_wait(curkse, curthread, sigseqno);
885                 THR_ACTIVATE_LAST_LOCK(curthread);
886                 if (curthread->wakeup_time.tv_sec >= 0) {
887                         KSE_GET_TOD(curkse, &ts);
888                         if (thr_timedout(curthread, &ts)) {
889                                 /* Indicate the thread timedout: */
890                                 curthread->timeout = 1;
891                                 /* Make the thread runnable. */
892                                 THR_SET_STATE(curthread, PS_RUNNING);
893                         }
894                 }
895         }
896
897         if (curthread->lock_switch == 0) {
898                 /* Unlock the scheduling queue. */
899                 KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
900         }
901
902         DBG_MSG("Continuing bound thread %p\n", curthread);
903         if (first) {
904                 _kse_critical_leave(&curthread->tcb->tcb_tmbx);
905                 pthread_exit(curthread->start_routine(curthread->arg));
906         }
907 }
908
909 #ifdef DEBUG_THREAD_KERN
910 static void
911 dump_queues(struct kse *curkse)
912 {
913         struct pthread *thread;
914
915         DBG_MSG("Threads in waiting queue:\n");
916         TAILQ_FOREACH(thread, &curkse->k_kseg->kg_schedq.sq_waitq, pqe) {
917                 DBG_MSG("  thread %p, state %d, blocked %d\n",
918                     thread, thread->state, thread->blocked);
919         }
920 }
921 #endif
922
923 /*
924  * This is the scheduler for a KSE which runs multiple threads.
925  */
926 static void
927 kse_sched_multi(struct kse_mailbox *kmbx)
928 {
929         struct kse *curkse;
930         struct pthread *curthread, *td_wait;
931         int ret;
932
933         curkse = (struct kse *)kmbx->km_udata;
934         THR_ASSERT(curkse->k_kcb->kcb_kmbx.km_curthread == NULL,
935             "Mailbox not null in kse_sched_multi");
936
937         /* Check for first time initialization: */
938         if (__predict_false((curkse->k_flags & KF_INITIALIZED) == 0)) {
939                 /* Setup this KSEs specific data. */
940                 _kcb_set(curkse->k_kcb);
941
942                 /* Set this before grabbing the context. */
943                 curkse->k_flags |= KF_INITIALIZED;
944         }
945
946         /*
947          * No current thread anymore, calling _get_curthread in UTS
948          * should dump core
949          */
950         _tcb_set(curkse->k_kcb, NULL);
951
952         /* If this is an upcall; take the scheduler lock. */
953         if (!KSE_IS_SWITCH(curkse))
954                 KSE_SCHED_LOCK(curkse, curkse->k_kseg);
955         else
956                 KSE_CLEAR_SWITCH(curkse);
957
958         if (KSE_IS_IDLE(curkse)) {
959                 KSE_CLEAR_IDLE(curkse);
960                 curkse->k_kseg->kg_idle_kses--;
961         }
962
963         /*
964          * Now that the scheduler lock is held, get the current
965          * thread.  The KSE's current thread cannot be safely
966          * examined without the lock because it could have returned
967          * as completed on another KSE.  See kse_check_completed().
968          */
969         curthread = curkse->k_curthread;
970
971         /*
972          * If the current thread was completed in another KSE, then
973          * it will be in the run queue.  Don't mark it as being blocked.
974          */
975         if ((curthread != NULL) &&
976             ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0) &&
977             (curthread->need_switchout == 0)) {
978                 /*
979                  * Assume the current thread is blocked; when the
980                  * completed threads are checked and if the current
981                  * thread is among the completed, the blocked flag
982                  * will be cleared.
983                  */
984                 curthread->blocked = 1;
985                 DBG_MSG("Running thread %p is now blocked in kernel.\n",
986                     curthread);
987         }
988
989         /* Check for any unblocked threads in the kernel. */
990         kse_check_completed(curkse);
991
992         /*
993          * Check for threads that have timed-out.
994          */
995         kse_check_waitq(curkse);
996
997         /*
998          * Switchout the current thread, if necessary, as the last step
999          * so that it is inserted into the run queue (if it's runnable)
1000          * _after_ any other threads that were added to it above.
1001          */
1002         if (curthread == NULL)
1003                 ;  /* Nothing to do here. */
1004         else if ((curthread->need_switchout == 0) && DBG_CAN_RUN(curthread) &&
1005             (curthread->blocked == 0) && (THR_IN_CRITICAL(curthread))) {
1006                 /*
1007                  * Resume the thread and tell it to yield when
1008                  * it leaves the critical region.
1009                  */
1010                 curthread->critical_yield = 1;
1011                 curthread->active = 1;
1012                 if ((curthread->flags & THR_FLAGS_IN_RUNQ) != 0)
1013                         KSE_RUNQ_REMOVE(curkse, curthread);
1014                 curkse->k_curthread = curthread;
1015                 curthread->kse = curkse;
1016                 DBG_MSG("Continuing thread %p in critical region\n",
1017                     curthread);
1018                 kse_wakeup_multi(curkse);
1019                 KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1020                 ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
1021                 if (ret != 0)
1022                         PANIC("Can't resume thread in critical region\n");
1023         }
1024         else if ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0) {
1025                 curthread->tcb->tcb_tmbx.tm_lwp = 0;
1026                 kse_switchout_thread(curkse, curthread);
1027         }
1028         curkse->k_curthread = NULL;
1029
1030 #ifdef DEBUG_THREAD_KERN
1031         dump_queues(curkse);
1032 #endif
1033
1034         /* Check if there are no threads ready to run: */
1035         while (((curthread = KSE_RUNQ_FIRST(curkse)) == NULL) &&
1036             (curkse->k_kseg->kg_threadcount != 0) &&
1037             ((curkse->k_flags & KF_TERMINATED) == 0)) {
1038                 /*
1039                  * Wait for a thread to become active or until there are
1040                  * no more threads.
1041                  */
1042                 td_wait = KSE_WAITQ_FIRST(curkse);
1043                 kse_wait(curkse, td_wait, 0);
1044                 kse_check_completed(curkse);
1045                 kse_check_waitq(curkse);
1046         }
1047
1048         /* Check for no more threads: */
1049         if ((curkse->k_kseg->kg_threadcount == 0) ||
1050             ((curkse->k_flags & KF_TERMINATED) != 0)) {
1051                 /*
1052                  * Normally this shouldn't return, but it will if there
1053                  * are other KSEs running that create new threads that
1054                  * are assigned to this KSE[G].  For instance, if a scope
1055                  * system thread were to create a scope process thread
1056                  * and this kse[g] is the initial kse[g], then that newly
1057                  * created thread would be assigned to us (the initial
1058                  * kse[g]).
1059                  */
1060                 kse_wakeup_multi(curkse);
1061                 KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1062                 kse_fini(curkse);
1063                 /* never returns */
1064         }
1065
1066         THR_ASSERT(curthread != NULL,
1067             "Return from kse_wait/fini without thread.");
1068         THR_ASSERT(curthread->state != PS_DEAD,
1069             "Trying to resume dead thread!");
1070         KSE_RUNQ_REMOVE(curkse, curthread);
1071
1072         /*
1073          * Make the selected thread the current thread.
1074          */
1075         curkse->k_curthread = curthread;
1076
1077         /*
1078          * Make sure the current thread's kse points to this kse.
1079          */
1080         curthread->kse = curkse;
1081
1082         /*
1083          * Reset the time slice if this thread is running for the first
1084          * time or running again after using its full time slice allocation.
1085          */
1086         if (curthread->slice_usec == -1)
1087                 curthread->slice_usec = 0;
1088
1089         /* Mark the thread active. */
1090         curthread->active = 1;
1091
1092         /*
1093          * The thread's current signal frame will only be NULL if it
1094          * is being resumed after being blocked in the kernel.  In
1095          * this case, and if the thread needs to run down pending
1096          * signals or needs a cancellation check, we need to add a
1097          * signal frame to the thread's context.
1098          */
1099         if (curthread->lock_switch == 0 && curthread->state == PS_RUNNING &&
1100             (curthread->check_pending != 0 ||
1101              THR_NEED_ASYNC_CANCEL(curthread)) &&
1102             !THR_IN_CRITICAL(curthread)) {
1103                 curthread->check_pending = 0;
1104                 signalcontext(&curthread->tcb->tcb_tmbx.tm_context, 0,
1105                     (__sighandler_t *)thr_resume_wrapper);
1106         }
1107         kse_wakeup_multi(curkse);
1108         /*
1109          * Continue the thread at its current frame:
1110          */
1111         if (curthread->lock_switch != 0) {
1112                 /*
1113                  * This thread came from a scheduler switch; it will
1114                  * unlock the scheduler lock and set the mailbox.
1115                  */
1116                 ret = _thread_switch(curkse->k_kcb, curthread->tcb, 0);
1117         } else {
1118                 /* This thread won't unlock the scheduler lock. */
1119                 KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1120                 ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
1121         }
1122         if (ret != 0)
1123                 PANIC("Thread has returned from _thread_switch");
1124
1125         /* This point should not be reached. */
1126         PANIC("Thread has returned from _thread_switch");
1127 }
1128
1129 static void
1130 thr_resume_wrapper(int sig __unused, siginfo_t *siginfo __unused,
1131     ucontext_t *ucp)
1132 {
1133         struct pthread *curthread = _get_curthread();
1134         struct kse *curkse;
1135         int ret, err_save = errno;
1136
1137         DBG_MSG(">>> sig wrapper\n");
1138         if (curthread->lock_switch)
1139                 PANIC("thr_resume_wrapper, lock_switch != 0\n");
1140         thr_resume_check(curthread, ucp);
1141         errno = err_save;
1142         _kse_critical_enter();
1143         curkse = curthread->kse;
1144         curthread->tcb->tcb_tmbx.tm_context = *ucp;
1145         ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
1146         if (ret != 0)
1147                 PANIC("thr_resume_wrapper: thread has returned "
1148                       "from _thread_switch");
1149         /* THR_SETCONTEXT(ucp); */ /* not work, why ? */
1150 }
1151
1152 static void
1153 thr_resume_check(struct pthread *curthread, ucontext_t *ucp)
1154 {
1155         _thr_sig_rundown(curthread, ucp);
1156
1157         if (THR_NEED_ASYNC_CANCEL(curthread))
1158                 pthread_testcancel();
1159 }
1160
1161 /*
1162  * Clean up a thread.  This must be called with the thread's KSE
1163  * scheduling lock held.  The thread must be a thread from the
1164  * KSE's group.
1165  */
1166 static void
1167 thr_cleanup(struct kse *curkse, struct pthread *thread)
1168 {
1169         struct pthread *joiner;
1170         struct kse_mailbox *kmbx = NULL;
1171         int sys_scope;
1172
1173         thread->active = 0;
1174         thread->need_switchout = 0;
1175         thread->lock_switch = 0;
1176         thread->check_pending = 0;
1177
1178         if ((joiner = thread->joiner) != NULL) {
1179                 /* Joinee scheduler lock held; joiner won't leave. */
1180                 if (joiner->kseg == curkse->k_kseg) {
1181                         if (joiner->join_status.thread == thread) {
1182                                 joiner->join_status.thread = NULL;
1183                                 joiner->join_status.ret = thread->ret;
1184                                 (void)_thr_setrunnable_unlocked(joiner);
1185                         }
1186                 } else {
1187                         KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1188                         /* The joiner may have removed itself and exited. */
1189                         if (_thr_ref_add(thread, joiner, 0) == 0) {
1190                                 KSE_SCHED_LOCK(curkse, joiner->kseg);
1191                                 if (joiner->join_status.thread == thread) {
1192                                         joiner->join_status.thread = NULL;
1193                                         joiner->join_status.ret = thread->ret;
1194                                         kmbx = _thr_setrunnable_unlocked(joiner);
1195                                 }
1196                                 KSE_SCHED_UNLOCK(curkse, joiner->kseg);
1197                                 _thr_ref_delete(thread, joiner);
1198                                 if (kmbx != NULL)
1199                                         kse_wakeup(kmbx);
1200                         }
1201                         KSE_SCHED_LOCK(curkse, curkse->k_kseg);
1202                 }
1203                 thread->attr.flags |= PTHREAD_DETACHED;
1204         }
1205
1206         if (!(sys_scope = (thread->attr.flags & PTHREAD_SCOPE_SYSTEM))) {
1207                 /*
1208                  * Remove the thread from the KSEG's list of threads.
1209                  */
1210                 KSEG_THRQ_REMOVE(thread->kseg, thread);
1211                 /*
1212                  * Migrate the thread to the main KSE so that this
1213                  * KSE and KSEG can be cleaned when their last thread
1214                  * exits.
1215                  */
1216                 thread->kseg = _kse_initial->k_kseg;
1217                 thread->kse = _kse_initial;
1218         }
1219
1220         /*
1221          * We can't hold the thread list lock while holding the
1222          * scheduler lock.
1223          */
1224         KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1225         DBG_MSG("Adding thread %p to GC list\n", thread);
1226         KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
1227         thread->tlflags |= TLFLAGS_GC_SAFE;
1228         THR_GCLIST_ADD(thread);
1229         KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
1230         if (sys_scope) {
1231                 /*
1232                  * System scope thread is single thread group, 
1233                  * when thread is exited, its kse and ksegrp should
1234                  * be recycled as well.
1235                  * kse upcall stack belongs to thread, clear it here.
1236                  */
1237                 curkse->k_stack.ss_sp = 0;
1238                 curkse->k_stack.ss_size = 0;
1239                 kse_exit();
1240                 PANIC("kse_exit() failed for system scope thread");
1241         }
1242         KSE_SCHED_LOCK(curkse, curkse->k_kseg);
1243 }
1244
1245 void
1246 _thr_gc(struct pthread *curthread)
1247 {
1248         thread_gc(curthread);
1249         kse_gc(curthread);
1250         kseg_gc(curthread);
1251 }
1252
1253 static void
1254 thread_gc(struct pthread *curthread)
1255 {
1256         struct pthread *td, *td_next;
1257         kse_critical_t crit;
1258         TAILQ_HEAD(, pthread) worklist;
1259
1260         TAILQ_INIT(&worklist);
1261         crit = _kse_critical_enter();
1262         KSE_LOCK_ACQUIRE(curthread->kse, &_thread_list_lock);
1263
1264         /* Check the threads waiting for GC. */
1265         for (td = TAILQ_FIRST(&_thread_gc_list); td != NULL; td = td_next) {
1266                 td_next = TAILQ_NEXT(td, gcle);
1267                 if ((td->tlflags & TLFLAGS_GC_SAFE) == 0)
1268                         continue;
1269                 else if (((td->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) &&
1270                     ((td->kse->k_kcb->kcb_kmbx.km_flags & KMF_DONE) == 0)) {
1271                         /*
1272                          * The thread and KSE are operating on the same
1273                          * stack.  Wait for the KSE to exit before freeing
1274                          * the thread's stack as well as everything else.
1275                          */
1276                         continue;
1277                 }
1278                 /*
1279                  * Remove the thread from the GC list.  If the thread
1280                  * isn't yet detached, it will get added back to the
1281                  * GC list at a later time.
1282                  */
1283                 THR_GCLIST_REMOVE(td);
1284                 DBG_MSG("Freeing thread %p stack\n", td);
1285                 /*
1286                  * We can free the thread stack since it's no longer
1287                  * in use.
1288                  */
1289                 _thr_stack_free(&td->attr);
1290                 if (((td->attr.flags & PTHREAD_DETACHED) != 0) &&
1291                     (td->refcount == 0)) {
1292                         /*
1293                          * The thread has detached and is no longer
1294                          * referenced.  It is safe to remove all
1295                          * remnants of the thread.
1296                          */
1297                         THR_LIST_REMOVE(td);
1298                         TAILQ_INSERT_HEAD(&worklist, td, gcle);
1299                 }
1300         }
1301         KSE_LOCK_RELEASE(curthread->kse, &_thread_list_lock);
1302         _kse_critical_leave(crit);
1303
1304         while ((td = TAILQ_FIRST(&worklist)) != NULL) {
1305                 TAILQ_REMOVE(&worklist, td, gcle);
1306                 /*
1307                  * XXX we don't free initial thread and its kse
1308                  * (if thread is a bound thread), because there might
1309                  * have some code referencing initial thread and kse.
1310                  */
1311                 if (td == _thr_initial) {
1312                         DBG_MSG("Initial thread won't be freed\n");
1313                         continue;
1314                 }
1315
1316                 if ((td->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) {
1317                         crit = _kse_critical_enter();
1318                         KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1319                         kse_free_unlocked(td->kse);
1320                         kseg_free_unlocked(td->kseg);
1321                         KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1322                         _kse_critical_leave(crit);
1323                 }
1324                 DBG_MSG("Freeing thread %p\n", td);
1325                 _thr_free(curthread, td);
1326         }
1327 }
1328
1329 static void
1330 kse_gc(struct pthread *curthread)
1331 {
1332         kse_critical_t crit;
1333         TAILQ_HEAD(, kse) worklist;
1334         struct kse *kse;
1335
1336         if (free_kse_count <= MAX_CACHED_KSES)
1337                 return;
1338         TAILQ_INIT(&worklist);
1339         crit = _kse_critical_enter();
1340         KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1341         while (free_kse_count > MAX_CACHED_KSES) {
1342                 kse = TAILQ_FIRST(&free_kseq);
1343                 TAILQ_REMOVE(&free_kseq, kse, k_qe);
1344                 TAILQ_INSERT_HEAD(&worklist, kse, k_qe);
1345                 free_kse_count--;
1346         }
1347         KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1348         _kse_critical_leave(crit);
1349
1350         while ((kse = TAILQ_FIRST(&worklist))) {
1351                 TAILQ_REMOVE(&worklist, kse, k_qe);
1352                 kse_destroy(kse);
1353         }
1354 }
1355
1356 static void
1357 kseg_gc(struct pthread *curthread)
1358 {
1359         kse_critical_t crit;
1360         TAILQ_HEAD(, kse_group) worklist;
1361         struct kse_group *kseg;
1362
1363         if (free_kseg_count <= MAX_CACHED_KSEGS)
1364                 return; 
1365         TAILQ_INIT(&worklist);
1366         crit = _kse_critical_enter();
1367         KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1368         while (free_kseg_count > MAX_CACHED_KSEGS) {
1369                 kseg = TAILQ_FIRST(&free_kse_groupq);
1370                 TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
1371                 free_kseg_count--;
1372                 TAILQ_INSERT_HEAD(&worklist, kseg, kg_qe);
1373         }
1374         KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1375         _kse_critical_leave(crit);
1376
1377         while ((kseg = TAILQ_FIRST(&worklist))) {
1378                 TAILQ_REMOVE(&worklist, kseg, kg_qe);
1379                 kseg_destroy(kseg);
1380         }
1381 }
1382
1383 /*
1384  * Only new threads that are running or suspended may be scheduled.
1385  */
1386 int
1387 _thr_schedule_add(struct pthread *curthread, struct pthread *newthread)
1388 {
1389         kse_critical_t crit;
1390         int ret;
1391
1392         /* Add the new thread. */
1393         thr_link(newthread);
1394
1395         /*
1396          * If this is the first time creating a thread, make sure
1397          * the mailbox is set for the current thread.
1398          */
1399         if ((newthread->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) {
1400                 /* We use the thread's stack as the KSE's stack. */
1401                 newthread->kse->k_kcb->kcb_kmbx.km_stack.ss_sp =
1402                     newthread->attr.stackaddr_attr;
1403                 newthread->kse->k_kcb->kcb_kmbx.km_stack.ss_size =
1404                     newthread->attr.stacksize_attr;
1405
1406                 /*
1407                  * No need to lock the scheduling queue since the
1408                  * KSE/KSEG pair have not yet been started.
1409                  */
1410                 KSEG_THRQ_ADD(newthread->kseg, newthread);
1411                 /* this thread never gives up kse */
1412                 newthread->active = 1;
1413                 newthread->kse->k_curthread = newthread;
1414                 newthread->kse->k_kcb->kcb_kmbx.km_flags = KMF_BOUND;
1415                 newthread->kse->k_kcb->kcb_kmbx.km_func =
1416                     (kse_func_t *)kse_sched_single;
1417                 newthread->kse->k_kcb->kcb_kmbx.km_quantum = 0;
1418                 KSE_SET_MBOX(newthread->kse, newthread);
1419                 /*
1420                  * This thread needs a new KSE and KSEG.
1421                  */
1422                 newthread->kse->k_flags &= ~KF_INITIALIZED;
1423                 newthread->kse->k_flags |= KF_STARTED;
1424                 /* Fire up! */
1425                 ret = kse_create(&newthread->kse->k_kcb->kcb_kmbx, 1);
1426                 if (ret != 0)
1427                         ret = errno;
1428         }
1429         else {
1430                 /*
1431                  * Lock the KSE and add the new thread to its list of
1432                  * assigned threads.  If the new thread is runnable, also
1433                  * add it to the KSE's run queue.
1434                  */
1435                 crit = _kse_critical_enter();
1436                 KSE_SCHED_LOCK(curthread->kse, newthread->kseg);
1437                 KSEG_THRQ_ADD(newthread->kseg, newthread);
1438                 if (newthread->state == PS_RUNNING)
1439                         THR_RUNQ_INSERT_TAIL(newthread);
1440                 if ((newthread->kse->k_flags & KF_STARTED) == 0) {
1441                         /*
1442                          * This KSE hasn't been started yet.  Start it
1443                          * outside of holding the lock.
1444                          */
1445                         newthread->kse->k_flags |= KF_STARTED;
1446                         newthread->kse->k_kcb->kcb_kmbx.km_func =
1447                             (kse_func_t *)kse_sched_multi;
1448                         newthread->kse->k_kcb->kcb_kmbx.km_flags = 0;
1449                         kse_create(&newthread->kse->k_kcb->kcb_kmbx, 0);
1450                  } else if ((newthread->state == PS_RUNNING) &&
1451                      KSE_IS_IDLE(newthread->kse)) {
1452                         /*
1453                          * The thread is being scheduled on another KSEG.
1454                          */
1455                         kse_wakeup_one(newthread);
1456                 }
1457                 KSE_SCHED_UNLOCK(curthread->kse, newthread->kseg);
1458                 _kse_critical_leave(crit);
1459                 ret = 0;
1460         }
1461         if (ret != 0)
1462                 thr_unlink(newthread);
1463
1464         return (ret);
1465 }
1466
1467 void
1468 kse_waitq_insert(struct pthread *thread)
1469 {
1470         struct pthread *td;
1471
1472         if (thread->wakeup_time.tv_sec == -1)
1473                 TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq, thread,
1474                     pqe);
1475         else {
1476                 td = TAILQ_FIRST(&thread->kse->k_schedq->sq_waitq);
1477                 while ((td != NULL) && (td->wakeup_time.tv_sec != -1) &&
1478                     ((td->wakeup_time.tv_sec < thread->wakeup_time.tv_sec) ||
1479                     ((td->wakeup_time.tv_sec == thread->wakeup_time.tv_sec) &&
1480                     (td->wakeup_time.tv_nsec <= thread->wakeup_time.tv_nsec))))
1481                         td = TAILQ_NEXT(td, pqe);
1482                 if (td == NULL)
1483                         TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq,
1484                             thread, pqe);
1485                 else
1486                         TAILQ_INSERT_BEFORE(td, thread, pqe);
1487         }
1488         thread->flags |= THR_FLAGS_IN_WAITQ;
1489 }
1490
1491 /*
1492  * This must be called with the scheduling lock held.
1493  */
1494 static void
1495 kse_check_completed(struct kse *kse)
1496 {
1497         struct pthread *thread;
1498         struct kse_thr_mailbox *completed;
1499         int sig;
1500
1501         if ((completed = kse->k_kcb->kcb_kmbx.km_completed) != NULL) {
1502                 kse->k_kcb->kcb_kmbx.km_completed = NULL;
1503                 while (completed != NULL) {
1504                         thread = completed->tm_udata;
1505                         DBG_MSG("Found completed thread %p, name %s\n",
1506                             thread,
1507                             (thread->name == NULL) ? "none" : thread->name);
1508                         thread->blocked = 0;
1509                         if (thread != kse->k_curthread) {
1510                                 thr_accounting(thread);
1511                                 if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
1512                                         THR_SET_STATE(thread, PS_SUSPENDED);
1513                                 else
1514                                         KSE_RUNQ_INSERT_TAIL(kse, thread);
1515                                 if ((thread->kse != kse) &&
1516                                     (thread->kse->k_curthread == thread)) {
1517                                         /*
1518                                          * Remove this thread from its
1519                                          * previous KSE so that it (the KSE)
1520                                          * doesn't think it is still active.
1521                                          */
1522                                         thread->kse->k_curthread = NULL;
1523                                         thread->active = 0;
1524                                 }
1525                         }
1526                         if ((sig = thread->tcb->tcb_tmbx.tm_syncsig.si_signo)
1527                             != 0) {
1528                                 if (SIGISMEMBER(thread->sigmask, sig))
1529                                         SIGADDSET(thread->sigpend, sig);
1530                                 else if (THR_IN_CRITICAL(thread))
1531                                         kse_thr_interrupt(NULL, KSE_INTR_SIGEXIT, sig);
1532                                 else
1533                                         (void)_thr_sig_add(thread, sig,
1534                                             &thread->tcb->tcb_tmbx.tm_syncsig);
1535                                 thread->tcb->tcb_tmbx.tm_syncsig.si_signo = 0;
1536                         }
1537                         completed = completed->tm_next;
1538                 }
1539         }
1540 }
1541
1542 /*
1543  * This must be called with the scheduling lock held.
1544  */
1545 static void
1546 kse_check_waitq(struct kse *kse)
1547 {
1548         struct pthread  *pthread;
1549         struct timespec ts;
1550
1551         KSE_GET_TOD(kse, &ts);
1552
1553         /*
1554          * Wake up threads that have timedout.  This has to be
1555          * done before adding the current thread to the run queue
1556          * so that a CPU intensive thread doesn't get preference
1557          * over waiting threads.
1558          */
1559         while (((pthread = KSE_WAITQ_FIRST(kse)) != NULL) &&
1560             thr_timedout(pthread, &ts)) {
1561                 /* Remove the thread from the wait queue: */
1562                 KSE_WAITQ_REMOVE(kse, pthread);
1563                 DBG_MSG("Found timedout thread %p in waitq\n", pthread);
1564
1565                 /* Indicate the thread timedout: */
1566                 pthread->timeout = 1;
1567
1568                 /* Add the thread to the priority queue: */
1569                 if ((pthread->flags & THR_FLAGS_SUSPENDED) != 0)
1570                         THR_SET_STATE(pthread, PS_SUSPENDED);
1571                 else {
1572                         THR_SET_STATE(pthread, PS_RUNNING);
1573                         KSE_RUNQ_INSERT_TAIL(kse, pthread);
1574                 }
1575         }
1576 }
1577
1578 static int
1579 thr_timedout(struct pthread *thread, struct timespec *curtime)
1580 {
1581         if (thread->wakeup_time.tv_sec < 0)
1582                 return (0);
1583         else if (thread->wakeup_time.tv_sec > curtime->tv_sec)
1584                 return (0);
1585         else if ((thread->wakeup_time.tv_sec == curtime->tv_sec) &&
1586             (thread->wakeup_time.tv_nsec > curtime->tv_nsec))
1587                 return (0);
1588         else
1589                 return (1);
1590 }
1591
1592 /*
1593  * This must be called with the scheduling lock held.
1594  *
1595  * Each thread has a time slice, a wakeup time (used when it wants
1596  * to wait for a specified amount of time), a run state, and an
1597  * active flag.
1598  *
1599  * When a thread gets run by the scheduler, the active flag is
1600  * set to non-zero (1).  When a thread performs an explicit yield
1601  * or schedules a state change, it enters the scheduler and the
1602  * active flag is cleared.  When the active flag is still seen
1603  * set in the scheduler, that means that the thread is blocked in
1604  * the kernel (because it is cleared before entering the scheduler
1605  * in all other instances).
1606  *
1607  * The wakeup time is only set for those states that can timeout.
1608  * It is set to (-1, -1) for all other instances.
1609  *
1610  * The thread's run state, aside from being useful when debugging,
1611  * is used to place the thread in an appropriate queue.  There
1612  * are 2 basic queues:
1613  *
1614  *   o run queue - queue ordered by priority for all threads
1615  *                 that are runnable
1616  *   o waiting queue - queue sorted by wakeup time for all threads
1617  *                     that are not otherwise runnable (not blocked
1618  *                     in kernel, not waiting for locks)
1619  *
1620  * The thread's time slice is used for round-robin scheduling
1621  * (the default scheduling policy).  While a SCHED_RR thread
1622  * is runnable it's time slice accumulates.  When it reaches
1623  * the time slice interval, it gets reset and added to the end
1624  * of the queue of threads at its priority.  When a thread no
1625  * longer becomes runnable (blocks in kernel, waits, etc), its
1626  * time slice is reset.
1627  *
1628  * The job of kse_switchout_thread() is to handle all of the above.
1629  */
1630 static void
1631 kse_switchout_thread(struct kse *kse, struct pthread *thread)
1632 {
1633         int level;
1634         int i;
1635         int restart;
1636         siginfo_t siginfo;
1637
1638         /*
1639          * Place the currently running thread into the
1640          * appropriate queue(s).
1641          */
1642         DBG_MSG("Switching out thread %p, state %d\n", thread, thread->state);
1643
1644         THR_DEACTIVATE_LAST_LOCK(thread);
1645         if (thread->blocked != 0) {
1646                 thread->active = 0;
1647                 thread->need_switchout = 0;
1648                 /* This thread must have blocked in the kernel. */
1649                 /*
1650                  * Check for pending signals and cancellation for
1651                  * this thread to see if we need to interrupt it
1652                  * in the kernel.
1653                  */
1654                 if (THR_NEED_CANCEL(thread)) {
1655                         kse_thr_interrupt(&thread->tcb->tcb_tmbx,
1656                                           KSE_INTR_INTERRUPT, 0);
1657                 } else if (thread->check_pending != 0) {
1658                         for (i = 1; i <= _SIG_MAXSIG; ++i) {
1659                                 if (SIGISMEMBER(thread->sigpend, i) &&
1660                                     !SIGISMEMBER(thread->sigmask, i)) {
1661                                         restart = _thread_sigact[i - 1].sa_flags & SA_RESTART;
1662                                         kse_thr_interrupt(&thread->tcb->tcb_tmbx,
1663                                             restart ? KSE_INTR_RESTART : KSE_INTR_INTERRUPT, 0);
1664                                         break;
1665                                 }
1666                         }
1667                 }
1668         }
1669         else {
1670                 switch (thread->state) {
1671                 case PS_MUTEX_WAIT:
1672                 case PS_COND_WAIT:
1673                         if (THR_NEED_CANCEL(thread)) {
1674                                 thread->interrupted = 1;
1675                                 thread->continuation = _thr_finish_cancellation;
1676                                 THR_SET_STATE(thread, PS_RUNNING);
1677                         } else {
1678                                 /* Insert into the waiting queue: */
1679                                 KSE_WAITQ_INSERT(kse, thread);
1680                         }
1681                         break;
1682
1683                 case PS_LOCKWAIT:
1684                         /*
1685                          * This state doesn't timeout.
1686                          */
1687                         thread->wakeup_time.tv_sec = -1;
1688                         thread->wakeup_time.tv_nsec = -1;
1689                         level = thread->locklevel - 1;
1690                         if (!_LCK_GRANTED(&thread->lockusers[level]))
1691                                 KSE_WAITQ_INSERT(kse, thread);
1692                         else
1693                                 THR_SET_STATE(thread, PS_RUNNING);
1694                         break;
1695
1696                 case PS_SLEEP_WAIT:
1697                 case PS_SIGWAIT:
1698                         if (THR_NEED_CANCEL(thread)) {
1699                                 thread->interrupted = 1;
1700                                 THR_SET_STATE(thread, PS_RUNNING);
1701                         } else {
1702                                 KSE_WAITQ_INSERT(kse, thread);
1703                         }
1704                         break;
1705
1706                 case PS_JOIN:
1707                         if (THR_NEED_CANCEL(thread)) {
1708                                 thread->join_status.thread = NULL;
1709                                 THR_SET_STATE(thread, PS_RUNNING);
1710                         } else {
1711                                 /*
1712                                  * This state doesn't timeout.
1713                                  */
1714                                 thread->wakeup_time.tv_sec = -1;
1715                                 thread->wakeup_time.tv_nsec = -1;
1716
1717                                 /* Insert into the waiting queue: */
1718                                 KSE_WAITQ_INSERT(kse, thread);
1719                         }
1720                         break;
1721
1722                 case PS_SIGSUSPEND:
1723                 case PS_SUSPENDED:
1724                         if (THR_NEED_CANCEL(thread)) {
1725                                 thread->interrupted = 1;
1726                                 THR_SET_STATE(thread, PS_RUNNING);
1727                         } else {
1728                                 /*
1729                                  * These states don't timeout.
1730                                  */
1731                                 thread->wakeup_time.tv_sec = -1;
1732                                 thread->wakeup_time.tv_nsec = -1;
1733
1734                                 /* Insert into the waiting queue: */
1735                                 KSE_WAITQ_INSERT(kse, thread);
1736                         }
1737                         break;
1738
1739                 case PS_DEAD:
1740                         /*
1741                          * The scheduler is operating on a different
1742                          * stack.  It is safe to do garbage collecting
1743                          * here.
1744                          */
1745                         thr_cleanup(kse, thread);
1746                         return;
1747                         break;
1748
1749                 case PS_RUNNING:
1750                         if ((thread->flags & THR_FLAGS_SUSPENDED) != 0 &&
1751                             !THR_NEED_CANCEL(thread))
1752                                 THR_SET_STATE(thread, PS_SUSPENDED);
1753                         break;
1754
1755                 case PS_DEADLOCK:
1756                         /*
1757                          * These states don't timeout.
1758                          */
1759                         thread->wakeup_time.tv_sec = -1;
1760                         thread->wakeup_time.tv_nsec = -1;
1761
1762                         /* Insert into the waiting queue: */
1763                         KSE_WAITQ_INSERT(kse, thread);
1764                         break;
1765
1766                 default:
1767                         PANIC("Unknown state\n");
1768                         break;
1769                 }
1770
1771                 thr_accounting(thread);
1772                 if (thread->state == PS_RUNNING) {
1773                         if (thread->slice_usec == -1) {
1774                                 /*
1775                                  * The thread exceeded its time quantum or
1776                                  * it yielded the CPU; place it at the tail
1777                                  * of the queue for its priority.
1778                                  */
1779                                 KSE_RUNQ_INSERT_TAIL(kse, thread);
1780                         } else {
1781                                 /*
1782                                  * The thread hasn't exceeded its interval
1783                                  * Place it at the head of the queue for its
1784                                  * priority.
1785                                  */
1786                                 KSE_RUNQ_INSERT_HEAD(kse, thread);
1787                         }
1788                 }
1789         }
1790         thread->active = 0;
1791         thread->need_switchout = 0;
1792         if (thread->check_pending != 0) {
1793                 /* Install pending signals into the frame. */
1794                 thread->check_pending = 0;
1795                 KSE_LOCK_ACQUIRE(kse, &_thread_signal_lock);
1796                 for (i = 1; i <= _SIG_MAXSIG; i++) {
1797                         if (SIGISMEMBER(thread->sigmask, i))
1798                                 continue;
1799                         if (SIGISMEMBER(thread->sigpend, i))
1800                                 (void)_thr_sig_add(thread, i,
1801                                     &thread->siginfo[i-1]);
1802                         else if (SIGISMEMBER(_thr_proc_sigpending, i) &&
1803                                 _thr_getprocsig_unlocked(i, &siginfo)) {
1804                                 (void)_thr_sig_add(thread, i, &siginfo);
1805                         }
1806                 }
1807                 KSE_LOCK_RELEASE(kse, &_thread_signal_lock);
1808         }
1809 }
1810
1811 /*
1812  * This function waits for the smallest timeout value of any waiting
1813  * thread, or until it receives a message from another KSE.
1814  *
1815  * This must be called with the scheduling lock held.
1816  */
1817 static void
1818 kse_wait(struct kse *kse, struct pthread *td_wait, int sigseqno)
1819 {
1820         struct timespec ts, ts_sleep;
1821         int saved_flags;
1822
1823         if ((td_wait == NULL) || (td_wait->wakeup_time.tv_sec < 0)) {
1824                 /* Limit sleep to no more than 1 minute. */
1825                 ts_sleep.tv_sec = 60;
1826                 ts_sleep.tv_nsec = 0;
1827         } else {
1828                 KSE_GET_TOD(kse, &ts);
1829                 TIMESPEC_SUB(&ts_sleep, &td_wait->wakeup_time, &ts);
1830                 if (ts_sleep.tv_sec > 60) {
1831                         ts_sleep.tv_sec = 60;
1832                         ts_sleep.tv_nsec = 0;
1833                 }
1834         }
1835         /* Don't sleep for negative times. */
1836         if ((ts_sleep.tv_sec >= 0) && (ts_sleep.tv_nsec >= 0)) {
1837                 KSE_SET_IDLE(kse);
1838                 kse->k_kseg->kg_idle_kses++;
1839                 KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1840                 if ((kse->k_kseg->kg_flags & KGF_SINGLE_THREAD) &&
1841                     (kse->k_sigseqno != sigseqno))
1842                         ; /* don't sleep */
1843                 else {
1844                         saved_flags = kse->k_kcb->kcb_kmbx.km_flags;
1845                         kse->k_kcb->kcb_kmbx.km_flags |= KMF_NOUPCALL;
1846                         kse_release(&ts_sleep);
1847                         kse->k_kcb->kcb_kmbx.km_flags = saved_flags;
1848                 }
1849                 KSE_SCHED_LOCK(kse, kse->k_kseg);
1850                 if (KSE_IS_IDLE(kse)) {
1851                         KSE_CLEAR_IDLE(kse);
1852                         kse->k_kseg->kg_idle_kses--;
1853                 }
1854         }
1855 }
1856
1857 /*
1858  * Avoid calling this kse_exit() so as not to confuse it with the
1859  * system call of the same name.
1860  */
1861 static void
1862 kse_fini(struct kse *kse)
1863 {
1864         /* struct kse_group *free_kseg = NULL; */
1865         struct timespec ts;
1866         struct pthread *td;
1867
1868         /*
1869          * Check to see if this is one of the main kses.
1870          */
1871         if (kse->k_kseg != _kse_initial->k_kseg) {
1872                 PANIC("shouldn't get here");
1873                 /* This is for supporting thread groups. */
1874 #ifdef NOT_YET
1875                 /* Remove this KSE from the KSEG's list of KSEs. */
1876                 KSE_SCHED_LOCK(kse, kse->k_kseg);
1877                 TAILQ_REMOVE(&kse->k_kseg->kg_kseq, kse, k_kgqe);
1878                 kse->k_kseg->kg_ksecount--;
1879                 if (TAILQ_EMPTY(&kse->k_kseg->kg_kseq))
1880                         free_kseg = kse->k_kseg;
1881                 KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1882
1883                 /*
1884                  * Add this KSE to the list of free KSEs along with
1885                  * the KSEG if is now orphaned.
1886                  */
1887                 KSE_LOCK_ACQUIRE(kse, &kse_lock);
1888                 if (free_kseg != NULL)
1889                         kseg_free_unlocked(free_kseg);
1890                 kse_free_unlocked(kse);
1891                 KSE_LOCK_RELEASE(kse, &kse_lock);
1892                 kse_exit();
1893                 /* Never returns. */
1894                 PANIC("kse_exit()");
1895 #endif
1896         } else {
1897                 /*
1898                  * We allow program to kill kse in initial group (by
1899                  * lowering the concurrency).
1900                  */
1901                 if ((kse != _kse_initial) &&
1902                     ((kse->k_flags & KF_TERMINATED) != 0)) {
1903                         KSE_SCHED_LOCK(kse, kse->k_kseg);
1904                         TAILQ_REMOVE(&kse->k_kseg->kg_kseq, kse, k_kgqe);
1905                         kse->k_kseg->kg_ksecount--;
1906                         /*
1907                          * Migrate thread to  _kse_initial if its lastest
1908                          * kse it ran on is the kse.
1909                          */
1910                         td = TAILQ_FIRST(&kse->k_kseg->kg_threadq);
1911                         while (td != NULL) {
1912                                 if (td->kse == kse)
1913                                         td->kse = _kse_initial;
1914                                 td = TAILQ_NEXT(td, kle);
1915                         }
1916                         KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1917                         KSE_LOCK_ACQUIRE(kse, &kse_lock);
1918                         kse_free_unlocked(kse);
1919                         KSE_LOCK_RELEASE(kse, &kse_lock);
1920                         /* Make sure there is always at least one is awake */
1921                         KSE_WAKEUP(_kse_initial);
1922                         kse_exit();
1923                         /* Never returns. */
1924                         PANIC("kse_exit() failed for initial kseg");
1925                 }
1926                 KSE_SCHED_LOCK(kse, kse->k_kseg);
1927                 KSE_SET_IDLE(kse);
1928                 kse->k_kseg->kg_idle_kses++;
1929                 KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1930                 ts.tv_sec = 120;
1931                 ts.tv_nsec = 0;
1932                 kse->k_kcb->kcb_kmbx.km_flags = 0;
1933                 kse_release(&ts);
1934                 /* Never reach */
1935         }
1936 }
1937
1938 void
1939 _thr_set_timeout(const struct timespec *timeout)
1940 {
1941         struct pthread  *curthread = _get_curthread();
1942         struct timespec ts;
1943
1944         /* Reset the timeout flag for the running thread: */
1945         curthread->timeout = 0;
1946
1947         /* Check if the thread is to wait forever: */
1948         if (timeout == NULL) {
1949                 /*
1950                  * Set the wakeup time to something that can be recognised as
1951                  * different to an actual time of day:
1952                  */
1953                 curthread->wakeup_time.tv_sec = -1;
1954                 curthread->wakeup_time.tv_nsec = -1;
1955         }
1956         /* Check if no waiting is required: */
1957         else if ((timeout->tv_sec == 0) && (timeout->tv_nsec == 0)) {
1958                 /* Set the wake up time to 'immediately': */
1959                 curthread->wakeup_time.tv_sec = 0;
1960                 curthread->wakeup_time.tv_nsec = 0;
1961         } else {
1962                 /* Calculate the time for the current thread to wakeup: */
1963                 KSE_GET_TOD(curthread->kse, &ts);
1964                 TIMESPEC_ADD(&curthread->wakeup_time, &ts, timeout);
1965         }
1966 }
1967
1968 void
1969 _thr_panic_exit(char *file, int line, char *msg)
1970 {
1971         char buf[256];
1972
1973         snprintf(buf, sizeof(buf), "(%s:%d) %s\n", file, line, msg);
1974         __sys_write(2, buf, strlen(buf));
1975         abort();
1976 }
1977
1978 void
1979 _thr_setrunnable(struct pthread *curthread, struct pthread *thread)
1980 {
1981         kse_critical_t crit;
1982         struct kse_mailbox *kmbx;
1983
1984         crit = _kse_critical_enter();
1985         KSE_SCHED_LOCK(curthread->kse, thread->kseg);
1986         kmbx = _thr_setrunnable_unlocked(thread);
1987         KSE_SCHED_UNLOCK(curthread->kse, thread->kseg);
1988         _kse_critical_leave(crit);
1989         if ((kmbx != NULL) && (__isthreaded != 0))
1990                 kse_wakeup(kmbx);
1991 }
1992
1993 struct kse_mailbox *
1994 _thr_setrunnable_unlocked(struct pthread *thread)
1995 {
1996         struct kse_mailbox *kmbx = NULL;
1997
1998         if ((thread->kseg->kg_flags & KGF_SINGLE_THREAD) != 0) {
1999                 /* No silly queues for these threads. */
2000                 if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
2001                         THR_SET_STATE(thread, PS_SUSPENDED);
2002                 else {
2003                         THR_SET_STATE(thread, PS_RUNNING);
2004                         kmbx = kse_wakeup_one(thread);
2005                 }
2006
2007         } else if (thread->state != PS_RUNNING) {
2008                 if ((thread->flags & THR_FLAGS_IN_WAITQ) != 0)
2009                         KSE_WAITQ_REMOVE(thread->kse, thread);
2010                 if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
2011                         THR_SET_STATE(thread, PS_SUSPENDED);
2012                 else {
2013                         THR_SET_STATE(thread, PS_RUNNING);
2014                         if ((thread->blocked == 0) && (thread->active == 0) &&
2015                             (thread->flags & THR_FLAGS_IN_RUNQ) == 0)
2016                                 THR_RUNQ_INSERT_TAIL(thread);
2017                         /*
2018                          * XXX - Threads are not yet assigned to specific
2019                          *       KSEs; they are assigned to the KSEG.  So
2020                          *       the fact that a thread's KSE is waiting
2021                          *       doesn't necessarily mean that it will be
2022                          *       the KSE that runs the thread after the
2023                          *       lock is granted.  But we don't know if the
2024                          *       other KSEs within the same KSEG are also
2025                          *       in a waiting state or not so we err on the
2026                          *       side of caution and wakeup the thread's
2027                          *       last known KSE.  We ensure that the
2028                          *       threads KSE doesn't change while it's
2029                          *       scheduling lock is held so it is safe to
2030                          *       reference it (the KSE).  If the KSE wakes
2031                          *       up and doesn't find any more work it will
2032                          *       again go back to waiting so no harm is
2033                          *       done.
2034                          */
2035                         kmbx = kse_wakeup_one(thread);
2036                 }
2037         }
2038         return (kmbx);
2039 }
2040
2041 static struct kse_mailbox *
2042 kse_wakeup_one(struct pthread *thread)
2043 {
2044         struct kse *ke;
2045
2046         if (KSE_IS_IDLE(thread->kse)) {
2047                 KSE_CLEAR_IDLE(thread->kse);
2048                 thread->kseg->kg_idle_kses--;
2049                 return (&thread->kse->k_kcb->kcb_kmbx);
2050         } else {
2051                 TAILQ_FOREACH(ke, &thread->kseg->kg_kseq, k_kgqe) {
2052                         if (KSE_IS_IDLE(ke)) {
2053                                 KSE_CLEAR_IDLE(ke);
2054                                 ke->k_kseg->kg_idle_kses--;
2055                                 return (&ke->k_kcb->kcb_kmbx);
2056                         }
2057                 }
2058         }
2059         return (NULL);
2060 }
2061
2062 static void
2063 kse_wakeup_multi(struct kse *curkse)
2064 {
2065         struct kse *ke;
2066         int tmp;
2067
2068         if ((tmp = KSE_RUNQ_THREADS(curkse)) && curkse->k_kseg->kg_idle_kses) {
2069                 TAILQ_FOREACH(ke, &curkse->k_kseg->kg_kseq, k_kgqe) {
2070                         if (KSE_IS_IDLE(ke)) {
2071                                 KSE_CLEAR_IDLE(ke);
2072                                 ke->k_kseg->kg_idle_kses--;
2073                                 KSE_WAKEUP(ke);
2074                                 if (--tmp == 0)
2075                                         break;
2076                         }
2077                 }
2078         }
2079 }
2080
2081 /*
2082  * Allocate a new KSEG.
2083  *
2084  * We allow the current thread to be NULL in the case that this
2085  * is the first time a KSEG is being created (library initialization).
2086  * In this case, we don't need to (and can't) take any locks.
2087  */
2088 struct kse_group *
2089 _kseg_alloc(struct pthread *curthread)
2090 {
2091         struct kse_group *kseg = NULL;
2092         kse_critical_t crit;
2093
2094         if ((curthread != NULL) && (free_kseg_count > 0)) {
2095                 /* Use the kse lock for the kseg queue. */
2096                 crit = _kse_critical_enter();
2097                 KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2098                 if ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) {
2099                         TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
2100                         free_kseg_count--;
2101                         active_kseg_count++;
2102                         TAILQ_INSERT_TAIL(&active_kse_groupq, kseg, kg_qe);
2103                 }
2104                 KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2105                 _kse_critical_leave(crit);
2106                 if (kseg)
2107                         kseg_reinit(kseg);
2108         }
2109
2110         /*
2111          * If requested, attempt to allocate a new KSE group only if the
2112          * KSE allocation was successful and a KSE group wasn't found in
2113          * the free list.
2114          */
2115         if ((kseg == NULL) &&
2116             ((kseg = (struct kse_group *)malloc(sizeof(*kseg))) != NULL)) {
2117                 if (_pq_alloc(&kseg->kg_schedq.sq_runq,
2118                     THR_MIN_PRIORITY, THR_LAST_PRIORITY) != 0) {
2119                         free(kseg);
2120                         kseg = NULL;
2121                 } else {
2122                         kseg_init(kseg);
2123                         /* Add the KSEG to the list of active KSEGs. */
2124                         if (curthread != NULL) {
2125                                 crit = _kse_critical_enter();
2126                                 KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2127                                 active_kseg_count++;
2128                                 TAILQ_INSERT_TAIL(&active_kse_groupq,
2129                                     kseg, kg_qe);
2130                                 KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2131                                 _kse_critical_leave(crit);
2132                         } else {
2133                                 active_kseg_count++;
2134                                 TAILQ_INSERT_TAIL(&active_kse_groupq,
2135                                     kseg, kg_qe);
2136                         }
2137                 }
2138         }
2139         return (kseg);
2140 }
2141
2142 static void
2143 kseg_init(struct kse_group *kseg)
2144 {
2145         kseg_reinit(kseg);
2146         _lock_init(&kseg->kg_lock, LCK_ADAPTIVE, _kse_lock_wait,
2147             _kse_lock_wakeup, calloc);
2148 }
2149
2150 static void
2151 kseg_reinit(struct kse_group *kseg)
2152 {
2153         TAILQ_INIT(&kseg->kg_kseq);
2154         TAILQ_INIT(&kseg->kg_threadq);
2155         TAILQ_INIT(&kseg->kg_schedq.sq_waitq);
2156         kseg->kg_threadcount = 0;
2157         kseg->kg_ksecount = 0;
2158         kseg->kg_idle_kses = 0;
2159         kseg->kg_flags = 0;
2160 }
2161
2162 /*
2163  * This must be called with the kse lock held and when there are
2164  * no more threads that reference it.
2165  */
2166 static void
2167 kseg_free_unlocked(struct kse_group *kseg)
2168 {
2169         TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe);
2170         TAILQ_INSERT_HEAD(&free_kse_groupq, kseg, kg_qe);
2171         free_kseg_count++;
2172         active_kseg_count--;
2173 }
2174
2175 void
2176 _kseg_free(struct kse_group *kseg)
2177 {
2178         struct kse *curkse;
2179         kse_critical_t crit;
2180
2181         crit = _kse_critical_enter();
2182         curkse = _get_curkse();
2183         KSE_LOCK_ACQUIRE(curkse, &kse_lock);
2184         kseg_free_unlocked(kseg);
2185         KSE_LOCK_RELEASE(curkse, &kse_lock);
2186         _kse_critical_leave(crit);
2187 }
2188
2189 static void
2190 kseg_destroy(struct kse_group *kseg)
2191 {
2192         _lock_destroy(&kseg->kg_lock);
2193         _pq_free(&kseg->kg_schedq.sq_runq);
2194         free(kseg);
2195 }
2196
2197 /*
2198  * Allocate a new KSE.
2199  *
2200  * We allow the current thread to be NULL in the case that this
2201  * is the first time a KSE is being created (library initialization).
2202  * In this case, we don't need to (and can't) take any locks.
2203  */
2204 struct kse *
2205 _kse_alloc(struct pthread *curthread, int sys_scope)
2206 {
2207         struct kse *kse = NULL;
2208         char *stack;
2209         kse_critical_t crit;
2210         int i;
2211
2212         if ((curthread != NULL) && (free_kse_count > 0)) {
2213                 crit = _kse_critical_enter();
2214                 KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2215                 /* Search for a finished KSE. */
2216                 kse = TAILQ_FIRST(&free_kseq);
2217                 while ((kse != NULL) &&
2218                     ((kse->k_kcb->kcb_kmbx.km_flags & KMF_DONE) == 0)) {
2219                         kse = TAILQ_NEXT(kse, k_qe);
2220                 }
2221                 if (kse != NULL) {
2222                         DBG_MSG("found an unused kse.\n");
2223                         TAILQ_REMOVE(&free_kseq, kse, k_qe);
2224                         free_kse_count--;
2225                         TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe);
2226                         active_kse_count++;
2227                 }
2228                 KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2229                 _kse_critical_leave(crit);
2230                 if (kse != NULL)
2231                         kse_reinit(kse, sys_scope);
2232         }
2233         if ((kse == NULL) &&
2234             ((kse = (struct kse *)malloc(sizeof(*kse))) != NULL)) {
2235                 if (sys_scope != 0)
2236                         stack = NULL;
2237                 else if ((stack = malloc(KSE_STACKSIZE)) == NULL) {
2238                         free(kse);
2239                         return (NULL);
2240                 }
2241                 bzero(kse, sizeof(*kse));
2242
2243                 /* Initialize KCB without the lock. */
2244                 if ((kse->k_kcb = _kcb_ctor(kse)) == NULL) {
2245                         if (stack != NULL)
2246                                 free(stack);
2247                         free(kse);
2248                         return (NULL);
2249                 }
2250
2251                 /* Initialize the lockusers. */
2252                 for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) {
2253                         _lockuser_init(&kse->k_lockusers[i], (void *)kse);
2254                         _LCK_SET_PRIVATE2(&kse->k_lockusers[i], NULL);
2255                 }
2256                 /* _lock_init(kse->k_lock, ...) */
2257
2258                 if (curthread != NULL) {
2259                         crit = _kse_critical_enter();
2260                         KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2261                 }
2262                 kse->k_flags = 0;
2263                 TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe);
2264                 active_kse_count++;
2265                 if (curthread != NULL) {
2266                         KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2267                         _kse_critical_leave(crit);
2268                 }
2269                 /*
2270                  * Create the KSE context.
2271                  * Scope system threads (one thread per KSE) are not required
2272                  * to have a stack for an unneeded kse upcall.
2273                  */
2274                 if (!sys_scope) {
2275                         kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_multi;
2276                         kse->k_stack.ss_sp = stack;
2277                         kse->k_stack.ss_size = KSE_STACKSIZE;
2278                 } else {
2279                         kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_single;
2280                         kse->k_stack.ss_sp = NULL;
2281                         kse->k_stack.ss_size = 0;
2282                 }
2283                 kse->k_kcb->kcb_kmbx.km_udata = (void *)kse;
2284                 kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2285                 /*
2286                  * We need to keep a copy of the stack in case it
2287                  * doesn't get used; a KSE running a scope system
2288                  * thread will use that thread's stack.
2289                  */
2290                 kse->k_kcb->kcb_kmbx.km_stack = kse->k_stack;
2291         }
2292         return (kse);
2293 }
2294
2295 static void
2296 kse_reinit(struct kse *kse, int sys_scope)
2297 {
2298         if (!sys_scope) {
2299                 kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_multi;
2300                 if (kse->k_stack.ss_sp == NULL) {
2301                         /* XXX check allocation failure */
2302                         kse->k_stack.ss_sp = (char *) malloc(KSE_STACKSIZE);
2303                         kse->k_stack.ss_size = KSE_STACKSIZE;
2304                 }
2305                 kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2306         } else {
2307                 kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_single;
2308                 if (kse->k_stack.ss_sp)
2309                         free(kse->k_stack.ss_sp);
2310                 kse->k_stack.ss_sp = NULL;
2311                 kse->k_stack.ss_size = 0;
2312                 kse->k_kcb->kcb_kmbx.km_quantum = 0;
2313         }
2314         kse->k_kcb->kcb_kmbx.km_stack = kse->k_stack;
2315         kse->k_kcb->kcb_kmbx.km_udata = (void *)kse;
2316         kse->k_kcb->kcb_kmbx.km_curthread = NULL;
2317         kse->k_kcb->kcb_kmbx.km_flags = 0;
2318         kse->k_curthread = NULL;
2319         kse->k_kseg = 0;
2320         kse->k_schedq = 0;
2321         kse->k_locklevel = 0;
2322         kse->k_flags = 0;
2323         kse->k_error = 0;
2324         kse->k_cpu = 0;
2325         kse->k_sigseqno = 0;
2326 }
2327
2328 void
2329 kse_free_unlocked(struct kse *kse)
2330 {
2331         TAILQ_REMOVE(&active_kseq, kse, k_qe);
2332         active_kse_count--;
2333         kse->k_kseg = NULL;
2334         kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2335         kse->k_flags = 0;
2336         TAILQ_INSERT_HEAD(&free_kseq, kse, k_qe);
2337         free_kse_count++;
2338 }
2339
2340 void
2341 _kse_free(struct pthread *curthread, struct kse *kse)
2342 {
2343         kse_critical_t crit;
2344
2345         if (curthread == NULL)
2346                 kse_free_unlocked(kse);
2347         else {
2348                 crit = _kse_critical_enter();
2349                 KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2350                 kse_free_unlocked(kse);
2351                 KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2352                 _kse_critical_leave(crit);
2353         }
2354 }
2355
2356 static void
2357 kse_destroy(struct kse *kse)
2358 {
2359         int i;
2360
2361         if (kse->k_stack.ss_sp != NULL)
2362                 free(kse->k_stack.ss_sp);
2363         _kcb_dtor(kse->k_kcb);
2364         for (i = 0; i < MAX_KSE_LOCKLEVEL; ++i)
2365                 _lockuser_destroy(&kse->k_lockusers[i]);
2366         _lock_destroy(&kse->k_lock);
2367         free(kse);
2368 }
2369
2370 struct pthread *
2371 _thr_alloc(struct pthread *curthread)
2372 {
2373         kse_critical_t  crit;
2374         struct pthread  *thread = NULL;
2375         int i;
2376
2377         if (curthread != NULL) {
2378                 if (GC_NEEDED())
2379                         _thr_gc(curthread);
2380                 if (free_thread_count > 0) {
2381                         crit = _kse_critical_enter();
2382                         KSE_LOCK_ACQUIRE(curthread->kse, &thread_lock);
2383                         if ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
2384                                 TAILQ_REMOVE(&free_threadq, thread, tle);
2385                                 free_thread_count--;
2386                         }
2387                         KSE_LOCK_RELEASE(curthread->kse, &thread_lock);
2388                         _kse_critical_leave(crit);
2389                 }
2390         }
2391         if ((thread == NULL) &&
2392             ((thread = malloc(sizeof(struct pthread))) != NULL)) {
2393                 bzero(thread, sizeof(struct pthread));
2394                 thread->siginfo = calloc(_SIG_MAXSIG, sizeof(siginfo_t));
2395                 if (thread->siginfo == NULL) {
2396                         free(thread);
2397                         return (NULL);
2398                 }
2399                 if (curthread) {
2400                         _pthread_mutex_lock(&_tcb_mutex);
2401                         thread->tcb = _tcb_ctor(thread, 0 /* not initial tls */);
2402                         _pthread_mutex_unlock(&_tcb_mutex);
2403                 } else {
2404                         thread->tcb = _tcb_ctor(thread, 1 /* initial tls */);
2405                 }
2406                 if (thread->tcb == NULL) {
2407                         free(thread->siginfo);
2408                         free(thread);
2409                         return (NULL);
2410                 }
2411                 /*
2412                  * Initialize thread locking.
2413                  * Lock initializing needs malloc, so don't
2414                  * enter critical region before doing this!
2415                  */
2416                 if (_lock_init(&thread->lock, LCK_ADAPTIVE,
2417                     _thr_lock_wait, _thr_lock_wakeup, calloc) != 0)
2418                         PANIC("Cannot initialize thread lock");
2419                 for (i = 0; i < MAX_THR_LOCKLEVEL; i++) {
2420                         _lockuser_init(&thread->lockusers[i], (void *)thread);
2421                         _LCK_SET_PRIVATE2(&thread->lockusers[i],
2422                             (void *)thread);
2423                 }
2424         }
2425         return (thread);
2426 }
2427
2428 void
2429 _thr_free(struct pthread *curthread, struct pthread *thread)
2430 {
2431         kse_critical_t crit;
2432
2433         DBG_MSG("Freeing thread %p\n", thread);
2434         if (thread->name) {
2435                 free(thread->name);
2436                 thread->name = NULL;
2437         }
2438         if ((curthread == NULL) || (free_thread_count >= MAX_CACHED_THREADS)) {
2439                 thr_destroy(curthread, thread);
2440         } else {
2441                 /* Add the thread to the free thread list. */
2442                 crit = _kse_critical_enter();
2443                 KSE_LOCK_ACQUIRE(curthread->kse, &thread_lock);
2444                 TAILQ_INSERT_TAIL(&free_threadq, thread, tle);
2445                 free_thread_count++;
2446                 KSE_LOCK_RELEASE(curthread->kse, &thread_lock);
2447                 _kse_critical_leave(crit);
2448         }
2449 }
2450
2451 static void
2452 thr_destroy(struct pthread *curthread, struct pthread *thread)
2453 {
2454         int i;
2455
2456         for (i = 0; i < MAX_THR_LOCKLEVEL; i++)
2457                 _lockuser_destroy(&thread->lockusers[i]);
2458         _lock_destroy(&thread->lock);
2459         if (curthread) {
2460                 _pthread_mutex_lock(&_tcb_mutex);
2461                 _tcb_dtor(thread->tcb);
2462                 _pthread_mutex_unlock(&_tcb_mutex);
2463         } else {
2464                 _tcb_dtor(thread->tcb);
2465         }
2466         free(thread->siginfo);
2467         free(thread);
2468 }
2469
2470 /*
2471  * Add an active thread:
2472  *
2473  *   o Assign the thread a unique id (which GDB uses to track
2474  *     threads.
2475  *   o Add the thread to the list of all threads and increment
2476  *     number of active threads.
2477  */
2478 static void
2479 thr_link(struct pthread *thread)
2480 {
2481         kse_critical_t crit;
2482         struct kse *curkse;
2483
2484         crit = _kse_critical_enter();
2485         curkse = _get_curkse();
2486         KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
2487         /*
2488          * Initialize the unique id (which GDB uses to track
2489          * threads), add the thread to the list of all threads,
2490          * and
2491          */
2492         thread->uniqueid = next_uniqueid++;
2493         THR_LIST_ADD(thread);
2494         _thread_active_threads++;
2495         KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
2496         _kse_critical_leave(crit);
2497 }
2498
2499 /*
2500  * Remove an active thread.
2501  */
2502 static void
2503 thr_unlink(struct pthread *thread)
2504 {
2505         kse_critical_t crit;
2506         struct kse *curkse;
2507
2508         crit = _kse_critical_enter();
2509         curkse = _get_curkse();
2510         KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
2511         THR_LIST_REMOVE(thread);
2512         _thread_active_threads--;
2513         KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
2514         _kse_critical_leave(crit);
2515 }
2516
2517 void
2518 _thr_hash_add(struct pthread *thread)
2519 {
2520         struct thread_hash_head *head;
2521
2522         head = &thr_hashtable[THREAD_HASH(thread)];
2523         LIST_INSERT_HEAD(head, thread, hle);
2524 }
2525
2526 void
2527 _thr_hash_remove(struct pthread *thread)
2528 {
2529         LIST_REMOVE(thread, hle);
2530 }
2531
2532 struct pthread *
2533 _thr_hash_find(struct pthread *thread)
2534 {
2535         struct pthread *td;
2536         struct thread_hash_head *head;
2537
2538         head = &thr_hashtable[THREAD_HASH(thread)];
2539         LIST_FOREACH(td, head, hle) {
2540                 if (td == thread)
2541                         return (thread);
2542         }
2543         return (NULL);
2544 }
2545
2546 void
2547 _thr_debug_check_yield(struct pthread *curthread)
2548 {
2549         /*
2550          * Note that TMDF_SUSPEND is set after process is suspended.
2551          * When we are being debugged, every suspension in process
2552          * will cause all KSEs to schedule an upcall in kernel, unless the
2553          * KSE is in critical region.
2554          * If the function is being called, it means the KSE is no longer
2555          * in critical region, if the TMDF_SUSPEND is set by debugger
2556          * before KSE leaves critical region, we will catch it here, else
2557          * if the flag is changed during testing, it also not a problem,
2558          * because the change only occurs after a process suspension event
2559          * occurs. A suspension event will always cause KSE to schedule an
2560          * upcall, in the case, because we are not in critical region,
2561          * upcall will be scheduled sucessfully, the flag will be checked
2562          * again in kse_sched_multi, we won't back until the flag
2563          * is cleared by debugger, the flag will be cleared in next
2564          * suspension event. 
2565          */
2566         if (!DBG_CAN_RUN(curthread)) {
2567                 if ((curthread->attr.flags & PTHREAD_SCOPE_SYSTEM) == 0)
2568                         _thr_sched_switch(curthread);
2569                 else
2570                         kse_thr_interrupt(&curthread->tcb->tcb_tmbx,
2571                                 KSE_INTR_DBSUSPEND, 0);
2572         }
2573 }