]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/libpthread/thread/thr_kern.c
merge fix for boot-time hang on centos' xen
[FreeBSD/FreeBSD.git] / lib / libpthread / thread / thr_kern.c
1 /*
2  * Copyright (C) 2003 Daniel M. Eischen <deischen@freebsd.org>
3  * Copyright (C) 2002 Jonathon Mini <mini@freebsd.org>
4  * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed by John Birrell.
18  * 4. Neither the name of the author nor the names of any co-contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 #include <sys/types.h>
39 #include <sys/kse.h>
40 #include <sys/ptrace.h>
41 #include <sys/signalvar.h>
42 #include <sys/queue.h>
43 #include <machine/atomic.h>
44 #include <machine/sigframe.h>
45
46 #include <assert.h>
47 #include <errno.h>
48 #include <signal.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include <time.h>
52 #include <ucontext.h>
53 #include <unistd.h>
54
55 #include "atomic_ops.h"
56 #include "thr_private.h"
57 #include "libc_private.h"
58 #ifdef NOTYET
59 #include "spinlock.h"
60 #endif
61
62 /* #define DEBUG_THREAD_KERN */
63 #ifdef DEBUG_THREAD_KERN
64 #define DBG_MSG         stdout_debug
65 #else
66 #define DBG_MSG(x...)
67 #endif
68
69 /*
70  * Define a high water mark for the maximum number of threads that
71  * will be cached.  Once this level is reached, any extra threads
72  * will be free()'d.
73  */
74 #define MAX_CACHED_THREADS      100
75 /*
76  * Define high water marks for the maximum number of KSEs and KSE groups
77  * that will be cached. Because we support 1:1 threading, there could have
78  * same number of KSEs and KSE groups as threads. Once these levels are
79  * reached, any extra KSE and KSE groups will be free()'d.
80  */
81 #define MAX_CACHED_KSES         ((_thread_scope_system <= 0) ? 50 : 100)
82 #define MAX_CACHED_KSEGS        ((_thread_scope_system <= 0) ? 50 : 100)
83
84 #define KSE_SET_MBOX(kse, thrd) \
85         (kse)->k_kcb->kcb_kmbx.km_curthread = &(thrd)->tcb->tcb_tmbx
86
87 #define KSE_SET_EXITED(kse)     (kse)->k_flags |= KF_EXITED
88
89 /*
90  * Macros for manipulating the run queues.  The priority queue
91  * routines use the thread's pqe link and also handle the setting
92  * and clearing of the thread's THR_FLAGS_IN_RUNQ flag.
93  */
94 #define KSE_RUNQ_INSERT_HEAD(kse, thrd)                 \
95         _pq_insert_head(&(kse)->k_schedq->sq_runq, thrd)
96 #define KSE_RUNQ_INSERT_TAIL(kse, thrd)                 \
97         _pq_insert_tail(&(kse)->k_schedq->sq_runq, thrd)
98 #define KSE_RUNQ_REMOVE(kse, thrd)                      \
99         _pq_remove(&(kse)->k_schedq->sq_runq, thrd)
100 #define KSE_RUNQ_FIRST(kse)                             \
101         ((_libkse_debug == 0) ?                         \
102          _pq_first(&(kse)->k_schedq->sq_runq) :         \
103          _pq_first_debug(&(kse)->k_schedq->sq_runq))
104
105 #define KSE_RUNQ_THREADS(kse)   ((kse)->k_schedq->sq_runq.pq_threads)
106
107 #define THR_NEED_CANCEL(thrd)                                           \
108          (((thrd)->cancelflags & THR_CANCELLING) != 0 &&                \
109           ((thrd)->cancelflags & PTHREAD_CANCEL_DISABLE) == 0 &&        \
110           (((thrd)->cancelflags & THR_AT_CANCEL_POINT) != 0 ||          \
111            ((thrd)->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
112
113 #define THR_NEED_ASYNC_CANCEL(thrd)                                     \
114          (((thrd)->cancelflags & THR_CANCELLING) != 0 &&                \
115           ((thrd)->cancelflags & PTHREAD_CANCEL_DISABLE) == 0 &&        \
116           (((thrd)->cancelflags & THR_AT_CANCEL_POINT) == 0 &&          \
117            ((thrd)->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
118
119 /*
120  * We've got to keep track of everything that is allocated, not only
121  * to have a speedy free list, but also so they can be deallocated
122  * after a fork().
123  */
124 static TAILQ_HEAD(, kse)        active_kseq;
125 static TAILQ_HEAD(, kse)        free_kseq;
126 static TAILQ_HEAD(, kse_group)  free_kse_groupq;
127 static TAILQ_HEAD(, kse_group)  active_kse_groupq;
128 static TAILQ_HEAD(, kse_group)  gc_ksegq;
129 static struct lock              kse_lock;       /* also used for kseg queue */
130 static int                      free_kse_count = 0;
131 static int                      free_kseg_count = 0;
132 static TAILQ_HEAD(, pthread)    free_threadq;
133 static struct lock              thread_lock;
134 static int                      free_thread_count = 0;
135 static int                      inited = 0;
136 static int                      active_kse_count = 0;
137 static int                      active_kseg_count = 0;
138 static u_int64_t                next_uniqueid = 1;
139
140 LIST_HEAD(thread_hash_head, pthread);
141 #define THREAD_HASH_QUEUES      127
142 static struct thread_hash_head  thr_hashtable[THREAD_HASH_QUEUES];
143 #define THREAD_HASH(thrd)       ((unsigned long)thrd % THREAD_HASH_QUEUES)
144
145 /* Lock for thread tcb constructor/destructor */
146 static pthread_mutex_t          _tcb_mutex;
147
148 #ifdef DEBUG_THREAD_KERN
149 static void     dump_queues(struct kse *curkse);
150 #endif
151 static void     kse_check_completed(struct kse *kse);
152 static void     kse_check_waitq(struct kse *kse);
153 static void     kse_fini(struct kse *curkse);
154 static void     kse_reinit(struct kse *kse, int sys_scope);
155 static void     kse_sched_multi(struct kse_mailbox *kmbx);
156 static void     kse_sched_single(struct kse_mailbox *kmbx);
157 static void     kse_switchout_thread(struct kse *kse, struct pthread *thread);
158 static void     kse_wait(struct kse *kse, struct pthread *td_wait, int sigseq);
159 static void     kse_free_unlocked(struct kse *kse);
160 static void     kse_destroy(struct kse *kse);
161 static void     kseg_free_unlocked(struct kse_group *kseg);
162 static void     kseg_init(struct kse_group *kseg);
163 static void     kseg_reinit(struct kse_group *kseg);
164 static void     kseg_destroy(struct kse_group *kseg);
165 static void     kse_waitq_insert(struct pthread *thread);
166 static void     kse_wakeup_multi(struct kse *curkse);
167 static struct kse_mailbox *kse_wakeup_one(struct pthread *thread);
168 static void     thr_cleanup(struct kse *kse, struct pthread *curthread);
169 static void     thr_link(struct pthread *thread);
170 static void     thr_resume_wrapper(int sig, siginfo_t *, ucontext_t *);
171 static void     thr_resume_check(struct pthread *curthread, ucontext_t *ucp);
172 static int      thr_timedout(struct pthread *thread, struct timespec *curtime);
173 static void     thr_unlink(struct pthread *thread);
174 static void     thr_destroy(struct pthread *curthread, struct pthread *thread);
175 static void     thread_gc(struct pthread *thread);
176 static void     kse_gc(struct pthread *thread);
177 static void     kseg_gc(struct pthread *thread);
178
179 static void __inline
180 thr_accounting(struct pthread *thread)
181 {
182         if ((thread->slice_usec != -1) &&
183             (thread->slice_usec <= TIMESLICE_USEC) &&
184             (thread->attr.sched_policy != SCHED_FIFO)) {
185                 thread->slice_usec += (thread->tcb->tcb_tmbx.tm_uticks
186                     + thread->tcb->tcb_tmbx.tm_sticks) * _clock_res_usec;
187                 /* Check for time quantum exceeded: */
188                 if (thread->slice_usec > TIMESLICE_USEC)
189                         thread->slice_usec = -1;
190         }
191         thread->tcb->tcb_tmbx.tm_uticks = 0;
192         thread->tcb->tcb_tmbx.tm_sticks = 0;
193 }
194
195 /*
196  * This is called after a fork().
197  * No locks need to be taken here since we are guaranteed to be
198  * single threaded.
199  * 
200  * XXX
201  * POSIX says for threaded process, fork() function is used
202  * only to run new programs, and the effects of calling functions
203  * that require certain resources between the call to fork() and
204  * the call to an exec function are undefined.
205  *
206  * It is not safe to free memory after fork(), because these data
207  * structures may be in inconsistent state.
208  */
209 void
210 _kse_single_thread(struct pthread *curthread)
211 {
212 #ifdef NOTYET
213         struct kse *kse;
214         struct kse_group *kseg;
215         struct pthread *thread;
216
217         _thr_spinlock_init();
218         *__malloc_lock = (spinlock_t)_SPINLOCK_INITIALIZER;
219         if (__isthreaded) {
220                 _thr_rtld_fini();
221                 _thr_signal_deinit();
222         }
223         __isthreaded = 0;
224         /*
225          * Restore signal mask early, so any memory problems could
226          * dump core.
227          */ 
228         __sys_sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
229         _thread_active_threads = 1;
230
231         /*
232          * Enter a loop to remove and free all threads other than
233          * the running thread from the active thread list:
234          */
235         while ((thread = TAILQ_FIRST(&_thread_list)) != NULL) {
236                 THR_GCLIST_REMOVE(thread);
237                 /*
238                  * Remove this thread from the list (the current
239                  * thread will be removed but re-added by libpthread
240                  * initialization.
241                  */
242                 TAILQ_REMOVE(&_thread_list, thread, tle);
243                 /* Make sure this isn't the running thread: */
244                 if (thread != curthread) {
245                         _thr_stack_free(&thread->attr);
246                         if (thread->specific != NULL)
247                                 free(thread->specific);
248                         thr_destroy(curthread, thread);
249                 }
250         }
251
252         TAILQ_INIT(&curthread->mutexq);         /* initialize mutex queue */
253         curthread->joiner = NULL;               /* no joining threads yet */
254         curthread->refcount = 0;
255         SIGEMPTYSET(curthread->sigpend);        /* clear pending signals */
256
257         /* Don't free thread-specific data as the caller may require it */
258
259         /* Free the free KSEs: */
260         while ((kse = TAILQ_FIRST(&free_kseq)) != NULL) {
261                 TAILQ_REMOVE(&free_kseq, kse, k_qe);
262                 kse_destroy(kse);
263         }
264         free_kse_count = 0;
265
266         /* Free the active KSEs: */
267         while ((kse = TAILQ_FIRST(&active_kseq)) != NULL) {
268                 TAILQ_REMOVE(&active_kseq, kse, k_qe);
269                 kse_destroy(kse);
270         }
271         active_kse_count = 0;
272
273         /* Free the free KSEGs: */
274         while ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) {
275                 TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
276                 kseg_destroy(kseg);
277         }
278         free_kseg_count = 0;
279
280         /* Free the active KSEGs: */
281         while ((kseg = TAILQ_FIRST(&active_kse_groupq)) != NULL) {
282                 TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe);
283                 kseg_destroy(kseg);
284         }
285         active_kseg_count = 0;
286
287         /* Free the free threads. */
288         while ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
289                 TAILQ_REMOVE(&free_threadq, thread, tle);
290                 thr_destroy(curthread, thread);
291         }
292         free_thread_count = 0;
293
294         /* Free the to-be-gc'd threads. */
295         while ((thread = TAILQ_FIRST(&_thread_gc_list)) != NULL) {
296                 TAILQ_REMOVE(&_thread_gc_list, thread, gcle);
297                 thr_destroy(curthread, thread);
298         }
299         TAILQ_INIT(&gc_ksegq);
300         _gc_count = 0;
301
302         if (inited != 0) {
303                 /*
304                  * Destroy these locks; they'll be recreated to assure they
305                  * are in the unlocked state.
306                  */
307                 _lock_destroy(&kse_lock);
308                 _lock_destroy(&thread_lock);
309                 _lock_destroy(&_thread_list_lock);
310                 inited = 0;
311         }
312
313         /*
314          * After a fork(), the leftover thread goes back to being
315          * scope process.
316          */
317         curthread->attr.flags &= ~PTHREAD_SCOPE_SYSTEM;
318         curthread->attr.flags |= PTHREAD_SCOPE_PROCESS;
319
320         /* We're no longer part of any lists */
321         curthread->tlflags = 0;
322
323         /*
324          * After a fork, we are still operating on the thread's original
325          * stack.  Don't clear the THR_FLAGS_USER from the thread's
326          * attribute flags.
327          */
328
329         /* Initialize the threads library. */
330         curthread->kse = NULL;
331         curthread->kseg = NULL;
332         _kse_initial = NULL;
333         _libpthread_init(curthread);
334 #else
335         int i;
336
337         /* Reset the current thread and KSE lock data. */
338         for (i = 0; i < curthread->locklevel; i++) {
339                 _lockuser_reinit(&curthread->lockusers[i], (void *)curthread);
340         }
341         curthread->locklevel = 0;
342         for (i = 0; i < curthread->kse->k_locklevel; i++) {
343                 _lockuser_reinit(&curthread->kse->k_lockusers[i],
344                     (void *)curthread->kse);
345                 _LCK_SET_PRIVATE2(&curthread->kse->k_lockusers[i], NULL);
346         }
347         curthread->kse->k_locklevel = 0;
348
349         /*
350          * Reinitialize the thread and signal locks so that
351          * sigaction() will work after a fork().
352          */
353         _lock_reinit(&curthread->lock, LCK_ADAPTIVE, _thr_lock_wait,
354             _thr_lock_wakeup);
355         _lock_reinit(&_thread_signal_lock, LCK_ADAPTIVE, _kse_lock_wait,
356             _kse_lock_wakeup);
357
358  
359         _thr_spinlock_init();
360         if (__isthreaded) {
361                 _thr_rtld_fini();
362                 _thr_signal_deinit();
363         }
364         __isthreaded = 0;
365         curthread->kse->k_kcb->kcb_kmbx.km_curthread = NULL;
366         curthread->attr.flags |= PTHREAD_SCOPE_SYSTEM;
367
368         /*
369          * After a fork, it is possible that an upcall occurs in
370          * the parent KSE that fork()'d before the child process
371          * is fully created and before its vm space is copied.
372          * During the upcall, the tcb is set to null or to another
373          * thread, and this is what gets copied in the child process
374          * when the vm space is cloned sometime after the upcall
375          * occurs.  Note that we shouldn't have to set the kcb, but
376          * we do it for completeness.
377          */
378         _kcb_set(curthread->kse->k_kcb);
379         _tcb_set(curthread->kse->k_kcb, curthread->tcb);
380  
381
382         /* After a fork(), there child should have no pending signals. */
383         sigemptyset(&curthread->sigpend);
384
385         /*
386          * Restore signal mask early, so any memory problems could
387          * dump core.
388          */ 
389         sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
390         _thread_active_threads = 1;
391 #endif
392 }
393
394 /*
395  * This is used to initialize housekeeping and to initialize the
396  * KSD for the KSE.
397  */
398 void
399 _kse_init(void)
400 {
401         if (inited == 0) {
402                 TAILQ_INIT(&active_kseq);
403                 TAILQ_INIT(&active_kse_groupq);
404                 TAILQ_INIT(&free_kseq);
405                 TAILQ_INIT(&free_kse_groupq);
406                 TAILQ_INIT(&free_threadq);
407                 TAILQ_INIT(&gc_ksegq);
408                 if (_lock_init(&kse_lock, LCK_ADAPTIVE,
409                     _kse_lock_wait, _kse_lock_wakeup) != 0)
410                         PANIC("Unable to initialize free KSE queue lock");
411                 if (_lock_init(&thread_lock, LCK_ADAPTIVE,
412                     _kse_lock_wait, _kse_lock_wakeup) != 0)
413                         PANIC("Unable to initialize free thread queue lock");
414                 if (_lock_init(&_thread_list_lock, LCK_ADAPTIVE,
415                     _kse_lock_wait, _kse_lock_wakeup) != 0)
416                         PANIC("Unable to initialize thread list lock");
417                 _pthread_mutex_init(&_tcb_mutex, NULL);
418                 active_kse_count = 0;
419                 active_kseg_count = 0;
420                 _gc_count = 0;
421                 inited = 1;
422         }
423 }
424
425 /*
426  * This is called when the first thread (other than the initial
427  * thread) is created.
428  */
429 int
430 _kse_setthreaded(int threaded)
431 {
432         sigset_t sigset;
433
434         if ((threaded != 0) && (__isthreaded == 0)) {
435                 SIGFILLSET(sigset);
436                 __sys_sigprocmask(SIG_SETMASK, &sigset, &_thr_initial->sigmask);
437
438                 /*
439                  * Tell the kernel to create a KSE for the initial thread
440                  * and enable upcalls in it.
441                  */
442                 _kse_initial->k_flags |= KF_STARTED;
443
444                 if (_thread_scope_system <= 0) {
445                         _thr_initial->attr.flags &= ~PTHREAD_SCOPE_SYSTEM;
446                         _kse_initial->k_kseg->kg_flags &= ~KGF_SINGLE_THREAD;
447                         _kse_initial->k_kcb->kcb_kmbx.km_curthread = NULL;
448                 }
449                 else {
450                         /*
451                          * For bound thread, kernel reads mailbox pointer
452                          * once, we'd set it here before calling kse_create.
453                          */
454                         _tcb_set(_kse_initial->k_kcb, _thr_initial->tcb);
455                         KSE_SET_MBOX(_kse_initial, _thr_initial);
456                         _kse_initial->k_kcb->kcb_kmbx.km_flags |= KMF_BOUND;
457                 }
458
459                 /*
460                  * Locking functions in libc are required when there are
461                  * threads other than the initial thread.
462                  */
463                 _thr_rtld_init();
464
465                 __isthreaded = 1;
466                 if (kse_create(&_kse_initial->k_kcb->kcb_kmbx, 0) != 0) {
467                         _kse_initial->k_flags &= ~KF_STARTED;
468                         __isthreaded = 0;
469                         PANIC("kse_create() failed\n");
470                         return (-1);
471                 }
472                 _thr_initial->tcb->tcb_tmbx.tm_lwp = 
473                         _kse_initial->k_kcb->kcb_kmbx.km_lwp;
474                 _thread_activated = 1;
475
476 #ifndef SYSTEM_SCOPE_ONLY
477                 if (_thread_scope_system <= 0) {
478                         /* Set current thread to initial thread */
479                         _tcb_set(_kse_initial->k_kcb, _thr_initial->tcb);
480                         KSE_SET_MBOX(_kse_initial, _thr_initial);
481                         _thr_start_sig_daemon();
482                         _thr_setmaxconcurrency();
483                 }
484                 else
485 #endif
486                         __sys_sigprocmask(SIG_SETMASK, &_thr_initial->sigmask,
487                             NULL);
488         }
489         return (0);
490 }
491
492 /*
493  * Lock wait and wakeup handlers for KSE locks.  These are only used by
494  * KSEs, and should never be used by threads.  KSE locks include the
495  * KSE group lock (used for locking the scheduling queue) and the
496  * kse_lock defined above.
497  *
498  * When a KSE lock attempt blocks, the entire KSE blocks allowing another
499  * KSE to run.  For the most part, it doesn't make much sense to try and
500  * schedule another thread because you need to lock the scheduling queue
501  * in order to do that.  And since the KSE lock is used to lock the scheduling
502  * queue, you would just end up blocking again.
503  */
504 void
505 _kse_lock_wait(struct lock *lock, struct lockuser *lu)
506 {
507         struct kse *curkse = (struct kse *)_LCK_GET_PRIVATE(lu);
508         struct timespec ts;
509         int saved_flags;
510
511         if (curkse->k_kcb->kcb_kmbx.km_curthread != NULL)
512                 PANIC("kse_lock_wait does not disable upcall.\n");
513         /*
514          * Enter a loop to wait until we get the lock.
515          */
516         ts.tv_sec = 0;
517         ts.tv_nsec = 1000000;  /* 1 sec */
518         while (!_LCK_GRANTED(lu)) {
519                 /*
520                  * Yield the kse and wait to be notified when the lock
521                  * is granted.
522                  */
523                 saved_flags = curkse->k_kcb->kcb_kmbx.km_flags;
524                 curkse->k_kcb->kcb_kmbx.km_flags |= KMF_NOUPCALL |
525                     KMF_NOCOMPLETED;
526                 kse_release(&ts);
527                 curkse->k_kcb->kcb_kmbx.km_flags = saved_flags;
528         }
529 }
530
531 void
532 _kse_lock_wakeup(struct lock *lock, struct lockuser *lu)
533 {
534         struct kse *curkse;
535         struct kse *kse;
536         struct kse_mailbox *mbx;
537
538         curkse = _get_curkse();
539         kse = (struct kse *)_LCK_GET_PRIVATE(lu);
540
541         if (kse == curkse)
542                 PANIC("KSE trying to wake itself up in lock");
543         else {
544                 mbx = &kse->k_kcb->kcb_kmbx;
545                 _lock_grant(lock, lu);
546                 /*
547                  * Notify the owning kse that it has the lock.
548                  * It is safe to pass invalid address to kse_wakeup
549                  * even if the mailbox is not in kernel at all,
550                  * and waking up a wrong kse is also harmless.
551                  */
552                 kse_wakeup(mbx);
553         }
554 }
555
556 /*
557  * Thread wait and wakeup handlers for thread locks.  These are only used
558  * by threads, never by KSEs.  Thread locks include the per-thread lock
559  * (defined in its structure), and condition variable and mutex locks.
560  */
561 void
562 _thr_lock_wait(struct lock *lock, struct lockuser *lu)
563 {
564         struct pthread *curthread = (struct pthread *)lu->lu_private;
565
566         do {
567                 THR_LOCK_SWITCH(curthread);
568                 THR_SET_STATE(curthread, PS_LOCKWAIT);
569                 _thr_sched_switch_unlocked(curthread);
570         } while (!_LCK_GRANTED(lu));
571 }
572
573 void
574 _thr_lock_wakeup(struct lock *lock, struct lockuser *lu)
575 {
576         struct pthread *thread;
577         struct pthread *curthread;
578         struct kse_mailbox *kmbx;
579
580         curthread = _get_curthread();
581         thread = (struct pthread *)_LCK_GET_PRIVATE(lu);
582
583         THR_SCHED_LOCK(curthread, thread);
584         _lock_grant(lock, lu);
585         kmbx = _thr_setrunnable_unlocked(thread);
586         THR_SCHED_UNLOCK(curthread, thread);
587         if (kmbx != NULL)
588                 kse_wakeup(kmbx);
589 }
590
591 kse_critical_t
592 _kse_critical_enter(void)
593 {
594         kse_critical_t crit;
595
596         crit = (kse_critical_t)_kcb_critical_enter();
597         return (crit);
598 }
599
600 void
601 _kse_critical_leave(kse_critical_t crit)
602 {
603         struct pthread *curthread;
604
605         _kcb_critical_leave((struct kse_thr_mailbox *)crit);
606         if ((crit != NULL) && ((curthread = _get_curthread()) != NULL))
607                 THR_YIELD_CHECK(curthread);
608 }
609
610 int
611 _kse_in_critical(void)
612 {
613         return (_kcb_in_critical());
614 }
615
616 void
617 _thr_critical_enter(struct pthread *thread)
618 {
619         thread->critical_count++;
620 }
621
622 void
623 _thr_critical_leave(struct pthread *thread)
624 {
625         thread->critical_count--;
626         THR_YIELD_CHECK(thread);
627 }
628
629 void
630 _thr_sched_switch(struct pthread *curthread)
631 {
632         struct kse *curkse;
633
634         (void)_kse_critical_enter();
635         curkse = _get_curkse();
636         KSE_SCHED_LOCK(curkse, curkse->k_kseg);
637         _thr_sched_switch_unlocked(curthread);
638 }
639
640 /*
641  * XXX - We may need to take the scheduling lock before calling
642  *       this, or perhaps take the lock within here before
643  *       doing anything else.
644  */
645 void
646 _thr_sched_switch_unlocked(struct pthread *curthread)
647 {
648         struct kse *curkse;
649         volatile int resume_once = 0;
650         ucontext_t *uc;
651
652         /* We're in the scheduler, 5 by 5: */
653         curkse = curthread->kse;
654
655         curthread->need_switchout = 1;  /* The thread yielded on its own. */
656         curthread->critical_yield = 0;  /* No need to yield anymore. */
657
658         /* Thread can unlock the scheduler lock. */
659         curthread->lock_switch = 1;
660
661         if (curthread->attr.flags & PTHREAD_SCOPE_SYSTEM)
662                 kse_sched_single(&curkse->k_kcb->kcb_kmbx);
663         else {
664                 if (__predict_false(_libkse_debug != 0)) {
665                         /*
666                          * Because debugger saves single step status in thread
667                          * mailbox's tm_dflags, we can safely clear single 
668                          * step status here. the single step status will be
669                          * restored by kse_switchin when the thread is
670                          * switched in again. This also lets uts run in full
671                          * speed.
672                          */
673                          ptrace(PT_CLEARSTEP, curkse->k_kcb->kcb_kmbx.km_lwp,
674                                 (caddr_t) 1, 0);
675                 }
676
677                 KSE_SET_SWITCH(curkse);
678                 _thread_enter_uts(curthread->tcb, curkse->k_kcb);
679         }
680         
681         /*
682          * Unlock the scheduling queue and leave the
683          * critical region.
684          */
685         /* Don't trust this after a switch! */
686         curkse = curthread->kse;
687
688         curthread->lock_switch = 0;
689         KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
690         _kse_critical_leave(&curthread->tcb->tcb_tmbx);
691
692         /*
693          * This thread is being resumed; check for cancellations.
694          */
695         if (THR_NEED_ASYNC_CANCEL(curthread) && !THR_IN_CRITICAL(curthread)) {
696                 uc = alloca(sizeof(ucontext_t));
697                 resume_once = 0;
698                 THR_GETCONTEXT(uc);
699                 if (resume_once == 0) {
700                         resume_once = 1;
701                         curthread->check_pending = 0;
702                         thr_resume_check(curthread, uc);
703                 }
704         }
705         THR_ACTIVATE_LAST_LOCK(curthread);
706 }
707
708 /*
709  * This is the scheduler for a KSE which runs a scope system thread.
710  * The multi-thread KSE scheduler should also work for a single threaded
711  * KSE, but we use a separate scheduler so that it can be fine-tuned
712  * to be more efficient (and perhaps not need a separate stack for
713  * the KSE, allowing it to use the thread's stack).
714  */
715
716 static void
717 kse_sched_single(struct kse_mailbox *kmbx)
718 {
719         struct kse *curkse;
720         struct pthread *curthread;
721         struct timespec ts;
722         sigset_t sigmask;
723         int i, sigseqno, level, first = 0;
724
725         curkse = (struct kse *)kmbx->km_udata;
726         curthread = curkse->k_curthread;
727
728         if (__predict_false((curkse->k_flags & KF_INITIALIZED) == 0)) {
729                 /* Setup this KSEs specific data. */
730                 _kcb_set(curkse->k_kcb);
731                 _tcb_set(curkse->k_kcb, curthread->tcb);
732                 curkse->k_flags |= KF_INITIALIZED;
733                 first = 1;
734                 curthread->active = 1;
735
736                 /* Setup kernel signal masks for new thread. */
737                 __sys_sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
738                 /*
739                  * Enter critical region, this is meanless for bound thread,
740                  * It is used to let other code work, those code want mailbox
741                  * to be cleared.
742                  */
743                 (void)_kse_critical_enter();
744         } else {
745                 /*
746                  * Bound thread always has tcb set, this prevent some
747                  * code from blindly setting bound thread tcb to NULL,
748                  * buggy code ?
749                  */
750                 _tcb_set(curkse->k_kcb, curthread->tcb);
751         }
752
753         curthread->critical_yield = 0;
754         curthread->need_switchout = 0;
755
756         /*
757          * Lock the scheduling queue.
758          *
759          * There is no scheduling queue for single threaded KSEs,
760          * but we need a lock for protection regardless.
761          */
762         if (curthread->lock_switch == 0)
763                 KSE_SCHED_LOCK(curkse, curkse->k_kseg);
764
765         /*
766          * This has to do the job of kse_switchout_thread(), only
767          * for a single threaded KSE/KSEG.
768          */
769
770         switch (curthread->state) {
771         case PS_MUTEX_WAIT:
772         case PS_COND_WAIT:
773                 if (THR_NEED_CANCEL(curthread)) {
774                         curthread->interrupted = 1;
775                         curthread->continuation = _thr_finish_cancellation;
776                         THR_SET_STATE(curthread, PS_RUNNING);
777                 }
778                 break;
779
780         case PS_LOCKWAIT:
781                 /*
782                  * This state doesn't timeout.
783                  */
784                 curthread->wakeup_time.tv_sec = -1;
785                 curthread->wakeup_time.tv_nsec = -1;
786                 level = curthread->locklevel - 1;
787                 if (_LCK_GRANTED(&curthread->lockusers[level]))
788                         THR_SET_STATE(curthread, PS_RUNNING);
789                 break;
790
791         case PS_DEAD:
792                 curthread->check_pending = 0;
793                 /* Unlock the scheduling queue and exit the KSE and thread. */
794                 thr_cleanup(curkse, curthread);
795                 KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
796                 PANIC("bound thread shouldn't get here\n");
797                 break;
798
799         case PS_JOIN:
800                 if (THR_NEED_CANCEL(curthread)) {
801                         curthread->join_status.thread = NULL;
802                         THR_SET_STATE(curthread, PS_RUNNING);
803                 } else {
804                         /*
805                          * This state doesn't timeout.
806                          */
807                         curthread->wakeup_time.tv_sec = -1;
808                         curthread->wakeup_time.tv_nsec = -1;
809                 }
810                 break;
811
812         case PS_SUSPENDED:
813                 if (THR_NEED_CANCEL(curthread)) {
814                         curthread->interrupted = 1;
815                         THR_SET_STATE(curthread, PS_RUNNING);
816                 } else {
817                         /*
818                          * These states don't timeout.
819                          */
820                         curthread->wakeup_time.tv_sec = -1;
821                         curthread->wakeup_time.tv_nsec = -1;
822                 }
823                 break;
824
825         case PS_RUNNING:
826                 if ((curthread->flags & THR_FLAGS_SUSPENDED) != 0 &&
827                     !THR_NEED_CANCEL(curthread)) {
828                         THR_SET_STATE(curthread, PS_SUSPENDED);
829                         /*
830                          * These states don't timeout.
831                          */
832                         curthread->wakeup_time.tv_sec = -1;
833                         curthread->wakeup_time.tv_nsec = -1;
834                 }
835                 break;
836
837         case PS_SIGWAIT:
838                 PANIC("bound thread does not have SIGWAIT state\n");
839
840         case PS_SLEEP_WAIT:
841                 PANIC("bound thread does not have SLEEP_WAIT state\n");
842
843         case PS_SIGSUSPEND:
844                 PANIC("bound thread does not have SIGSUSPEND state\n");
845         
846         case PS_DEADLOCK:
847                 /*
848                  * These states don't timeout and don't need
849                  * to be in the waiting queue.
850                  */
851                 curthread->wakeup_time.tv_sec = -1;
852                 curthread->wakeup_time.tv_nsec = -1;
853                 break;
854
855         default:
856                 PANIC("Unknown state\n");
857                 break;
858         }
859
860         while (curthread->state != PS_RUNNING) {
861                 sigseqno = curkse->k_sigseqno;
862                 if (curthread->check_pending != 0) {
863                         /*
864                          * Install pending signals into the frame, possible
865                          * cause mutex or condvar backout.
866                          */
867                         curthread->check_pending = 0;
868                         SIGFILLSET(sigmask);
869
870                         /*
871                          * Lock out kernel signal code when we are processing
872                          * signals, and get a fresh copy of signal mask.
873                          */
874                         __sys_sigprocmask(SIG_SETMASK, &sigmask,
875                                           &curthread->sigmask);
876                         for (i = 1; i <= _SIG_MAXSIG; i++) {
877                                 if (SIGISMEMBER(curthread->sigmask, i))
878                                         continue;
879                                 if (SIGISMEMBER(curthread->sigpend, i))
880                                         (void)_thr_sig_add(curthread, i, 
881                                             &curthread->siginfo[i-1]);
882                         }
883                         __sys_sigprocmask(SIG_SETMASK, &curthread->sigmask,
884                                 NULL);
885                         /* The above code might make thread runnable */
886                         if (curthread->state == PS_RUNNING)
887                                 break;
888                 }
889                 THR_DEACTIVATE_LAST_LOCK(curthread);
890                 kse_wait(curkse, curthread, sigseqno);
891                 THR_ACTIVATE_LAST_LOCK(curthread);
892                 if (curthread->wakeup_time.tv_sec >= 0) {
893                         KSE_GET_TOD(curkse, &ts);
894                         if (thr_timedout(curthread, &ts)) {
895                                 /* Indicate the thread timedout: */
896                                 curthread->timeout = 1;
897                                 /* Make the thread runnable. */
898                                 THR_SET_STATE(curthread, PS_RUNNING);
899                         }
900                 }
901         }
902
903         if (curthread->lock_switch == 0) {
904                 /* Unlock the scheduling queue. */
905                 KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
906         }
907
908         DBG_MSG("Continuing bound thread %p\n", curthread);
909         if (first) {
910                 _kse_critical_leave(&curthread->tcb->tcb_tmbx);
911                 pthread_exit(curthread->start_routine(curthread->arg));
912         }
913 }
914
915 #ifdef DEBUG_THREAD_KERN
916 static void
917 dump_queues(struct kse *curkse)
918 {
919         struct pthread *thread;
920
921         DBG_MSG("Threads in waiting queue:\n");
922         TAILQ_FOREACH(thread, &curkse->k_kseg->kg_schedq.sq_waitq, pqe) {
923                 DBG_MSG("  thread %p, state %d, blocked %d\n",
924                     thread, thread->state, thread->blocked);
925         }
926 }
927 #endif
928
929 /*
930  * This is the scheduler for a KSE which runs multiple threads.
931  */
932 static void
933 kse_sched_multi(struct kse_mailbox *kmbx)
934 {
935         struct kse *curkse;
936         struct pthread *curthread, *td_wait;
937         int ret;
938
939         curkse = (struct kse *)kmbx->km_udata;
940         THR_ASSERT(curkse->k_kcb->kcb_kmbx.km_curthread == NULL,
941             "Mailbox not null in kse_sched_multi");
942
943         /* Check for first time initialization: */
944         if (__predict_false((curkse->k_flags & KF_INITIALIZED) == 0)) {
945                 /* Setup this KSEs specific data. */
946                 _kcb_set(curkse->k_kcb);
947
948                 /* Set this before grabbing the context. */
949                 curkse->k_flags |= KF_INITIALIZED;
950         }
951
952         /*
953          * No current thread anymore, calling _get_curthread in UTS
954          * should dump core
955          */
956         _tcb_set(curkse->k_kcb, NULL);
957
958         /* If this is an upcall; take the scheduler lock. */
959         if (!KSE_IS_SWITCH(curkse))
960                 KSE_SCHED_LOCK(curkse, curkse->k_kseg);
961         else
962                 KSE_CLEAR_SWITCH(curkse);
963
964         if (KSE_IS_IDLE(curkse)) {
965                 KSE_CLEAR_IDLE(curkse);
966                 curkse->k_kseg->kg_idle_kses--;
967         }
968
969         /*
970          * Now that the scheduler lock is held, get the current
971          * thread.  The KSE's current thread cannot be safely
972          * examined without the lock because it could have returned
973          * as completed on another KSE.  See kse_check_completed().
974          */
975         curthread = curkse->k_curthread;
976
977         /*
978          * If the current thread was completed in another KSE, then
979          * it will be in the run queue.  Don't mark it as being blocked.
980          */
981         if ((curthread != NULL) &&
982             ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0) &&
983             (curthread->need_switchout == 0)) {
984                 /*
985                  * Assume the current thread is blocked; when the
986                  * completed threads are checked and if the current
987                  * thread is among the completed, the blocked flag
988                  * will be cleared.
989                  */
990                 curthread->blocked = 1;
991                 DBG_MSG("Running thread %p is now blocked in kernel.\n",
992                     curthread);
993         }
994
995         /* Check for any unblocked threads in the kernel. */
996         kse_check_completed(curkse);
997
998         /*
999          * Check for threads that have timed-out.
1000          */
1001         kse_check_waitq(curkse);
1002
1003         /*
1004          * Switchout the current thread, if necessary, as the last step
1005          * so that it is inserted into the run queue (if it's runnable)
1006          * _after_ any other threads that were added to it above.
1007          */
1008         if (curthread == NULL)
1009                 ;  /* Nothing to do here. */
1010         else if ((curthread->need_switchout == 0) && DBG_CAN_RUN(curthread) &&
1011             (curthread->blocked == 0) && (THR_IN_CRITICAL(curthread))) {
1012                 /*
1013                  * Resume the thread and tell it to yield when
1014                  * it leaves the critical region.
1015                  */
1016                 curthread->critical_yield = 1;
1017                 curthread->active = 1;
1018                 if ((curthread->flags & THR_FLAGS_IN_RUNQ) != 0)
1019                         KSE_RUNQ_REMOVE(curkse, curthread);
1020                 curkse->k_curthread = curthread;
1021                 curthread->kse = curkse;
1022                 DBG_MSG("Continuing thread %p in critical region\n",
1023                     curthread);
1024                 kse_wakeup_multi(curkse);
1025                 KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1026                 ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
1027                 if (ret != 0)
1028                         PANIC("Can't resume thread in critical region\n");
1029         }
1030         else if ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0) {
1031                 curthread->tcb->tcb_tmbx.tm_lwp = 0;
1032                 kse_switchout_thread(curkse, curthread);
1033         }
1034         curkse->k_curthread = NULL;
1035
1036 #ifdef DEBUG_THREAD_KERN
1037         dump_queues(curkse);
1038 #endif
1039
1040         /* Check if there are no threads ready to run: */
1041         while (((curthread = KSE_RUNQ_FIRST(curkse)) == NULL) &&
1042             (curkse->k_kseg->kg_threadcount != 0) &&
1043             ((curkse->k_flags & KF_TERMINATED) == 0)) {
1044                 /*
1045                  * Wait for a thread to become active or until there are
1046                  * no more threads.
1047                  */
1048                 td_wait = KSE_WAITQ_FIRST(curkse);
1049                 kse_wait(curkse, td_wait, 0);
1050                 kse_check_completed(curkse);
1051                 kse_check_waitq(curkse);
1052         }
1053
1054         /* Check for no more threads: */
1055         if ((curkse->k_kseg->kg_threadcount == 0) ||
1056             ((curkse->k_flags & KF_TERMINATED) != 0)) {
1057                 /*
1058                  * Normally this shouldn't return, but it will if there
1059                  * are other KSEs running that create new threads that
1060                  * are assigned to this KSE[G].  For instance, if a scope
1061                  * system thread were to create a scope process thread
1062                  * and this kse[g] is the initial kse[g], then that newly
1063                  * created thread would be assigned to us (the initial
1064                  * kse[g]).
1065                  */
1066                 kse_wakeup_multi(curkse);
1067                 KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1068                 kse_fini(curkse);
1069                 /* never returns */
1070         }
1071
1072         THR_ASSERT(curthread != NULL,
1073             "Return from kse_wait/fini without thread.");
1074         THR_ASSERT(curthread->state != PS_DEAD,
1075             "Trying to resume dead thread!");
1076         KSE_RUNQ_REMOVE(curkse, curthread);
1077
1078         /*
1079          * Make the selected thread the current thread.
1080          */
1081         curkse->k_curthread = curthread;
1082
1083         /*
1084          * Make sure the current thread's kse points to this kse.
1085          */
1086         curthread->kse = curkse;
1087
1088         /*
1089          * Reset the time slice if this thread is running for the first
1090          * time or running again after using its full time slice allocation.
1091          */
1092         if (curthread->slice_usec == -1)
1093                 curthread->slice_usec = 0;
1094
1095         /* Mark the thread active. */
1096         curthread->active = 1;
1097
1098         /*
1099          * The thread's current signal frame will only be NULL if it
1100          * is being resumed after being blocked in the kernel.  In
1101          * this case, and if the thread needs to run down pending
1102          * signals or needs a cancellation check, we need to add a
1103          * signal frame to the thread's context.
1104          */
1105         if (curthread->lock_switch == 0 && curthread->state == PS_RUNNING &&
1106             (curthread->check_pending != 0 ||
1107              THR_NEED_ASYNC_CANCEL(curthread)) &&
1108             !THR_IN_CRITICAL(curthread)) {
1109                 curthread->check_pending = 0;
1110                 signalcontext(&curthread->tcb->tcb_tmbx.tm_context, 0,
1111                     (__sighandler_t *)thr_resume_wrapper);
1112         }
1113         kse_wakeup_multi(curkse);
1114         /*
1115          * Continue the thread at its current frame:
1116          */
1117         if (curthread->lock_switch != 0) {
1118                 /*
1119                  * This thread came from a scheduler switch; it will
1120                  * unlock the scheduler lock and set the mailbox.
1121                  */
1122                 ret = _thread_switch(curkse->k_kcb, curthread->tcb, 0);
1123         } else {
1124                 /* This thread won't unlock the scheduler lock. */
1125                 KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1126                 ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
1127         }
1128         if (ret != 0)
1129                 PANIC("Thread has returned from _thread_switch");
1130
1131         /* This point should not be reached. */
1132         PANIC("Thread has returned from _thread_switch");
1133 }
1134
1135 static void
1136 thr_resume_wrapper(int sig, siginfo_t *siginfo, ucontext_t *ucp)
1137 {
1138         struct pthread *curthread = _get_curthread();
1139         struct kse *curkse;
1140         int ret, err_save = errno;
1141
1142         DBG_MSG(">>> sig wrapper\n");
1143         if (curthread->lock_switch)
1144                 PANIC("thr_resume_wrapper, lock_switch != 0\n");
1145         thr_resume_check(curthread, ucp);
1146         errno = err_save;
1147         _kse_critical_enter();
1148         curkse = curthread->kse;
1149         curthread->tcb->tcb_tmbx.tm_context = *ucp;
1150         ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
1151         if (ret != 0)
1152                 PANIC("thr_resume_wrapper: thread has returned "
1153                       "from _thread_switch");
1154         /* THR_SETCONTEXT(ucp); */ /* not work, why ? */
1155 }
1156
1157 static void
1158 thr_resume_check(struct pthread *curthread, ucontext_t *ucp)
1159 {
1160         _thr_sig_rundown(curthread, ucp);
1161
1162         if (THR_NEED_ASYNC_CANCEL(curthread))
1163                 pthread_testcancel();
1164 }
1165
1166 /*
1167  * Clean up a thread.  This must be called with the thread's KSE
1168  * scheduling lock held.  The thread must be a thread from the
1169  * KSE's group.
1170  */
1171 static void
1172 thr_cleanup(struct kse *curkse, struct pthread *thread)
1173 {
1174         struct pthread *joiner;
1175         struct kse_mailbox *kmbx = NULL;
1176         int sys_scope;
1177
1178         if ((joiner = thread->joiner) != NULL) {
1179                 /* Joinee scheduler lock held; joiner won't leave. */
1180                 if (joiner->kseg == curkse->k_kseg) {
1181                         if (joiner->join_status.thread == thread) {
1182                                 joiner->join_status.thread = NULL;
1183                                 joiner->join_status.ret = thread->ret;
1184                                 (void)_thr_setrunnable_unlocked(joiner);
1185                         }
1186                 } else {
1187                         KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1188                         /* The joiner may have removed itself and exited. */
1189                         if (_thr_ref_add(thread, joiner, 0) == 0) {
1190                                 KSE_SCHED_LOCK(curkse, joiner->kseg);
1191                                 if (joiner->join_status.thread == thread) {
1192                                         joiner->join_status.thread = NULL;
1193                                         joiner->join_status.ret = thread->ret;
1194                                         kmbx = _thr_setrunnable_unlocked(joiner);
1195                                 }
1196                                 KSE_SCHED_UNLOCK(curkse, joiner->kseg);
1197                                 _thr_ref_delete(thread, joiner);
1198                                 if (kmbx != NULL)
1199                                         kse_wakeup(kmbx);
1200                         }
1201                         KSE_SCHED_LOCK(curkse, curkse->k_kseg);
1202                 }
1203                 thread->attr.flags |= PTHREAD_DETACHED;
1204         }
1205
1206         if (!(sys_scope = (thread->attr.flags & PTHREAD_SCOPE_SYSTEM))) {
1207                 /*
1208                  * Remove the thread from the KSEG's list of threads.
1209                  */
1210                 KSEG_THRQ_REMOVE(thread->kseg, thread);
1211                 /*
1212                  * Migrate the thread to the main KSE so that this
1213                  * KSE and KSEG can be cleaned when their last thread
1214                  * exits.
1215                  */
1216                 thread->kseg = _kse_initial->k_kseg;
1217                 thread->kse = _kse_initial;
1218         }
1219
1220         /*
1221          * We can't hold the thread list lock while holding the
1222          * scheduler lock.
1223          */
1224         KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1225         DBG_MSG("Adding thread %p to GC list\n", thread);
1226         KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
1227         thread->tlflags |= TLFLAGS_GC_SAFE;
1228         THR_GCLIST_ADD(thread);
1229         KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
1230         if (sys_scope) {
1231                 /*
1232                  * System scope thread is single thread group, 
1233                  * when thread is exited, its kse and ksegrp should
1234                  * be recycled as well.
1235                  * kse upcall stack belongs to thread, clear it here.
1236                  */
1237                 curkse->k_stack.ss_sp = 0;
1238                 curkse->k_stack.ss_size = 0;
1239                 kse_exit();
1240                 PANIC("kse_exit() failed for system scope thread");
1241         }
1242         KSE_SCHED_LOCK(curkse, curkse->k_kseg);
1243 }
1244
1245 void
1246 _thr_gc(struct pthread *curthread)
1247 {
1248         thread_gc(curthread);
1249         kse_gc(curthread);
1250         kseg_gc(curthread);
1251 }
1252
1253 static void
1254 thread_gc(struct pthread *curthread)
1255 {
1256         struct pthread *td, *td_next;
1257         kse_critical_t crit;
1258         TAILQ_HEAD(, pthread) worklist;
1259
1260         TAILQ_INIT(&worklist);
1261         crit = _kse_critical_enter();
1262         KSE_LOCK_ACQUIRE(curthread->kse, &_thread_list_lock);
1263
1264         /* Check the threads waiting for GC. */
1265         for (td = TAILQ_FIRST(&_thread_gc_list); td != NULL; td = td_next) {
1266                 td_next = TAILQ_NEXT(td, gcle);
1267                 if ((td->tlflags & TLFLAGS_GC_SAFE) == 0)
1268                         continue;
1269                 else if (((td->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) &&
1270                     ((td->kse->k_kcb->kcb_kmbx.km_flags & KMF_DONE) == 0)) {
1271                         /*
1272                          * The thread and KSE are operating on the same
1273                          * stack.  Wait for the KSE to exit before freeing
1274                          * the thread's stack as well as everything else.
1275                          */
1276                         continue;
1277                 }
1278                 /*
1279                  * Remove the thread from the GC list.  If the thread
1280                  * isn't yet detached, it will get added back to the
1281                  * GC list at a later time.
1282                  */
1283                 THR_GCLIST_REMOVE(td);
1284                 DBG_MSG("Freeing thread %p stack\n", td);
1285                 /*
1286                  * We can free the thread stack since it's no longer
1287                  * in use.
1288                  */
1289                 _thr_stack_free(&td->attr);
1290                 if (((td->attr.flags & PTHREAD_DETACHED) != 0) &&
1291                     (td->refcount == 0)) {
1292                         /*
1293                          * The thread has detached and is no longer
1294                          * referenced.  It is safe to remove all
1295                          * remnants of the thread.
1296                          */
1297                         THR_LIST_REMOVE(td);
1298                         TAILQ_INSERT_HEAD(&worklist, td, gcle);
1299                 }
1300         }
1301         KSE_LOCK_RELEASE(curthread->kse, &_thread_list_lock);
1302         _kse_critical_leave(crit);
1303
1304         while ((td = TAILQ_FIRST(&worklist)) != NULL) {
1305                 TAILQ_REMOVE(&worklist, td, gcle);
1306                 /*
1307                  * XXX we don't free initial thread and its kse
1308                  * (if thread is a bound thread), because there might
1309                  * have some code referencing initial thread and kse.
1310                  */
1311                 if (td == _thr_initial) {
1312                         DBG_MSG("Initial thread won't be freed\n");
1313                         continue;
1314                 }
1315
1316                 if ((td->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) {
1317                         crit = _kse_critical_enter();
1318                         KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1319                         kse_free_unlocked(td->kse);
1320                         kseg_free_unlocked(td->kseg);
1321                         KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1322                         _kse_critical_leave(crit);
1323                 }
1324                 DBG_MSG("Freeing thread %p\n", td);
1325                 _thr_free(curthread, td);
1326         }
1327 }
1328
1329 static void
1330 kse_gc(struct pthread *curthread)
1331 {
1332         kse_critical_t crit;
1333         TAILQ_HEAD(, kse) worklist;
1334         struct kse *kse;
1335
1336         if (free_kse_count <= MAX_CACHED_KSES)
1337                 return;
1338         TAILQ_INIT(&worklist);
1339         crit = _kse_critical_enter();
1340         KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1341         while (free_kse_count > MAX_CACHED_KSES) {
1342                 kse = TAILQ_FIRST(&free_kseq);
1343                 TAILQ_REMOVE(&free_kseq, kse, k_qe);
1344                 TAILQ_INSERT_HEAD(&worklist, kse, k_qe);
1345                 free_kse_count--;
1346         }
1347         KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1348         _kse_critical_leave(crit);
1349
1350         while ((kse = TAILQ_FIRST(&worklist))) {
1351                 TAILQ_REMOVE(&worklist, kse, k_qe);
1352                 kse_destroy(kse);
1353         }
1354 }
1355
1356 static void
1357 kseg_gc(struct pthread *curthread)
1358 {
1359         kse_critical_t crit;
1360         TAILQ_HEAD(, kse_group) worklist;
1361         struct kse_group *kseg;
1362
1363         if (free_kseg_count <= MAX_CACHED_KSEGS)
1364                 return; 
1365         TAILQ_INIT(&worklist);
1366         crit = _kse_critical_enter();
1367         KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1368         while (free_kseg_count > MAX_CACHED_KSEGS) {
1369                 kseg = TAILQ_FIRST(&free_kse_groupq);
1370                 TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
1371                 free_kseg_count--;
1372                 TAILQ_INSERT_HEAD(&worklist, kseg, kg_qe);
1373         }
1374         KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1375         _kse_critical_leave(crit);
1376
1377         while ((kseg = TAILQ_FIRST(&worklist))) {
1378                 TAILQ_REMOVE(&worklist, kseg, kg_qe);
1379                 kseg_destroy(kseg);
1380         }
1381 }
1382
1383 /*
1384  * Only new threads that are running or suspended may be scheduled.
1385  */
1386 int
1387 _thr_schedule_add(struct pthread *curthread, struct pthread *newthread)
1388 {
1389         kse_critical_t crit;
1390         int ret;
1391
1392         /* Add the new thread. */
1393         thr_link(newthread);
1394
1395         /*
1396          * If this is the first time creating a thread, make sure
1397          * the mailbox is set for the current thread.
1398          */
1399         if ((newthread->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) {
1400                 /* We use the thread's stack as the KSE's stack. */
1401                 newthread->kse->k_kcb->kcb_kmbx.km_stack.ss_sp =
1402                     newthread->attr.stackaddr_attr;
1403                 newthread->kse->k_kcb->kcb_kmbx.km_stack.ss_size =
1404                     newthread->attr.stacksize_attr;
1405
1406                 /*
1407                  * No need to lock the scheduling queue since the
1408                  * KSE/KSEG pair have not yet been started.
1409                  */
1410                 KSEG_THRQ_ADD(newthread->kseg, newthread);
1411                 /* this thread never gives up kse */
1412                 newthread->active = 1;
1413                 newthread->kse->k_curthread = newthread;
1414                 newthread->kse->k_kcb->kcb_kmbx.km_flags = KMF_BOUND;
1415                 newthread->kse->k_kcb->kcb_kmbx.km_func =
1416                     (kse_func_t *)kse_sched_single;
1417                 newthread->kse->k_kcb->kcb_kmbx.km_quantum = 0;
1418                 KSE_SET_MBOX(newthread->kse, newthread);
1419                 /*
1420                  * This thread needs a new KSE and KSEG.
1421                  */
1422                 newthread->kse->k_flags &= ~KF_INITIALIZED;
1423                 newthread->kse->k_flags |= KF_STARTED;
1424                 /* Fire up! */
1425                 ret = kse_create(&newthread->kse->k_kcb->kcb_kmbx, 1);
1426                 if (ret != 0)
1427                         ret = errno;
1428         }
1429         else {
1430                 /*
1431                  * Lock the KSE and add the new thread to its list of
1432                  * assigned threads.  If the new thread is runnable, also
1433                  * add it to the KSE's run queue.
1434                  */
1435                 crit = _kse_critical_enter();
1436                 KSE_SCHED_LOCK(curthread->kse, newthread->kseg);
1437                 KSEG_THRQ_ADD(newthread->kseg, newthread);
1438                 if (newthread->state == PS_RUNNING)
1439                         THR_RUNQ_INSERT_TAIL(newthread);
1440                 if ((newthread->kse->k_flags & KF_STARTED) == 0) {
1441                         /*
1442                          * This KSE hasn't been started yet.  Start it
1443                          * outside of holding the lock.
1444                          */
1445                         newthread->kse->k_flags |= KF_STARTED;
1446                         newthread->kse->k_kcb->kcb_kmbx.km_func =
1447                             (kse_func_t *)kse_sched_multi;
1448                         newthread->kse->k_kcb->kcb_kmbx.km_flags = 0;
1449                         kse_create(&newthread->kse->k_kcb->kcb_kmbx, 0);
1450                  } else if ((newthread->state == PS_RUNNING) &&
1451                      KSE_IS_IDLE(newthread->kse)) {
1452                         /*
1453                          * The thread is being scheduled on another KSEG.
1454                          */
1455                         kse_wakeup_one(newthread);
1456                 }
1457                 KSE_SCHED_UNLOCK(curthread->kse, newthread->kseg);
1458                 _kse_critical_leave(crit);
1459                 ret = 0;
1460         }
1461         if (ret != 0)
1462                 thr_unlink(newthread);
1463
1464         return (ret);
1465 }
1466
1467 void
1468 kse_waitq_insert(struct pthread *thread)
1469 {
1470         struct pthread *td;
1471
1472         if (thread->wakeup_time.tv_sec == -1)
1473                 TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq, thread,
1474                     pqe);
1475         else {
1476                 td = TAILQ_FIRST(&thread->kse->k_schedq->sq_waitq);
1477                 while ((td != NULL) && (td->wakeup_time.tv_sec != -1) &&
1478                     ((td->wakeup_time.tv_sec < thread->wakeup_time.tv_sec) ||
1479                     ((td->wakeup_time.tv_sec == thread->wakeup_time.tv_sec) &&
1480                     (td->wakeup_time.tv_nsec <= thread->wakeup_time.tv_nsec))))
1481                         td = TAILQ_NEXT(td, pqe);
1482                 if (td == NULL)
1483                         TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq,
1484                             thread, pqe);
1485                 else
1486                         TAILQ_INSERT_BEFORE(td, thread, pqe);
1487         }
1488         thread->flags |= THR_FLAGS_IN_WAITQ;
1489 }
1490
1491 /*
1492  * This must be called with the scheduling lock held.
1493  */
1494 static void
1495 kse_check_completed(struct kse *kse)
1496 {
1497         struct pthread *thread;
1498         struct kse_thr_mailbox *completed;
1499         int sig;
1500
1501         if ((completed = kse->k_kcb->kcb_kmbx.km_completed) != NULL) {
1502                 kse->k_kcb->kcb_kmbx.km_completed = NULL;
1503                 while (completed != NULL) {
1504                         thread = completed->tm_udata;
1505                         DBG_MSG("Found completed thread %p, name %s\n",
1506                             thread,
1507                             (thread->name == NULL) ? "none" : thread->name);
1508                         thread->blocked = 0;
1509                         if (thread != kse->k_curthread) {
1510                                 thr_accounting(thread);
1511                                 if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
1512                                         THR_SET_STATE(thread, PS_SUSPENDED);
1513                                 else
1514                                         KSE_RUNQ_INSERT_TAIL(kse, thread);
1515                                 if ((thread->kse != kse) &&
1516                                     (thread->kse->k_curthread == thread)) {
1517                                         /*
1518                                          * Remove this thread from its
1519                                          * previous KSE so that it (the KSE)
1520                                          * doesn't think it is still active.
1521                                          */
1522                                         thread->kse->k_curthread = NULL;
1523                                         thread->active = 0;
1524                                 }
1525                         }
1526                         if ((sig = thread->tcb->tcb_tmbx.tm_syncsig.si_signo)
1527                             != 0) {
1528                                 if (SIGISMEMBER(thread->sigmask, sig))
1529                                         SIGADDSET(thread->sigpend, sig);
1530                                 else if (THR_IN_CRITICAL(thread))
1531                                         kse_thr_interrupt(NULL, KSE_INTR_SIGEXIT, sig);
1532                                 else
1533                                         (void)_thr_sig_add(thread, sig,
1534                                             &thread->tcb->tcb_tmbx.tm_syncsig);
1535                                 thread->tcb->tcb_tmbx.tm_syncsig.si_signo = 0;
1536                         }
1537                         completed = completed->tm_next;
1538                 }
1539         }
1540 }
1541
1542 /*
1543  * This must be called with the scheduling lock held.
1544  */
1545 static void
1546 kse_check_waitq(struct kse *kse)
1547 {
1548         struct pthread  *pthread;
1549         struct timespec ts;
1550
1551         KSE_GET_TOD(kse, &ts);
1552
1553         /*
1554          * Wake up threads that have timedout.  This has to be
1555          * done before adding the current thread to the run queue
1556          * so that a CPU intensive thread doesn't get preference
1557          * over waiting threads.
1558          */
1559         while (((pthread = KSE_WAITQ_FIRST(kse)) != NULL) &&
1560             thr_timedout(pthread, &ts)) {
1561                 /* Remove the thread from the wait queue: */
1562                 KSE_WAITQ_REMOVE(kse, pthread);
1563                 DBG_MSG("Found timedout thread %p in waitq\n", pthread);
1564
1565                 /* Indicate the thread timedout: */
1566                 pthread->timeout = 1;
1567
1568                 /* Add the thread to the priority queue: */
1569                 if ((pthread->flags & THR_FLAGS_SUSPENDED) != 0)
1570                         THR_SET_STATE(pthread, PS_SUSPENDED);
1571                 else {
1572                         THR_SET_STATE(pthread, PS_RUNNING);
1573                         KSE_RUNQ_INSERT_TAIL(kse, pthread);
1574                 }
1575         }
1576 }
1577
1578 static int
1579 thr_timedout(struct pthread *thread, struct timespec *curtime)
1580 {
1581         if (thread->wakeup_time.tv_sec < 0)
1582                 return (0);
1583         else if (thread->wakeup_time.tv_sec > curtime->tv_sec)
1584                 return (0);
1585         else if ((thread->wakeup_time.tv_sec == curtime->tv_sec) &&
1586             (thread->wakeup_time.tv_nsec > curtime->tv_nsec))
1587                 return (0);
1588         else
1589                 return (1);
1590 }
1591
1592 /*
1593  * This must be called with the scheduling lock held.
1594  *
1595  * Each thread has a time slice, a wakeup time (used when it wants
1596  * to wait for a specified amount of time), a run state, and an
1597  * active flag.
1598  *
1599  * When a thread gets run by the scheduler, the active flag is
1600  * set to non-zero (1).  When a thread performs an explicit yield
1601  * or schedules a state change, it enters the scheduler and the
1602  * active flag is cleared.  When the active flag is still seen
1603  * set in the scheduler, that means that the thread is blocked in
1604  * the kernel (because it is cleared before entering the scheduler
1605  * in all other instances).
1606  *
1607  * The wakeup time is only set for those states that can timeout.
1608  * It is set to (-1, -1) for all other instances.
1609  *
1610  * The thread's run state, aside from being useful when debugging,
1611  * is used to place the thread in an appropriate queue.  There
1612  * are 2 basic queues:
1613  *
1614  *   o run queue - queue ordered by priority for all threads
1615  *                 that are runnable
1616  *   o waiting queue - queue sorted by wakeup time for all threads
1617  *                     that are not otherwise runnable (not blocked
1618  *                     in kernel, not waiting for locks)
1619  *
1620  * The thread's time slice is used for round-robin scheduling
1621  * (the default scheduling policy).  While a SCHED_RR thread
1622  * is runnable it's time slice accumulates.  When it reaches
1623  * the time slice interval, it gets reset and added to the end
1624  * of the queue of threads at its priority.  When a thread no
1625  * longer becomes runnable (blocks in kernel, waits, etc), its
1626  * time slice is reset.
1627  *
1628  * The job of kse_switchout_thread() is to handle all of the above.
1629  */
1630 static void
1631 kse_switchout_thread(struct kse *kse, struct pthread *thread)
1632 {
1633         int level;
1634         int i;
1635         int restart;
1636         siginfo_t siginfo;
1637
1638         /*
1639          * Place the currently running thread into the
1640          * appropriate queue(s).
1641          */
1642         DBG_MSG("Switching out thread %p, state %d\n", thread, thread->state);
1643
1644         THR_DEACTIVATE_LAST_LOCK(thread);
1645         if (thread->blocked != 0) {
1646                 thread->active = 0;
1647                 thread->need_switchout = 0;
1648                 /* This thread must have blocked in the kernel. */
1649                 /*
1650                  * Check for pending signals and cancellation for
1651                  * this thread to see if we need to interrupt it
1652                  * in the kernel.
1653                  */
1654                 if (THR_NEED_CANCEL(thread)) {
1655                         kse_thr_interrupt(&thread->tcb->tcb_tmbx,
1656                                           KSE_INTR_INTERRUPT, 0);
1657                 } else if (thread->check_pending != 0) {
1658                         for (i = 1; i <= _SIG_MAXSIG; ++i) {
1659                                 if (SIGISMEMBER(thread->sigpend, i) &&
1660                                     !SIGISMEMBER(thread->sigmask, i)) {
1661                                         restart = _thread_sigact[i - 1].sa_flags & SA_RESTART;
1662                                         kse_thr_interrupt(&thread->tcb->tcb_tmbx,
1663                                             restart ? KSE_INTR_RESTART : KSE_INTR_INTERRUPT, 0);
1664                                         break;
1665                                 }
1666                         }
1667                 }
1668         }
1669         else {
1670                 switch (thread->state) {
1671                 case PS_MUTEX_WAIT:
1672                 case PS_COND_WAIT:
1673                         if (THR_NEED_CANCEL(thread)) {
1674                                 thread->interrupted = 1;
1675                                 thread->continuation = _thr_finish_cancellation;
1676                                 THR_SET_STATE(thread, PS_RUNNING);
1677                         } else {
1678                                 /* Insert into the waiting queue: */
1679                                 KSE_WAITQ_INSERT(kse, thread);
1680                         }
1681                         break;
1682
1683                 case PS_LOCKWAIT:
1684                         /*
1685                          * This state doesn't timeout.
1686                          */
1687                         thread->wakeup_time.tv_sec = -1;
1688                         thread->wakeup_time.tv_nsec = -1;
1689                         level = thread->locklevel - 1;
1690                         if (!_LCK_GRANTED(&thread->lockusers[level]))
1691                                 KSE_WAITQ_INSERT(kse, thread);
1692                         else
1693                                 THR_SET_STATE(thread, PS_RUNNING);
1694                         break;
1695
1696                 case PS_SLEEP_WAIT:
1697                 case PS_SIGWAIT:
1698                         if (THR_NEED_CANCEL(thread)) {
1699                                 thread->interrupted = 1;
1700                                 THR_SET_STATE(thread, PS_RUNNING);
1701                         } else {
1702                                 KSE_WAITQ_INSERT(kse, thread);
1703                         }
1704                         break;
1705
1706                 case PS_JOIN:
1707                         if (THR_NEED_CANCEL(thread)) {
1708                                 thread->join_status.thread = NULL;
1709                                 THR_SET_STATE(thread, PS_RUNNING);
1710                         } else {
1711                                 /*
1712                                  * This state doesn't timeout.
1713                                  */
1714                                 thread->wakeup_time.tv_sec = -1;
1715                                 thread->wakeup_time.tv_nsec = -1;
1716
1717                                 /* Insert into the waiting queue: */
1718                                 KSE_WAITQ_INSERT(kse, thread);
1719                         }
1720                         break;
1721
1722                 case PS_SIGSUSPEND:
1723                 case PS_SUSPENDED:
1724                         if (THR_NEED_CANCEL(thread)) {
1725                                 thread->interrupted = 1;
1726                                 THR_SET_STATE(thread, PS_RUNNING);
1727                         } else {
1728                                 /*
1729                                  * These states don't timeout.
1730                                  */
1731                                 thread->wakeup_time.tv_sec = -1;
1732                                 thread->wakeup_time.tv_nsec = -1;
1733
1734                                 /* Insert into the waiting queue: */
1735                                 KSE_WAITQ_INSERT(kse, thread);
1736                         }
1737                         break;
1738
1739                 case PS_DEAD:
1740                         /*
1741                          * The scheduler is operating on a different
1742                          * stack.  It is safe to do garbage collecting
1743                          * here.
1744                          */
1745                         thread->active = 0;
1746                         thread->need_switchout = 0;
1747                         thread->lock_switch = 0;
1748                         thr_cleanup(kse, thread);
1749                         return;
1750                         break;
1751
1752                 case PS_RUNNING:
1753                         if ((thread->flags & THR_FLAGS_SUSPENDED) != 0 &&
1754                             !THR_NEED_CANCEL(thread))
1755                                 THR_SET_STATE(thread, PS_SUSPENDED);
1756                         break;
1757
1758                 case PS_DEADLOCK:
1759                         /*
1760                          * These states don't timeout.
1761                          */
1762                         thread->wakeup_time.tv_sec = -1;
1763                         thread->wakeup_time.tv_nsec = -1;
1764
1765                         /* Insert into the waiting queue: */
1766                         KSE_WAITQ_INSERT(kse, thread);
1767                         break;
1768
1769                 default:
1770                         PANIC("Unknown state\n");
1771                         break;
1772                 }
1773
1774                 thr_accounting(thread);
1775                 if (thread->state == PS_RUNNING) {
1776                         if (thread->slice_usec == -1) {
1777                                 /*
1778                                  * The thread exceeded its time quantum or
1779                                  * it yielded the CPU; place it at the tail
1780                                  * of the queue for its priority.
1781                                  */
1782                                 KSE_RUNQ_INSERT_TAIL(kse, thread);
1783                         } else {
1784                                 /*
1785                                  * The thread hasn't exceeded its interval
1786                                  * Place it at the head of the queue for its
1787                                  * priority.
1788                                  */
1789                                 KSE_RUNQ_INSERT_HEAD(kse, thread);
1790                         }
1791                 }
1792         }
1793         thread->active = 0;
1794         thread->need_switchout = 0;
1795         if (thread->check_pending != 0) {
1796                 /* Install pending signals into the frame. */
1797                 thread->check_pending = 0;
1798                 KSE_LOCK_ACQUIRE(kse, &_thread_signal_lock);
1799                 for (i = 1; i <= _SIG_MAXSIG; i++) {
1800                         if (SIGISMEMBER(thread->sigmask, i))
1801                                 continue;
1802                         if (SIGISMEMBER(thread->sigpend, i))
1803                                 (void)_thr_sig_add(thread, i,
1804                                     &thread->siginfo[i-1]);
1805                         else if (SIGISMEMBER(_thr_proc_sigpending, i) &&
1806                                 _thr_getprocsig_unlocked(i, &siginfo)) {
1807                                 (void)_thr_sig_add(thread, i, &siginfo);
1808                         }
1809                 }
1810                 KSE_LOCK_RELEASE(kse, &_thread_signal_lock);
1811         }
1812 }
1813
1814 /*
1815  * This function waits for the smallest timeout value of any waiting
1816  * thread, or until it receives a message from another KSE.
1817  *
1818  * This must be called with the scheduling lock held.
1819  */
1820 static void
1821 kse_wait(struct kse *kse, struct pthread *td_wait, int sigseqno)
1822 {
1823         struct timespec ts, ts_sleep;
1824         int saved_flags;
1825
1826         if ((td_wait == NULL) || (td_wait->wakeup_time.tv_sec < 0)) {
1827                 /* Limit sleep to no more than 1 minute. */
1828                 ts_sleep.tv_sec = 60;
1829                 ts_sleep.tv_nsec = 0;
1830         } else {
1831                 KSE_GET_TOD(kse, &ts);
1832                 TIMESPEC_SUB(&ts_sleep, &td_wait->wakeup_time, &ts);
1833                 if (ts_sleep.tv_sec > 60) {
1834                         ts_sleep.tv_sec = 60;
1835                         ts_sleep.tv_nsec = 0;
1836                 }
1837         }
1838         /* Don't sleep for negative times. */
1839         if ((ts_sleep.tv_sec >= 0) && (ts_sleep.tv_nsec >= 0)) {
1840                 KSE_SET_IDLE(kse);
1841                 kse->k_kseg->kg_idle_kses++;
1842                 KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1843                 if ((kse->k_kseg->kg_flags & KGF_SINGLE_THREAD) &&
1844                     (kse->k_sigseqno != sigseqno))
1845                         ; /* don't sleep */
1846                 else {
1847                         saved_flags = kse->k_kcb->kcb_kmbx.km_flags;
1848                         kse->k_kcb->kcb_kmbx.km_flags |= KMF_NOUPCALL;
1849                         kse_release(&ts_sleep);
1850                         kse->k_kcb->kcb_kmbx.km_flags = saved_flags;
1851                 }
1852                 KSE_SCHED_LOCK(kse, kse->k_kseg);
1853                 if (KSE_IS_IDLE(kse)) {
1854                         KSE_CLEAR_IDLE(kse);
1855                         kse->k_kseg->kg_idle_kses--;
1856                 }
1857         }
1858 }
1859
1860 /*
1861  * Avoid calling this kse_exit() so as not to confuse it with the
1862  * system call of the same name.
1863  */
1864 static void
1865 kse_fini(struct kse *kse)
1866 {
1867         /* struct kse_group *free_kseg = NULL; */
1868         struct timespec ts;
1869         struct pthread *td;
1870
1871         /*
1872          * Check to see if this is one of the main kses.
1873          */
1874         if (kse->k_kseg != _kse_initial->k_kseg) {
1875                 PANIC("shouldn't get here");
1876                 /* This is for supporting thread groups. */
1877 #ifdef NOT_YET
1878                 /* Remove this KSE from the KSEG's list of KSEs. */
1879                 KSE_SCHED_LOCK(kse, kse->k_kseg);
1880                 TAILQ_REMOVE(&kse->k_kseg->kg_kseq, kse, k_kgqe);
1881                 kse->k_kseg->kg_ksecount--;
1882                 if (TAILQ_EMPTY(&kse->k_kseg->kg_kseq))
1883                         free_kseg = kse->k_kseg;
1884                 KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1885
1886                 /*
1887                  * Add this KSE to the list of free KSEs along with
1888                  * the KSEG if is now orphaned.
1889                  */
1890                 KSE_LOCK_ACQUIRE(kse, &kse_lock);
1891                 if (free_kseg != NULL)
1892                         kseg_free_unlocked(free_kseg);
1893                 kse_free_unlocked(kse);
1894                 KSE_LOCK_RELEASE(kse, &kse_lock);
1895                 kse_exit();
1896                 /* Never returns. */
1897                 PANIC("kse_exit()");
1898 #endif
1899         } else {
1900                 /*
1901                  * We allow program to kill kse in initial group (by
1902                  * lowering the concurrency).
1903                  */
1904                 if ((kse != _kse_initial) &&
1905                     ((kse->k_flags & KF_TERMINATED) != 0)) {
1906                         KSE_SCHED_LOCK(kse, kse->k_kseg);
1907                         TAILQ_REMOVE(&kse->k_kseg->kg_kseq, kse, k_kgqe);
1908                         kse->k_kseg->kg_ksecount--;
1909                         /*
1910                          * Migrate thread to  _kse_initial if its lastest
1911                          * kse it ran on is the kse.
1912                          */
1913                         td = TAILQ_FIRST(&kse->k_kseg->kg_threadq);
1914                         while (td != NULL) {
1915                                 if (td->kse == kse)
1916                                         td->kse = _kse_initial;
1917                                 td = TAILQ_NEXT(td, kle);
1918                         }
1919                         KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1920                         KSE_LOCK_ACQUIRE(kse, &kse_lock);
1921                         kse_free_unlocked(kse);
1922                         KSE_LOCK_RELEASE(kse, &kse_lock);
1923                         /* Make sure there is always at least one is awake */
1924                         KSE_WAKEUP(_kse_initial);
1925                         kse_exit();
1926                         /* Never returns. */
1927                         PANIC("kse_exit() failed for initial kseg");
1928                 }
1929                 KSE_SCHED_LOCK(kse, kse->k_kseg);
1930                 KSE_SET_IDLE(kse);
1931                 kse->k_kseg->kg_idle_kses++;
1932                 KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1933                 ts.tv_sec = 120;
1934                 ts.tv_nsec = 0;
1935                 kse->k_kcb->kcb_kmbx.km_flags = 0;
1936                 kse_release(&ts);
1937                 /* Never reach */
1938         }
1939 }
1940
1941 void
1942 _thr_set_timeout(const struct timespec *timeout)
1943 {
1944         struct pthread  *curthread = _get_curthread();
1945         struct timespec ts;
1946
1947         /* Reset the timeout flag for the running thread: */
1948         curthread->timeout = 0;
1949
1950         /* Check if the thread is to wait forever: */
1951         if (timeout == NULL) {
1952                 /*
1953                  * Set the wakeup time to something that can be recognised as
1954                  * different to an actual time of day:
1955                  */
1956                 curthread->wakeup_time.tv_sec = -1;
1957                 curthread->wakeup_time.tv_nsec = -1;
1958         }
1959         /* Check if no waiting is required: */
1960         else if ((timeout->tv_sec == 0) && (timeout->tv_nsec == 0)) {
1961                 /* Set the wake up time to 'immediately': */
1962                 curthread->wakeup_time.tv_sec = 0;
1963                 curthread->wakeup_time.tv_nsec = 0;
1964         } else {
1965                 /* Calculate the time for the current thread to wakeup: */
1966                 KSE_GET_TOD(curthread->kse, &ts);
1967                 TIMESPEC_ADD(&curthread->wakeup_time, &ts, timeout);
1968         }
1969 }
1970
1971 void
1972 _thr_panic_exit(char *file, int line, char *msg)
1973 {
1974         char buf[256];
1975
1976         snprintf(buf, sizeof(buf), "(%s:%d) %s\n", file, line, msg);
1977         __sys_write(2, buf, strlen(buf));
1978         abort();
1979 }
1980
1981 void
1982 _thr_setrunnable(struct pthread *curthread, struct pthread *thread)
1983 {
1984         kse_critical_t crit;
1985         struct kse_mailbox *kmbx;
1986
1987         crit = _kse_critical_enter();
1988         KSE_SCHED_LOCK(curthread->kse, thread->kseg);
1989         kmbx = _thr_setrunnable_unlocked(thread);
1990         KSE_SCHED_UNLOCK(curthread->kse, thread->kseg);
1991         _kse_critical_leave(crit);
1992         if ((kmbx != NULL) && (__isthreaded != 0))
1993                 kse_wakeup(kmbx);
1994 }
1995
1996 struct kse_mailbox *
1997 _thr_setrunnable_unlocked(struct pthread *thread)
1998 {
1999         struct kse_mailbox *kmbx = NULL;
2000
2001         if ((thread->kseg->kg_flags & KGF_SINGLE_THREAD) != 0) {
2002                 /* No silly queues for these threads. */
2003                 if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
2004                         THR_SET_STATE(thread, PS_SUSPENDED);
2005                 else {
2006                         THR_SET_STATE(thread, PS_RUNNING);
2007                         kmbx = kse_wakeup_one(thread);
2008                 }
2009
2010         } else if (thread->state != PS_RUNNING) {
2011                 if ((thread->flags & THR_FLAGS_IN_WAITQ) != 0)
2012                         KSE_WAITQ_REMOVE(thread->kse, thread);
2013                 if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
2014                         THR_SET_STATE(thread, PS_SUSPENDED);
2015                 else {
2016                         THR_SET_STATE(thread, PS_RUNNING);
2017                         if ((thread->blocked == 0) && (thread->active == 0) &&
2018                             (thread->flags & THR_FLAGS_IN_RUNQ) == 0)
2019                                 THR_RUNQ_INSERT_TAIL(thread);
2020                         /*
2021                          * XXX - Threads are not yet assigned to specific
2022                          *       KSEs; they are assigned to the KSEG.  So
2023                          *       the fact that a thread's KSE is waiting
2024                          *       doesn't necessarily mean that it will be
2025                          *       the KSE that runs the thread after the
2026                          *       lock is granted.  But we don't know if the
2027                          *       other KSEs within the same KSEG are also
2028                          *       in a waiting state or not so we err on the
2029                          *       side of caution and wakeup the thread's
2030                          *       last known KSE.  We ensure that the
2031                          *       threads KSE doesn't change while it's
2032                          *       scheduling lock is held so it is safe to
2033                          *       reference it (the KSE).  If the KSE wakes
2034                          *       up and doesn't find any more work it will
2035                          *       again go back to waiting so no harm is
2036                          *       done.
2037                          */
2038                         kmbx = kse_wakeup_one(thread);
2039                 }
2040         }
2041         return (kmbx);
2042 }
2043
2044 static struct kse_mailbox *
2045 kse_wakeup_one(struct pthread *thread)
2046 {
2047         struct kse *ke;
2048
2049         if (KSE_IS_IDLE(thread->kse)) {
2050                 KSE_CLEAR_IDLE(thread->kse);
2051                 thread->kseg->kg_idle_kses--;
2052                 return (&thread->kse->k_kcb->kcb_kmbx);
2053         } else {
2054                 TAILQ_FOREACH(ke, &thread->kseg->kg_kseq, k_kgqe) {
2055                         if (KSE_IS_IDLE(ke)) {
2056                                 KSE_CLEAR_IDLE(ke);
2057                                 ke->k_kseg->kg_idle_kses--;
2058                                 return (&ke->k_kcb->kcb_kmbx);
2059                         }
2060                 }
2061         }
2062         return (NULL);
2063 }
2064
2065 static void
2066 kse_wakeup_multi(struct kse *curkse)
2067 {
2068         struct kse *ke;
2069         int tmp;
2070
2071         if ((tmp = KSE_RUNQ_THREADS(curkse)) && curkse->k_kseg->kg_idle_kses) {
2072                 TAILQ_FOREACH(ke, &curkse->k_kseg->kg_kseq, k_kgqe) {
2073                         if (KSE_IS_IDLE(ke)) {
2074                                 KSE_CLEAR_IDLE(ke);
2075                                 ke->k_kseg->kg_idle_kses--;
2076                                 KSE_WAKEUP(ke);
2077                                 if (--tmp == 0)
2078                                         break;
2079                         }
2080                 }
2081         }
2082 }
2083
2084 /*
2085  * Allocate a new KSEG.
2086  *
2087  * We allow the current thread to be NULL in the case that this
2088  * is the first time a KSEG is being created (library initialization).
2089  * In this case, we don't need to (and can't) take any locks.
2090  */
2091 struct kse_group *
2092 _kseg_alloc(struct pthread *curthread)
2093 {
2094         struct kse_group *kseg = NULL;
2095         kse_critical_t crit;
2096
2097         if ((curthread != NULL) && (free_kseg_count > 0)) {
2098                 /* Use the kse lock for the kseg queue. */
2099                 crit = _kse_critical_enter();
2100                 KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2101                 if ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) {
2102                         TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
2103                         free_kseg_count--;
2104                         active_kseg_count++;
2105                         TAILQ_INSERT_TAIL(&active_kse_groupq, kseg, kg_qe);
2106                 }
2107                 KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2108                 _kse_critical_leave(crit);
2109                 if (kseg)
2110                         kseg_reinit(kseg);
2111         }
2112
2113         /*
2114          * If requested, attempt to allocate a new KSE group only if the
2115          * KSE allocation was successful and a KSE group wasn't found in
2116          * the free list.
2117          */
2118         if ((kseg == NULL) &&
2119             ((kseg = (struct kse_group *)malloc(sizeof(*kseg))) != NULL)) {
2120                 if (_pq_alloc(&kseg->kg_schedq.sq_runq,
2121                     THR_MIN_PRIORITY, THR_LAST_PRIORITY) != 0) {
2122                         free(kseg);
2123                         kseg = NULL;
2124                 } else {
2125                         kseg_init(kseg);
2126                         /* Add the KSEG to the list of active KSEGs. */
2127                         if (curthread != NULL) {
2128                                 crit = _kse_critical_enter();
2129                                 KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2130                                 active_kseg_count++;
2131                                 TAILQ_INSERT_TAIL(&active_kse_groupq,
2132                                     kseg, kg_qe);
2133                                 KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2134                                 _kse_critical_leave(crit);
2135                         } else {
2136                                 active_kseg_count++;
2137                                 TAILQ_INSERT_TAIL(&active_kse_groupq,
2138                                     kseg, kg_qe);
2139                         }
2140                 }
2141         }
2142         return (kseg);
2143 }
2144
2145 static void
2146 kseg_init(struct kse_group *kseg)
2147 {
2148         kseg_reinit(kseg);
2149         _lock_init(&kseg->kg_lock, LCK_ADAPTIVE, _kse_lock_wait,
2150             _kse_lock_wakeup);
2151 }
2152
2153 static void
2154 kseg_reinit(struct kse_group *kseg)
2155 {
2156         TAILQ_INIT(&kseg->kg_kseq);
2157         TAILQ_INIT(&kseg->kg_threadq);
2158         TAILQ_INIT(&kseg->kg_schedq.sq_waitq);
2159         kseg->kg_threadcount = 0;
2160         kseg->kg_ksecount = 0;
2161         kseg->kg_idle_kses = 0;
2162         kseg->kg_flags = 0;
2163 }
2164
2165 /*
2166  * This must be called with the kse lock held and when there are
2167  * no more threads that reference it.
2168  */
2169 static void
2170 kseg_free_unlocked(struct kse_group *kseg)
2171 {
2172         TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe);
2173         TAILQ_INSERT_HEAD(&free_kse_groupq, kseg, kg_qe);
2174         free_kseg_count++;
2175         active_kseg_count--;
2176 }
2177
2178 void
2179 _kseg_free(struct kse_group *kseg)
2180 {
2181         struct kse *curkse;
2182         kse_critical_t crit;
2183
2184         crit = _kse_critical_enter();
2185         curkse = _get_curkse();
2186         KSE_LOCK_ACQUIRE(curkse, &kse_lock);
2187         kseg_free_unlocked(kseg);
2188         KSE_LOCK_RELEASE(curkse, &kse_lock);
2189         _kse_critical_leave(crit);
2190 }
2191
2192 static void
2193 kseg_destroy(struct kse_group *kseg)
2194 {
2195         _lock_destroy(&kseg->kg_lock);
2196         _pq_free(&kseg->kg_schedq.sq_runq);
2197         free(kseg);
2198 }
2199
2200 /*
2201  * Allocate a new KSE.
2202  *
2203  * We allow the current thread to be NULL in the case that this
2204  * is the first time a KSE is being created (library initialization).
2205  * In this case, we don't need to (and can't) take any locks.
2206  */
2207 struct kse *
2208 _kse_alloc(struct pthread *curthread, int sys_scope)
2209 {
2210         struct kse *kse = NULL;
2211         char *stack;
2212         kse_critical_t crit;
2213         int i;
2214
2215         if ((curthread != NULL) && (free_kse_count > 0)) {
2216                 crit = _kse_critical_enter();
2217                 KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2218                 /* Search for a finished KSE. */
2219                 kse = TAILQ_FIRST(&free_kseq);
2220                 while ((kse != NULL) &&
2221                     ((kse->k_kcb->kcb_kmbx.km_flags & KMF_DONE) == 0)) {
2222                         kse = TAILQ_NEXT(kse, k_qe);
2223                 }
2224                 if (kse != NULL) {
2225                         DBG_MSG("found an unused kse.\n");
2226                         TAILQ_REMOVE(&free_kseq, kse, k_qe);
2227                         free_kse_count--;
2228                         TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe);
2229                         active_kse_count++;
2230                 }
2231                 KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2232                 _kse_critical_leave(crit);
2233                 if (kse != NULL)
2234                         kse_reinit(kse, sys_scope);
2235         }
2236         if ((kse == NULL) &&
2237             ((kse = (struct kse *)malloc(sizeof(*kse))) != NULL)) {
2238                 if (sys_scope != 0)
2239                         stack = NULL;
2240                 else if ((stack = malloc(KSE_STACKSIZE)) == NULL) {
2241                         free(kse);
2242                         return (NULL);
2243                 }
2244                 bzero(kse, sizeof(*kse));
2245
2246                 /* Initialize KCB without the lock. */
2247                 if ((kse->k_kcb = _kcb_ctor(kse)) == NULL) {
2248                         if (stack != NULL)
2249                                 free(stack);
2250                         free(kse);
2251                         return (NULL);
2252                 }
2253
2254                 /* Initialize the lockusers. */
2255                 for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) {
2256                         _lockuser_init(&kse->k_lockusers[i], (void *)kse);
2257                         _LCK_SET_PRIVATE2(&kse->k_lockusers[i], NULL);
2258                 }
2259                 /* _lock_init(kse->k_lock, ...) */
2260
2261                 if (curthread != NULL) {
2262                         crit = _kse_critical_enter();
2263                         KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2264                 }
2265                 kse->k_flags = 0;
2266                 TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe);
2267                 active_kse_count++;
2268                 if (curthread != NULL) {
2269                         KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2270                         _kse_critical_leave(crit);
2271                 }
2272                 /*
2273                  * Create the KSE context.
2274                  * Scope system threads (one thread per KSE) are not required
2275                  * to have a stack for an unneeded kse upcall.
2276                  */
2277                 if (!sys_scope) {
2278                         kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_multi;
2279                         kse->k_stack.ss_sp = stack;
2280                         kse->k_stack.ss_size = KSE_STACKSIZE;
2281                 } else {
2282                         kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_single;
2283                         kse->k_stack.ss_sp = NULL;
2284                         kse->k_stack.ss_size = 0;
2285                 }
2286                 kse->k_kcb->kcb_kmbx.km_udata = (void *)kse;
2287                 kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2288                 /*
2289                  * We need to keep a copy of the stack in case it
2290                  * doesn't get used; a KSE running a scope system
2291                  * thread will use that thread's stack.
2292                  */
2293                 kse->k_kcb->kcb_kmbx.km_stack = kse->k_stack;
2294         }
2295         return (kse);
2296 }
2297
2298 static void
2299 kse_reinit(struct kse *kse, int sys_scope)
2300 {
2301         if (!sys_scope) {
2302                 kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_multi;
2303                 if (kse->k_stack.ss_sp == NULL) {
2304                         /* XXX check allocation failure */
2305                         kse->k_stack.ss_sp = (char *) malloc(KSE_STACKSIZE);
2306                         kse->k_stack.ss_size = KSE_STACKSIZE;
2307                 }
2308                 kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2309         } else {
2310                 kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_single;
2311                 if (kse->k_stack.ss_sp)
2312                         free(kse->k_stack.ss_sp);
2313                 kse->k_stack.ss_sp = NULL;
2314                 kse->k_stack.ss_size = 0;
2315                 kse->k_kcb->kcb_kmbx.km_quantum = 0;
2316         }
2317         kse->k_kcb->kcb_kmbx.km_stack = kse->k_stack;
2318         kse->k_kcb->kcb_kmbx.km_udata = (void *)kse;
2319         kse->k_kcb->kcb_kmbx.km_curthread = NULL;
2320         kse->k_kcb->kcb_kmbx.km_flags = 0;
2321         kse->k_curthread = NULL;
2322         kse->k_kseg = 0;
2323         kse->k_schedq = 0;
2324         kse->k_locklevel = 0;
2325         kse->k_flags = 0;
2326         kse->k_error = 0;
2327         kse->k_cpu = 0;
2328         kse->k_sigseqno = 0;
2329 }
2330
2331 void
2332 kse_free_unlocked(struct kse *kse)
2333 {
2334         TAILQ_REMOVE(&active_kseq, kse, k_qe);
2335         active_kse_count--;
2336         kse->k_kseg = NULL;
2337         kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2338         kse->k_flags = 0;
2339         TAILQ_INSERT_HEAD(&free_kseq, kse, k_qe);
2340         free_kse_count++;
2341 }
2342
2343 void
2344 _kse_free(struct pthread *curthread, struct kse *kse)
2345 {
2346         kse_critical_t crit;
2347
2348         if (curthread == NULL)
2349                 kse_free_unlocked(kse);
2350         else {
2351                 crit = _kse_critical_enter();
2352                 KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2353                 kse_free_unlocked(kse);
2354                 KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2355                 _kse_critical_leave(crit);
2356         }
2357 }
2358
2359 static void
2360 kse_destroy(struct kse *kse)
2361 {
2362         int i;
2363
2364         if (kse->k_stack.ss_sp != NULL)
2365                 free(kse->k_stack.ss_sp);
2366         _kcb_dtor(kse->k_kcb);
2367         for (i = 0; i < MAX_KSE_LOCKLEVEL; ++i)
2368                 _lockuser_destroy(&kse->k_lockusers[i]);
2369         _lock_destroy(&kse->k_lock);
2370         free(kse);
2371 }
2372
2373 struct pthread *
2374 _thr_alloc(struct pthread *curthread)
2375 {
2376         kse_critical_t  crit;
2377         struct pthread  *thread = NULL;
2378         int i;
2379
2380         if (curthread != NULL) {
2381                 if (GC_NEEDED())
2382                         _thr_gc(curthread);
2383                 if (free_thread_count > 0) {
2384                         crit = _kse_critical_enter();
2385                         KSE_LOCK_ACQUIRE(curthread->kse, &thread_lock);
2386                         if ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
2387                                 TAILQ_REMOVE(&free_threadq, thread, tle);
2388                                 free_thread_count--;
2389                         }
2390                         KSE_LOCK_RELEASE(curthread->kse, &thread_lock);
2391                         _kse_critical_leave(crit);
2392                 }
2393         }
2394         if ((thread == NULL) &&
2395             ((thread = malloc(sizeof(struct pthread))) != NULL)) {
2396                 bzero(thread, sizeof(struct pthread));
2397                 thread->siginfo = calloc(_SIG_MAXSIG, sizeof(siginfo_t));
2398                 if (thread->siginfo == NULL) {
2399                         free(thread);
2400                         return (NULL);
2401                 }
2402                 if (curthread) {
2403                         _pthread_mutex_lock(&_tcb_mutex);
2404                         thread->tcb = _tcb_ctor(thread, 0 /* not initial tls */);
2405                         _pthread_mutex_unlock(&_tcb_mutex);
2406                 } else {
2407                         thread->tcb = _tcb_ctor(thread, 1 /* initial tls */);
2408                 }
2409                 if (thread->tcb == NULL) {
2410                         free(thread->siginfo);
2411                         free(thread);
2412                         return (NULL);
2413                 }
2414                 /*
2415                  * Initialize thread locking.
2416                  * Lock initializing needs malloc, so don't
2417                  * enter critical region before doing this!
2418                  */
2419                 if (_lock_init(&thread->lock, LCK_ADAPTIVE,
2420                     _thr_lock_wait, _thr_lock_wakeup) != 0)
2421                         PANIC("Cannot initialize thread lock");
2422                 for (i = 0; i < MAX_THR_LOCKLEVEL; i++) {
2423                         _lockuser_init(&thread->lockusers[i], (void *)thread);
2424                         _LCK_SET_PRIVATE2(&thread->lockusers[i],
2425                             (void *)thread);
2426                 }
2427         }
2428         return (thread);
2429 }
2430
2431 void
2432 _thr_free(struct pthread *curthread, struct pthread *thread)
2433 {
2434         kse_critical_t crit;
2435
2436         DBG_MSG("Freeing thread %p\n", thread);
2437         if (thread->name) {
2438                 free(thread->name);
2439                 thread->name = NULL;
2440         }
2441         if ((curthread == NULL) || (free_thread_count >= MAX_CACHED_THREADS)) {
2442                 thr_destroy(curthread, thread);
2443         } else {
2444                 /* Add the thread to the free thread list. */
2445                 crit = _kse_critical_enter();
2446                 KSE_LOCK_ACQUIRE(curthread->kse, &thread_lock);
2447                 TAILQ_INSERT_TAIL(&free_threadq, thread, tle);
2448                 free_thread_count++;
2449                 KSE_LOCK_RELEASE(curthread->kse, &thread_lock);
2450                 _kse_critical_leave(crit);
2451         }
2452 }
2453
2454 static void
2455 thr_destroy(struct pthread *curthread, struct pthread *thread)
2456 {
2457         int i;
2458
2459         for (i = 0; i < MAX_THR_LOCKLEVEL; i++)
2460                 _lockuser_destroy(&thread->lockusers[i]);
2461         _lock_destroy(&thread->lock);
2462         if (curthread) {
2463                 _pthread_mutex_lock(&_tcb_mutex);
2464                 _tcb_dtor(thread->tcb);
2465                 _pthread_mutex_unlock(&_tcb_mutex);
2466         } else {
2467                 _tcb_dtor(thread->tcb);
2468         }
2469         free(thread->siginfo);
2470         free(thread);
2471 }
2472
2473 /*
2474  * Add an active thread:
2475  *
2476  *   o Assign the thread a unique id (which GDB uses to track
2477  *     threads.
2478  *   o Add the thread to the list of all threads and increment
2479  *     number of active threads.
2480  */
2481 static void
2482 thr_link(struct pthread *thread)
2483 {
2484         kse_critical_t crit;
2485         struct kse *curkse;
2486
2487         crit = _kse_critical_enter();
2488         curkse = _get_curkse();
2489         KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
2490         /*
2491          * Initialize the unique id (which GDB uses to track
2492          * threads), add the thread to the list of all threads,
2493          * and
2494          */
2495         thread->uniqueid = next_uniqueid++;
2496         THR_LIST_ADD(thread);
2497         _thread_active_threads++;
2498         KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
2499         _kse_critical_leave(crit);
2500 }
2501
2502 /*
2503  * Remove an active thread.
2504  */
2505 static void
2506 thr_unlink(struct pthread *thread)
2507 {
2508         kse_critical_t crit;
2509         struct kse *curkse;
2510
2511         crit = _kse_critical_enter();
2512         curkse = _get_curkse();
2513         KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
2514         THR_LIST_REMOVE(thread);
2515         _thread_active_threads--;
2516         KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
2517         _kse_critical_leave(crit);
2518 }
2519
2520 void
2521 _thr_hash_add(struct pthread *thread)
2522 {
2523         struct thread_hash_head *head;
2524
2525         head = &thr_hashtable[THREAD_HASH(thread)];
2526         LIST_INSERT_HEAD(head, thread, hle);
2527 }
2528
2529 void
2530 _thr_hash_remove(struct pthread *thread)
2531 {
2532         LIST_REMOVE(thread, hle);
2533 }
2534
2535 struct pthread *
2536 _thr_hash_find(struct pthread *thread)
2537 {
2538         struct pthread *td;
2539         struct thread_hash_head *head;
2540
2541         head = &thr_hashtable[THREAD_HASH(thread)];
2542         LIST_FOREACH(td, head, hle) {
2543                 if (td == thread)
2544                         return (thread);
2545         }
2546         return (NULL);
2547 }
2548
2549 void
2550 _thr_debug_check_yield(struct pthread *curthread)
2551 {
2552         /*
2553          * Note that TMDF_SUSPEND is set after process is suspended.
2554          * When we are being debugged, every suspension in process
2555          * will cause all KSEs to schedule an upcall in kernel, unless the
2556          * KSE is in critical region.
2557          * If the function is being called, it means the KSE is no longer
2558          * in critical region, if the TMDF_SUSPEND is set by debugger
2559          * before KSE leaves critical region, we will catch it here, else
2560          * if the flag is changed during testing, it also not a problem,
2561          * because the change only occurs after a process suspension event
2562          * occurs. A suspension event will always cause KSE to schedule an
2563          * upcall, in the case, because we are not in critical region,
2564          * upcall will be scheduled sucessfully, the flag will be checked
2565          * again in kse_sched_multi, we won't back until the flag
2566          * is cleared by debugger, the flag will be cleared in next
2567          * suspension event. 
2568          */
2569         if (!DBG_CAN_RUN(curthread)) {
2570                 if ((curthread->attr.flags & PTHREAD_SCOPE_SYSTEM) == 0)
2571                         _thr_sched_switch(curthread);
2572                 else
2573                         kse_thr_interrupt(&curthread->tcb->tcb_tmbx,
2574                                 KSE_INTR_DBSUSPEND, 0);
2575         }
2576 }