]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/libkse/thread/thr_kern.c
This commit was generated by cvs2svn to compensate for changes in r171366,
[FreeBSD/FreeBSD.git] / lib / libkse / thread / thr_kern.c
1 /*
2  * Copyright (C) 2003 Daniel M. Eischen <deischen@freebsd.org>
3  * Copyright (C) 2002 Jonathon Mini <mini@freebsd.org>
4  * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed by John Birrell.
18  * 4. Neither the name of the author nor the names of any co-contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 #include <sys/types.h>
39 #include <sys/kse.h>
40 #include <sys/ptrace.h>
41 #include <sys/signalvar.h>
42 #include <sys/queue.h>
43 #include <machine/atomic.h>
44 #include <machine/sigframe.h>
45
46 #include <assert.h>
47 #include <errno.h>
48 #include <signal.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include <time.h>
52 #include <ucontext.h>
53 #include <unistd.h>
54
55 #include "atomic_ops.h"
56 #include "thr_private.h"
57 #include "libc_private.h"
58 #ifdef NOTYET
59 #include "spinlock.h"
60 #endif
61
62 /* #define DEBUG_THREAD_KERN */
63 #ifdef DEBUG_THREAD_KERN
64 #define DBG_MSG         stdout_debug
65 #else
66 #define DBG_MSG(x...)
67 #endif
68
69 /*
70  * Define a high water mark for the maximum number of threads that
71  * will be cached.  Once this level is reached, any extra threads
72  * will be free()'d.
73  */
74 #define MAX_CACHED_THREADS      100
75 /*
76  * Define high water marks for the maximum number of KSEs and KSE groups
77  * that will be cached. Because we support 1:1 threading, there could have
78  * same number of KSEs and KSE groups as threads. Once these levels are
79  * reached, any extra KSE and KSE groups will be free()'d.
80  */
81 #define MAX_CACHED_KSES         ((_thread_scope_system <= 0) ? 50 : 100)
82 #define MAX_CACHED_KSEGS        ((_thread_scope_system <= 0) ? 50 : 100)
83
84 #define KSE_SET_MBOX(kse, thrd) \
85         (kse)->k_kcb->kcb_kmbx.km_curthread = &(thrd)->tcb->tcb_tmbx
86
87 #define KSE_SET_EXITED(kse)     (kse)->k_flags |= KF_EXITED
88
89 /*
90  * Macros for manipulating the run queues.  The priority queue
91  * routines use the thread's pqe link and also handle the setting
92  * and clearing of the thread's THR_FLAGS_IN_RUNQ flag.
93  */
94 #define KSE_RUNQ_INSERT_HEAD(kse, thrd)                 \
95         _pq_insert_head(&(kse)->k_schedq->sq_runq, thrd)
96 #define KSE_RUNQ_INSERT_TAIL(kse, thrd)                 \
97         _pq_insert_tail(&(kse)->k_schedq->sq_runq, thrd)
98 #define KSE_RUNQ_REMOVE(kse, thrd)                      \
99         _pq_remove(&(kse)->k_schedq->sq_runq, thrd)
100 #define KSE_RUNQ_FIRST(kse)                             \
101         ((_libkse_debug == 0) ?                         \
102          _pq_first(&(kse)->k_schedq->sq_runq) :         \
103          _pq_first_debug(&(kse)->k_schedq->sq_runq))
104
105 #define KSE_RUNQ_THREADS(kse)   ((kse)->k_schedq->sq_runq.pq_threads)
106
107 #define THR_NEED_CANCEL(thrd)                                           \
108          (((thrd)->cancelflags & THR_CANCELLING) != 0 &&                \
109           ((thrd)->cancelflags & PTHREAD_CANCEL_DISABLE) == 0 &&        \
110           (((thrd)->cancelflags & THR_AT_CANCEL_POINT) != 0 ||          \
111            ((thrd)->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
112
113 #define THR_NEED_ASYNC_CANCEL(thrd)                                     \
114          (((thrd)->cancelflags & THR_CANCELLING) != 0 &&                \
115           ((thrd)->cancelflags & PTHREAD_CANCEL_DISABLE) == 0 &&        \
116           (((thrd)->cancelflags & THR_AT_CANCEL_POINT) == 0 &&          \
117            ((thrd)->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
118
119 /*
120  * We've got to keep track of everything that is allocated, not only
121  * to have a speedy free list, but also so they can be deallocated
122  * after a fork().
123  */
124 static TAILQ_HEAD(, kse)        active_kseq;
125 static TAILQ_HEAD(, kse)        free_kseq;
126 static TAILQ_HEAD(, kse_group)  free_kse_groupq;
127 static TAILQ_HEAD(, kse_group)  active_kse_groupq;
128 static TAILQ_HEAD(, kse_group)  gc_ksegq;
129 static struct lock              kse_lock;       /* also used for kseg queue */
130 static int                      free_kse_count = 0;
131 static int                      free_kseg_count = 0;
132 static TAILQ_HEAD(, pthread)    free_threadq;
133 static struct lock              thread_lock;
134 static int                      free_thread_count = 0;
135 static int                      inited = 0;
136 static int                      active_kse_count = 0;
137 static int                      active_kseg_count = 0;
138 static u_int64_t                next_uniqueid = 1;
139
140 LIST_HEAD(thread_hash_head, pthread);
141 #define THREAD_HASH_QUEUES      127
142 static struct thread_hash_head  thr_hashtable[THREAD_HASH_QUEUES];
143 #define THREAD_HASH(thrd)       ((unsigned long)thrd % THREAD_HASH_QUEUES)
144
145 /* Lock for thread tcb constructor/destructor */
146 static pthread_mutex_t          _tcb_mutex;
147
148 #ifdef DEBUG_THREAD_KERN
149 static void     dump_queues(struct kse *curkse);
150 #endif
151 static void     kse_check_completed(struct kse *kse);
152 static void     kse_check_waitq(struct kse *kse);
153 static void     kse_fini(struct kse *curkse);
154 static void     kse_reinit(struct kse *kse, int sys_scope);
155 static void     kse_sched_multi(struct kse_mailbox *kmbx);
156 static void     kse_sched_single(struct kse_mailbox *kmbx);
157 static void     kse_switchout_thread(struct kse *kse, struct pthread *thread);
158 static void     kse_wait(struct kse *kse, struct pthread *td_wait, int sigseq);
159 static void     kse_free_unlocked(struct kse *kse);
160 static void     kse_destroy(struct kse *kse);
161 static void     kseg_free_unlocked(struct kse_group *kseg);
162 static void     kseg_init(struct kse_group *kseg);
163 static void     kseg_reinit(struct kse_group *kseg);
164 static void     kseg_destroy(struct kse_group *kseg);
165 static void     kse_waitq_insert(struct pthread *thread);
166 static void     kse_wakeup_multi(struct kse *curkse);
167 static struct kse_mailbox *kse_wakeup_one(struct pthread *thread);
168 static void     thr_cleanup(struct kse *kse, struct pthread *curthread);
169 static void     thr_link(struct pthread *thread);
170 static void     thr_resume_wrapper(int sig, siginfo_t *, ucontext_t *);
171 static void     thr_resume_check(struct pthread *curthread, ucontext_t *ucp);
172 static int      thr_timedout(struct pthread *thread, struct timespec *curtime);
173 static void     thr_unlink(struct pthread *thread);
174 static void     thr_destroy(struct pthread *curthread, struct pthread *thread);
175 static void     thread_gc(struct pthread *thread);
176 static void     kse_gc(struct pthread *thread);
177 static void     kseg_gc(struct pthread *thread);
178
179 static void __inline
180 thr_accounting(struct pthread *thread)
181 {
182         if ((thread->slice_usec != -1) &&
183             (thread->slice_usec <= TIMESLICE_USEC) &&
184             (thread->attr.sched_policy != SCHED_FIFO)) {
185                 thread->slice_usec += (thread->tcb->tcb_tmbx.tm_uticks
186                     + thread->tcb->tcb_tmbx.tm_sticks) * _clock_res_usec;
187                 /* Check for time quantum exceeded: */
188                 if (thread->slice_usec > TIMESLICE_USEC)
189                         thread->slice_usec = -1;
190         }
191         thread->tcb->tcb_tmbx.tm_uticks = 0;
192         thread->tcb->tcb_tmbx.tm_sticks = 0;
193 }
194
195 /*
196  * This is called after a fork().
197  * No locks need to be taken here since we are guaranteed to be
198  * single threaded.
199  * 
200  * XXX
201  * POSIX says for threaded process, fork() function is used
202  * only to run new programs, and the effects of calling functions
203  * that require certain resources between the call to fork() and
204  * the call to an exec function are undefined.
205  *
206  * It is not safe to free memory after fork(), because these data
207  * structures may be in inconsistent state.
208  */
209 void
210 _kse_single_thread(struct pthread *curthread)
211 {
212 #ifdef NOTYET
213         struct kse *kse;
214         struct kse_group *kseg;
215         struct pthread *thread;
216
217         _thr_spinlock_init();
218         *__malloc_lock = (spinlock_t)_SPINLOCK_INITIALIZER;
219         if (__isthreaded) {
220                 _thr_rtld_fini();
221                 _thr_signal_deinit();
222         }
223         __isthreaded = 0;
224         /*
225          * Restore signal mask early, so any memory problems could
226          * dump core.
227          */ 
228         __sys_sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
229         _thread_active_threads = 1;
230
231         curthread->kse->k_kcb->kcb_kmbx.km_curthread = NULL;
232         curthread->attr.flags &= ~PTHREAD_SCOPE_PROCESS;
233         curthread->attr.flags |= PTHREAD_SCOPE_SYSTEM;
234
235         /*
236          * Enter a loop to remove and free all threads other than
237          * the running thread from the active thread list:
238          */
239         while ((thread = TAILQ_FIRST(&_thread_list)) != NULL) {
240                 THR_GCLIST_REMOVE(thread);
241                 /*
242                  * Remove this thread from the list (the current
243                  * thread will be removed but re-added by libpthread
244                  * initialization.
245                  */
246                 TAILQ_REMOVE(&_thread_list, thread, tle);
247                 /* Make sure this isn't the running thread: */
248                 if (thread != curthread) {
249                         _thr_stack_free(&thread->attr);
250                         if (thread->specific != NULL)
251                                 free(thread->specific);
252                         thr_destroy(curthread, thread);
253                 }
254         }
255
256         TAILQ_INIT(&curthread->mutexq);         /* initialize mutex queue */
257         curthread->joiner = NULL;               /* no joining threads yet */
258         curthread->refcount = 0;
259         SIGEMPTYSET(curthread->sigpend);        /* clear pending signals */
260
261         /* Don't free thread-specific data as the caller may require it */
262
263         /* Free the free KSEs: */
264         while ((kse = TAILQ_FIRST(&free_kseq)) != NULL) {
265                 TAILQ_REMOVE(&free_kseq, kse, k_qe);
266                 kse_destroy(kse);
267         }
268         free_kse_count = 0;
269
270         /* Free the active KSEs: */
271         while ((kse = TAILQ_FIRST(&active_kseq)) != NULL) {
272                 TAILQ_REMOVE(&active_kseq, kse, k_qe);
273                 kse_destroy(kse);
274         }
275         active_kse_count = 0;
276
277         /* Free the free KSEGs: */
278         while ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) {
279                 TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
280                 kseg_destroy(kseg);
281         }
282         free_kseg_count = 0;
283
284         /* Free the active KSEGs: */
285         while ((kseg = TAILQ_FIRST(&active_kse_groupq)) != NULL) {
286                 TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe);
287                 kseg_destroy(kseg);
288         }
289         active_kseg_count = 0;
290
291         /* Free the free threads. */
292         while ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
293                 TAILQ_REMOVE(&free_threadq, thread, tle);
294                 thr_destroy(curthread, thread);
295         }
296         free_thread_count = 0;
297
298         /* Free the to-be-gc'd threads. */
299         while ((thread = TAILQ_FIRST(&_thread_gc_list)) != NULL) {
300                 TAILQ_REMOVE(&_thread_gc_list, thread, gcle);
301                 thr_destroy(curthread, thread);
302         }
303         TAILQ_INIT(&gc_ksegq);
304         _gc_count = 0;
305
306         if (inited != 0) {
307                 /*
308                  * Destroy these locks; they'll be recreated to assure they
309                  * are in the unlocked state.
310                  */
311                 _lock_destroy(&kse_lock);
312                 _lock_destroy(&thread_lock);
313                 _lock_destroy(&_thread_list_lock);
314                 inited = 0;
315         }
316
317         /* We're no longer part of any lists */
318         curthread->tlflags = 0;
319
320         /*
321          * After a fork, we are still operating on the thread's original
322          * stack.  Don't clear the THR_FLAGS_USER from the thread's
323          * attribute flags.
324          */
325
326         /* Initialize the threads library. */
327         curthread->kse = NULL;
328         curthread->kseg = NULL;
329         _kse_initial = NULL;
330         _libpthread_init(curthread);
331 #else
332         int i;
333
334         /* Reset the current thread and KSE lock data. */
335         for (i = 0; i < curthread->locklevel; i++) {
336                 _lockuser_reinit(&curthread->lockusers[i], (void *)curthread);
337         }
338         curthread->locklevel = 0;
339         for (i = 0; i < curthread->kse->k_locklevel; i++) {
340                 _lockuser_reinit(&curthread->kse->k_lockusers[i],
341                     (void *)curthread->kse);
342                 _LCK_SET_PRIVATE2(&curthread->kse->k_lockusers[i], NULL);
343         }
344         curthread->kse->k_locklevel = 0;
345         _thr_spinlock_init();
346         if (__isthreaded) {
347                 _thr_rtld_fini();
348                 _thr_signal_deinit();
349         }
350         __isthreaded = 0;
351         curthread->kse->k_kcb->kcb_kmbx.km_curthread = NULL;
352         curthread->attr.flags |= PTHREAD_SCOPE_SYSTEM;
353
354         /* After a fork(), there child should have no pending signals. */
355         sigemptyset(&curthread->sigpend);
356
357         /*
358          * Restore signal mask early, so any memory problems could
359          * dump core.
360          */ 
361         sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
362         _thread_active_threads = 1;
363 #endif
364 }
365
366 /*
367  * This is used to initialize housekeeping and to initialize the
368  * KSD for the KSE.
369  */
370 void
371 _kse_init(void)
372 {
373         if (inited == 0) {
374                 TAILQ_INIT(&active_kseq);
375                 TAILQ_INIT(&active_kse_groupq);
376                 TAILQ_INIT(&free_kseq);
377                 TAILQ_INIT(&free_kse_groupq);
378                 TAILQ_INIT(&free_threadq);
379                 TAILQ_INIT(&gc_ksegq);
380                 if (_lock_init(&kse_lock, LCK_ADAPTIVE,
381                     _kse_lock_wait, _kse_lock_wakeup) != 0)
382                         PANIC("Unable to initialize free KSE queue lock");
383                 if (_lock_init(&thread_lock, LCK_ADAPTIVE,
384                     _kse_lock_wait, _kse_lock_wakeup) != 0)
385                         PANIC("Unable to initialize free thread queue lock");
386                 if (_lock_init(&_thread_list_lock, LCK_ADAPTIVE,
387                     _kse_lock_wait, _kse_lock_wakeup) != 0)
388                         PANIC("Unable to initialize thread list lock");
389                 _pthread_mutex_init(&_tcb_mutex, NULL);
390                 active_kse_count = 0;
391                 active_kseg_count = 0;
392                 _gc_count = 0;
393                 inited = 1;
394         }
395 }
396
397 /*
398  * This is called when the first thread (other than the initial
399  * thread) is created.
400  */
401 int
402 _kse_setthreaded(int threaded)
403 {
404         sigset_t sigset;
405
406         if ((threaded != 0) && (__isthreaded == 0)) {
407                 SIGFILLSET(sigset);
408                 __sys_sigprocmask(SIG_SETMASK, &sigset, &_thr_initial->sigmask);
409
410                 /*
411                  * Tell the kernel to create a KSE for the initial thread
412                  * and enable upcalls in it.
413                  */
414                 _kse_initial->k_flags |= KF_STARTED;
415
416                 if (_thread_scope_system <= 0) {
417                         _thr_initial->attr.flags &= ~PTHREAD_SCOPE_SYSTEM;
418                         _kse_initial->k_kseg->kg_flags &= ~KGF_SINGLE_THREAD;
419                         _kse_initial->k_kcb->kcb_kmbx.km_curthread = NULL;
420                 }
421                 else {
422                         /*
423                          * For bound thread, kernel reads mailbox pointer
424                          * once, we'd set it here before calling kse_create.
425                          */
426                         _tcb_set(_kse_initial->k_kcb, _thr_initial->tcb);
427                         KSE_SET_MBOX(_kse_initial, _thr_initial);
428                         _kse_initial->k_kcb->kcb_kmbx.km_flags |= KMF_BOUND;
429                 }
430
431                 /*
432                  * Locking functions in libc are required when there are
433                  * threads other than the initial thread.
434                  */
435                 _thr_rtld_init();
436
437                 __isthreaded = 1;
438                 if (kse_create(&_kse_initial->k_kcb->kcb_kmbx, 0) != 0) {
439                         _kse_initial->k_flags &= ~KF_STARTED;
440                         __isthreaded = 0;
441                         PANIC("kse_create() failed\n");
442                         return (-1);
443                 }
444                 _thr_initial->tcb->tcb_tmbx.tm_lwp = 
445                         _kse_initial->k_kcb->kcb_kmbx.km_lwp;
446                 _thread_activated = 1;
447
448 #ifndef SYSTEM_SCOPE_ONLY
449                 if (_thread_scope_system <= 0) {
450                         /* Set current thread to initial thread */
451                         _tcb_set(_kse_initial->k_kcb, _thr_initial->tcb);
452                         KSE_SET_MBOX(_kse_initial, _thr_initial);
453                         _thr_start_sig_daemon();
454                         _thr_setmaxconcurrency();
455                 }
456                 else
457 #endif
458                         __sys_sigprocmask(SIG_SETMASK, &_thr_initial->sigmask,
459                             NULL);
460         }
461         return (0);
462 }
463
464 /*
465  * Lock wait and wakeup handlers for KSE locks.  These are only used by
466  * KSEs, and should never be used by threads.  KSE locks include the
467  * KSE group lock (used for locking the scheduling queue) and the
468  * kse_lock defined above.
469  *
470  * When a KSE lock attempt blocks, the entire KSE blocks allowing another
471  * KSE to run.  For the most part, it doesn't make much sense to try and
472  * schedule another thread because you need to lock the scheduling queue
473  * in order to do that.  And since the KSE lock is used to lock the scheduling
474  * queue, you would just end up blocking again.
475  */
476 void
477 _kse_lock_wait(struct lock *lock, struct lockuser *lu)
478 {
479         struct kse *curkse = (struct kse *)_LCK_GET_PRIVATE(lu);
480         struct timespec ts;
481         int saved_flags;
482
483         if (curkse->k_kcb->kcb_kmbx.km_curthread != NULL)
484                 PANIC("kse_lock_wait does not disable upcall.\n");
485         /*
486          * Enter a loop to wait until we get the lock.
487          */
488         ts.tv_sec = 0;
489         ts.tv_nsec = 1000000;  /* 1 sec */
490         while (!_LCK_GRANTED(lu)) {
491                 /*
492                  * Yield the kse and wait to be notified when the lock
493                  * is granted.
494                  */
495                 saved_flags = curkse->k_kcb->kcb_kmbx.km_flags;
496                 curkse->k_kcb->kcb_kmbx.km_flags |= KMF_NOUPCALL |
497                     KMF_NOCOMPLETED;
498                 kse_release(&ts);
499                 curkse->k_kcb->kcb_kmbx.km_flags = saved_flags;
500         }
501 }
502
503 void
504 _kse_lock_wakeup(struct lock *lock, struct lockuser *lu)
505 {
506         struct kse *curkse;
507         struct kse *kse;
508         struct kse_mailbox *mbx;
509
510         curkse = _get_curkse();
511         kse = (struct kse *)_LCK_GET_PRIVATE(lu);
512
513         if (kse == curkse)
514                 PANIC("KSE trying to wake itself up in lock");
515         else {
516                 mbx = &kse->k_kcb->kcb_kmbx;
517                 _lock_grant(lock, lu);
518                 /*
519                  * Notify the owning kse that it has the lock.
520                  * It is safe to pass invalid address to kse_wakeup
521                  * even if the mailbox is not in kernel at all,
522                  * and waking up a wrong kse is also harmless.
523                  */
524                 kse_wakeup(mbx);
525         }
526 }
527
528 /*
529  * Thread wait and wakeup handlers for thread locks.  These are only used
530  * by threads, never by KSEs.  Thread locks include the per-thread lock
531  * (defined in its structure), and condition variable and mutex locks.
532  */
533 void
534 _thr_lock_wait(struct lock *lock, struct lockuser *lu)
535 {
536         struct pthread *curthread = (struct pthread *)lu->lu_private;
537
538         do {
539                 THR_LOCK_SWITCH(curthread);
540                 THR_SET_STATE(curthread, PS_LOCKWAIT);
541                 _thr_sched_switch_unlocked(curthread);
542         } while (!_LCK_GRANTED(lu));
543 }
544
545 void
546 _thr_lock_wakeup(struct lock *lock, struct lockuser *lu)
547 {
548         struct pthread *thread;
549         struct pthread *curthread;
550         struct kse_mailbox *kmbx;
551
552         curthread = _get_curthread();
553         thread = (struct pthread *)_LCK_GET_PRIVATE(lu);
554
555         THR_SCHED_LOCK(curthread, thread);
556         _lock_grant(lock, lu);
557         kmbx = _thr_setrunnable_unlocked(thread);
558         THR_SCHED_UNLOCK(curthread, thread);
559         if (kmbx != NULL)
560                 kse_wakeup(kmbx);
561 }
562
563 kse_critical_t
564 _kse_critical_enter(void)
565 {
566         kse_critical_t crit;
567
568         crit = (kse_critical_t)_kcb_critical_enter();
569         return (crit);
570 }
571
572 void
573 _kse_critical_leave(kse_critical_t crit)
574 {
575         struct pthread *curthread;
576
577         _kcb_critical_leave((struct kse_thr_mailbox *)crit);
578         if ((crit != NULL) && ((curthread = _get_curthread()) != NULL))
579                 THR_YIELD_CHECK(curthread);
580 }
581
582 int
583 _kse_in_critical(void)
584 {
585         return (_kcb_in_critical());
586 }
587
588 void
589 _thr_critical_enter(struct pthread *thread)
590 {
591         thread->critical_count++;
592 }
593
594 void
595 _thr_critical_leave(struct pthread *thread)
596 {
597         thread->critical_count--;
598         THR_YIELD_CHECK(thread);
599 }
600
601 void
602 _thr_sched_switch(struct pthread *curthread)
603 {
604         struct kse *curkse;
605
606         (void)_kse_critical_enter();
607         curkse = _get_curkse();
608         KSE_SCHED_LOCK(curkse, curkse->k_kseg);
609         _thr_sched_switch_unlocked(curthread);
610 }
611
612 /*
613  * XXX - We may need to take the scheduling lock before calling
614  *       this, or perhaps take the lock within here before
615  *       doing anything else.
616  */
617 void
618 _thr_sched_switch_unlocked(struct pthread *curthread)
619 {
620         struct kse *curkse;
621         volatile int resume_once = 0;
622         ucontext_t *uc;
623
624         /* We're in the scheduler, 5 by 5: */
625         curkse = curthread->kse;
626
627         curthread->need_switchout = 1;  /* The thread yielded on its own. */
628         curthread->critical_yield = 0;  /* No need to yield anymore. */
629
630         /* Thread can unlock the scheduler lock. */
631         curthread->lock_switch = 1;
632
633         if (curthread->attr.flags & PTHREAD_SCOPE_SYSTEM)
634                 kse_sched_single(&curkse->k_kcb->kcb_kmbx);
635         else {
636                 if (__predict_false(_libkse_debug != 0)) {
637                         /*
638                          * Because debugger saves single step status in thread
639                          * mailbox's tm_dflags, we can safely clear single 
640                          * step status here. the single step status will be
641                          * restored by kse_switchin when the thread is
642                          * switched in again. This also lets uts run in full
643                          * speed.
644                          */
645                          ptrace(PT_CLEARSTEP, curkse->k_kcb->kcb_kmbx.km_lwp,
646                                 (caddr_t) 1, 0);
647                 }
648
649                 KSE_SET_SWITCH(curkse);
650                 _thread_enter_uts(curthread->tcb, curkse->k_kcb);
651         }
652         
653         /*
654          * Unlock the scheduling queue and leave the
655          * critical region.
656          */
657         /* Don't trust this after a switch! */
658         curkse = curthread->kse;
659
660         curthread->lock_switch = 0;
661         KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
662         _kse_critical_leave(&curthread->tcb->tcb_tmbx);
663
664         /*
665          * This thread is being resumed; check for cancellations.
666          */
667         if (THR_NEED_ASYNC_CANCEL(curthread) && !THR_IN_CRITICAL(curthread)) {
668                 uc = alloca(sizeof(ucontext_t));
669                 resume_once = 0;
670                 THR_GETCONTEXT(uc);
671                 if (resume_once == 0) {
672                         resume_once = 1;
673                         curthread->check_pending = 0;
674                         thr_resume_check(curthread, uc);
675                 }
676         }
677         THR_ACTIVATE_LAST_LOCK(curthread);
678 }
679
680 /*
681  * This is the scheduler for a KSE which runs a scope system thread.
682  * The multi-thread KSE scheduler should also work for a single threaded
683  * KSE, but we use a separate scheduler so that it can be fine-tuned
684  * to be more efficient (and perhaps not need a separate stack for
685  * the KSE, allowing it to use the thread's stack).
686  */
687
688 static void
689 kse_sched_single(struct kse_mailbox *kmbx)
690 {
691         struct kse *curkse;
692         struct pthread *curthread;
693         struct timespec ts;
694         sigset_t sigmask;
695         int i, sigseqno, level, first = 0;
696
697         curkse = (struct kse *)kmbx->km_udata;
698         curthread = curkse->k_curthread;
699
700         if (__predict_false((curkse->k_flags & KF_INITIALIZED) == 0)) {
701                 /* Setup this KSEs specific data. */
702                 _kcb_set(curkse->k_kcb);
703                 _tcb_set(curkse->k_kcb, curthread->tcb);
704                 curkse->k_flags |= KF_INITIALIZED;
705                 first = 1;
706                 curthread->active = 1;
707
708                 /* Setup kernel signal masks for new thread. */
709                 __sys_sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
710                 /*
711                  * Enter critical region, this is meanless for bound thread,
712                  * It is used to let other code work, those code want mailbox
713                  * to be cleared.
714                  */
715                 (void)_kse_critical_enter();
716         } else {
717                 /*
718                  * Bound thread always has tcb set, this prevent some
719                  * code from blindly setting bound thread tcb to NULL,
720                  * buggy code ?
721                  */
722                 _tcb_set(curkse->k_kcb, curthread->tcb);
723         }
724
725         curthread->critical_yield = 0;
726         curthread->need_switchout = 0;
727
728         /*
729          * Lock the scheduling queue.
730          *
731          * There is no scheduling queue for single threaded KSEs,
732          * but we need a lock for protection regardless.
733          */
734         if (curthread->lock_switch == 0)
735                 KSE_SCHED_LOCK(curkse, curkse->k_kseg);
736
737         /*
738          * This has to do the job of kse_switchout_thread(), only
739          * for a single threaded KSE/KSEG.
740          */
741
742         switch (curthread->state) {
743         case PS_MUTEX_WAIT:
744         case PS_COND_WAIT:
745                 if (THR_NEED_CANCEL(curthread)) {
746                         curthread->interrupted = 1;
747                         curthread->continuation = _thr_finish_cancellation;
748                         THR_SET_STATE(curthread, PS_RUNNING);
749                 }
750                 break;
751
752         case PS_LOCKWAIT:
753                 /*
754                  * This state doesn't timeout.
755                  */
756                 curthread->wakeup_time.tv_sec = -1;
757                 curthread->wakeup_time.tv_nsec = -1;
758                 level = curthread->locklevel - 1;
759                 if (_LCK_GRANTED(&curthread->lockusers[level]))
760                         THR_SET_STATE(curthread, PS_RUNNING);
761                 break;
762
763         case PS_DEAD:
764                 /* Unlock the scheduling queue and exit the KSE and thread. */
765                 thr_cleanup(curkse, curthread);
766                 KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
767                 PANIC("bound thread shouldn't get here\n");
768                 break;
769
770         case PS_JOIN:
771                 if (THR_NEED_CANCEL(curthread)) {
772                         curthread->join_status.thread = NULL;
773                         THR_SET_STATE(curthread, PS_RUNNING);
774                 } else {
775                         /*
776                          * This state doesn't timeout.
777                          */
778                         curthread->wakeup_time.tv_sec = -1;
779                         curthread->wakeup_time.tv_nsec = -1;
780                 }
781                 break;
782
783         case PS_SUSPENDED:
784                 if (THR_NEED_CANCEL(curthread)) {
785                         curthread->interrupted = 1;
786                         THR_SET_STATE(curthread, PS_RUNNING);
787                 } else {
788                         /*
789                          * These states don't timeout.
790                          */
791                         curthread->wakeup_time.tv_sec = -1;
792                         curthread->wakeup_time.tv_nsec = -1;
793                 }
794                 break;
795
796         case PS_RUNNING:
797                 if ((curthread->flags & THR_FLAGS_SUSPENDED) != 0 &&
798                     !THR_NEED_CANCEL(curthread)) {
799                         THR_SET_STATE(curthread, PS_SUSPENDED);
800                         /*
801                          * These states don't timeout.
802                          */
803                         curthread->wakeup_time.tv_sec = -1;
804                         curthread->wakeup_time.tv_nsec = -1;
805                 }
806                 break;
807
808         case PS_SIGWAIT:
809                 PANIC("bound thread does not have SIGWAIT state\n");
810
811         case PS_SLEEP_WAIT:
812                 PANIC("bound thread does not have SLEEP_WAIT state\n");
813
814         case PS_SIGSUSPEND:
815                 PANIC("bound thread does not have SIGSUSPEND state\n");
816         
817         case PS_DEADLOCK:
818                 /*
819                  * These states don't timeout and don't need
820                  * to be in the waiting queue.
821                  */
822                 curthread->wakeup_time.tv_sec = -1;
823                 curthread->wakeup_time.tv_nsec = -1;
824                 break;
825
826         default:
827                 PANIC("Unknown state\n");
828                 break;
829         }
830
831         while (curthread->state != PS_RUNNING) {
832                 sigseqno = curkse->k_sigseqno;
833                 if (curthread->check_pending != 0) {
834                         /*
835                          * Install pending signals into the frame, possible
836                          * cause mutex or condvar backout.
837                          */
838                         curthread->check_pending = 0;
839                         SIGFILLSET(sigmask);
840
841                         /*
842                          * Lock out kernel signal code when we are processing
843                          * signals, and get a fresh copy of signal mask.
844                          */
845                         __sys_sigprocmask(SIG_SETMASK, &sigmask,
846                                           &curthread->sigmask);
847                         for (i = 1; i <= _SIG_MAXSIG; i++) {
848                                 if (SIGISMEMBER(curthread->sigmask, i))
849                                         continue;
850                                 if (SIGISMEMBER(curthread->sigpend, i))
851                                         (void)_thr_sig_add(curthread, i, 
852                                             &curthread->siginfo[i-1]);
853                         }
854                         __sys_sigprocmask(SIG_SETMASK, &curthread->sigmask,
855                                 NULL);
856                         /* The above code might make thread runnable */
857                         if (curthread->state == PS_RUNNING)
858                                 break;
859                 }
860                 THR_DEACTIVATE_LAST_LOCK(curthread);
861                 kse_wait(curkse, curthread, sigseqno);
862                 THR_ACTIVATE_LAST_LOCK(curthread);
863                 if (curthread->wakeup_time.tv_sec >= 0) {
864                         KSE_GET_TOD(curkse, &ts);
865                         if (thr_timedout(curthread, &ts)) {
866                                 /* Indicate the thread timedout: */
867                                 curthread->timeout = 1;
868                                 /* Make the thread runnable. */
869                                 THR_SET_STATE(curthread, PS_RUNNING);
870                         }
871                 }
872         }
873
874         if (curthread->lock_switch == 0) {
875                 /* Unlock the scheduling queue. */
876                 KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
877         }
878
879         DBG_MSG("Continuing bound thread %p\n", curthread);
880         if (first) {
881                 _kse_critical_leave(&curthread->tcb->tcb_tmbx);
882                 pthread_exit(curthread->start_routine(curthread->arg));
883         }
884 }
885
886 #ifdef DEBUG_THREAD_KERN
887 static void
888 dump_queues(struct kse *curkse)
889 {
890         struct pthread *thread;
891
892         DBG_MSG("Threads in waiting queue:\n");
893         TAILQ_FOREACH(thread, &curkse->k_kseg->kg_schedq.sq_waitq, pqe) {
894                 DBG_MSG("  thread %p, state %d, blocked %d\n",
895                     thread, thread->state, thread->blocked);
896         }
897 }
898 #endif
899
900 /*
901  * This is the scheduler for a KSE which runs multiple threads.
902  */
903 static void
904 kse_sched_multi(struct kse_mailbox *kmbx)
905 {
906         struct kse *curkse;
907         struct pthread *curthread, *td_wait;
908         int ret;
909
910         curkse = (struct kse *)kmbx->km_udata;
911         THR_ASSERT(curkse->k_kcb->kcb_kmbx.km_curthread == NULL,
912             "Mailbox not null in kse_sched_multi");
913
914         /* Check for first time initialization: */
915         if (__predict_false((curkse->k_flags & KF_INITIALIZED) == 0)) {
916                 /* Setup this KSEs specific data. */
917                 _kcb_set(curkse->k_kcb);
918
919                 /* Set this before grabbing the context. */
920                 curkse->k_flags |= KF_INITIALIZED;
921         }
922
923         /*
924          * No current thread anymore, calling _get_curthread in UTS
925          * should dump core
926          */
927         _tcb_set(curkse->k_kcb, NULL);
928
929         /* If this is an upcall; take the scheduler lock. */
930         if (!KSE_IS_SWITCH(curkse))
931                 KSE_SCHED_LOCK(curkse, curkse->k_kseg);
932         else
933                 KSE_CLEAR_SWITCH(curkse);
934
935         if (KSE_IS_IDLE(curkse)) {
936                 KSE_CLEAR_IDLE(curkse);
937                 curkse->k_kseg->kg_idle_kses--;
938         }
939
940         /*
941          * Now that the scheduler lock is held, get the current
942          * thread.  The KSE's current thread cannot be safely
943          * examined without the lock because it could have returned
944          * as completed on another KSE.  See kse_check_completed().
945          */
946         curthread = curkse->k_curthread;
947
948         /*
949          * If the current thread was completed in another KSE, then
950          * it will be in the run queue.  Don't mark it as being blocked.
951          */
952         if ((curthread != NULL) &&
953             ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0) &&
954             (curthread->need_switchout == 0)) {
955                 /*
956                  * Assume the current thread is blocked; when the
957                  * completed threads are checked and if the current
958                  * thread is among the completed, the blocked flag
959                  * will be cleared.
960                  */
961                 curthread->blocked = 1;
962                 DBG_MSG("Running thread %p is now blocked in kernel.\n",
963                     curthread);
964         }
965
966         /* Check for any unblocked threads in the kernel. */
967         kse_check_completed(curkse);
968
969         /*
970          * Check for threads that have timed-out.
971          */
972         kse_check_waitq(curkse);
973
974         /*
975          * Switchout the current thread, if necessary, as the last step
976          * so that it is inserted into the run queue (if it's runnable)
977          * _after_ any other threads that were added to it above.
978          */
979         if (curthread == NULL)
980                 ;  /* Nothing to do here. */
981         else if ((curthread->need_switchout == 0) && DBG_CAN_RUN(curthread) &&
982             (curthread->blocked == 0) && (THR_IN_CRITICAL(curthread))) {
983                 /*
984                  * Resume the thread and tell it to yield when
985                  * it leaves the critical region.
986                  */
987                 curthread->critical_yield = 1;
988                 curthread->active = 1;
989                 if ((curthread->flags & THR_FLAGS_IN_RUNQ) != 0)
990                         KSE_RUNQ_REMOVE(curkse, curthread);
991                 curkse->k_curthread = curthread;
992                 curthread->kse = curkse;
993                 DBG_MSG("Continuing thread %p in critical region\n",
994                     curthread);
995                 kse_wakeup_multi(curkse);
996                 KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
997                 ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
998                 if (ret != 0)
999                         PANIC("Can't resume thread in critical region\n");
1000         }
1001         else if ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0) {
1002                 curthread->tcb->tcb_tmbx.tm_lwp = 0;
1003                 kse_switchout_thread(curkse, curthread);
1004         }
1005         curkse->k_curthread = NULL;
1006
1007 #ifdef DEBUG_THREAD_KERN
1008         dump_queues(curkse);
1009 #endif
1010
1011         /* Check if there are no threads ready to run: */
1012         while (((curthread = KSE_RUNQ_FIRST(curkse)) == NULL) &&
1013             (curkse->k_kseg->kg_threadcount != 0) &&
1014             ((curkse->k_flags & KF_TERMINATED) == 0)) {
1015                 /*
1016                  * Wait for a thread to become active or until there are
1017                  * no more threads.
1018                  */
1019                 td_wait = KSE_WAITQ_FIRST(curkse);
1020                 kse_wait(curkse, td_wait, 0);
1021                 kse_check_completed(curkse);
1022                 kse_check_waitq(curkse);
1023         }
1024
1025         /* Check for no more threads: */
1026         if ((curkse->k_kseg->kg_threadcount == 0) ||
1027             ((curkse->k_flags & KF_TERMINATED) != 0)) {
1028                 /*
1029                  * Normally this shouldn't return, but it will if there
1030                  * are other KSEs running that create new threads that
1031                  * are assigned to this KSE[G].  For instance, if a scope
1032                  * system thread were to create a scope process thread
1033                  * and this kse[g] is the initial kse[g], then that newly
1034                  * created thread would be assigned to us (the initial
1035                  * kse[g]).
1036                  */
1037                 kse_wakeup_multi(curkse);
1038                 KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1039                 kse_fini(curkse);
1040                 /* never returns */
1041         }
1042
1043         THR_ASSERT(curthread != NULL,
1044             "Return from kse_wait/fini without thread.");
1045         THR_ASSERT(curthread->state != PS_DEAD,
1046             "Trying to resume dead thread!");
1047         KSE_RUNQ_REMOVE(curkse, curthread);
1048
1049         /*
1050          * Make the selected thread the current thread.
1051          */
1052         curkse->k_curthread = curthread;
1053
1054         /*
1055          * Make sure the current thread's kse points to this kse.
1056          */
1057         curthread->kse = curkse;
1058
1059         /*
1060          * Reset the time slice if this thread is running for the first
1061          * time or running again after using its full time slice allocation.
1062          */
1063         if (curthread->slice_usec == -1)
1064                 curthread->slice_usec = 0;
1065
1066         /* Mark the thread active. */
1067         curthread->active = 1;
1068
1069         /*
1070          * The thread's current signal frame will only be NULL if it
1071          * is being resumed after being blocked in the kernel.  In
1072          * this case, and if the thread needs to run down pending
1073          * signals or needs a cancellation check, we need to add a
1074          * signal frame to the thread's context.
1075          */
1076         if (curthread->lock_switch == 0 && curthread->state == PS_RUNNING &&
1077             (curthread->check_pending != 0 ||
1078              THR_NEED_ASYNC_CANCEL(curthread)) &&
1079             !THR_IN_CRITICAL(curthread)) {
1080                 curthread->check_pending = 0;
1081                 signalcontext(&curthread->tcb->tcb_tmbx.tm_context, 0,
1082                     (__sighandler_t *)thr_resume_wrapper);
1083         }
1084         kse_wakeup_multi(curkse);
1085         /*
1086          * Continue the thread at its current frame:
1087          */
1088         if (curthread->lock_switch != 0) {
1089                 /*
1090                  * This thread came from a scheduler switch; it will
1091                  * unlock the scheduler lock and set the mailbox.
1092                  */
1093                 ret = _thread_switch(curkse->k_kcb, curthread->tcb, 0);
1094         } else {
1095                 /* This thread won't unlock the scheduler lock. */
1096                 KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1097                 ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
1098         }
1099         if (ret != 0)
1100                 PANIC("Thread has returned from _thread_switch");
1101
1102         /* This point should not be reached. */
1103         PANIC("Thread has returned from _thread_switch");
1104 }
1105
1106 static void
1107 thr_resume_wrapper(int sig, siginfo_t *siginfo, ucontext_t *ucp)
1108 {
1109         struct pthread *curthread = _get_curthread();
1110         struct kse *curkse;
1111         int ret, err_save = errno;
1112
1113         DBG_MSG(">>> sig wrapper\n");
1114         if (curthread->lock_switch)
1115                 PANIC("thr_resume_wrapper, lock_switch != 0\n");
1116         thr_resume_check(curthread, ucp);
1117         errno = err_save;
1118         _kse_critical_enter();
1119         curkse = curthread->kse;
1120         curthread->tcb->tcb_tmbx.tm_context = *ucp;
1121         ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
1122         if (ret != 0)
1123                 PANIC("thr_resume_wrapper: thread has returned "
1124                       "from _thread_switch");
1125         /* THR_SETCONTEXT(ucp); */ /* not work, why ? */
1126 }
1127
1128 static void
1129 thr_resume_check(struct pthread *curthread, ucontext_t *ucp)
1130 {
1131         _thr_sig_rundown(curthread, ucp);
1132
1133         if (THR_NEED_ASYNC_CANCEL(curthread))
1134                 pthread_testcancel();
1135 }
1136
1137 /*
1138  * Clean up a thread.  This must be called with the thread's KSE
1139  * scheduling lock held.  The thread must be a thread from the
1140  * KSE's group.
1141  */
1142 static void
1143 thr_cleanup(struct kse *curkse, struct pthread *thread)
1144 {
1145         struct pthread *joiner;
1146         struct kse_mailbox *kmbx = NULL;
1147         int sys_scope;
1148
1149         thread->active = 0;
1150         thread->need_switchout = 0;
1151         thread->lock_switch = 0;
1152         thread->check_pending = 0;
1153
1154         if ((joiner = thread->joiner) != NULL) {
1155                 /* Joinee scheduler lock held; joiner won't leave. */
1156                 if (joiner->kseg == curkse->k_kseg) {
1157                         if (joiner->join_status.thread == thread) {
1158                                 joiner->join_status.thread = NULL;
1159                                 joiner->join_status.ret = thread->ret;
1160                                 (void)_thr_setrunnable_unlocked(joiner);
1161                         }
1162                 } else {
1163                         KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1164                         /* The joiner may have removed itself and exited. */
1165                         if (_thr_ref_add(thread, joiner, 0) == 0) {
1166                                 KSE_SCHED_LOCK(curkse, joiner->kseg);
1167                                 if (joiner->join_status.thread == thread) {
1168                                         joiner->join_status.thread = NULL;
1169                                         joiner->join_status.ret = thread->ret;
1170                                         kmbx = _thr_setrunnable_unlocked(joiner);
1171                                 }
1172                                 KSE_SCHED_UNLOCK(curkse, joiner->kseg);
1173                                 _thr_ref_delete(thread, joiner);
1174                                 if (kmbx != NULL)
1175                                         kse_wakeup(kmbx);
1176                         }
1177                         KSE_SCHED_LOCK(curkse, curkse->k_kseg);
1178                 }
1179                 thread->attr.flags |= PTHREAD_DETACHED;
1180         }
1181
1182         if (!(sys_scope = (thread->attr.flags & PTHREAD_SCOPE_SYSTEM))) {
1183                 /*
1184                  * Remove the thread from the KSEG's list of threads.
1185                  */
1186                 KSEG_THRQ_REMOVE(thread->kseg, thread);
1187                 /*
1188                  * Migrate the thread to the main KSE so that this
1189                  * KSE and KSEG can be cleaned when their last thread
1190                  * exits.
1191                  */
1192                 thread->kseg = _kse_initial->k_kseg;
1193                 thread->kse = _kse_initial;
1194         }
1195
1196         /*
1197          * We can't hold the thread list lock while holding the
1198          * scheduler lock.
1199          */
1200         KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1201         DBG_MSG("Adding thread %p to GC list\n", thread);
1202         KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
1203         thread->tlflags |= TLFLAGS_GC_SAFE;
1204         THR_GCLIST_ADD(thread);
1205         KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
1206         if (sys_scope) {
1207                 /*
1208                  * System scope thread is single thread group, 
1209                  * when thread is exited, its kse and ksegrp should
1210                  * be recycled as well.
1211                  * kse upcall stack belongs to thread, clear it here.
1212                  */
1213                 curkse->k_stack.ss_sp = 0;
1214                 curkse->k_stack.ss_size = 0;
1215                 kse_exit();
1216                 PANIC("kse_exit() failed for system scope thread");
1217         }
1218         KSE_SCHED_LOCK(curkse, curkse->k_kseg);
1219 }
1220
1221 void
1222 _thr_gc(struct pthread *curthread)
1223 {
1224         thread_gc(curthread);
1225         kse_gc(curthread);
1226         kseg_gc(curthread);
1227 }
1228
1229 static void
1230 thread_gc(struct pthread *curthread)
1231 {
1232         struct pthread *td, *td_next;
1233         kse_critical_t crit;
1234         TAILQ_HEAD(, pthread) worklist;
1235
1236         TAILQ_INIT(&worklist);
1237         crit = _kse_critical_enter();
1238         KSE_LOCK_ACQUIRE(curthread->kse, &_thread_list_lock);
1239
1240         /* Check the threads waiting for GC. */
1241         for (td = TAILQ_FIRST(&_thread_gc_list); td != NULL; td = td_next) {
1242                 td_next = TAILQ_NEXT(td, gcle);
1243                 if ((td->tlflags & TLFLAGS_GC_SAFE) == 0)
1244                         continue;
1245                 else if (((td->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) &&
1246                     ((td->kse->k_kcb->kcb_kmbx.km_flags & KMF_DONE) == 0)) {
1247                         /*
1248                          * The thread and KSE are operating on the same
1249                          * stack.  Wait for the KSE to exit before freeing
1250                          * the thread's stack as well as everything else.
1251                          */
1252                         continue;
1253                 }
1254                 /*
1255                  * Remove the thread from the GC list.  If the thread
1256                  * isn't yet detached, it will get added back to the
1257                  * GC list at a later time.
1258                  */
1259                 THR_GCLIST_REMOVE(td);
1260                 DBG_MSG("Freeing thread %p stack\n", td);
1261                 /*
1262                  * We can free the thread stack since it's no longer
1263                  * in use.
1264                  */
1265                 _thr_stack_free(&td->attr);
1266                 if (((td->attr.flags & PTHREAD_DETACHED) != 0) &&
1267                     (td->refcount == 0)) {
1268                         /*
1269                          * The thread has detached and is no longer
1270                          * referenced.  It is safe to remove all
1271                          * remnants of the thread.
1272                          */
1273                         THR_LIST_REMOVE(td);
1274                         TAILQ_INSERT_HEAD(&worklist, td, gcle);
1275                 }
1276         }
1277         KSE_LOCK_RELEASE(curthread->kse, &_thread_list_lock);
1278         _kse_critical_leave(crit);
1279
1280         while ((td = TAILQ_FIRST(&worklist)) != NULL) {
1281                 TAILQ_REMOVE(&worklist, td, gcle);
1282                 /*
1283                  * XXX we don't free initial thread and its kse
1284                  * (if thread is a bound thread), because there might
1285                  * have some code referencing initial thread and kse.
1286                  */
1287                 if (td == _thr_initial) {
1288                         DBG_MSG("Initial thread won't be freed\n");
1289                         continue;
1290                 }
1291
1292                 if ((td->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) {
1293                         crit = _kse_critical_enter();
1294                         KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1295                         kse_free_unlocked(td->kse);
1296                         kseg_free_unlocked(td->kseg);
1297                         KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1298                         _kse_critical_leave(crit);
1299                 }
1300                 DBG_MSG("Freeing thread %p\n", td);
1301                 _thr_free(curthread, td);
1302         }
1303 }
1304
1305 static void
1306 kse_gc(struct pthread *curthread)
1307 {
1308         kse_critical_t crit;
1309         TAILQ_HEAD(, kse) worklist;
1310         struct kse *kse;
1311
1312         if (free_kse_count <= MAX_CACHED_KSES)
1313                 return;
1314         TAILQ_INIT(&worklist);
1315         crit = _kse_critical_enter();
1316         KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1317         while (free_kse_count > MAX_CACHED_KSES) {
1318                 kse = TAILQ_FIRST(&free_kseq);
1319                 TAILQ_REMOVE(&free_kseq, kse, k_qe);
1320                 TAILQ_INSERT_HEAD(&worklist, kse, k_qe);
1321                 free_kse_count--;
1322         }
1323         KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1324         _kse_critical_leave(crit);
1325
1326         while ((kse = TAILQ_FIRST(&worklist))) {
1327                 TAILQ_REMOVE(&worklist, kse, k_qe);
1328                 kse_destroy(kse);
1329         }
1330 }
1331
1332 static void
1333 kseg_gc(struct pthread *curthread)
1334 {
1335         kse_critical_t crit;
1336         TAILQ_HEAD(, kse_group) worklist;
1337         struct kse_group *kseg;
1338
1339         if (free_kseg_count <= MAX_CACHED_KSEGS)
1340                 return; 
1341         TAILQ_INIT(&worklist);
1342         crit = _kse_critical_enter();
1343         KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1344         while (free_kseg_count > MAX_CACHED_KSEGS) {
1345                 kseg = TAILQ_FIRST(&free_kse_groupq);
1346                 TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
1347                 free_kseg_count--;
1348                 TAILQ_INSERT_HEAD(&worklist, kseg, kg_qe);
1349         }
1350         KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1351         _kse_critical_leave(crit);
1352
1353         while ((kseg = TAILQ_FIRST(&worklist))) {
1354                 TAILQ_REMOVE(&worklist, kseg, kg_qe);
1355                 kseg_destroy(kseg);
1356         }
1357 }
1358
1359 /*
1360  * Only new threads that are running or suspended may be scheduled.
1361  */
1362 int
1363 _thr_schedule_add(struct pthread *curthread, struct pthread *newthread)
1364 {
1365         kse_critical_t crit;
1366         int ret;
1367
1368         /* Add the new thread. */
1369         thr_link(newthread);
1370
1371         /*
1372          * If this is the first time creating a thread, make sure
1373          * the mailbox is set for the current thread.
1374          */
1375         if ((newthread->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) {
1376                 /* We use the thread's stack as the KSE's stack. */
1377                 newthread->kse->k_kcb->kcb_kmbx.km_stack.ss_sp =
1378                     newthread->attr.stackaddr_attr;
1379                 newthread->kse->k_kcb->kcb_kmbx.km_stack.ss_size =
1380                     newthread->attr.stacksize_attr;
1381
1382                 /*
1383                  * No need to lock the scheduling queue since the
1384                  * KSE/KSEG pair have not yet been started.
1385                  */
1386                 KSEG_THRQ_ADD(newthread->kseg, newthread);
1387                 /* this thread never gives up kse */
1388                 newthread->active = 1;
1389                 newthread->kse->k_curthread = newthread;
1390                 newthread->kse->k_kcb->kcb_kmbx.km_flags = KMF_BOUND;
1391                 newthread->kse->k_kcb->kcb_kmbx.km_func =
1392                     (kse_func_t *)kse_sched_single;
1393                 newthread->kse->k_kcb->kcb_kmbx.km_quantum = 0;
1394                 KSE_SET_MBOX(newthread->kse, newthread);
1395                 /*
1396                  * This thread needs a new KSE and KSEG.
1397                  */
1398                 newthread->kse->k_flags &= ~KF_INITIALIZED;
1399                 newthread->kse->k_flags |= KF_STARTED;
1400                 /* Fire up! */
1401                 ret = kse_create(&newthread->kse->k_kcb->kcb_kmbx, 1);
1402                 if (ret != 0)
1403                         ret = errno;
1404         }
1405         else {
1406                 /*
1407                  * Lock the KSE and add the new thread to its list of
1408                  * assigned threads.  If the new thread is runnable, also
1409                  * add it to the KSE's run queue.
1410                  */
1411                 crit = _kse_critical_enter();
1412                 KSE_SCHED_LOCK(curthread->kse, newthread->kseg);
1413                 KSEG_THRQ_ADD(newthread->kseg, newthread);
1414                 if (newthread->state == PS_RUNNING)
1415                         THR_RUNQ_INSERT_TAIL(newthread);
1416                 if ((newthread->kse->k_flags & KF_STARTED) == 0) {
1417                         /*
1418                          * This KSE hasn't been started yet.  Start it
1419                          * outside of holding the lock.
1420                          */
1421                         newthread->kse->k_flags |= KF_STARTED;
1422                         newthread->kse->k_kcb->kcb_kmbx.km_func =
1423                             (kse_func_t *)kse_sched_multi;
1424                         newthread->kse->k_kcb->kcb_kmbx.km_flags = 0;
1425                         kse_create(&newthread->kse->k_kcb->kcb_kmbx, 0);
1426                  } else if ((newthread->state == PS_RUNNING) &&
1427                      KSE_IS_IDLE(newthread->kse)) {
1428                         /*
1429                          * The thread is being scheduled on another KSEG.
1430                          */
1431                         kse_wakeup_one(newthread);
1432                 }
1433                 KSE_SCHED_UNLOCK(curthread->kse, newthread->kseg);
1434                 _kse_critical_leave(crit);
1435                 ret = 0;
1436         }
1437         if (ret != 0)
1438                 thr_unlink(newthread);
1439
1440         return (ret);
1441 }
1442
1443 void
1444 kse_waitq_insert(struct pthread *thread)
1445 {
1446         struct pthread *td;
1447
1448         if (thread->wakeup_time.tv_sec == -1)
1449                 TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq, thread,
1450                     pqe);
1451         else {
1452                 td = TAILQ_FIRST(&thread->kse->k_schedq->sq_waitq);
1453                 while ((td != NULL) && (td->wakeup_time.tv_sec != -1) &&
1454                     ((td->wakeup_time.tv_sec < thread->wakeup_time.tv_sec) ||
1455                     ((td->wakeup_time.tv_sec == thread->wakeup_time.tv_sec) &&
1456                     (td->wakeup_time.tv_nsec <= thread->wakeup_time.tv_nsec))))
1457                         td = TAILQ_NEXT(td, pqe);
1458                 if (td == NULL)
1459                         TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq,
1460                             thread, pqe);
1461                 else
1462                         TAILQ_INSERT_BEFORE(td, thread, pqe);
1463         }
1464         thread->flags |= THR_FLAGS_IN_WAITQ;
1465 }
1466
1467 /*
1468  * This must be called with the scheduling lock held.
1469  */
1470 static void
1471 kse_check_completed(struct kse *kse)
1472 {
1473         struct pthread *thread;
1474         struct kse_thr_mailbox *completed;
1475         int sig;
1476
1477         if ((completed = kse->k_kcb->kcb_kmbx.km_completed) != NULL) {
1478                 kse->k_kcb->kcb_kmbx.km_completed = NULL;
1479                 while (completed != NULL) {
1480                         thread = completed->tm_udata;
1481                         DBG_MSG("Found completed thread %p, name %s\n",
1482                             thread,
1483                             (thread->name == NULL) ? "none" : thread->name);
1484                         thread->blocked = 0;
1485                         if (thread != kse->k_curthread) {
1486                                 thr_accounting(thread);
1487                                 if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
1488                                         THR_SET_STATE(thread, PS_SUSPENDED);
1489                                 else
1490                                         KSE_RUNQ_INSERT_TAIL(kse, thread);
1491                                 if ((thread->kse != kse) &&
1492                                     (thread->kse->k_curthread == thread)) {
1493                                         /*
1494                                          * Remove this thread from its
1495                                          * previous KSE so that it (the KSE)
1496                                          * doesn't think it is still active.
1497                                          */
1498                                         thread->kse->k_curthread = NULL;
1499                                         thread->active = 0;
1500                                 }
1501                         }
1502                         if ((sig = thread->tcb->tcb_tmbx.tm_syncsig.si_signo)
1503                             != 0) {
1504                                 if (SIGISMEMBER(thread->sigmask, sig))
1505                                         SIGADDSET(thread->sigpend, sig);
1506                                 else if (THR_IN_CRITICAL(thread))
1507                                         kse_thr_interrupt(NULL, KSE_INTR_SIGEXIT, sig);
1508                                 else
1509                                         (void)_thr_sig_add(thread, sig,
1510                                             &thread->tcb->tcb_tmbx.tm_syncsig);
1511                                 thread->tcb->tcb_tmbx.tm_syncsig.si_signo = 0;
1512                         }
1513                         completed = completed->tm_next;
1514                 }
1515         }
1516 }
1517
1518 /*
1519  * This must be called with the scheduling lock held.
1520  */
1521 static void
1522 kse_check_waitq(struct kse *kse)
1523 {
1524         struct pthread  *pthread;
1525         struct timespec ts;
1526
1527         KSE_GET_TOD(kse, &ts);
1528
1529         /*
1530          * Wake up threads that have timedout.  This has to be
1531          * done before adding the current thread to the run queue
1532          * so that a CPU intensive thread doesn't get preference
1533          * over waiting threads.
1534          */
1535         while (((pthread = KSE_WAITQ_FIRST(kse)) != NULL) &&
1536             thr_timedout(pthread, &ts)) {
1537                 /* Remove the thread from the wait queue: */
1538                 KSE_WAITQ_REMOVE(kse, pthread);
1539                 DBG_MSG("Found timedout thread %p in waitq\n", pthread);
1540
1541                 /* Indicate the thread timedout: */
1542                 pthread->timeout = 1;
1543
1544                 /* Add the thread to the priority queue: */
1545                 if ((pthread->flags & THR_FLAGS_SUSPENDED) != 0)
1546                         THR_SET_STATE(pthread, PS_SUSPENDED);
1547                 else {
1548                         THR_SET_STATE(pthread, PS_RUNNING);
1549                         KSE_RUNQ_INSERT_TAIL(kse, pthread);
1550                 }
1551         }
1552 }
1553
1554 static int
1555 thr_timedout(struct pthread *thread, struct timespec *curtime)
1556 {
1557         if (thread->wakeup_time.tv_sec < 0)
1558                 return (0);
1559         else if (thread->wakeup_time.tv_sec > curtime->tv_sec)
1560                 return (0);
1561         else if ((thread->wakeup_time.tv_sec == curtime->tv_sec) &&
1562             (thread->wakeup_time.tv_nsec > curtime->tv_nsec))
1563                 return (0);
1564         else
1565                 return (1);
1566 }
1567
1568 /*
1569  * This must be called with the scheduling lock held.
1570  *
1571  * Each thread has a time slice, a wakeup time (used when it wants
1572  * to wait for a specified amount of time), a run state, and an
1573  * active flag.
1574  *
1575  * When a thread gets run by the scheduler, the active flag is
1576  * set to non-zero (1).  When a thread performs an explicit yield
1577  * or schedules a state change, it enters the scheduler and the
1578  * active flag is cleared.  When the active flag is still seen
1579  * set in the scheduler, that means that the thread is blocked in
1580  * the kernel (because it is cleared before entering the scheduler
1581  * in all other instances).
1582  *
1583  * The wakeup time is only set for those states that can timeout.
1584  * It is set to (-1, -1) for all other instances.
1585  *
1586  * The thread's run state, aside from being useful when debugging,
1587  * is used to place the thread in an appropriate queue.  There
1588  * are 2 basic queues:
1589  *
1590  *   o run queue - queue ordered by priority for all threads
1591  *                 that are runnable
1592  *   o waiting queue - queue sorted by wakeup time for all threads
1593  *                     that are not otherwise runnable (not blocked
1594  *                     in kernel, not waiting for locks)
1595  *
1596  * The thread's time slice is used for round-robin scheduling
1597  * (the default scheduling policy).  While a SCHED_RR thread
1598  * is runnable it's time slice accumulates.  When it reaches
1599  * the time slice interval, it gets reset and added to the end
1600  * of the queue of threads at its priority.  When a thread no
1601  * longer becomes runnable (blocks in kernel, waits, etc), its
1602  * time slice is reset.
1603  *
1604  * The job of kse_switchout_thread() is to handle all of the above.
1605  */
1606 static void
1607 kse_switchout_thread(struct kse *kse, struct pthread *thread)
1608 {
1609         int level;
1610         int i;
1611         int restart;
1612         siginfo_t siginfo;
1613
1614         /*
1615          * Place the currently running thread into the
1616          * appropriate queue(s).
1617          */
1618         DBG_MSG("Switching out thread %p, state %d\n", thread, thread->state);
1619
1620         THR_DEACTIVATE_LAST_LOCK(thread);
1621         if (thread->blocked != 0) {
1622                 thread->active = 0;
1623                 thread->need_switchout = 0;
1624                 /* This thread must have blocked in the kernel. */
1625                 /*
1626                  * Check for pending signals and cancellation for
1627                  * this thread to see if we need to interrupt it
1628                  * in the kernel.
1629                  */
1630                 if (THR_NEED_CANCEL(thread)) {
1631                         kse_thr_interrupt(&thread->tcb->tcb_tmbx,
1632                                           KSE_INTR_INTERRUPT, 0);
1633                 } else if (thread->check_pending != 0) {
1634                         for (i = 1; i <= _SIG_MAXSIG; ++i) {
1635                                 if (SIGISMEMBER(thread->sigpend, i) &&
1636                                     !SIGISMEMBER(thread->sigmask, i)) {
1637                                         restart = _thread_sigact[i - 1].sa_flags & SA_RESTART;
1638                                         kse_thr_interrupt(&thread->tcb->tcb_tmbx,
1639                                             restart ? KSE_INTR_RESTART : KSE_INTR_INTERRUPT, 0);
1640                                         break;
1641                                 }
1642                         }
1643                 }
1644         }
1645         else {
1646                 switch (thread->state) {
1647                 case PS_MUTEX_WAIT:
1648                 case PS_COND_WAIT:
1649                         if (THR_NEED_CANCEL(thread)) {
1650                                 thread->interrupted = 1;
1651                                 thread->continuation = _thr_finish_cancellation;
1652                                 THR_SET_STATE(thread, PS_RUNNING);
1653                         } else {
1654                                 /* Insert into the waiting queue: */
1655                                 KSE_WAITQ_INSERT(kse, thread);
1656                         }
1657                         break;
1658
1659                 case PS_LOCKWAIT:
1660                         /*
1661                          * This state doesn't timeout.
1662                          */
1663                         thread->wakeup_time.tv_sec = -1;
1664                         thread->wakeup_time.tv_nsec = -1;
1665                         level = thread->locklevel - 1;
1666                         if (!_LCK_GRANTED(&thread->lockusers[level]))
1667                                 KSE_WAITQ_INSERT(kse, thread);
1668                         else
1669                                 THR_SET_STATE(thread, PS_RUNNING);
1670                         break;
1671
1672                 case PS_SLEEP_WAIT:
1673                 case PS_SIGWAIT:
1674                         if (THR_NEED_CANCEL(thread)) {
1675                                 thread->interrupted = 1;
1676                                 THR_SET_STATE(thread, PS_RUNNING);
1677                         } else {
1678                                 KSE_WAITQ_INSERT(kse, thread);
1679                         }
1680                         break;
1681
1682                 case PS_JOIN:
1683                         if (THR_NEED_CANCEL(thread)) {
1684                                 thread->join_status.thread = NULL;
1685                                 THR_SET_STATE(thread, PS_RUNNING);
1686                         } else {
1687                                 /*
1688                                  * This state doesn't timeout.
1689                                  */
1690                                 thread->wakeup_time.tv_sec = -1;
1691                                 thread->wakeup_time.tv_nsec = -1;
1692
1693                                 /* Insert into the waiting queue: */
1694                                 KSE_WAITQ_INSERT(kse, thread);
1695                         }
1696                         break;
1697
1698                 case PS_SIGSUSPEND:
1699                 case PS_SUSPENDED:
1700                         if (THR_NEED_CANCEL(thread)) {
1701                                 thread->interrupted = 1;
1702                                 THR_SET_STATE(thread, PS_RUNNING);
1703                         } else {
1704                                 /*
1705                                  * These states don't timeout.
1706                                  */
1707                                 thread->wakeup_time.tv_sec = -1;
1708                                 thread->wakeup_time.tv_nsec = -1;
1709
1710                                 /* Insert into the waiting queue: */
1711                                 KSE_WAITQ_INSERT(kse, thread);
1712                         }
1713                         break;
1714
1715                 case PS_DEAD:
1716                         /*
1717                          * The scheduler is operating on a different
1718                          * stack.  It is safe to do garbage collecting
1719                          * here.
1720                          */
1721                         thr_cleanup(kse, thread);
1722                         return;
1723                         break;
1724
1725                 case PS_RUNNING:
1726                         if ((thread->flags & THR_FLAGS_SUSPENDED) != 0 &&
1727                             !THR_NEED_CANCEL(thread))
1728                                 THR_SET_STATE(thread, PS_SUSPENDED);
1729                         break;
1730
1731                 case PS_DEADLOCK:
1732                         /*
1733                          * These states don't timeout.
1734                          */
1735                         thread->wakeup_time.tv_sec = -1;
1736                         thread->wakeup_time.tv_nsec = -1;
1737
1738                         /* Insert into the waiting queue: */
1739                         KSE_WAITQ_INSERT(kse, thread);
1740                         break;
1741
1742                 default:
1743                         PANIC("Unknown state\n");
1744                         break;
1745                 }
1746
1747                 thr_accounting(thread);
1748                 if (thread->state == PS_RUNNING) {
1749                         if (thread->slice_usec == -1) {
1750                                 /*
1751                                  * The thread exceeded its time quantum or
1752                                  * it yielded the CPU; place it at the tail
1753                                  * of the queue for its priority.
1754                                  */
1755                                 KSE_RUNQ_INSERT_TAIL(kse, thread);
1756                         } else {
1757                                 /*
1758                                  * The thread hasn't exceeded its interval
1759                                  * Place it at the head of the queue for its
1760                                  * priority.
1761                                  */
1762                                 KSE_RUNQ_INSERT_HEAD(kse, thread);
1763                         }
1764                 }
1765         }
1766         thread->active = 0;
1767         thread->need_switchout = 0;
1768         if (thread->check_pending != 0) {
1769                 /* Install pending signals into the frame. */
1770                 thread->check_pending = 0;
1771                 KSE_LOCK_ACQUIRE(kse, &_thread_signal_lock);
1772                 for (i = 1; i <= _SIG_MAXSIG; i++) {
1773                         if (SIGISMEMBER(thread->sigmask, i))
1774                                 continue;
1775                         if (SIGISMEMBER(thread->sigpend, i))
1776                                 (void)_thr_sig_add(thread, i,
1777                                     &thread->siginfo[i-1]);
1778                         else if (SIGISMEMBER(_thr_proc_sigpending, i) &&
1779                                 _thr_getprocsig_unlocked(i, &siginfo)) {
1780                                 (void)_thr_sig_add(thread, i, &siginfo);
1781                         }
1782                 }
1783                 KSE_LOCK_RELEASE(kse, &_thread_signal_lock);
1784         }
1785 }
1786
1787 /*
1788  * This function waits for the smallest timeout value of any waiting
1789  * thread, or until it receives a message from another KSE.
1790  *
1791  * This must be called with the scheduling lock held.
1792  */
1793 static void
1794 kse_wait(struct kse *kse, struct pthread *td_wait, int sigseqno)
1795 {
1796         struct timespec ts, ts_sleep;
1797         int saved_flags;
1798
1799         if ((td_wait == NULL) || (td_wait->wakeup_time.tv_sec < 0)) {
1800                 /* Limit sleep to no more than 1 minute. */
1801                 ts_sleep.tv_sec = 60;
1802                 ts_sleep.tv_nsec = 0;
1803         } else {
1804                 KSE_GET_TOD(kse, &ts);
1805                 TIMESPEC_SUB(&ts_sleep, &td_wait->wakeup_time, &ts);
1806                 if (ts_sleep.tv_sec > 60) {
1807                         ts_sleep.tv_sec = 60;
1808                         ts_sleep.tv_nsec = 0;
1809                 }
1810         }
1811         /* Don't sleep for negative times. */
1812         if ((ts_sleep.tv_sec >= 0) && (ts_sleep.tv_nsec >= 0)) {
1813                 KSE_SET_IDLE(kse);
1814                 kse->k_kseg->kg_idle_kses++;
1815                 KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1816                 if ((kse->k_kseg->kg_flags & KGF_SINGLE_THREAD) &&
1817                     (kse->k_sigseqno != sigseqno))
1818                         ; /* don't sleep */
1819                 else {
1820                         saved_flags = kse->k_kcb->kcb_kmbx.km_flags;
1821                         kse->k_kcb->kcb_kmbx.km_flags |= KMF_NOUPCALL;
1822                         kse_release(&ts_sleep);
1823                         kse->k_kcb->kcb_kmbx.km_flags = saved_flags;
1824                 }
1825                 KSE_SCHED_LOCK(kse, kse->k_kseg);
1826                 if (KSE_IS_IDLE(kse)) {
1827                         KSE_CLEAR_IDLE(kse);
1828                         kse->k_kseg->kg_idle_kses--;
1829                 }
1830         }
1831 }
1832
1833 /*
1834  * Avoid calling this kse_exit() so as not to confuse it with the
1835  * system call of the same name.
1836  */
1837 static void
1838 kse_fini(struct kse *kse)
1839 {
1840         /* struct kse_group *free_kseg = NULL; */
1841         struct timespec ts;
1842         struct pthread *td;
1843
1844         /*
1845          * Check to see if this is one of the main kses.
1846          */
1847         if (kse->k_kseg != _kse_initial->k_kseg) {
1848                 PANIC("shouldn't get here");
1849                 /* This is for supporting thread groups. */
1850 #ifdef NOT_YET
1851                 /* Remove this KSE from the KSEG's list of KSEs. */
1852                 KSE_SCHED_LOCK(kse, kse->k_kseg);
1853                 TAILQ_REMOVE(&kse->k_kseg->kg_kseq, kse, k_kgqe);
1854                 kse->k_kseg->kg_ksecount--;
1855                 if (TAILQ_EMPTY(&kse->k_kseg->kg_kseq))
1856                         free_kseg = kse->k_kseg;
1857                 KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1858
1859                 /*
1860                  * Add this KSE to the list of free KSEs along with
1861                  * the KSEG if is now orphaned.
1862                  */
1863                 KSE_LOCK_ACQUIRE(kse, &kse_lock);
1864                 if (free_kseg != NULL)
1865                         kseg_free_unlocked(free_kseg);
1866                 kse_free_unlocked(kse);
1867                 KSE_LOCK_RELEASE(kse, &kse_lock);
1868                 kse_exit();
1869                 /* Never returns. */
1870                 PANIC("kse_exit()");
1871 #endif
1872         } else {
1873                 /*
1874                  * We allow program to kill kse in initial group (by
1875                  * lowering the concurrency).
1876                  */
1877                 if ((kse != _kse_initial) &&
1878                     ((kse->k_flags & KF_TERMINATED) != 0)) {
1879                         KSE_SCHED_LOCK(kse, kse->k_kseg);
1880                         TAILQ_REMOVE(&kse->k_kseg->kg_kseq, kse, k_kgqe);
1881                         kse->k_kseg->kg_ksecount--;
1882                         /*
1883                          * Migrate thread to  _kse_initial if its lastest
1884                          * kse it ran on is the kse.
1885                          */
1886                         td = TAILQ_FIRST(&kse->k_kseg->kg_threadq);
1887                         while (td != NULL) {
1888                                 if (td->kse == kse)
1889                                         td->kse = _kse_initial;
1890                                 td = TAILQ_NEXT(td, kle);
1891                         }
1892                         KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1893                         KSE_LOCK_ACQUIRE(kse, &kse_lock);
1894                         kse_free_unlocked(kse);
1895                         KSE_LOCK_RELEASE(kse, &kse_lock);
1896                         /* Make sure there is always at least one is awake */
1897                         KSE_WAKEUP(_kse_initial);
1898                         kse_exit();
1899                         /* Never returns. */
1900                         PANIC("kse_exit() failed for initial kseg");
1901                 }
1902                 KSE_SCHED_LOCK(kse, kse->k_kseg);
1903                 KSE_SET_IDLE(kse);
1904                 kse->k_kseg->kg_idle_kses++;
1905                 KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1906                 ts.tv_sec = 120;
1907                 ts.tv_nsec = 0;
1908                 kse->k_kcb->kcb_kmbx.km_flags = 0;
1909                 kse_release(&ts);
1910                 /* Never reach */
1911         }
1912 }
1913
1914 void
1915 _thr_set_timeout(const struct timespec *timeout)
1916 {
1917         struct pthread  *curthread = _get_curthread();
1918         struct timespec ts;
1919
1920         /* Reset the timeout flag for the running thread: */
1921         curthread->timeout = 0;
1922
1923         /* Check if the thread is to wait forever: */
1924         if (timeout == NULL) {
1925                 /*
1926                  * Set the wakeup time to something that can be recognised as
1927                  * different to an actual time of day:
1928                  */
1929                 curthread->wakeup_time.tv_sec = -1;
1930                 curthread->wakeup_time.tv_nsec = -1;
1931         }
1932         /* Check if no waiting is required: */
1933         else if ((timeout->tv_sec == 0) && (timeout->tv_nsec == 0)) {
1934                 /* Set the wake up time to 'immediately': */
1935                 curthread->wakeup_time.tv_sec = 0;
1936                 curthread->wakeup_time.tv_nsec = 0;
1937         } else {
1938                 /* Calculate the time for the current thread to wakeup: */
1939                 KSE_GET_TOD(curthread->kse, &ts);
1940                 TIMESPEC_ADD(&curthread->wakeup_time, &ts, timeout);
1941         }
1942 }
1943
1944 void
1945 _thr_panic_exit(char *file, int line, char *msg)
1946 {
1947         char buf[256];
1948
1949         snprintf(buf, sizeof(buf), "(%s:%d) %s\n", file, line, msg);
1950         __sys_write(2, buf, strlen(buf));
1951         abort();
1952 }
1953
1954 void
1955 _thr_setrunnable(struct pthread *curthread, struct pthread *thread)
1956 {
1957         kse_critical_t crit;
1958         struct kse_mailbox *kmbx;
1959
1960         crit = _kse_critical_enter();
1961         KSE_SCHED_LOCK(curthread->kse, thread->kseg);
1962         kmbx = _thr_setrunnable_unlocked(thread);
1963         KSE_SCHED_UNLOCK(curthread->kse, thread->kseg);
1964         _kse_critical_leave(crit);
1965         if ((kmbx != NULL) && (__isthreaded != 0))
1966                 kse_wakeup(kmbx);
1967 }
1968
1969 struct kse_mailbox *
1970 _thr_setrunnable_unlocked(struct pthread *thread)
1971 {
1972         struct kse_mailbox *kmbx = NULL;
1973
1974         if ((thread->kseg->kg_flags & KGF_SINGLE_THREAD) != 0) {
1975                 /* No silly queues for these threads. */
1976                 if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
1977                         THR_SET_STATE(thread, PS_SUSPENDED);
1978                 else {
1979                         THR_SET_STATE(thread, PS_RUNNING);
1980                         kmbx = kse_wakeup_one(thread);
1981                 }
1982
1983         } else if (thread->state != PS_RUNNING) {
1984                 if ((thread->flags & THR_FLAGS_IN_WAITQ) != 0)
1985                         KSE_WAITQ_REMOVE(thread->kse, thread);
1986                 if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
1987                         THR_SET_STATE(thread, PS_SUSPENDED);
1988                 else {
1989                         THR_SET_STATE(thread, PS_RUNNING);
1990                         if ((thread->blocked == 0) && (thread->active == 0) &&
1991                             (thread->flags & THR_FLAGS_IN_RUNQ) == 0)
1992                                 THR_RUNQ_INSERT_TAIL(thread);
1993                         /*
1994                          * XXX - Threads are not yet assigned to specific
1995                          *       KSEs; they are assigned to the KSEG.  So
1996                          *       the fact that a thread's KSE is waiting
1997                          *       doesn't necessarily mean that it will be
1998                          *       the KSE that runs the thread after the
1999                          *       lock is granted.  But we don't know if the
2000                          *       other KSEs within the same KSEG are also
2001                          *       in a waiting state or not so we err on the
2002                          *       side of caution and wakeup the thread's
2003                          *       last known KSE.  We ensure that the
2004                          *       threads KSE doesn't change while it's
2005                          *       scheduling lock is held so it is safe to
2006                          *       reference it (the KSE).  If the KSE wakes
2007                          *       up and doesn't find any more work it will
2008                          *       again go back to waiting so no harm is
2009                          *       done.
2010                          */
2011                         kmbx = kse_wakeup_one(thread);
2012                 }
2013         }
2014         return (kmbx);
2015 }
2016
2017 static struct kse_mailbox *
2018 kse_wakeup_one(struct pthread *thread)
2019 {
2020         struct kse *ke;
2021
2022         if (KSE_IS_IDLE(thread->kse)) {
2023                 KSE_CLEAR_IDLE(thread->kse);
2024                 thread->kseg->kg_idle_kses--;
2025                 return (&thread->kse->k_kcb->kcb_kmbx);
2026         } else {
2027                 TAILQ_FOREACH(ke, &thread->kseg->kg_kseq, k_kgqe) {
2028                         if (KSE_IS_IDLE(ke)) {
2029                                 KSE_CLEAR_IDLE(ke);
2030                                 ke->k_kseg->kg_idle_kses--;
2031                                 return (&ke->k_kcb->kcb_kmbx);
2032                         }
2033                 }
2034         }
2035         return (NULL);
2036 }
2037
2038 static void
2039 kse_wakeup_multi(struct kse *curkse)
2040 {
2041         struct kse *ke;
2042         int tmp;
2043
2044         if ((tmp = KSE_RUNQ_THREADS(curkse)) && curkse->k_kseg->kg_idle_kses) {
2045                 TAILQ_FOREACH(ke, &curkse->k_kseg->kg_kseq, k_kgqe) {
2046                         if (KSE_IS_IDLE(ke)) {
2047                                 KSE_CLEAR_IDLE(ke);
2048                                 ke->k_kseg->kg_idle_kses--;
2049                                 KSE_WAKEUP(ke);
2050                                 if (--tmp == 0)
2051                                         break;
2052                         }
2053                 }
2054         }
2055 }
2056
2057 /*
2058  * Allocate a new KSEG.
2059  *
2060  * We allow the current thread to be NULL in the case that this
2061  * is the first time a KSEG is being created (library initialization).
2062  * In this case, we don't need to (and can't) take any locks.
2063  */
2064 struct kse_group *
2065 _kseg_alloc(struct pthread *curthread)
2066 {
2067         struct kse_group *kseg = NULL;
2068         kse_critical_t crit;
2069
2070         if ((curthread != NULL) && (free_kseg_count > 0)) {
2071                 /* Use the kse lock for the kseg queue. */
2072                 crit = _kse_critical_enter();
2073                 KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2074                 if ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) {
2075                         TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
2076                         free_kseg_count--;
2077                         active_kseg_count++;
2078                         TAILQ_INSERT_TAIL(&active_kse_groupq, kseg, kg_qe);
2079                 }
2080                 KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2081                 _kse_critical_leave(crit);
2082                 if (kseg)
2083                         kseg_reinit(kseg);
2084         }
2085
2086         /*
2087          * If requested, attempt to allocate a new KSE group only if the
2088          * KSE allocation was successful and a KSE group wasn't found in
2089          * the free list.
2090          */
2091         if ((kseg == NULL) &&
2092             ((kseg = (struct kse_group *)malloc(sizeof(*kseg))) != NULL)) {
2093                 if (_pq_alloc(&kseg->kg_schedq.sq_runq,
2094                     THR_MIN_PRIORITY, THR_LAST_PRIORITY) != 0) {
2095                         free(kseg);
2096                         kseg = NULL;
2097                 } else {
2098                         kseg_init(kseg);
2099                         /* Add the KSEG to the list of active KSEGs. */
2100                         if (curthread != NULL) {
2101                                 crit = _kse_critical_enter();
2102                                 KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2103                                 active_kseg_count++;
2104                                 TAILQ_INSERT_TAIL(&active_kse_groupq,
2105                                     kseg, kg_qe);
2106                                 KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2107                                 _kse_critical_leave(crit);
2108                         } else {
2109                                 active_kseg_count++;
2110                                 TAILQ_INSERT_TAIL(&active_kse_groupq,
2111                                     kseg, kg_qe);
2112                         }
2113                 }
2114         }
2115         return (kseg);
2116 }
2117
2118 static void
2119 kseg_init(struct kse_group *kseg)
2120 {
2121         kseg_reinit(kseg);
2122         _lock_init(&kseg->kg_lock, LCK_ADAPTIVE, _kse_lock_wait,
2123             _kse_lock_wakeup);
2124 }
2125
2126 static void
2127 kseg_reinit(struct kse_group *kseg)
2128 {
2129         TAILQ_INIT(&kseg->kg_kseq);
2130         TAILQ_INIT(&kseg->kg_threadq);
2131         TAILQ_INIT(&kseg->kg_schedq.sq_waitq);
2132         kseg->kg_threadcount = 0;
2133         kseg->kg_ksecount = 0;
2134         kseg->kg_idle_kses = 0;
2135         kseg->kg_flags = 0;
2136 }
2137
2138 /*
2139  * This must be called with the kse lock held and when there are
2140  * no more threads that reference it.
2141  */
2142 static void
2143 kseg_free_unlocked(struct kse_group *kseg)
2144 {
2145         TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe);
2146         TAILQ_INSERT_HEAD(&free_kse_groupq, kseg, kg_qe);
2147         free_kseg_count++;
2148         active_kseg_count--;
2149 }
2150
2151 void
2152 _kseg_free(struct kse_group *kseg)
2153 {
2154         struct kse *curkse;
2155         kse_critical_t crit;
2156
2157         crit = _kse_critical_enter();
2158         curkse = _get_curkse();
2159         KSE_LOCK_ACQUIRE(curkse, &kse_lock);
2160         kseg_free_unlocked(kseg);
2161         KSE_LOCK_RELEASE(curkse, &kse_lock);
2162         _kse_critical_leave(crit);
2163 }
2164
2165 static void
2166 kseg_destroy(struct kse_group *kseg)
2167 {
2168         _lock_destroy(&kseg->kg_lock);
2169         _pq_free(&kseg->kg_schedq.sq_runq);
2170         free(kseg);
2171 }
2172
2173 /*
2174  * Allocate a new KSE.
2175  *
2176  * We allow the current thread to be NULL in the case that this
2177  * is the first time a KSE is being created (library initialization).
2178  * In this case, we don't need to (and can't) take any locks.
2179  */
2180 struct kse *
2181 _kse_alloc(struct pthread *curthread, int sys_scope)
2182 {
2183         struct kse *kse = NULL;
2184         char *stack;
2185         kse_critical_t crit;
2186         int i;
2187
2188         if ((curthread != NULL) && (free_kse_count > 0)) {
2189                 crit = _kse_critical_enter();
2190                 KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2191                 /* Search for a finished KSE. */
2192                 kse = TAILQ_FIRST(&free_kseq);
2193                 while ((kse != NULL) &&
2194                     ((kse->k_kcb->kcb_kmbx.km_flags & KMF_DONE) == 0)) {
2195                         kse = TAILQ_NEXT(kse, k_qe);
2196                 }
2197                 if (kse != NULL) {
2198                         DBG_MSG("found an unused kse.\n");
2199                         TAILQ_REMOVE(&free_kseq, kse, k_qe);
2200                         free_kse_count--;
2201                         TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe);
2202                         active_kse_count++;
2203                 }
2204                 KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2205                 _kse_critical_leave(crit);
2206                 if (kse != NULL)
2207                         kse_reinit(kse, sys_scope);
2208         }
2209         if ((kse == NULL) &&
2210             ((kse = (struct kse *)malloc(sizeof(*kse))) != NULL)) {
2211                 if (sys_scope != 0)
2212                         stack = NULL;
2213                 else if ((stack = malloc(KSE_STACKSIZE)) == NULL) {
2214                         free(kse);
2215                         return (NULL);
2216                 }
2217                 bzero(kse, sizeof(*kse));
2218
2219                 /* Initialize KCB without the lock. */
2220                 if ((kse->k_kcb = _kcb_ctor(kse)) == NULL) {
2221                         if (stack != NULL)
2222                                 free(stack);
2223                         free(kse);
2224                         return (NULL);
2225                 }
2226
2227                 /* Initialize the lockusers. */
2228                 for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) {
2229                         _lockuser_init(&kse->k_lockusers[i], (void *)kse);
2230                         _LCK_SET_PRIVATE2(&kse->k_lockusers[i], NULL);
2231                 }
2232                 /* _lock_init(kse->k_lock, ...) */
2233
2234                 if (curthread != NULL) {
2235                         crit = _kse_critical_enter();
2236                         KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2237                 }
2238                 kse->k_flags = 0;
2239                 TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe);
2240                 active_kse_count++;
2241                 if (curthread != NULL) {
2242                         KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2243                         _kse_critical_leave(crit);
2244                 }
2245                 /*
2246                  * Create the KSE context.
2247                  * Scope system threads (one thread per KSE) are not required
2248                  * to have a stack for an unneeded kse upcall.
2249                  */
2250                 if (!sys_scope) {
2251                         kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_multi;
2252                         kse->k_stack.ss_sp = stack;
2253                         kse->k_stack.ss_size = KSE_STACKSIZE;
2254                 } else {
2255                         kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_single;
2256                         kse->k_stack.ss_sp = NULL;
2257                         kse->k_stack.ss_size = 0;
2258                 }
2259                 kse->k_kcb->kcb_kmbx.km_udata = (void *)kse;
2260                 kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2261                 /*
2262                  * We need to keep a copy of the stack in case it
2263                  * doesn't get used; a KSE running a scope system
2264                  * thread will use that thread's stack.
2265                  */
2266                 kse->k_kcb->kcb_kmbx.km_stack = kse->k_stack;
2267         }
2268         return (kse);
2269 }
2270
2271 static void
2272 kse_reinit(struct kse *kse, int sys_scope)
2273 {
2274         if (!sys_scope) {
2275                 kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_multi;
2276                 if (kse->k_stack.ss_sp == NULL) {
2277                         /* XXX check allocation failure */
2278                         kse->k_stack.ss_sp = (char *) malloc(KSE_STACKSIZE);
2279                         kse->k_stack.ss_size = KSE_STACKSIZE;
2280                 }
2281                 kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2282         } else {
2283                 kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_single;
2284                 if (kse->k_stack.ss_sp)
2285                         free(kse->k_stack.ss_sp);
2286                 kse->k_stack.ss_sp = NULL;
2287                 kse->k_stack.ss_size = 0;
2288                 kse->k_kcb->kcb_kmbx.km_quantum = 0;
2289         }
2290         kse->k_kcb->kcb_kmbx.km_stack = kse->k_stack;
2291         kse->k_kcb->kcb_kmbx.km_udata = (void *)kse;
2292         kse->k_kcb->kcb_kmbx.km_curthread = NULL;
2293         kse->k_kcb->kcb_kmbx.km_flags = 0;
2294         kse->k_curthread = NULL;
2295         kse->k_kseg = 0;
2296         kse->k_schedq = 0;
2297         kse->k_locklevel = 0;
2298         kse->k_flags = 0;
2299         kse->k_error = 0;
2300         kse->k_cpu = 0;
2301         kse->k_sigseqno = 0;
2302 }
2303
2304 void
2305 kse_free_unlocked(struct kse *kse)
2306 {
2307         TAILQ_REMOVE(&active_kseq, kse, k_qe);
2308         active_kse_count--;
2309         kse->k_kseg = NULL;
2310         kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2311         kse->k_flags = 0;
2312         TAILQ_INSERT_HEAD(&free_kseq, kse, k_qe);
2313         free_kse_count++;
2314 }
2315
2316 void
2317 _kse_free(struct pthread *curthread, struct kse *kse)
2318 {
2319         kse_critical_t crit;
2320
2321         if (curthread == NULL)
2322                 kse_free_unlocked(kse);
2323         else {
2324                 crit = _kse_critical_enter();
2325                 KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2326                 kse_free_unlocked(kse);
2327                 KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2328                 _kse_critical_leave(crit);
2329         }
2330 }
2331
2332 static void
2333 kse_destroy(struct kse *kse)
2334 {
2335         int i;
2336
2337         if (kse->k_stack.ss_sp != NULL)
2338                 free(kse->k_stack.ss_sp);
2339         _kcb_dtor(kse->k_kcb);
2340         for (i = 0; i < MAX_KSE_LOCKLEVEL; ++i)
2341                 _lockuser_destroy(&kse->k_lockusers[i]);
2342         _lock_destroy(&kse->k_lock);
2343         free(kse);
2344 }
2345
2346 struct pthread *
2347 _thr_alloc(struct pthread *curthread)
2348 {
2349         kse_critical_t  crit;
2350         struct pthread  *thread = NULL;
2351         int i;
2352
2353         if (curthread != NULL) {
2354                 if (GC_NEEDED())
2355                         _thr_gc(curthread);
2356                 if (free_thread_count > 0) {
2357                         crit = _kse_critical_enter();
2358                         KSE_LOCK_ACQUIRE(curthread->kse, &thread_lock);
2359                         if ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
2360                                 TAILQ_REMOVE(&free_threadq, thread, tle);
2361                                 free_thread_count--;
2362                         }
2363                         KSE_LOCK_RELEASE(curthread->kse, &thread_lock);
2364                         _kse_critical_leave(crit);
2365                 }
2366         }
2367         if ((thread == NULL) &&
2368             ((thread = malloc(sizeof(struct pthread))) != NULL)) {
2369                 bzero(thread, sizeof(struct pthread));
2370                 thread->siginfo = calloc(_SIG_MAXSIG, sizeof(siginfo_t));
2371                 if (thread->siginfo == NULL) {
2372                         free(thread);
2373                         return (NULL);
2374                 }
2375                 if (curthread) {
2376                         _pthread_mutex_lock(&_tcb_mutex);
2377                         thread->tcb = _tcb_ctor(thread, 0 /* not initial tls */);
2378                         _pthread_mutex_unlock(&_tcb_mutex);
2379                 } else {
2380                         thread->tcb = _tcb_ctor(thread, 1 /* initial tls */);
2381                 }
2382                 if (thread->tcb == NULL) {
2383                         free(thread->siginfo);
2384                         free(thread);
2385                         return (NULL);
2386                 }
2387                 /*
2388                  * Initialize thread locking.
2389                  * Lock initializing needs malloc, so don't
2390                  * enter critical region before doing this!
2391                  */
2392                 if (_lock_init(&thread->lock, LCK_ADAPTIVE,
2393                     _thr_lock_wait, _thr_lock_wakeup) != 0)
2394                         PANIC("Cannot initialize thread lock");
2395                 for (i = 0; i < MAX_THR_LOCKLEVEL; i++) {
2396                         _lockuser_init(&thread->lockusers[i], (void *)thread);
2397                         _LCK_SET_PRIVATE2(&thread->lockusers[i],
2398                             (void *)thread);
2399                 }
2400         }
2401         return (thread);
2402 }
2403
2404 void
2405 _thr_free(struct pthread *curthread, struct pthread *thread)
2406 {
2407         kse_critical_t crit;
2408
2409         DBG_MSG("Freeing thread %p\n", thread);
2410         if (thread->name) {
2411                 free(thread->name);
2412                 thread->name = NULL;
2413         }
2414         if ((curthread == NULL) || (free_thread_count >= MAX_CACHED_THREADS)) {
2415                 thr_destroy(curthread, thread);
2416         } else {
2417                 /* Add the thread to the free thread list. */
2418                 crit = _kse_critical_enter();
2419                 KSE_LOCK_ACQUIRE(curthread->kse, &thread_lock);
2420                 TAILQ_INSERT_TAIL(&free_threadq, thread, tle);
2421                 free_thread_count++;
2422                 KSE_LOCK_RELEASE(curthread->kse, &thread_lock);
2423                 _kse_critical_leave(crit);
2424         }
2425 }
2426
2427 static void
2428 thr_destroy(struct pthread *curthread, struct pthread *thread)
2429 {
2430         int i;
2431
2432         for (i = 0; i < MAX_THR_LOCKLEVEL; i++)
2433                 _lockuser_destroy(&thread->lockusers[i]);
2434         _lock_destroy(&thread->lock);
2435         if (curthread) {
2436                 _pthread_mutex_lock(&_tcb_mutex);
2437                 _tcb_dtor(thread->tcb);
2438                 _pthread_mutex_unlock(&_tcb_mutex);
2439         } else {
2440                 _tcb_dtor(thread->tcb);
2441         }
2442         free(thread->siginfo);
2443         free(thread);
2444 }
2445
2446 /*
2447  * Add an active thread:
2448  *
2449  *   o Assign the thread a unique id (which GDB uses to track
2450  *     threads.
2451  *   o Add the thread to the list of all threads and increment
2452  *     number of active threads.
2453  */
2454 static void
2455 thr_link(struct pthread *thread)
2456 {
2457         kse_critical_t crit;
2458         struct kse *curkse;
2459
2460         crit = _kse_critical_enter();
2461         curkse = _get_curkse();
2462         KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
2463         /*
2464          * Initialize the unique id (which GDB uses to track
2465          * threads), add the thread to the list of all threads,
2466          * and
2467          */
2468         thread->uniqueid = next_uniqueid++;
2469         THR_LIST_ADD(thread);
2470         _thread_active_threads++;
2471         KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
2472         _kse_critical_leave(crit);
2473 }
2474
2475 /*
2476  * Remove an active thread.
2477  */
2478 static void
2479 thr_unlink(struct pthread *thread)
2480 {
2481         kse_critical_t crit;
2482         struct kse *curkse;
2483
2484         crit = _kse_critical_enter();
2485         curkse = _get_curkse();
2486         KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
2487         THR_LIST_REMOVE(thread);
2488         _thread_active_threads--;
2489         KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
2490         _kse_critical_leave(crit);
2491 }
2492
2493 void
2494 _thr_hash_add(struct pthread *thread)
2495 {
2496         struct thread_hash_head *head;
2497
2498         head = &thr_hashtable[THREAD_HASH(thread)];
2499         LIST_INSERT_HEAD(head, thread, hle);
2500 }
2501
2502 void
2503 _thr_hash_remove(struct pthread *thread)
2504 {
2505         LIST_REMOVE(thread, hle);
2506 }
2507
2508 struct pthread *
2509 _thr_hash_find(struct pthread *thread)
2510 {
2511         struct pthread *td;
2512         struct thread_hash_head *head;
2513
2514         head = &thr_hashtable[THREAD_HASH(thread)];
2515         LIST_FOREACH(td, head, hle) {
2516                 if (td == thread)
2517                         return (thread);
2518         }
2519         return (NULL);
2520 }
2521
2522 void
2523 _thr_debug_check_yield(struct pthread *curthread)
2524 {
2525         /*
2526          * Note that TMDF_SUSPEND is set after process is suspended.
2527          * When we are being debugged, every suspension in process
2528          * will cause all KSEs to schedule an upcall in kernel, unless the
2529          * KSE is in critical region.
2530          * If the function is being called, it means the KSE is no longer
2531          * in critical region, if the TMDF_SUSPEND is set by debugger
2532          * before KSE leaves critical region, we will catch it here, else
2533          * if the flag is changed during testing, it also not a problem,
2534          * because the change only occurs after a process suspension event
2535          * occurs. A suspension event will always cause KSE to schedule an
2536          * upcall, in the case, because we are not in critical region,
2537          * upcall will be scheduled sucessfully, the flag will be checked
2538          * again in kse_sched_multi, we won't back until the flag
2539          * is cleared by debugger, the flag will be cleared in next
2540          * suspension event. 
2541          */
2542         if (!DBG_CAN_RUN(curthread)) {
2543                 if ((curthread->attr.flags & PTHREAD_SCOPE_SYSTEM) == 0)
2544                         _thr_sched_switch(curthread);
2545                 else
2546                         kse_thr_interrupt(&curthread->tcb->tcb_tmbx,
2547                                 KSE_INTR_DBSUSPEND, 0);
2548         }
2549 }