2 * work_thread.c - threads implementation for blocking worker child.
5 #include "ntp_workimpl.h"
16 #include "ntp_stdlib.h"
17 #include "ntp_malloc.h"
18 #include "ntp_syslog.h"
21 #include "ntp_assert.h"
22 #include "ntp_unixtime.h"
23 #include "timespecops.h"
24 #include "ntp_worker.h"
26 #define CHILD_EXIT_REQ ((blocking_pipe_header *)(intptr_t)-1)
27 #define CHILD_GONE_RESP CHILD_EXIT_REQ
28 /* Queue size increments:
29 * The request queue grows a bit faster than the response queue -- the
30 * daemon can push requests and pull results faster on avarage than the
31 * worker can process requests and push results... If this really pays
34 #define WORKITEMS_ALLOC_INC 16
35 #define RESPONSES_ALLOC_INC 4
37 /* Fiddle with min/max stack sizes. 64kB minimum seems to work, so we
38 * set the maximum to 256kB. If the minimum goes below the
39 * system-defined minimum stack size, we have to adjust accordingly.
41 #ifndef THREAD_MINSTACKSIZE
42 # define THREAD_MINSTACKSIZE (64U * 1024)
45 #if defined(PTHREAD_STACK_MIN) && THREAD_MINSTACKSIZE < PTHREAD_STACK_MIN
46 # undef THREAD_MINSTACKSIZE
47 # define THREAD_MINSTACKSIZE PTHREAD_STACK_MIN
51 #ifndef THREAD_MAXSTACKSIZE
52 # define THREAD_MAXSTACKSIZE (256U * 1024)
54 #if THREAD_MAXSTACKSIZE < THREAD_MINSTACKSIZE
55 # undef THREAD_MAXSTACKSIZE
56 # define THREAD_MAXSTACKSIZE THREAD_MINSTACKSIZE
59 /* need a good integer to store a pointer... */
61 # if defined(UINTPTR_MAX)
62 # define UINTPTR_T uintptr_t
63 # elif defined(UINT_PTR)
64 # define UINTPTR_T UINT_PTR
66 # define UINTPTR_T size_t
73 # define thread_exit(c) _endthreadex(c)
74 # define tickle_sem(sh) ReleaseSemaphore((sh->shnd), 1, NULL)
75 u_int WINAPI blocking_thread(void *);
76 static BOOL same_os_sema(const sem_ref obj, void * osobj);
80 # define thread_exit(c) pthread_exit((void*)(UINTPTR_T)(c))
81 # define tickle_sem sem_post
82 void * blocking_thread(void *);
83 static void block_thread_signals(sigset_t *);
88 addremove_io_fd_func addremove_io_fd;
90 addremove_io_semaphore_func addremove_io_semaphore;
93 static void start_blocking_thread(blocking_child *);
94 static void start_blocking_thread_internal(blocking_child *);
95 static void prepare_child_sems(blocking_child *);
96 static int wait_for_sem(sem_ref, struct timespec *);
97 static int ensure_workitems_empty_slot(blocking_child *);
98 static int ensure_workresp_empty_slot(blocking_child *);
99 static int queue_req_pointer(blocking_child *, blocking_pipe_header *);
100 static void cleanup_after_child(blocking_child *);
102 static sema_type worker_mmutex;
103 static sem_ref worker_memlock;
105 /* --------------------------------------------------------------------
106 * locking the global worker state table (and other global stuff)
112 if (worker_memlock) {
114 wait_for_sem(worker_memlock, NULL);
116 tickle_sem(worker_memlock);
120 /* --------------------------------------------------------------------
121 * implementation isolation wrapper
128 thread_exit(exitcode); /* see #define thread_exit */
131 /* --------------------------------------------------------------------
132 * sleep for a given time or until the wakup semaphore is tickled.
140 struct timespec until;
143 # ifdef HAVE_CLOCK_GETTIME
144 if (0 != clock_gettime(CLOCK_REALTIME, &until)) {
145 msyslog(LOG_ERR, "worker_sleep: clock_gettime() failed: %m");
149 if (0 != getclock(TIMEOFDAY, &until)) {
150 msyslog(LOG_ERR, "worker_sleep: getclock() failed: %m");
154 until.tv_sec += seconds;
155 rc = wait_for_sem(c->wake_scheduled_sleep, &until);
158 if (-1 == rc && ETIMEDOUT == errno)
160 msyslog(LOG_ERR, "worker_sleep: sem_timedwait: %m");
165 /* --------------------------------------------------------------------
166 * Wake up a worker that takes a nap.
169 interrupt_worker_sleep(void)
174 for (idx = 0; idx < blocking_children_alloc; idx++) {
175 c = blocking_children[idx];
176 if (NULL == c || NULL == c->wake_scheduled_sleep)
178 tickle_sem(c->wake_scheduled_sleep);
182 /* --------------------------------------------------------------------
183 * Make sure there is an empty slot at the head of the request
184 * queue. Tell if the queue is currently empty.
187 ensure_workitems_empty_slot(
192 ** !!! PRECONDITION: caller holds access lock!
194 ** This simply tries to increase the size of the buffer if it
195 ** becomes full. The resize operation does *not* maintain the
196 ** order of requests, but that should be irrelevant since the
197 ** processing is considered asynchronous anyway.
199 ** Return if the buffer is currently empty.
202 static const size_t each =
203 sizeof(blocking_children[0]->workitems[0]);
209 slots_used = c->head_workitem - c->tail_workitem;
210 if (slots_used >= c->workitems_alloc) {
211 new_alloc = c->workitems_alloc + WORKITEMS_ALLOC_INC;
212 c->workitems = erealloc(c->workitems, new_alloc * each);
213 for (sidx = c->workitems_alloc; sidx < new_alloc; ++sidx)
214 c->workitems[sidx] = NULL;
215 c->tail_workitem = 0;
216 c->head_workitem = c->workitems_alloc;
217 c->workitems_alloc = new_alloc;
219 INSIST(NULL == c->workitems[c->head_workitem % c->workitems_alloc]);
220 return (0 == slots_used);
223 /* --------------------------------------------------------------------
224 * Make sure there is an empty slot at the head of the response
225 * queue. Tell if the queue is currently empty.
228 ensure_workresp_empty_slot(
233 ** !!! PRECONDITION: caller holds access lock!
235 ** Works like the companion function above.
238 static const size_t each =
239 sizeof(blocking_children[0]->responses[0]);
245 slots_used = c->head_response - c->tail_response;
246 if (slots_used >= c->responses_alloc) {
247 new_alloc = c->responses_alloc + RESPONSES_ALLOC_INC;
248 c->responses = erealloc(c->responses, new_alloc * each);
249 for (sidx = c->responses_alloc; sidx < new_alloc; ++sidx)
250 c->responses[sidx] = NULL;
251 c->tail_response = 0;
252 c->head_response = c->responses_alloc;
253 c->responses_alloc = new_alloc;
255 INSIST(NULL == c->responses[c->head_response % c->responses_alloc]);
256 return (0 == slots_used);
260 /* --------------------------------------------------------------------
261 * queue_req_pointer() - append a work item or idle exit request to
262 * blocking_workitems[]. Employ proper locking.
267 blocking_pipe_header * hdr
272 /* >>>> ACCESS LOCKING STARTS >>>> */
273 wait_for_sem(c->accesslock, NULL);
274 ensure_workitems_empty_slot(c);
275 qhead = c->head_workitem;
276 c->workitems[qhead % c->workitems_alloc] = hdr;
277 c->head_workitem = 1 + qhead;
278 tickle_sem(c->accesslock);
279 /* <<<< ACCESS LOCKING ENDS <<<< */
281 /* queue consumer wake-up notification */
282 tickle_sem(c->workitems_pending);
287 /* --------------------------------------------------------------------
288 * API function to make sure a worker is running, a proper private copy
289 * of the data is made, the data eneterd into the queue and the worker
293 send_blocking_req_internal(
295 blocking_pipe_header * hdr,
299 blocking_pipe_header * threadcopy;
300 size_t payload_octets;
302 REQUIRE(hdr != NULL);
303 REQUIRE(data != NULL);
304 DEBUG_REQUIRE(BLOCKING_REQ_MAGIC == hdr->magic_sig);
306 if (hdr->octets <= sizeof(*hdr))
307 return 1; /* failure */
308 payload_octets = hdr->octets - sizeof(*hdr);
310 if (NULL == c->thread_ref)
311 start_blocking_thread(c);
312 threadcopy = emalloc(hdr->octets);
313 memcpy(threadcopy, hdr, sizeof(*hdr));
314 memcpy((char *)threadcopy + sizeof(*hdr), data, payload_octets);
316 return queue_req_pointer(c, threadcopy);
319 /* --------------------------------------------------------------------
320 * Wait for the 'incoming queue no longer empty' signal, lock the shared
321 * structure and dequeue an item.
323 blocking_pipe_header *
324 receive_blocking_req_internal(
328 blocking_pipe_header * req;
333 /* wait for tickle from the producer side */
334 wait_for_sem(c->workitems_pending, NULL);
336 /* >>>> ACCESS LOCKING STARTS >>>> */
337 wait_for_sem(c->accesslock, NULL);
338 qhead = c->head_workitem;
340 qtail = c->tail_workitem;
343 c->tail_workitem = qtail + 1;
344 qtail %= c->workitems_alloc;
345 req = c->workitems[qtail];
346 c->workitems[qtail] = NULL;
347 } while (NULL == req);
348 tickle_sem(c->accesslock);
349 /* <<<< ACCESS LOCKING ENDS <<<< */
351 } while (NULL == req);
354 if (CHILD_EXIT_REQ == req) { /* idled out */
355 send_blocking_resp_internal(c, CHILD_GONE_RESP);
362 /* --------------------------------------------------------------------
363 * Push a response into the return queue and eventually tickle the
367 send_blocking_resp_internal(
369 blocking_pipe_header * resp
375 /* >>>> ACCESS LOCKING STARTS >>>> */
376 wait_for_sem(c->accesslock, NULL);
377 empty = ensure_workresp_empty_slot(c);
378 qhead = c->head_response;
379 c->responses[qhead % c->responses_alloc] = resp;
380 c->head_response = 1 + qhead;
381 tickle_sem(c->accesslock);
382 /* <<<< ACCESS LOCKING ENDS <<<< */
384 /* queue consumer wake-up notification */
388 if (1 != write(c->resp_write_pipe, "", 1))
389 msyslog(LOG_WARNING, "async resolver: %s",
390 "failed to notify main thread!");
392 tickle_sem(c->responses_pending);
401 /* --------------------------------------------------------------------
402 * Check if a (Windows-)hanndle to a semaphore is actually the same we
403 * are using inside the sema wrapper.
411 return obj && osh && (obj->shnd == (HANDLE)osh);
414 /* --------------------------------------------------------------------
415 * Find the shared context that associates to an OS handle and make sure
416 * the data is dequeued and processed.
419 handle_blocking_resp_sem(
427 for (idx = 0; idx < blocking_children_alloc; idx++) {
428 c = blocking_children[idx];
430 c->thread_ref != NULL &&
431 same_os_sema(c->responses_pending, context))
434 if (idx < blocking_children_alloc)
435 process_blocking_resp(c);
437 #endif /* !WORK_PIPE */
439 /* --------------------------------------------------------------------
440 * Fetch the next response from the return queue. In case of signalling
441 * via pipe, make sure the pipe is flushed, too.
443 blocking_pipe_header *
444 receive_blocking_resp_internal(
448 blocking_pipe_header * removed;
449 size_t qhead, qtail, slot;
456 rc = read(c->resp_read_pipe, scratch, sizeof(scratch));
457 while (-1 == rc && EINTR == errno);
460 /* >>>> ACCESS LOCKING STARTS >>>> */
461 wait_for_sem(c->accesslock, NULL);
462 qhead = c->head_response;
463 qtail = c->tail_response;
464 for (removed = NULL; !removed && (qhead != qtail); ++qtail) {
465 slot = qtail % c->responses_alloc;
466 removed = c->responses[slot];
467 c->responses[slot] = NULL;
469 c->tail_response = qtail;
470 tickle_sem(c->accesslock);
471 /* <<<< ACCESS LOCKING ENDS <<<< */
473 if (NULL != removed) {
474 DEBUG_ENSURE(CHILD_GONE_RESP == removed ||
475 BLOCKING_RESP_MAGIC == removed->magic_sig);
477 if (CHILD_GONE_RESP == removed) {
478 cleanup_after_child(c);
485 /* --------------------------------------------------------------------
486 * Light up a new worker.
489 start_blocking_thread(
494 DEBUG_INSIST(!c->reusable);
496 prepare_child_sems(c);
497 start_blocking_thread_internal(c);
500 /* --------------------------------------------------------------------
501 * Create a worker thread. There are several differences between POSIX
502 * and Windows, of course -- most notably the Windows thread is no
503 * detached thread, and we keep the handle around until we want to get
504 * rid of the thread. The notification scheme also differs: Windows
505 * makes use of semaphores in both directions, POSIX uses a pipe for
506 * integration with 'select()' or alike.
509 start_blocking_thread_internal(
516 c->thread_ref = NULL;
517 (*addremove_io_semaphore)(c->responses_pending->shnd, FALSE);
518 c->thr_table[0].thnd =
519 (HANDLE)_beginthreadex(
527 if (NULL == c->thr_table[0].thnd) {
528 msyslog(LOG_ERR, "start blocking thread failed: %m");
531 /* remember the thread priority is only within the process class */
532 if (!SetThreadPriority(c->thr_table[0].thnd,
533 THREAD_PRIORITY_BELOW_NORMAL))
534 msyslog(LOG_ERR, "Error lowering blocking thread priority: %m");
536 resumed = ResumeThread(c->thr_table[0].thnd);
537 DEBUG_INSIST(resumed);
538 c->thread_ref = &c->thr_table[0];
540 #else /* pthreads start_blocking_thread_internal() follows */
542 # ifdef NEED_PTHREAD_INIT
543 static int pthread_init_called;
545 pthread_attr_t thr_attr;
547 int pipe_ends[2]; /* read then write */
552 sigset_t saved_sig_mask;
554 c->thread_ref = NULL;
556 # ifdef NEED_PTHREAD_INIT
558 * from lib/isc/unix/app.c:
559 * BSDI 3.1 seg faults in pthread_sigmask() if we don't do this.
561 if (!pthread_init_called) {
563 pthread_init_called = TRUE;
567 rc = pipe_socketpair(&pipe_ends[0], &is_pipe);
569 msyslog(LOG_ERR, "start_blocking_thread: pipe_socketpair() %m");
572 c->resp_read_pipe = move_fd(pipe_ends[0]);
573 c->resp_write_pipe = move_fd(pipe_ends[1]);
575 flags = fcntl(c->resp_read_pipe, F_GETFL, 0);
577 msyslog(LOG_ERR, "start_blocking_thread: fcntl(F_GETFL) %m");
580 rc = fcntl(c->resp_read_pipe, F_SETFL, O_NONBLOCK | flags);
583 "start_blocking_thread: fcntl(F_SETFL, O_NONBLOCK) %m");
586 (*addremove_io_fd)(c->resp_read_pipe, c->ispipe, FALSE);
587 pthread_attr_init(&thr_attr);
588 pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED);
589 #if defined(HAVE_PTHREAD_ATTR_GETSTACKSIZE) && \
590 defined(HAVE_PTHREAD_ATTR_SETSTACKSIZE)
591 rc = pthread_attr_getstacksize(&thr_attr, &ostacksize);
594 "start_blocking_thread: pthread_attr_getstacksize() -> %s",
597 if (ostacksize < THREAD_MINSTACKSIZE)
598 nstacksize = THREAD_MINSTACKSIZE;
599 else if (ostacksize > THREAD_MAXSTACKSIZE)
600 nstacksize = THREAD_MAXSTACKSIZE;
602 nstacksize = ostacksize;
603 if (nstacksize != ostacksize)
604 rc = pthread_attr_setstacksize(&thr_attr, nstacksize);
607 "start_blocking_thread: pthread_attr_setstacksize(0x%lx -> 0x%lx) -> %s",
608 (u_long)ostacksize, (u_long)nstacksize,
612 UNUSED_ARG(nstacksize);
613 UNUSED_ARG(ostacksize);
615 #if defined(PTHREAD_SCOPE_SYSTEM) && defined(NEED_PTHREAD_SCOPE_SYSTEM)
616 pthread_attr_setscope(&thr_attr, PTHREAD_SCOPE_SYSTEM);
618 c->thread_ref = emalloc_zero(sizeof(*c->thread_ref));
619 block_thread_signals(&saved_sig_mask);
620 rc = pthread_create(&c->thr_table[0], &thr_attr,
621 &blocking_thread, c);
622 pthread_sigmask(SIG_SETMASK, &saved_sig_mask, NULL);
623 pthread_attr_destroy(&thr_attr);
625 msyslog(LOG_ERR, "start_blocking_thread: pthread_create() -> %s",
629 c->thread_ref = &c->thr_table[0];
633 /* --------------------------------------------------------------------
634 * block_thread_signals()
636 * Temporarily block signals used by ntpd main thread, so that signal
637 * mask inherited by child threads leaves them blocked. Returns prior
638 * active signal mask via pmask, to be restored by the main thread
639 * after pthread_create().
643 block_thread_signals(
650 # ifdef HAVE_SIGNALED_IO
652 sigaddset(&block, SIGIO);
655 sigaddset(&block, SIGPOLL);
657 # endif /* HAVE_SIGNALED_IO */
658 sigaddset(&block, SIGALRM);
659 sigaddset(&block, MOREDEBUGSIG);
660 sigaddset(&block, LESSDEBUGSIG);
662 sigaddset(&block, SIGDIE1);
665 sigaddset(&block, SIGDIE2);
668 sigaddset(&block, SIGDIE3);
671 sigaddset(&block, SIGDIE4);
674 sigaddset(&block, SIGBUS);
677 pthread_sigmask(SIG_BLOCK, &block, pmask);
679 #endif /* !SYS_WINNT */
682 /* --------------------------------------------------------------------
683 * Create & destroy semaphores. This is sufficiently different between
684 * POSIX and Windows to warrant wrapper functions and close enough to
685 * use the concept of synchronization via semaphore for all platforms.
696 if (NULL != semptr) {
697 svini = (inival < LONG_MAX)
698 ? (long)inival : LONG_MAX;
699 svmax = (maxval < LONG_MAX && maxval > 0)
700 ? (long)maxval : LONG_MAX;
701 semptr->shnd = CreateSemaphore(NULL, svini, svmax, NULL);
702 if (NULL == semptr->shnd)
709 if (semptr && sem_init(semptr, FALSE, inival))
717 /* ------------------------------------------------------------------ */
727 CloseHandle(obj->shnd);
741 /* --------------------------------------------------------------------
742 * prepare_child_sems()
744 * create sync & access semaphores
746 * All semaphores are cleared, only the access semaphore has 1 unit.
747 * Childs wait on 'workitems_pending', then grabs 'sema_access'
748 * and dequeues jobs. When done, 'sema_access' is given one unit back.
750 * The producer grabs 'sema_access', manages the queue, restores
751 * 'sema_access' and puts one unit into 'workitems_pending'.
753 * The story goes the same for the response queue.
760 if (NULL == worker_memlock)
761 worker_memlock = create_sema(&worker_mmutex, 1, 1);
763 c->accesslock = create_sema(&c->sem_table[0], 1, 1);
764 c->workitems_pending = create_sema(&c->sem_table[1], 0, 0);
765 c->wake_scheduled_sleep = create_sema(&c->sem_table[2], 0, 1);
767 c->responses_pending = create_sema(&c->sem_table[3], 0, 0);
771 /* --------------------------------------------------------------------
772 * wait for semaphore. Where the wait can be interrupted, it will
773 * internally resume -- When this function returns, there is either no
774 * semaphore at all, a timeout occurred, or the caller could
775 * successfully take a token from the semaphore.
777 * For untimed wait, not checking the result of this function at all is
778 * definitely an option.
783 struct timespec * timeout /* wall-clock */
788 struct timespec delta;
792 if (!(sem && sem->shnd)) {
797 if (NULL == timeout) {
800 getclock(TIMEOFDAY, &now);
801 delta = sub_tspec(*timeout, now);
802 if (delta.tv_sec < 0) {
804 } else if ((delta.tv_sec + 1) >= (MAXDWORD / 1000)) {
807 msec = 1000 * (DWORD)delta.tv_sec;
808 msec += delta.tv_nsec / (1000 * 1000);
811 rc = WaitForSingleObject(sem->shnd, msec);
812 if (WAIT_OBJECT_0 == rc)
814 if (WAIT_TIMEOUT == rc) {
818 msyslog(LOG_ERR, "WaitForSingleObject unexpected 0x%x", rc);
822 #else /* pthreads wait_for_sem() follows */
830 rc = sem_timedwait(sem, timeout);
831 } while (rc == -1 && errno == EINTR);
839 /* --------------------------------------------------------------------
840 * blocking_thread - thread functions have WINAPI (aka 'stdcall')
841 * calling conventions under Windows and POSIX-defined signature
856 exit_worker(blocking_child_common(c));
862 /* --------------------------------------------------------------------
863 * req_child_exit() runs in the parent.
865 * This function is called from from the idle timer, too, and possibly
866 * without a thread being there any longer. Since we have folded up our
867 * tent in that case and all the semaphores are already gone, we simply
868 * ignore this request in this case.
870 * Since the existence of the semaphores is controlled exclusively by
871 * the parent, there's no risk of data race here.
878 return (c->accesslock)
879 ? queue_req_pointer(c, CHILD_EXIT_REQ)
883 /* --------------------------------------------------------------------
884 * cleanup_after_child() runs in parent.
891 DEBUG_INSIST(!c->reusable);
894 /* The thread was not created in detached state, so we better
897 if (c->thread_ref && c->thread_ref->thnd) {
898 WaitForSingleObject(c->thread_ref->thnd, INFINITE);
899 INSIST(CloseHandle(c->thread_ref->thnd));
900 c->thread_ref->thnd = NULL;
903 c->thread_ref = NULL;
905 /* remove semaphores and (if signalling vi IO) pipes */
907 c->accesslock = delete_sema(c->accesslock);
908 c->workitems_pending = delete_sema(c->workitems_pending);
909 c->wake_scheduled_sleep = delete_sema(c->wake_scheduled_sleep);
912 DEBUG_INSIST(-1 != c->resp_read_pipe);
913 DEBUG_INSIST(-1 != c->resp_write_pipe);
914 (*addremove_io_fd)(c->resp_read_pipe, c->ispipe, TRUE);
915 close(c->resp_write_pipe);
916 close(c->resp_read_pipe);
917 c->resp_write_pipe = -1;
918 c->resp_read_pipe = -1;
920 DEBUG_INSIST(NULL != c->responses_pending);
921 (*addremove_io_semaphore)(c->responses_pending->shnd, TRUE);
922 c->responses_pending = delete_sema(c->responses_pending);
925 /* Is it necessary to check if there are pending requests and
926 * responses? If so, and if there are, what to do with them?
929 /* re-init buffer index sequencers */
930 c->head_workitem = 0;
931 c->tail_workitem = 0;
932 c->head_response = 0;
933 c->tail_response = 0;
939 #else /* !WORK_THREAD follows */
940 char work_thread_nonempty_compilation_unit;