2 * work_thread.c - threads implementation for blocking worker child.
5 #include "ntp_workimpl.h"
16 #include "ntp_stdlib.h"
17 #include "ntp_malloc.h"
18 #include "ntp_syslog.h"
21 #include "ntp_assert.h"
22 #include "ntp_unixtime.h"
23 #include "timespecops.h"
24 #include "ntp_worker.h"
26 #define CHILD_EXIT_REQ ((blocking_pipe_header *)(intptr_t)-1)
27 #define CHILD_GONE_RESP CHILD_EXIT_REQ
28 #define WORKITEMS_ALLOC_INC 16
29 #define RESPONSES_ALLOC_INC 4
31 #ifndef THREAD_MINSTACKSIZE
32 #define THREAD_MINSTACKSIZE (64U * 1024)
37 # define thread_exit(c) _endthreadex(c)
38 # define tickle_sem(sh) ReleaseSemaphore((sh->shnd), 1, NULL)
39 u_int WINAPI blocking_thread(void *);
40 static BOOL same_os_sema(const sem_ref obj, void * osobj);
44 # define thread_exit(c) pthread_exit((void*)(size_t)(c))
45 # define tickle_sem sem_post
46 void * blocking_thread(void *);
47 static void block_thread_signals(sigset_t *);
52 addremove_io_fd_func addremove_io_fd;
54 addremove_io_semaphore_func addremove_io_semaphore;
57 static void start_blocking_thread(blocking_child *);
58 static void start_blocking_thread_internal(blocking_child *);
59 static void prepare_child_sems(blocking_child *);
60 static int wait_for_sem(sem_ref, struct timespec *);
61 static int ensure_workitems_empty_slot(blocking_child *);
62 static int ensure_workresp_empty_slot(blocking_child *);
63 static int queue_req_pointer(blocking_child *, blocking_pipe_header *);
64 static void cleanup_after_child(blocking_child *);
72 thread_exit(exitcode); /* see #define thread_exit */
75 /* --------------------------------------------------------------------
76 * sleep for a given time or until the wakup semaphore is tickled.
84 struct timespec until;
87 # ifdef HAVE_CLOCK_GETTIME
88 if (0 != clock_gettime(CLOCK_REALTIME, &until)) {
89 msyslog(LOG_ERR, "worker_sleep: clock_gettime() failed: %m");
93 if (0 != getclock(TIMEOFDAY, &until)) {
94 msyslog(LOG_ERR, "worker_sleep: getclock() failed: %m");
98 until.tv_sec += seconds;
99 rc = wait_for_sem(c->wake_scheduled_sleep, &until);
102 if (-1 == rc && ETIMEDOUT == errno)
104 msyslog(LOG_ERR, "worker_sleep: sem_timedwait: %m");
109 /* --------------------------------------------------------------------
110 * Wake up a worker that takes a nap.
113 interrupt_worker_sleep(void)
118 for (idx = 0; idx < blocking_children_alloc; idx++) {
119 c = blocking_children[idx];
120 if (NULL == c || NULL == c->wake_scheduled_sleep)
122 tickle_sem(c->wake_scheduled_sleep);
126 /* --------------------------------------------------------------------
127 * Make sure there is an empty slot at the head of the request
128 * queue. Tell if the queue is currently empty.
131 ensure_workitems_empty_slot(
136 ** !!! PRECONDITION: caller holds access lock!
138 ** This simply tries to increase the size of the buffer if it
139 ** becomes full. The resize operation does *not* maintain the
140 ** order of requests, but that should be irrelevant since the
141 ** processing is considered asynchronous anyway.
143 ** Return if the buffer is currently empty.
146 static const size_t each =
147 sizeof(blocking_children[0]->workitems[0]);
152 slots_used = c->head_workitem - c->tail_workitem;
153 if (slots_used >= c->workitems_alloc) {
154 new_alloc = c->workitems_alloc + WORKITEMS_ALLOC_INC;
155 c->workitems = erealloc(c->workitems, new_alloc * each);
156 c->tail_workitem = 0;
157 c->head_workitem = c->workitems_alloc;
158 c->workitems_alloc = new_alloc;
160 return (0 == slots_used);
163 /* --------------------------------------------------------------------
164 * Make sure there is an empty slot at the head of the response
165 * queue. Tell if the queue is currently empty.
168 ensure_workresp_empty_slot(
173 ** !!! PRECONDITION: caller holds access lock!
175 ** Works like the companion function above.
178 static const size_t each =
179 sizeof(blocking_children[0]->responses[0]);
184 slots_used = c->head_response - c->tail_response;
185 if (slots_used >= c->responses_alloc) {
186 new_alloc = c->responses_alloc + RESPONSES_ALLOC_INC;
187 c->responses = erealloc(c->responses, new_alloc * each);
188 c->tail_response = 0;
189 c->head_response = c->responses_alloc;
190 c->responses_alloc = new_alloc;
192 return (0 == slots_used);
196 /* --------------------------------------------------------------------
197 * queue_req_pointer() - append a work item or idle exit request to
198 * blocking_workitems[]. Employ proper locking.
203 blocking_pipe_header * hdr
208 /* >>>> ACCESS LOCKING STARTS >>>> */
209 wait_for_sem(c->accesslock, NULL);
210 ensure_workitems_empty_slot(c);
211 qhead = c->head_workitem;
212 c->workitems[qhead % c->workitems_alloc] = hdr;
213 c->head_workitem = 1 + qhead;
214 tickle_sem(c->accesslock);
215 /* <<<< ACCESS LOCKING ENDS <<<< */
217 /* queue consumer wake-up notification */
218 tickle_sem(c->workitems_pending);
223 /* --------------------------------------------------------------------
224 * API function to make sure a worker is running, a proper private copy
225 * of the data is made, the data eneterd into the queue and the worker
229 send_blocking_req_internal(
231 blocking_pipe_header * hdr,
235 blocking_pipe_header * threadcopy;
236 size_t payload_octets;
238 REQUIRE(hdr != NULL);
239 REQUIRE(data != NULL);
240 DEBUG_REQUIRE(BLOCKING_REQ_MAGIC == hdr->magic_sig);
242 if (hdr->octets <= sizeof(*hdr))
243 return 1; /* failure */
244 payload_octets = hdr->octets - sizeof(*hdr);
246 if (NULL == c->thread_ref)
247 start_blocking_thread(c);
248 threadcopy = emalloc(hdr->octets);
249 memcpy(threadcopy, hdr, sizeof(*hdr));
250 memcpy((char *)threadcopy + sizeof(*hdr), data, payload_octets);
252 return queue_req_pointer(c, threadcopy);
255 /* --------------------------------------------------------------------
256 * Wait for the 'incoming queue no longer empty' signal, lock the shared
257 * structure and dequeue an item.
259 blocking_pipe_header *
260 receive_blocking_req_internal(
264 blocking_pipe_header * req;
269 /* wait for tickle from the producer side */
270 wait_for_sem(c->workitems_pending, NULL);
272 /* >>>> ACCESS LOCKING STARTS >>>> */
273 wait_for_sem(c->accesslock, NULL);
274 qhead = c->head_workitem;
276 qtail = c->tail_workitem;
279 c->tail_workitem = qtail + 1;
280 qtail %= c->workitems_alloc;
281 req = c->workitems[qtail];
282 c->workitems[qtail] = NULL;
283 } while (NULL == req);
284 tickle_sem(c->accesslock);
285 /* <<<< ACCESS LOCKING ENDS <<<< */
287 } while (NULL == req);
290 if (CHILD_EXIT_REQ == req) { /* idled out */
291 send_blocking_resp_internal(c, CHILD_GONE_RESP);
298 /* --------------------------------------------------------------------
299 * Push a response into the return queue and eventually tickle the
303 send_blocking_resp_internal(
305 blocking_pipe_header * resp
311 /* >>>> ACCESS LOCKING STARTS >>>> */
312 wait_for_sem(c->accesslock, NULL);
313 empty = ensure_workresp_empty_slot(c);
314 qhead = c->head_response;
315 c->responses[qhead % c->responses_alloc] = resp;
316 c->head_response = 1 + qhead;
317 tickle_sem(c->accesslock);
318 /* <<<< ACCESS LOCKING ENDS <<<< */
320 /* queue consumer wake-up notification */
324 write(c->resp_write_pipe, "", 1);
326 tickle_sem(c->responses_pending);
335 /* --------------------------------------------------------------------
336 * Check if a (Windows-)hanndle to a semaphore is actually the same we
337 * are using inside the sema wrapper.
345 return obj && osh && (obj->shnd == (HANDLE)osh);
348 /* --------------------------------------------------------------------
349 * Find the shared context that associates to an OS handle and make sure
350 * the data is dequeued and processed.
353 handle_blocking_resp_sem(
361 for (idx = 0; idx < blocking_children_alloc; idx++) {
362 c = blocking_children[idx];
364 c->thread_ref != NULL &&
365 same_os_sema(c->responses_pending, context))
368 if (idx < blocking_children_alloc)
369 process_blocking_resp(c);
371 #endif /* !WORK_PIPE */
373 /* --------------------------------------------------------------------
374 * Fetch the next response from the return queue. In case of signalling
375 * via pipe, make sure the pipe is flushed, too.
377 blocking_pipe_header *
378 receive_blocking_resp_internal(
382 blocking_pipe_header * removed;
383 size_t qhead, qtail, slot;
390 rc = read(c->resp_read_pipe, scratch, sizeof(scratch));
391 while (-1 == rc && EINTR == errno);
394 /* >>>> ACCESS LOCKING STARTS >>>> */
395 wait_for_sem(c->accesslock, NULL);
396 qhead = c->head_response;
397 qtail = c->tail_response;
398 for (removed = NULL; !removed && (qhead != qtail); ++qtail) {
399 slot = qtail % c->responses_alloc;
400 removed = c->responses[slot];
401 c->responses[slot] = NULL;
403 c->tail_response = qtail;
404 tickle_sem(c->accesslock);
405 /* <<<< ACCESS LOCKING ENDS <<<< */
407 if (NULL != removed) {
408 DEBUG_ENSURE(CHILD_GONE_RESP == removed ||
409 BLOCKING_RESP_MAGIC == removed->magic_sig);
411 if (CHILD_GONE_RESP == removed) {
412 cleanup_after_child(c);
419 /* --------------------------------------------------------------------
420 * Light up a new worker.
423 start_blocking_thread(
428 DEBUG_INSIST(!c->reusable);
430 prepare_child_sems(c);
431 start_blocking_thread_internal(c);
434 /* --------------------------------------------------------------------
435 * Create a worker thread. There are several differences between POSIX
436 * and Windows, of course -- most notably the Windows thread is no
437 * detached thread, and we keep the handle around until we want to get
438 * rid of the thread. The notification scheme also differs: Windows
439 * makes use of semaphores in both directions, POSIX uses a pipe for
440 * integration with 'select()' or alike.
443 start_blocking_thread_internal(
450 c->thread_ref = NULL;
451 (*addremove_io_semaphore)(c->responses_pending->shnd, FALSE);
452 c->thr_table[0].thnd =
453 (HANDLE)_beginthreadex(
461 if (NULL == c->thr_table[0].thnd) {
462 msyslog(LOG_ERR, "start blocking thread failed: %m");
465 /* remember the thread priority is only within the process class */
466 if (!SetThreadPriority(c->thr_table[0].thnd,
467 THREAD_PRIORITY_BELOW_NORMAL))
468 msyslog(LOG_ERR, "Error lowering blocking thread priority: %m");
470 resumed = ResumeThread(c->thr_table[0].thnd);
471 DEBUG_INSIST(resumed);
472 c->thread_ref = &c->thr_table[0];
474 #else /* pthreads start_blocking_thread_internal() follows */
476 # ifdef NEED_PTHREAD_INIT
477 static int pthread_init_called;
479 pthread_attr_t thr_attr;
482 int pipe_ends[2]; /* read then write */
486 sigset_t saved_sig_mask;
488 c->thread_ref = NULL;
490 # ifdef NEED_PTHREAD_INIT
492 * from lib/isc/unix/app.c:
493 * BSDI 3.1 seg faults in pthread_sigmask() if we don't do this.
495 if (!pthread_init_called) {
497 pthread_init_called = TRUE;
501 rc = pipe_socketpair(&pipe_ends[0], &is_pipe);
503 msyslog(LOG_ERR, "start_blocking_thread: pipe_socketpair() %m");
506 c->resp_read_pipe = move_fd(pipe_ends[0]);
507 c->resp_write_pipe = move_fd(pipe_ends[1]);
509 flags = fcntl(c->resp_read_pipe, F_GETFL, 0);
511 msyslog(LOG_ERR, "start_blocking_thread: fcntl(F_GETFL) %m");
514 rc = fcntl(c->resp_read_pipe, F_SETFL, O_NONBLOCK | flags);
517 "start_blocking_thread: fcntl(F_SETFL, O_NONBLOCK) %m");
520 (*addremove_io_fd)(c->resp_read_pipe, c->ispipe, FALSE);
521 pthread_attr_init(&thr_attr);
522 pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED);
523 #if defined(HAVE_PTHREAD_ATTR_GETSTACKSIZE) && \
524 defined(HAVE_PTHREAD_ATTR_SETSTACKSIZE)
525 rc = pthread_attr_getstacksize(&thr_attr, &stacksize);
528 "start_blocking_thread: pthread_attr_getstacksize %m");
529 } else if (stacksize < THREAD_MINSTACKSIZE) {
530 rc = pthread_attr_setstacksize(&thr_attr,
531 THREAD_MINSTACKSIZE);
534 "start_blocking_thread: pthread_attr_setstacksize(0x%lx -> 0x%lx) %m",
536 (u_long)THREAD_MINSTACKSIZE);
539 UNUSED_ARG(stacksize);
541 #if defined(PTHREAD_SCOPE_SYSTEM) && defined(NEED_PTHREAD_SCOPE_SYSTEM)
542 pthread_attr_setscope(&thr_attr, PTHREAD_SCOPE_SYSTEM);
544 c->thread_ref = emalloc_zero(sizeof(*c->thread_ref));
545 block_thread_signals(&saved_sig_mask);
546 rc = pthread_create(&c->thr_table[0], &thr_attr,
547 &blocking_thread, c);
549 pthread_sigmask(SIG_SETMASK, &saved_sig_mask, NULL);
550 pthread_attr_destroy(&thr_attr);
553 msyslog(LOG_ERR, "pthread_create() blocking child: %m");
556 c->thread_ref = &c->thr_table[0];
560 /* --------------------------------------------------------------------
561 * block_thread_signals()
563 * Temporarily block signals used by ntpd main thread, so that signal
564 * mask inherited by child threads leaves them blocked. Returns prior
565 * active signal mask via pmask, to be restored by the main thread
566 * after pthread_create().
570 block_thread_signals(
577 # ifdef HAVE_SIGNALED_IO
579 sigaddset(&block, SIGIO);
582 sigaddset(&block, SIGPOLL);
584 # endif /* HAVE_SIGNALED_IO */
585 sigaddset(&block, SIGALRM);
586 sigaddset(&block, MOREDEBUGSIG);
587 sigaddset(&block, LESSDEBUGSIG);
589 sigaddset(&block, SIGDIE1);
592 sigaddset(&block, SIGDIE2);
595 sigaddset(&block, SIGDIE3);
598 sigaddset(&block, SIGDIE4);
601 sigaddset(&block, SIGBUS);
604 pthread_sigmask(SIG_BLOCK, &block, pmask);
606 #endif /* !SYS_WINNT */
609 /* --------------------------------------------------------------------
610 * Create & destroy semaphores. This is sufficiently different between
611 * POSIX and Windows to warrant wrapper functions and close enough to
612 * use the concept of synchronization via semaphore for all platforms.
623 if (NULL != semptr) {
624 svini = (inival < LONG_MAX)
625 ? (long)inival : LONG_MAX;
626 svmax = (maxval < LONG_MAX && maxval > 0)
627 ? (long)maxval : LONG_MAX;
628 semptr->shnd = CreateSemaphore(NULL, svini, svmax, NULL);
629 if (NULL == semptr->shnd)
636 if (semptr && sem_init(semptr, FALSE, inival))
644 /* ------------------------------------------------------------------ */
654 CloseHandle(obj->shnd);
668 /* --------------------------------------------------------------------
669 * prepare_child_sems()
671 * create sync & access semaphores
673 * All semaphores are cleared, only the access semaphore has 1 unit.
674 * Childs wait on 'workitems_pending', then grabs 'sema_access'
675 * and dequeues jobs. When done, 'sema_access' is given one unit back.
677 * The producer grabs 'sema_access', manages the queue, restores
678 * 'sema_access' and puts one unit into 'workitems_pending'.
680 * The story goes the same for the response queue.
687 c->accesslock = create_sema(&c->sem_table[0], 1, 1);
688 c->workitems_pending = create_sema(&c->sem_table[1], 0, 0);
689 c->wake_scheduled_sleep = create_sema(&c->sem_table[2], 0, 1);
691 c->responses_pending = create_sema(&c->sem_table[3], 0, 0);
695 /* --------------------------------------------------------------------
696 * wait for semaphore. Where the wait can be interrupted, it will
697 * internally resume -- When this function returns, there is either no
698 * semaphore at all, a timeout occurred, or the caller could
699 * successfully take a token from the semaphore.
701 * For untimed wait, not checking the result of this function at all is
702 * definitely an option.
707 struct timespec * timeout /* wall-clock */
712 struct timespec delta;
716 if (!(sem && sem->shnd)) {
721 if (NULL == timeout) {
724 getclock(TIMEOFDAY, &now);
725 delta = sub_tspec(*timeout, now);
726 if (delta.tv_sec < 0) {
728 } else if ((delta.tv_sec + 1) >= (MAXDWORD / 1000)) {
731 msec = 1000 * (DWORD)delta.tv_sec;
732 msec += delta.tv_nsec / (1000 * 1000);
735 rc = WaitForSingleObject(sem->shnd, msec);
736 if (WAIT_OBJECT_0 == rc)
738 if (WAIT_TIMEOUT == rc) {
742 msyslog(LOG_ERR, "WaitForSingleObject unexpected 0x%x", rc);
746 #else /* pthreads wait_for_sem() follows */
754 rc = sem_timedwait(sem, timeout);
755 } while (rc == -1 && errno == EINTR);
763 /* --------------------------------------------------------------------
764 * blocking_thread - thread functions have WINAPI (aka 'stdcall')
765 * calling conventions under Windows and POSIX-defined signature
780 exit_worker(blocking_child_common(c));
786 /* --------------------------------------------------------------------
787 * req_child_exit() runs in the parent.
789 * This function is called from from the idle timer, too, and possibly
790 * without a thread being there any longer. Since we have folded up our
791 * tent in that case and all the semaphores are already gone, we simply
792 * ignore this request in this case.
794 * Since the existence of the semaphores is controlled exclusively by
795 * the parent, there's no risk of data race here.
802 return (c->accesslock)
803 ? queue_req_pointer(c, CHILD_EXIT_REQ)
807 /* --------------------------------------------------------------------
808 * cleanup_after_child() runs in parent.
815 DEBUG_INSIST(!c->reusable);
818 /* The thread was not created in detached state, so we better
821 if (c->thread_ref && c->thread_ref->thnd) {
822 WaitForSingleObject(c->thread_ref->thnd, INFINITE);
823 INSIST(CloseHandle(c->thread_ref->thnd));
824 c->thread_ref->thnd = NULL;
827 c->thread_ref = NULL;
829 /* remove semaphores and (if signalling vi IO) pipes */
831 c->accesslock = delete_sema(c->accesslock);
832 c->workitems_pending = delete_sema(c->workitems_pending);
833 c->wake_scheduled_sleep = delete_sema(c->wake_scheduled_sleep);
836 DEBUG_INSIST(-1 != c->resp_read_pipe);
837 DEBUG_INSIST(-1 != c->resp_write_pipe);
838 (*addremove_io_fd)(c->resp_read_pipe, c->ispipe, TRUE);
839 close(c->resp_write_pipe);
840 close(c->resp_read_pipe);
841 c->resp_write_pipe = -1;
842 c->resp_read_pipe = -1;
844 DEBUG_INSIST(NULL != c->responses_pending);
845 (*addremove_io_semaphore)(c->responses_pending->shnd, TRUE);
846 c->responses_pending = delete_sema(c->responses_pending);
849 /* Is it necessary to check if there are pending requests and
850 * responses? If so, and if there are, what to do with them?
853 /* re-init buffer index sequencers */
854 c->head_workitem = 0;
855 c->tail_workitem = 0;
856 c->head_response = 0;
857 c->tail_response = 0;
863 #else /* !WORK_THREAD follows */
864 char work_thread_nonempty_compilation_unit;