1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 ******************************************************************************
19 * This implementation is based on a design by John Brooks (IBM Pok) which uses
20 * the z/OS sockets async i/o facility. When a
21 * socket is added to the pollset, an async poll is issued for that individual
22 * socket. It specifies that the kernel should send an IPC message when the
23 * socket becomes ready. The IPC messages are sent to a single message queue
24 * that is part of the pollset. apr_pollset_poll waits on the arrival of IPC
25 * messages or the specified timeout.
27 * Since z/OS does not support async i/o for pipes or files at present, this
28 * implementation falls back to using ordinary poll() when
29 * APR_POLLSET_THREADSAFE is unset.
39 #include "apr_portable.h"
40 #include "apr_arch_inherit.h"
41 #include "apr_arch_file_io.h"
42 #include "apr_arch_networkio.h"
43 #include "apr_arch_poll_private.h"
47 #include <sys/msg.h> /* msgget etc */
48 #include <time.h> /* timestruct */
49 #include <poll.h> /* pollfd */
50 #include <limits.h> /* MAX_INT */
52 struct apr_pollset_private_t
54 int msg_q; /* IPC message queue. The z/OS kernel sends messages
55 * to this queue when our async polls on individual
56 * file descriptors complete
58 apr_pollfd_t *result_set;
62 /* A thread mutex to protect operations on the rings and the hash */
63 apr_thread_mutex_t *ring_lock;
66 /* A hash of all active elements used for O(1) _remove operations */
69 APR_RING_HEAD(ready_ring_t, asio_elem_t) ready_ring;
70 APR_RING_HEAD(prior_ready_ring_t, asio_elem_t) prior_ready_ring;
71 APR_RING_HEAD(free_ring_t, asio_elem_t) free_ring;
73 /* for pipes etc with no asio */
74 struct pollfd *pollset;
75 apr_pollfd_t *query_set;
84 typedef struct asio_elem_t asio_elem_t;
86 struct asio_msgbuf_t {
87 long msg_type; /* must be > 0 */
88 asio_elem_t *msg_elem;
93 APR_RING_ENTRY(asio_elem_t) link;
98 struct asio_msgbuf_t msg;
103 /* DEBUG settings: 0 - no debug messages at all,
104 * 1 - should not occur messages,
105 * 2 - apr_pollset_* entry and exit messages,
106 * 3 - state changes, memory usage,
107 * 4 - z/OS, APR, and internal calls,
108 * 5 - everything else except the timer pop path,
109 * 6 - everything, including the Event 1 sec timer pop path
111 * each DEBUG level includes all messages produced by lower numbered levels
117 #include <unistd.h> /* getpid */
119 #define DBG_BUFF char dbg_msg_buff[256];
121 #define DBG_TEST(lvl) if (lvl <= DEBUG) {
123 #define DBG_CORE(msg) sprintf(dbg_msg_buff, "% 8d " __FUNCTION__ \
124 " " msg, getpid()), \
125 fprintf(stderr, "%s", dbg_msg_buff);
126 #define DBG_CORE1(msg, var1) sprintf(dbg_msg_buff, "% 8d " __FUNCTION__ \
127 " " msg, getpid(), var1), \
128 fprintf(stderr, "%s", dbg_msg_buff);
129 #define DBG_CORE2(msg, var1, var2) sprintf(dbg_msg_buff, "% 8d " __FUNCTION__ \
130 " " msg, getpid(), var1, var2), \
131 fprintf(stderr, "%s", dbg_msg_buff);
132 #define DBG_CORE3(msg, var1, var2, var3) \
133 sprintf(dbg_msg_buff, "% 8d " __FUNCTION__ \
134 " " msg, getpid(), var1, var2, var3), \
135 fprintf(stderr, "%s", dbg_msg_buff);
136 #define DBG_CORE4(msg, var1, var2, var3, var4) \
137 sprintf(dbg_msg_buff, "% 8d " __FUNCTION__ \
138 " " msg, getpid(), var1, var2, var3, var4),\
139 fprintf(stderr, "%s", dbg_msg_buff);
143 #define DBG(lvl, msg) DBG_TEST(lvl) \
147 #define DBG1(lvl, msg, var1) DBG_TEST(lvl) \
148 DBG_CORE1(msg, var1) \
151 #define DBG2(lvl, msg, var1, var2) DBG_TEST(lvl) \
152 DBG_CORE2(msg, var1, var2) \
155 #define DBG3(lvl, msg, var1, var2, var3) \
157 DBG_CORE3(msg, var1, var2, var3) \
160 #define DBG4(lvl, msg, var1, var2, var3, var4) \
162 DBG_CORE4(msg, var1, var2, var3, var4) \
165 #else /* DEBUG is 0 */
167 #define DBG(lvl, msg) ((void)0)
168 #define DBG1(lvl, msg, var1) ((void)0)
169 #define DBG2(lvl, msg, var1, var2) ((void)0)
170 #define DBG3(lvl, msg, var1, var2, var3) ((void)0)
171 #define DBG4(lvl, msg, var1, var2, var3, var4) ((void)0)
175 static int asyncio(struct aiocb *a)
186 AIO(sizeof(struct aiocb), a, &rv, &errno, __err2ad());
187 DBG2(4, "BPX4AIO aiocb %p rv %d\n",
191 DBG2(4, "errno %d errnojr %08x\n",
198 static apr_int16_t get_event(apr_int16_t event)
204 if (event & APR_POLLIN)
206 if (event & APR_POLLPRI)
208 if (event & APR_POLLOUT)
210 if (event & APR_POLLERR)
212 if (event & APR_POLLHUP)
214 if (event & APR_POLLNVAL)
221 static apr_int16_t get_revent(apr_int16_t event)
237 if (event & POLLNVAL)
244 static apr_status_t asio_pollset_cleanup(apr_pollset_t *pollset)
250 if (pollset->flags & APR_POLLSET_THREADSAFE) {
251 rv = msgctl(pollset->p->msg_q, IPC_RMID, NULL);
252 DBG1(4, "asio_pollset_cleanup: msgctl(IPC_RMID) returned %d\n", rv);
258 static apr_status_t asio_pollset_create(apr_pollset_t *pollset,
265 apr_pollset_private_t *priv;
267 DBG1(2, "entered, flags: %x\n", flags);
269 priv = pollset->p = apr_pcalloc(p, sizeof(*priv));
271 if (flags & APR_POLLSET_THREADSAFE) {
273 if ((rv = apr_thread_mutex_create(&(priv->ring_lock),
274 APR_THREAD_MUTEX_DEFAULT,
275 p)) != APR_SUCCESS) {
276 DBG1(1, "apr_thread_mutex_create returned %d\n", rv);
280 rv = msgget(IPC_PRIVATE, S_IWUSR+S_IRUSR); /* user r/w perms */
283 perror(__FUNCTION__ " msgget returned < 0 ");
289 DBG2(4, "pollset %p msgget was OK, rv=%d\n", pollset, rv);
291 priv->elems = apr_hash_make(p);
293 APR_RING_INIT(&priv->free_ring, asio_elem_t, link);
294 APR_RING_INIT(&priv->prior_ready_ring, asio_elem_t, link);
296 #else /* APR doesn't have threads but caller wants a threadsafe pollset */
301 } else { /* APR_POLLSET_THREADSAFE not set, i.e. no async i/o,
302 * init fields only needed in old style pollset
305 priv->pollset = apr_palloc(p, size * sizeof(struct pollfd));
306 priv->query_set = apr_palloc(p, size * sizeof(apr_pollfd_t));
308 if ((!priv->pollset) || (!priv->query_set)) {
315 pollset->flags = flags;
318 priv->result_set = apr_palloc(p, size * sizeof(apr_pollfd_t));
319 if (!priv->result_set) {
320 if (flags & APR_POLLSET_THREADSAFE) {
321 msgctl(priv->msg_q, IPC_RMID, NULL);
327 DBG2(2, "exiting, pollset: %p, type: %s\n",
329 flags & APR_POLLSET_THREADSAFE ? "async" : "POSIX");
334 } /* end of asio_pollset_create */
336 static apr_status_t posix_add(apr_pollset_t *pollset,
337 const apr_pollfd_t *descriptor)
341 apr_pool_t *p = pollset->pool;
342 apr_pollset_private_t *priv = pollset->p;
346 if (pollset->nelts == priv->size) {
350 priv->query_set[pollset->nelts] = *descriptor;
351 if (descriptor->desc_type == APR_POLL_SOCKET) {
352 fd = descriptor->desc.s->socketdes;
355 fd = descriptor->desc.f->filedes;
358 priv->pollset[pollset->nelts].fd = fd;
360 priv->pollset[pollset->nelts].events =
361 get_event(descriptor->reqevents);
365 DBG2(4, "exiting, fd %d added to pollset %p\n", fd, pollset);
368 } /* end of posix_add */
371 static apr_status_t asio_pollset_add(apr_pollset_t *pollset,
372 const apr_pollfd_t *descriptor)
376 apr_status_t rv = APR_SUCCESS;
377 apr_pollset_private_t *priv = pollset->p;
379 pollset_lock_rings();
382 if (pollset->flags & APR_POLLSET_THREADSAFE) {
384 if (!APR_RING_EMPTY(&(priv->free_ring), asio_elem_t, link)) {
385 elem = APR_RING_FIRST(&(priv->free_ring));
386 APR_RING_REMOVE(elem, link);
387 DBG1(3, "used recycled memory at %08p\n", elem);
388 elem->state = ASIO_INIT;
389 elem->a.aio_cflags = 0;
392 elem = (asio_elem_t *) apr_pcalloc(pollset->pool, sizeof(asio_elem_t));
393 DBG1(3, "alloced new memory at %08p\n", elem);
395 elem->a.aio_notifytype = AIO_MSGQ;
396 elem->a.aio_msgev_qid = priv->msg_q;
397 DBG1(5, "aio_msgev_quid = %d \n", elem->a.aio_msgev_qid);
398 elem->a.aio_msgev_size = sizeof(asio_elem_t *);
399 elem->a.aio_msgev_flag = 0; /* wait if queue is full */
400 elem->a.aio_msgev_addr = &(elem->msg);
401 elem->a.aio_buf = &(elem->os_pfd);
402 elem->a.aio_nbytes = 1; /* number of pfds to poll */
403 elem->msg.msg_type = 1;
404 elem->msg.msg_elem = elem;
407 /* z/OS only supports async I/O for sockets for now */
408 elem->os_pfd.fd = descriptor->desc.s->socketdes;
410 APR_RING_ELEM_INIT(elem, link);
411 elem->a.aio_cmd = AIO_SELPOLL;
412 elem->a.aio_cflags &= ~AIO_OK2COMPIMD; /* not OK to complete inline*/
413 elem->pfd = *descriptor;
414 elem->os_pfd.events = get_event(descriptor->reqevents);
416 if (0 != asyncio(&elem->a)) {
418 DBG3(4, "pollset %p asio failed fd %d, errno %p\n",
419 pollset, elem->os_pfd.fd, rv);
421 perror(__FUNCTION__ " asio failure");
425 DBG2(4, "good asio call, adding fd %d to pollset %p\n",
426 elem->os_pfd.fd, pollset);
429 apr_hash_set(priv->elems, &(elem->os_pfd.fd), sizeof(int), elem);
433 /* APR_POLLSET_THREADSAFE isn't set. use POSIX poll in case
434 * pipes or files are used with this pollset
437 rv = posix_add(pollset, descriptor);
440 DBG1(2, "exiting, rv = %d\n", rv);
442 pollset_unlock_rings();
444 } /* end of asio_pollset_add */
446 static posix_remove(apr_pollset_t *pollset, const apr_pollfd_t *descriptor)
450 apr_pollset_private_t *priv = pollset->p;
453 for (i = 0; i < pollset->nelts; i++) {
454 if (descriptor->desc.s == priv->query_set[i].desc.s) {
455 /* Found an instance of the fd: remove this and any other copies */
456 apr_uint32_t dst = i;
457 apr_uint32_t old_nelts = pollset->nelts;
459 for (i++; i < old_nelts; i++) {
460 if (descriptor->desc.s == priv->query_set[i].desc.s) {
464 priv->pollset[dst] = priv->pollset[i];
465 priv->query_set[dst] = priv->query_set[i];
469 DBG(4, "returning OK\n");
474 DBG(1, "returning APR_NOTFOUND\n");
477 } /* end of posix_remove */
479 static apr_status_t asio_pollset_remove(apr_pollset_t *pollset,
480 const apr_pollfd_t *descriptor)
484 apr_status_t rv = APR_SUCCESS;
485 apr_pollset_private_t *priv = pollset->p;
486 /* AIO_CANCEL is synchronous, so autodata works fine. */
487 struct aiocb cancel_a = {0};
493 if (!(pollset->flags & APR_POLLSET_THREADSAFE)) {
494 return posix_remove(pollset, descriptor);
497 pollset_lock_rings();
500 assert(descriptor->desc_type == APR_POLL_SOCKET);
502 /* zOS 1.12 doesn't support files for async i/o */
503 fd = descriptor->desc.s->socketdes;
505 elem = apr_hash_get(priv->elems, &(fd), sizeof(int));
507 DBG1(1, "couldn't find fd %d\n", fd);
510 DBG1(5, "hash found fd %d\n", fd);
511 /* delete this fd from the hash */
512 apr_hash_set(priv->elems, &(fd), sizeof(int), NULL);
514 if (elem->state == ASIO_INIT) {
515 /* asyncio call to cancel */
516 cancel_a.aio_cmd = AIO_CANCEL;
517 cancel_a.aio_buf = &elem->a; /* point to original aiocb */
519 cancel_a.aio_cflags = 0;
520 cancel_a.aio_cflags2 = 0;
522 /* we want the original aiocb to show up on the pollset message queue
523 * before recycling its memory to eliminate race conditions
526 rv = asyncio(&cancel_a);
527 DBG1(4, "asyncio returned %d\n", rv);
533 elem->state = ASIO_REMOVED;
537 DBG1(2, "exiting, rv: %d\n", rv);
539 pollset_unlock_rings();
542 } /* end of asio_pollset_remove */
544 static posix_poll(apr_pollset_t *pollset,
545 apr_interval_time_t timeout,
547 const apr_pollfd_t **descriptors)
552 apr_pollset_private_t *priv = pollset->p;
559 rv = poll(priv->pollset, pollset->nelts, timeout);
562 return apr_get_netos_error();
568 for (i = 0; i < pollset->nelts; i++) {
569 if (priv->pollset[i].revents != 0) {
570 priv->result_set[j] = priv->query_set[i];
571 priv->result_set[j].rtnevents =
572 get_revent(priv->pollset[i].revents);
577 *descriptors = priv->result_set;
579 DBG(4, "exiting ok\n");
582 } /* end of posix_poll */
584 static process_msg(apr_pollset_t *pollset, struct asio_msgbuf_t *msg)
587 asio_elem_t *elem = msg->msg_elem;
589 switch(elem->state) {
591 DBG2(5, "for cancelled elem, recycling memory - elem %08p, fd %d\n",
592 elem, elem->os_pfd.fd);
593 APR_RING_INSERT_TAIL(&(pollset->p->free_ring), elem,
597 DBG2(4, "adding to ready ring: elem %08p, fd %d\n",
598 elem, elem->os_pfd.fd);
599 elem->state = ASIO_COMPLETE;
600 APR_RING_INSERT_TAIL(&(pollset->p->ready_ring), elem,
604 DBG3(1, "unexpected state: elem %08p, fd %d, state %d\n",
605 elem, elem->os_pfd.fd, elem->state);
612 static apr_status_t asio_pollset_poll(apr_pollset_t *pollset,
613 apr_interval_time_t timeout,
615 const apr_pollfd_t **descriptors)
619 asio_elem_t *elem, *next_elem;
620 struct asio_msgbuf_t msg_buff;
622 apr_status_t rv = APR_SUCCESS;
623 apr_pollset_private_t *priv = pollset->p;
625 DBG(6, "entered\n"); /* chatty - traces every second w/Event */
627 if ((pollset->flags & APR_POLLSET_THREADSAFE) == 0 ) {
628 return posix_poll(pollset, timeout, num, descriptors);
631 pollset_lock_rings();
632 APR_RING_INIT(&(priv->ready_ring), asio_elem_t, link);
634 while (!APR_RING_EMPTY(&(priv->prior_ready_ring), asio_elem_t, link)) {
635 elem = APR_RING_FIRST(&(priv->prior_ready_ring));
636 DBG3(5, "pollset %p elem %p fd %d on prior ready ring\n",
641 APR_RING_REMOVE(elem, link);
644 * since USS does not remember what's in our pollset, we have
645 * to re-add fds which have not been apr_pollset_remove'd
647 * there may have been too many ready fd's to return in the
648 * result set last time. re-poll inline for both cases
651 if (elem->state == ASIO_REMOVED) {
654 * async i/o is done since it was found on prior_ready
655 * the state says the caller is done with it too
656 * so recycle the elem
659 APR_RING_INSERT_TAIL(&(priv->free_ring), elem,
661 continue; /* do not re-add if it has been _removed */
664 elem->state = ASIO_INIT;
665 elem->a.aio_cflags = AIO_OK2COMPIMD;
667 if (0 != (ret = asyncio(&elem->a))) {
669 DBG(4, "asyncio() completed inline\n");
671 elem->state = ASIO_COMPLETE;
672 APR_RING_INSERT_TAIL(&(priv->ready_ring), elem, asio_elem_t,
676 DBG2(1, "asyncio() failed, ret: %d, errno: %d\n",
678 pollset_unlock_rings();
682 DBG1(4, "asyncio() completed rc %d\n", ret);
685 DBG(6, "after prior ready loop\n"); /* chatty w/timeouts, hence 6 */
687 /* Gather async poll completions that have occurred since the last call */
688 while (0 < msgrcv(priv->msg_q, &msg_buff, sizeof(asio_elem_t *), 0,
690 process_msg(pollset, &msg_buff);
693 /* Suspend if nothing is ready yet. */
694 if (APR_RING_EMPTY(&(priv->ready_ring), asio_elem_t, link)) {
697 tv.tv_sec = apr_time_sec(timeout);
698 tv.tv_nsec = apr_time_usec(timeout) * 1000;
700 tv.tv_sec = INT_MAX; /* block until something is ready */
703 DBG2(6, "nothing on the ready ring "
704 "- blocking for %d seconds %d ns\n",
705 tv.tv_sec, tv.tv_nsec);
707 pollset_unlock_rings(); /* allow other apr_pollset_* calls while blocked */
709 if (0 >= (ret = __msgrcv_timed(priv->msg_q, &msg_buff,
710 sizeof(asio_elem_t *), 0, NULL, &tv))) {
712 if (errno == EAGAIN) {
713 DBG(6, "__msgrcv_timed timed out\n"); /* timeout path, so 6 */
716 DBG(1, "__msgrcv_timed failed!\n");
719 return (errno == EAGAIN) ? APR_TIMEUP : errno;
722 pollset_lock_rings();
724 process_msg(pollset, &msg_buff);
727 APR_RING_INIT(&priv->prior_ready_ring, asio_elem_t, link);
730 elem = APR_RING_FIRST(&(priv->ready_ring));
735 && elem != APR_RING_SENTINEL(&(priv->ready_ring), asio_elem_t, link);
737 DBG2(5, "ready ring: elem %08p, fd %d\n", elem, elem->os_pfd.fd);
739 priv->result_set[i] = elem->pfd;
740 priv->result_set[i].rtnevents
741 = get_revent(elem->os_pfd.revents);
744 elem = APR_RING_NEXT(elem, link);
747 if (elem == APR_RING_SENTINEL(&(priv->ready_ring), asio_elem_t, link)) {
748 DBG(5, "end of ready ring reached\n");
754 *descriptors = priv->result_set;
757 /* if the result size is too small, remember which descriptors
758 * haven't had results reported yet. we will look
759 * at these descriptors on the next apr_pollset_poll call
762 APR_RING_CONCAT(&priv->prior_ready_ring, &(priv->ready_ring), asio_elem_t, link);
764 DBG1(2, "exiting, rv = %d\n", rv);
766 pollset_unlock_rings();
769 } /* end of asio_pollset_poll */
771 static const apr_pollset_provider_t impl = {
776 asio_pollset_cleanup,
780 const apr_pollset_provider_t *apr_pollset_provider_aio_msgq = &impl;
782 #endif /* HAVE_AIO_MSGQ */