From cdbc8067a2e27ee7f9e0d8ff11cf415603fc0558 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Sun, 10 Oct 2021 22:18:24 +0200 Subject: [PATCH] Switch from select(2) to poll(2). The select(2) API is kind of obsolete because it does not work for file descriptors greater or equal than 1024, The general advice is to switch to poll(2), which offers equivalent functionality and does not suffer from this restriction. This patch implements this switch. The fd sets of select(2) have one nice feature: One can determine in O(1) time whether the bit for a given fd is turned on in an fd set. For poll(2), the monitored file descriptors are organized in an array of struct pollfd. Without information about the given fd's index in the pollfd array, one can only perform a linear search which requires O(n) time, with n being the number of fds being watched. Since this would have to be done for each fd, the running time becomes quadratic in the number of monitored fds, which is bad. Keeping the pollfd array sorted would reduce that to n * log(n) at the cost of additional work at insert time. This patch implements a different approach. The scheduler now maintains an additional array of unsigned integers which map fds to indices into the pollfd array. This new index array is transparent to the individual tasks, which still simply pass one or more fds from their ->pre_monitor() method to the scheduler. The length of the index array equals the highest fd given. This might become prohibitive in theory, but should not be an issue for the time being. Care needs to be taken in order to deal with callers which ask for the readiness of an fd without having called sched_monitor_readfd() or sched_monitor_writefd() in the ->pre_monitor() step. Before the patch, thanks to the FD_ZERO() call at the beginning of each iteration of the scheduler's main loop, both sched_read_ok() and sched_write_ok() returned false for fds which were not asked to be watched. We need to keep it this way for a seamless transition. We achieve this by replacing the FD_ZERO() call by a memset(3) call which fills the index array with 0xff bytes. Both sched_read_ok() and sched_write_ok() call the new get_revents() helper, where we check the fd argument against the allocation sizes of the two arrays. If either function is called with an fd that was not asked to be monitored in the ->pre_monitor() step, the checks notice that the index of this fd, 0xffffffff, is larger than the highest open fd and we return "not ready for I/O". Another issue is the case where the same file descriptor is submitted twice in ->pre_monitor() to check for readiness with respect to both reading and writing. The code in client_comon.c currently does that. To keep it working, the scheduler needs to detect this case and re-use the existing slot in both arrays. --- audioc.c | 2 +- audiod.c | 2 +- client.c | 2 +- fd.c | 48 +++++---------- fd.h | 2 +- interactive.c | 25 ++++---- interactive.h | 2 +- para.h | 2 + play.c | 4 +- sched.c | 155 +++++++++++++++++++++++++++++++++++------------- sched.h | 38 ++++++------ server.c | 15 +++-- wmadec_filter.c | 1 - 13 files changed, 171 insertions(+), 127 deletions(-) diff --git a/audioc.c b/audioc.c index 932f0d30..2506c3f8 100644 --- a/audioc.c +++ b/audioc.c @@ -250,7 +250,7 @@ __noreturn static void interactive_session(void) sigemptyset(&act.sa_mask); act.sa_flags = 0; sigaction(SIGINT, &act, NULL); - sched.select_function = i9e_select; + sched.poll_function = i9e_poll; sched.default_timeout = 1000; ret = i9e_open(&ici, &sched); diff --git a/audiod.c b/audiod.c index 311462e6..a084558b 100644 --- a/audiod.c +++ b/audiod.c @@ -123,7 +123,7 @@ enum vss_status_flags { * This is needed also in audiod_command.c (for the tasks command), so it can * not be made static. */ -struct sched sched = {.max_fileno = 0}; +struct sched sched = {.timeout = 0}; /* The task for obtaining para_server's status (para_client stat). */ struct status_task { diff --git a/client.c b/client.c index 827ff215..ed0d5e02 100644 --- a/client.c +++ b/client.c @@ -532,7 +532,7 @@ __noreturn static void interactive_session(void) sigemptyset(&act.sa_mask); act.sa_flags = 0; sigaction(SIGINT, &act, NULL); - sched.select_function = i9e_select; + sched.poll_function = i9e_poll; ret = i9e_open(&ici, &sched); if (ret < 0) diff --git a/fd.c b/fd.c index b8d1062d..800106e1 100644 --- a/fd.c +++ b/fd.c @@ -6,7 +6,6 @@ #include #include #include -#include #include "para.h" #include "error.h" @@ -309,37 +308,6 @@ bool file_exists(const char *fn) return !stat(fn, &statbuf); } -/** - * Paraslash's wrapper for select(2). - * - * It calls select(2) (with no exceptfds) and starts over if select() was - * interrupted by a signal. - * - * \param n The highest-numbered descriptor in any of the two sets, plus 1. - * \param readfds fds that should be checked for readability. - * \param writefds fds that should be checked for writablility. - * \param timeout Upper bound in milliseconds. - * - * \return The return value of the underlying select() call on success, the - * negative system error code on errors. - * - * All arguments are passed verbatim to select(2). - * \sa select(2) select_tut(2). - */ -int para_select(int n, fd_set *readfds, fd_set *writefds, int timeout) -{ - int ret; - struct timeval tv; - - ms2tv(timeout, &tv); - do - ret = select(n, readfds, writefds, NULL, &tv); - while (ret < 0 && errno == EINTR); - if (ret < 0) - return -ERRNO_TO_PARA_ERROR(errno); - return ret; -} - /** * Set a file descriptor to blocking mode. * @@ -599,7 +567,21 @@ int para_munmap(void *start, size_t length) return -ERRNO_TO_PARA_ERROR(err); } -static int xpoll(struct pollfd *fds, nfds_t nfds, int timeout) +/** + * Simple wrapper for poll(2). + * + * It calls poll(2) and starts over if the call was interrupted by a signal. + * + * \param fds See poll(2). + * \param nfds See poll(2). + * \param timeout See poll(2). + * + * \return The return value of the underlying poll() call on success, the + * negative paraslash error code on errors. + * + * All arguments are passed verbatim to poll(2). + */ +int xpoll(struct pollfd *fds, nfds_t nfds, int timeout) { int ret; diff --git a/fd.h b/fd.h index ea6307b2..270d0ce2 100644 --- a/fd.h +++ b/fd.h @@ -6,7 +6,7 @@ int xrename(const char *oldpath, const char *newpath); int write_all(int fd, const char *buf, size_t len); __printf_2_3 int write_va_buffer(int fd, const char *fmt, ...); bool file_exists(const char *); -int para_select(int n, fd_set *readfds, fd_set *writefds, int timeout); +int xpoll(struct pollfd *fds, nfds_t nfds, int timeout); __must_check int mark_fd_nonblocking(int fd); __must_check int mark_fd_blocking(int fd); int para_mmap(size_t length, int prot, int flags, int fd, void *map); diff --git a/interactive.c b/interactive.c index 28937db4..812a7d5b 100644 --- a/interactive.c +++ b/interactive.c @@ -593,26 +593,21 @@ void i9e_signal_dispatch(int sig_num) } /** - * Wrapper for select(2) which does not restart on interrupts. + * Wrapper for poll(2) which handles EINTR and returns paraslash error codes. * - * \param n \sa \ref para_select(). - * \param readfds \sa \ref para_select(). - * \param writefds \sa \ref para_select(). - * \param timeout \sa \ref para_select(). + * \param fds See poll(2). + * \param nfds See poll(2). + * \param timeout See poll(2). * - * \return \sa \ref para_select(). + * \return See poll(2). * - * The only difference between this function and \ref para_select() is that - * \ref i9e_select() returns zero if the select call returned \p EINTR. + * The only difference between this function and \ref xpoll() is that \ref + * i9e_poll() returns zero if the system call was interrupted while xpoll() + * restarts the system call in this case. */ -int i9e_select(int n, fd_set *readfds, fd_set *writefds, int timeout) +int i9e_poll(struct pollfd *fds, nfds_t nfds, int timeout) { - struct timeval tv; - int ret; - - ms2tv(timeout, &tv); - ret = select(n, readfds, writefds, NULL, &tv); - + int ret = poll(fds, nfds, timeout); if (ret < 0) { if (errno == EINTR) ret = 0; diff --git a/interactive.h b/interactive.h index 4253f79c..53b1ad34 100644 --- a/interactive.h +++ b/interactive.h @@ -84,7 +84,7 @@ void i9e_print_status_bar(char *buf, unsigned len); void i9e_close(void); void i9e_signal_dispatch(int sig_num); __printf_2_3 void i9e_log(int ll, const char* fmt,...); -int i9e_select(int n, fd_set *readfds, fd_set *writefds, int timeout); +int i9e_poll(struct pollfd *fds, nfds_t nfds, int timeout); int i9e_extract_completions(const char *word, char **string_list, char ***result); char **i9e_complete_commands(const char *word, struct i9e_completer *completers); diff --git a/para.h b/para.h index b406818b..2525bee6 100644 --- a/para.h +++ b/para.h @@ -20,6 +20,8 @@ #include #include #include +#include + #include "gcc-compat.h" /** used in various contexts */ diff --git a/play.c b/play.c index 48da675c..66383ebe 100644 --- a/play.c +++ b/play.c @@ -117,7 +117,7 @@ INIT_STDERR_LOGGING(loglevel); char *stat_item_values[NUM_STAT_ITEMS] = {NULL}; -static struct sched sched = {.max_fileno = 0}; +static struct sched sched; static struct play_task play_task, *pt = &play_task; #define AFH_RECV_CMD (lls_cmd(LSG_RECV_CMD_CMD_AFH, recv_cmd_suite)) @@ -1075,7 +1075,7 @@ static void session_open(void) sigemptyset(&act.sa_mask); act.sa_flags = 0; sigaction(SIGWINCH, &act, NULL); - sched.select_function = i9e_select; + sched.poll_function = i9e_poll; ici.bound_keyseqs = get_mapped_keyseqs(); pt->btrn = ici.producer = btr_new_node(&(struct btr_node_description) diff --git a/sched.c b/sched.c index 261809c1..eb8d03c2 100644 --- a/sched.c +++ b/sched.c @@ -117,13 +117,13 @@ static unsigned sched_post_monitor(struct sched *s) * * \param s Pointer to the scheduler struct. * - * This function updates the global \a now pointer, calls all registered + * This function updates the global now pointer, calls all registered * pre_monitor hooks which may set the timeout and add any file descriptors to - * the fd sets of \a s. Next, it calls para_select() and makes the result + * the pollfd array. Next, it calls the poll function and makes the result * available to the registered tasks by calling their post_monitor hook. * * \return Zero if no more tasks are left in the task list, negative if the - * select function returned an error. + * poll function returned an error. * * \sa \ref now. */ @@ -132,29 +132,18 @@ int schedule(struct sched *s) int ret; unsigned num_running_tasks; - if (!s->select_function) - s->select_function = para_select; + if (!s->poll_function) + s->poll_function = xpoll; again: - FD_ZERO(&s->rfds); - FD_ZERO(&s->wfds); + s->num_pfds = 0; + if (s->pidx) + memset(s->pidx, 0xff, s->pidx_array_len * sizeof(unsigned)); s->timeout = s->default_timeout; - s->max_fileno = -1; clock_get_realtime(&now_struct); sched_pre_monitor(s); - ret = s->select_function(s->max_fileno + 1, &s->rfds, &s->wfds, - s->timeout); + ret = s->poll_function(s->pfd, s->num_pfds, s->timeout); if (ret < 0) return ret; - if (ret == 0) { - /* - * APUE: Be careful not to check the descriptor sets on return - * unless the return value is greater than zero. The return - * state of the descriptor sets is implementation dependent if - * either a signal is caught or the timer expires. - */ - FD_ZERO(&s->rfds); - FD_ZERO(&s->wfds); - } clock_get_realtime(&now_struct); num_running_tasks = sched_post_monitor(s); if (num_running_tasks == 0) @@ -226,6 +215,8 @@ void sched_shutdown(struct sched *s) t->name); unlink_and_free_task(t); } + free(s->pfd); + free(s->pidx); } /** @@ -362,11 +353,11 @@ void task_notify_all(struct sched *s, int err) } /** - * Set the select timeout to the minimal possible value. + * Set the I/O timeout to the minimal possible value. * * \param s Pointer to the scheduler struct. * - * This causes the next select() call to return immediately. + * This causes the next poll() call to return immediately. */ void sched_min_delay(struct sched *s) { @@ -374,14 +365,13 @@ void sched_min_delay(struct sched *s) } /** - * Impose an upper bound for the timeout of the next select() call. + * Impose an upper bound for the I/O timeout. * * \param to Maximal allowed timeout. * \param s Pointer to the scheduler struct. * - * If the current scheduler timeout is already smaller than \a to, this - * function does nothing. Otherwise the timeout for the next select() call is - * set to the given value. + * If the current I/O timeout is already smaller than to, this function does + * nothing. Otherwise the timeout is set to the given value. * * \sa \ref sched_request_timeout_ms(). */ @@ -393,13 +383,13 @@ void sched_request_timeout(struct timeval *to, struct sched *s) } /** - * Force the next select() call to return before the given amount of milliseconds. + * Bound the I/O timeout to at most the given amount of milliseconds. * * \param ms The maximal allowed timeout in milliseconds. * \param s Pointer to the scheduler struct. * - * Like sched_request_timeout() this imposes an upper bound on the timeout - * value for the next select() call. + * Like \ref sched_request_timeout() this imposes an upper bound on the I/O + * timeout. */ void sched_request_timeout_ms(long unsigned ms, struct sched *s) { @@ -409,13 +399,13 @@ void sched_request_timeout_ms(long unsigned ms, struct sched *s) } /** - * Force the next select() call to return before the given future time. + * Bound the I/O timeout by an absolute time in the future. * - * \param barrier Absolute time before select() should return. + * \param barrier Defines the upper bound for the timeout. * \param s Pointer to the scheduler struct. * - * \return If \a barrier is in the past, this function does nothing and returns - * zero. Otherwise it returns one. + * \return If the barrier is in the past, this function does nothing and + * returns zero. Otherwise it returns one. * * \sa \ref sched_request_barrier_or_min_delay(). */ @@ -430,12 +420,12 @@ int sched_request_barrier(struct timeval *barrier, struct sched *s) } /** - * Force the next select() call to return before the given time. + * Bound the I/O timeout or request a minimal delay. * - * \param barrier Absolute time before select() should return. + * \param barrier Absolute time as in \ref sched_request_barrier(). * \param s Pointer to the scheduler struct. * - * \return If \a barrier is in the past, this function requests a minimal + * \return If the barrier is in the past, this function requests a minimal * timeout and returns zero. Otherwise it returns one. * * \sa \ref sched_min_delay(), \ref sched_request_barrier(). @@ -452,9 +442,9 @@ int sched_request_barrier_or_min_delay(struct timeval *barrier, struct sched *s) return 1; } -static void sched_fd_set(int fd, fd_set *fds, int *max_fileno) +static void add_pollfd(int fd, struct sched *s, short events) { - assert(fd >= 0 && fd < FD_SETSIZE); + assert(fd >= 0); #if 0 { int flags = fcntl(fd, F_GETFL); @@ -464,8 +454,41 @@ static void sched_fd_set(int fd, fd_set *fds, int *max_fileno) } } #endif - FD_SET(fd, fds); - *max_fileno = PARA_MAX(*max_fileno, fd); + if (s->pidx_array_len > fd) { /* is fd already registered? */ + if (s->pidx[fd] < s->pfd_array_len) { /* yes, it is */ + assert(s->pfd[s->pidx[fd]].fd == fd); + s->pfd[s->pidx[fd]].events |= events; + return; + } + } else { /* need to extend the index array */ + unsigned old_len = s->pidx_array_len; + while (s->pidx_array_len <= fd) + s->pidx_array_len = s->pidx_array_len * 2 + 1; + PARA_INFO_LOG("pidx array len: %u\n", s->pidx_array_len); + s->pidx = para_realloc(s->pidx, + s->pidx_array_len * sizeof(unsigned)); + memset(s->pidx + old_len, 0xff, + (s->pidx_array_len - old_len) * sizeof(unsigned)); + } + /* + * The given fd is not part of the pfd array yet. Initialize pidx[fd] + * to point at the next unused slot of this array and initialize the + * slot. + */ + s->pidx[fd] = s->num_pfds; + if (s->pfd_array_len <= s->num_pfds) { + unsigned old_len = s->pfd_array_len; + s->pfd_array_len = old_len * 2 + 1; + PARA_INFO_LOG("pfd array len: %u\n", s->pfd_array_len); + s->pfd = para_realloc(s->pfd, + s->pfd_array_len * sizeof(struct pollfd)); + memset(s->pfd + old_len, 0, + (s->pfd_array_len - old_len) * sizeof(struct pollfd)); + } + s->pfd[s->num_pfds].fd = fd; + s->pfd[s->num_pfds].events = events; + s->pfd[s->num_pfds].revents = 0; + s->num_pfds++; } /** @@ -478,7 +501,7 @@ static void sched_fd_set(int fd, fd_set *fds, int *max_fileno) */ void sched_monitor_readfd(int fd, struct sched *s) { - sched_fd_set(fd, &s->rfds, &s->max_fileno); + add_pollfd(fd, s, POLLIN); } /** @@ -491,5 +514,53 @@ void sched_monitor_readfd(int fd, struct sched *s) */ void sched_monitor_writefd(int fd, struct sched *s) { - sched_fd_set(fd, &s->wfds, &s->max_fileno); + add_pollfd(fd, s, POLLOUT); +} + +static int get_revents(int fd, const struct sched *s) +{ + if (fd < 0) + return 0; + if (fd >= s->pidx_array_len) + return 0; + if (s->pidx[fd] >= s->num_pfds) + return 0; + if (s->pfd[s->pidx[fd]].fd != fd) + return 0; + assert((s->pfd[s->pidx[fd]].revents & POLLNVAL) == 0); + return s->pfd[s->pidx[fd]].revents; +} + +/** + * Check whether there is data to read on the given fd. + * + * To be called from the ->post_monitor() method of a task. + * + * \param fd Should have been monitored with \ref sched_monitor_readfd(). + * \param s The scheduler instance. + * + * \return True if the file descriptor is ready for reading, false otherwise. + * If fd is negative, or has not been monitored in the current iteration of the + * scheduler's main loop, the function also returns false. + * + * \sa \ref sched_write_ok(). + */ +bool sched_read_ok(int fd, const struct sched *s) +{ + return get_revents(fd, s) & (POLLIN | POLLERR | POLLHUP); +} + +/** + * Check whether writing is possible (i.e., does not block). + * + * \param fd Should have been monitored with \ref sched_monitor_writefd(). + * \param s The scheduler instance. + * + * \return True if the file descriptor is ready for writing, false otherwise. + * The comment in \ref sched_read_ok() about invalid file descriptors applies + * to this function as well. + */ +bool sched_write_ok(int fd, const struct sched *s) +{ + return get_revents(fd, s) & (POLLOUT | POLLERR | POLLHUP); } diff --git a/sched.h b/sched.h index 77dc5748..ede5e67e 100644 --- a/sched.h +++ b/sched.h @@ -9,24 +9,28 @@ * Designed with KISS in mind. It maintains a list of task structures which is * extended when a new task is registered. Each task may define a pre_monitor * function which is called from the scheduler main loop before it calls - * select(). Similarly, each task must define a post_monitor function which is - * called after the select call. + * poll(2). Similarly, each task must define a post_monitor function which is + * called after poll(2) returns. * * \sa select(2), poll(2). */ struct sched { /** Initial value (in milliseconds) before any pre_monitor call. */ int default_timeout; - /** The timeout (also in milliseconds) for the next select call. */ + /** The timeout (also in milliseconds) for the next iteration. */ int timeout; - /** fds that should be watched for readability. */ - fd_set rfds; - /** fds that should be watched for writability. */ - fd_set wfds; - /** Highest numbered file descriptor in any of the above fd sets. */ - int max_fileno; - /** If non-NULL, use this function instead of para_select. */ - int (*select_function)(int, fd_set *, fd_set *, int timeout); + /** Passed to poll(2). */ + struct pollfd *pfd; + /** Number of elements in the above array, passed to poll(2). */ + unsigned pfd_array_len; + /** Number of fds registered for montitoring so far. */ + unsigned num_pfds; + /** Maps fds to indices of the pfd array. */ + unsigned *pidx; + /** Mumber of elements in the above pidx array. */ + unsigned pidx_array_len; + /** If non-NULL, use this function instead of \ref xpoll(). */ + int (*poll_function)(struct pollfd *fds, nfds_t nfds, int timeout); /** Tasks which have been registered to the scheduler. */ struct list_head task_list; }; @@ -93,13 +97,5 @@ int sched_request_barrier(struct timeval *barrier, struct sched *s); int sched_request_barrier_or_min_delay(struct timeval *barrier, struct sched *s); void sched_monitor_readfd(int fd, struct sched *s); void sched_monitor_writefd(int fd, struct sched *s); - -static inline bool sched_read_ok(int fd, const struct sched *s) -{ - return FD_ISSET(fd, &s->rfds); -} - -static inline bool sched_write_ok(int fd, const struct sched *s) -{ - return FD_ISSET(fd, &s->wfds); -} +bool sched_read_ok(int fd, const struct sched *s); +bool sched_write_ok(int fd, const struct sched *s); diff --git a/server.c b/server.c index 5da384d8..2c66cc27 100644 --- a/server.c +++ b/server.c @@ -388,8 +388,8 @@ static int command_task_accept(unsigned listen_idx, struct sched *s, * schedule() to return as there are no more runnable tasks. * * Note that semaphores are not inherited across a fork(), so we don't - * hold the lock at this point. Since server_select() drops the lock - * prior to calling para_select(), we need to acquire it here. + * hold the lock at this point. Since server_poll() drops the lock + * prior to calling poll(), we need to acquire it here. */ mutex_lock(mmd_mutex); return -E_CHILD_CONTEXT; @@ -617,14 +617,13 @@ out: killpg(0, SIGUSR1); } -static int server_select(int max_fileno, fd_set *readfds, fd_set *writefds, - int timeout) +static int server_poll(struct pollfd *fds, nfds_t nfds, int timeout) { int ret; status_refresh(); mutex_unlock(mmd_mutex); - ret = para_select(max_fileno + 1, readfds, writefds, timeout); + ret = xpoll(fds, nfds, timeout); mutex_lock(mmd_mutex); return ret; } @@ -659,14 +658,14 @@ int main(int argc, char *argv[]) *sct = &server_command_task_struct; sched.default_timeout = 1000; - sched.select_function = server_select; + sched.poll_function = server_poll; server_init(argc, argv, sct); mutex_lock(mmd_mutex); ret = schedule(&sched); /* - * We hold the mmd lock: it was re-acquired in server_select() - * after the select call. + * We hold the mmd lock: it was re-acquired in server_poll() + * after the poll(2) call. */ mutex_unlock(mmd_mutex); sched_shutdown(&sched); diff --git a/wmadec_filter.c b/wmadec_filter.c index 48160005..8061f9ae 100644 --- a/wmadec_filter.c +++ b/wmadec_filter.c @@ -16,7 +16,6 @@ #include #include -#include #include "para.h" #include "error.h" -- 2.39.5