From c8f409c2f297180ee06b474ef59653aebb817a4a Mon Sep 17 00:00:00 2001 From: "Justin M. Keyes" Date: Fri, 13 Apr 2018 22:27:00 +0200 Subject: loop: remove `children_stop_requests` It serves no purpose because process_stop() is already guarded by `proc->stopped_time`. --- src/nvim/event/loop.c | 1 - src/nvim/event/loop.h | 1 - src/nvim/event/process.c | 17 ++++++----------- 3 files changed, 6 insertions(+), 13 deletions(-) (limited to 'src/nvim') diff --git a/src/nvim/event/loop.c b/src/nvim/event/loop.c index d92464f17b..7998e0b8d0 100644 --- a/src/nvim/event/loop.c +++ b/src/nvim/event/loop.c @@ -21,7 +21,6 @@ void loop_init(Loop *loop, void *data) loop->recursive = 0; loop->uv.data = loop; loop->children = kl_init(WatcherPtr); - loop->children_stop_requests = 0; loop->events = multiqueue_new_parent(loop_on_put, loop); loop->fast_events = multiqueue_new_child(loop->events); loop->thread_events = multiqueue_new_parent(NULL, NULL); diff --git a/src/nvim/event/loop.h b/src/nvim/event/loop.h index d1a40d5cc9..c0e2f49e4f 100644 --- a/src/nvim/event/loop.h +++ b/src/nvim/event/loop.h @@ -37,7 +37,6 @@ typedef struct loop { // generic timer, used by loop_poll_events() uv_timer_t poll_timer; - size_t children_stop_requests; uv_async_t async; uv_mutex_t mutex; int recursive; diff --git a/src/nvim/event/process.c b/src/nvim/event/process.c index 60650344ce..6820194d5e 100644 --- a/src/nvim/event/process.c +++ b/src/nvim/event/process.c @@ -228,13 +228,10 @@ void process_stop(Process *proc) FUNC_ATTR_NONNULL_ALL } Loop *loop = proc->loop; - if (!loop->children_stop_requests++) { - // When there's at least one stop request pending, start a timer that - // will periodically check if a signal should be send to the job. - ILOG("starting job kill timer"); - uv_timer_start(&loop->children_kill_timer, children_kill_cb, - KILL_TIMEOUT_MS, KILL_TIMEOUT_MS); - } + // Start a timer to periodically check if a signal should be send to the job. + ILOG("starting job kill timer"); + uv_timer_start(&loop->children_kill_timer, children_kill_cb, + KILL_TIMEOUT_MS, KILL_TIMEOUT_MS); } /// Iterates the process list sending SIGTERM to stopped processes and SIGKILL @@ -383,10 +380,8 @@ static void process_close_handles(void **argv) static void on_process_exit(Process *proc) { Loop *loop = proc->loop; - if (proc->stopped_time && loop->children_stop_requests - && !--loop->children_stop_requests) { - // Stop the timer if no more stop requests are pending - DLOG("Stopping process kill timer"); + if (proc->stopped_time) { + DLOG("stopping process kill timer"); uv_timer_stop(&loop->children_kill_timer); } -- cgit From 8fa0b8051dbe2f4e4a00c162913886a34bf78bfa Mon Sep 17 00:00:00 2001 From: "Justin M. Keyes" Date: Sat, 14 Apr 2018 12:03:34 +0200 Subject: job-control: mitigate process-kill race children_kill_cb() is racey. One obvious problem is that process_close_handles() is *queued* by on_process_exit(), so when children_kill_cb() is invoked, the dead process might still be in the `loop->children` list. If the OS already reclaimed the dead PID, Nvim may try to SIGKILL it. Avoid that by checking `proc->status`. Vim doesn't have this problem because it doesn't attempt to kill processes that ignored SIGTERM after a timeout. closes #8269 --- src/nvim/channel.c | 6 +++--- src/nvim/event/process.c | 20 +++++++++++--------- 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'src/nvim') diff --git a/src/nvim/channel.c b/src/nvim/channel.c index 776e2bfa86..4e6ca8d278 100644 --- a/src/nvim/channel.c +++ b/src/nvim/channel.c @@ -658,9 +658,9 @@ static void channel_process_exit_cb(Process *proc, int status, void *data) terminal_close(chan->term, msg); } - // if status is -1 the process did not really exit, - // we just closed the handle onto a detached process - if (status >= 0) { + // If process did not exit, we only closed the handle of a detached process. + bool exited = (status >= 0); + if (exited) { process_channel_event(chan, &chan->on_exit, "exit", NULL, 0, status); } diff --git a/src/nvim/event/process.c b/src/nvim/event/process.c index 6820194d5e..59bf2fccf3 100644 --- a/src/nvim/event/process.c +++ b/src/nvim/event/process.c @@ -188,8 +188,7 @@ int process_wait(Process *proc, int ms, MultiQueue *events) } if (proc->refcount == 1) { - // Job exited, collect status and manually invoke close_cb to free the job - // resources + // Job exited, free its resources. decref(proc); if (events) { // the decref call created an exit event, process it now @@ -205,7 +204,8 @@ int process_wait(Process *proc, int ms, MultiQueue *events) /// Ask a process to terminate and eventually kill if it doesn't respond void process_stop(Process *proc) FUNC_ATTR_NONNULL_ALL { - if (proc->stopped_time) { + bool exited = (proc->status >= 0); + if (exited || proc->stopped_time) { return; } @@ -228,14 +228,14 @@ void process_stop(Process *proc) FUNC_ATTR_NONNULL_ALL } Loop *loop = proc->loop; - // Start a timer to periodically check if a signal should be send to the job. + // Start a timer to periodically check if a signal should be sent to the job. ILOG("starting job kill timer"); uv_timer_start(&loop->children_kill_timer, children_kill_cb, KILL_TIMEOUT_MS, KILL_TIMEOUT_MS); } -/// Iterates the process list sending SIGTERM to stopped processes and SIGKILL -/// to those that didn't die from SIGTERM after a while(exit_timeout is 0). +/// Sends SIGKILL (or SIGTERM for PTY jobs) to processes that didn't terminate +/// after process_stop() requested them. static void children_kill_cb(uv_timer_t *handle) { Loop *loop = handle->loop->data; @@ -243,11 +243,11 @@ static void children_kill_cb(uv_timer_t *handle) kl_iter(WatcherPtr, loop->children, current) { Process *proc = (*current)->data; - if (!proc->stopped_time) { + bool exited = (proc->status >= 0); + if (exited || !proc->stopped_time) { continue; } uint64_t elapsed = (now - proc->stopped_time) / 1000000 + 1; - if (elapsed >= KILL_TIMEOUT_MS) { int sig = proc->type == kProcessTypePty && elapsed < KILL_TIMEOUT_MS * 2 ? SIGTERM @@ -380,8 +380,10 @@ static void process_close_handles(void **argv) static void on_process_exit(Process *proc) { Loop *loop = proc->loop; + ILOG("exited: pid=%d status=%d stoptime=%" PRId64, proc->pid, + proc->status, proc->stopped_time); if (proc->stopped_time) { - DLOG("stopping process kill timer"); + ILOG("stopping process kill timer"); uv_timer_stop(&loop->children_kill_timer); } -- cgit From 142ac021ac88387ef7319dc4eb1f430753480f44 Mon Sep 17 00:00:00 2001 From: "Justin M. Keyes" Date: Sat, 14 Apr 2018 17:23:17 +0200 Subject: job-control: one-shot timer instead of repeating Before f31c26f1afb5 the timer was used to try SIGTERM *and* SIGKILL, so a repeating timer was needed. After f31c26f1afb5 process_stop() sends SIGTERM immediately, and the timer only sends SIGKILL. So we don't need a repeating timer. - Simplifies the logic: don't need to call uv_timer_stop() explicitly. - Avoids a problem: if process_stop() is called more than once in the 2-second window, the first on_process_exit() would call uv_timer_stop() which stops the timer for all stopped processes. --- src/nvim/event/process.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'src/nvim') diff --git a/src/nvim/event/process.c b/src/nvim/event/process.c index 59bf2fccf3..312da54c76 100644 --- a/src/nvim/event/process.c +++ b/src/nvim/event/process.c @@ -111,6 +111,7 @@ int process_spawn(Process *proc, bool in, bool out, bool err) proc->internal_close_cb = decref; proc->refcount++; kl_push(WatcherPtr, proc->loop->children, proc); + DLOG("new: pid=%d argv=[%s]", proc->pid, *proc->argv); return 0; } @@ -227,11 +228,10 @@ void process_stop(Process *proc) FUNC_ATTR_NONNULL_ALL abort(); } - Loop *loop = proc->loop; - // Start a timer to periodically check if a signal should be sent to the job. + // Start a timer to verify that the job process terminated. ILOG("starting job kill timer"); - uv_timer_start(&loop->children_kill_timer, children_kill_cb, - KILL_TIMEOUT_MS, KILL_TIMEOUT_MS); + uv_timer_start(&proc->loop->children_kill_timer, children_kill_cb, + KILL_TIMEOUT_MS, 0); } /// Sends SIGKILL (or SIGTERM for PTY jobs) to processes that didn't terminate @@ -380,12 +380,8 @@ static void process_close_handles(void **argv) static void on_process_exit(Process *proc) { Loop *loop = proc->loop; - ILOG("exited: pid=%d status=%d stoptime=%" PRId64, proc->pid, - proc->status, proc->stopped_time); - if (proc->stopped_time) { - ILOG("stopping process kill timer"); - uv_timer_stop(&loop->children_kill_timer); - } + ILOG("exited: pid=%d status=%d stoptime=%" PRIu64, proc->pid, proc->status, + proc->stopped_time); // Process has terminated, but there could still be data to be read from the // OS. We are still in the libuv loop, so we cannot call code that polls for -- cgit From b2c066409d19deb6228b7448e5c0367117031753 Mon Sep 17 00:00:00 2001 From: "Justin M. Keyes" Date: Sun, 15 Apr 2018 02:03:01 +0200 Subject: job-control: children_kill_cb(): do not check elapsed time 1. Don't check elapsed time in children_kill_cb(), it's already implied by the start-time of the timer itself. 2. Restart timer from children_kill_cb() for PTY jobs, to send SIGKILL after SIGTERM. There is an edge case where SIGKILL might follow SIGTERM too quickly, if jobstop() is called near the 2-second timer window. But this edge case is not worth code complication. --- src/nvim/event/process.c | 27 ++++++++++++++------------- src/nvim/event/process.h | 3 +-- 2 files changed, 15 insertions(+), 15 deletions(-) (limited to 'src/nvim') diff --git a/src/nvim/event/process.c b/src/nvim/event/process.c index 312da54c76..23433cf495 100644 --- a/src/nvim/event/process.c +++ b/src/nvim/event/process.c @@ -23,7 +23,7 @@ #endif // Time for a process to exit cleanly before we send KILL. -// For pty processes SIGTERM is sent first (in case SIGHUP was not enough). +// For PTY processes SIGTERM is sent first (in case SIGHUP was not enough). #define KILL_TIMEOUT_MS 2000 static bool process_is_tearing_down = false; @@ -209,8 +209,8 @@ void process_stop(Process *proc) FUNC_ATTR_NONNULL_ALL if (exited || proc->stopped_time) { return; } - proc->stopped_time = os_hrtime(); + switch (proc->type) { case kProcessTypeUv: // Close the process's stdin. If the process doesn't close its own @@ -228,18 +228,16 @@ void process_stop(Process *proc) FUNC_ATTR_NONNULL_ALL abort(); } - // Start a timer to verify that the job process terminated. - ILOG("starting job kill timer"); + // (Re)start timer to verify that stopped process(es) died. uv_timer_start(&proc->loop->children_kill_timer, children_kill_cb, KILL_TIMEOUT_MS, 0); } -/// Sends SIGKILL (or SIGTERM for PTY jobs) to processes that didn't terminate -/// after process_stop() requested them. +/// Sends SIGKILL (or SIGTERM..SIGKILL for PTY jobs) to processes that did +/// not terminate after process_stop(). static void children_kill_cb(uv_timer_t *handle) { Loop *loop = handle->loop->data; - uint64_t now = os_hrtime(); kl_iter(WatcherPtr, loop->children, current) { Process *proc = (*current)->data; @@ -247,12 +245,15 @@ static void children_kill_cb(uv_timer_t *handle) if (exited || !proc->stopped_time) { continue; } - uint64_t elapsed = (now - proc->stopped_time) / 1000000 + 1; - if (elapsed >= KILL_TIMEOUT_MS) { - int sig = proc->type == kProcessTypePty && elapsed < KILL_TIMEOUT_MS * 2 - ? SIGTERM - : SIGKILL; - os_proc_tree_kill(proc->pid, sig); + uint64_t term_sent = UINT64_MAX == proc->stopped_time; + if (kProcessTypePty != proc->type || term_sent) { + os_proc_tree_kill(proc->pid, SIGKILL); + } else { + os_proc_tree_kill(proc->pid, SIGTERM); + proc->stopped_time = UINT64_MAX; // Flag: SIGTERM was sent. + // Restart timer. + uv_timer_start(&proc->loop->children_kill_timer, children_kill_cb, + KILL_TIMEOUT_MS, 0); } } } diff --git a/src/nvim/event/process.h b/src/nvim/event/process.h index 033ce3604b..ba2c2a6a11 100644 --- a/src/nvim/event/process.h +++ b/src/nvim/event/process.h @@ -19,8 +19,7 @@ struct process { Loop *loop; void *data; int pid, status, refcount; - // set to the hrtime of when process_stop was called for the process. - uint64_t stopped_time; + uint64_t stopped_time; // process_stop() timestamp const char *cwd; char **argv; Stream in, out, err; -- cgit