From 8dd83d2e7ed5c7ed6bdf393d859a017835967ad1 Mon Sep 17 00:00:00 2001 From: Oliver Jowett Date: Sun, 21 Mar 2021 01:51:08 +0800 Subject: [PATCH] Don't hang on exit if rtlsdr hardware stops sending samples. Give up and exit after 30 seconds of no sample data, rather than just warning and continuing. background & discussion: https://discussions.flightaware.com/t/cpu-hikes-crash-dump1090-fa/74759 The scenario this addresses is: * Hardware wedges, USB bulk endpoint stops providing data * librtlsdr remains in rtlsdr_read_async() waiting for either USB data which never arrives, or a cancellation via _a different thread_ calling rtlsdr_cancel_async(). * main thread notices the lack of SDR data and complains * something external e.g. piaware tries a restart and sends SIGTERM * the signal handler sets Modes.exit = 1; the main thread starts waiting for receive thread termination * because we're never getting callbacks from rtlsdr_read_async(), we never call rtlsdr_cancel_async * dump1090 hangs waiting on receive thread termination To fix this, add a sdrStop() handler function where the general contract is "make the receive thread terminate". In the rtlsdr case, this calls rtlsdr_cancel_async directly, which will make rtlsdr_read_async() return even if the hardware is stuck. The main thread then calls sdrStop() before waiting for receive thread termination. Also, as discussed in the thread above, there's not really much point in continuing to run if the SDR has wedged, so bail out after 30 seconds of no sample data. Also, if pthread_timedjoin_np is available, use it in preference to pthread_join so that we do not wait indefinitely for the receive thread on shutdown. If the join times out, give up and abort() as we can't safely continue a clean shutdown while the receive thread is running. --- dump1090.c | 16 +++++++++++----- sdr.c | 26 ++++++++++++++++++-------- sdr.h | 1 + sdr_rtlsdr.c | 9 +++++++++ sdr_rtlsdr.h | 1 + util.c | 12 ++++++++++++ util.h | 6 ++++++ 7 files changed, 58 insertions(+), 13 deletions(-) diff --git a/dump1090.c b/dump1090.c index 65f352f..1a509dd 100644 --- a/dump1090.c +++ b/dump1090.c @@ -761,7 +761,7 @@ int main(int argc, char **argv) { nanosleep(&slp, NULL); } } else { - int watchdogCounter = 10; // about 1 second + int watchdogCounter = 300; // about 30 seconds // Create the thread that will read the data from the device. pthread_create(&Modes.reader_thread, NULL, readerThreadEntryPoint, NULL); @@ -789,12 +789,12 @@ int main(int argc, char **argv) { fifo_release(buf); // We got something so reset the watchdog - watchdogCounter = 10; + watchdogCounter = 300; } else { // Nothing to process this time around. if (--watchdogCounter <= 0) { - log_with_timestamp("No data received from the SDR for a long time, it may have wedged"); - watchdogCounter = 600; + log_with_timestamp("No samples received from the SDR for a long time. Maybe the hardware is wedged? Giving up."); + Modes.exit = 2; // abnormal exit } } @@ -804,8 +804,14 @@ int main(int argc, char **argv) { } log_with_timestamp("Waiting for receive thread termination"); + sdrStop(); // tell reader thread to wake up and exit fifo_halt(); // Reader thread should do this anyway, but just in case.. - pthread_join(Modes.reader_thread,NULL); // Wait on reader thread exit + + // Wait on reader thread exit + if (join_thread(Modes.reader_thread, NULL, 30000) == ETIMEDOUT) { + log_with_timestamp("Receive thread did not shut down cleanly in 30 seconds, aborting."); + abort(); // Can't complete cleanup while the receive thread is active; bail out. + } } interactiveCleanup(); diff --git a/sdr.c b/sdr.c index 41e3ef2..4c33de1 100644 --- a/sdr.c +++ b/sdr.c @@ -42,6 +42,7 @@ typedef struct { bool (*handleOption)(int, char**, int*); bool (*open)(); void (*run)(); + void (*stop)(); void (*close)(); } sdr_handler; @@ -72,6 +73,10 @@ static void noRun() { } +static void noStop() +{ +} + static void noClose() { } @@ -84,24 +89,24 @@ static bool unsupportedOpen() static sdr_handler sdr_handlers[] = { #ifdef ENABLE_RTLSDR - { "rtlsdr", SDR_RTLSDR, rtlsdrInitConfig, rtlsdrShowHelp, rtlsdrHandleOption, rtlsdrOpen, rtlsdrRun, rtlsdrClose }, + { "rtlsdr", SDR_RTLSDR, rtlsdrInitConfig, rtlsdrShowHelp, rtlsdrHandleOption, rtlsdrOpen, rtlsdrRun, rtlsdrStop, rtlsdrClose }, #endif #ifdef ENABLE_BLADERF - { "bladerf", SDR_BLADERF, bladeRFInitConfig, bladeRFShowHelp, bladeRFHandleOption, bladeRFOpen, bladeRFRun, bladeRFClose }, + { "bladerf", SDR_BLADERF, bladeRFInitConfig, bladeRFShowHelp, bladeRFHandleOption, bladeRFOpen, bladeRFRun, noStop, bladeRFClose }, #endif #ifdef ENABLE_HACKRF - { "hackrf", SDR_HACKRF, hackRFInitConfig, hackRFShowHelp, hackRFHandleOption, hackRFOpen, hackRFRun, hackRFClose }, + { "hackrf", SDR_HACKRF, hackRFInitConfig, hackRFShowHelp, hackRFHandleOption, hackRFOpen, hackRFRun, noStop, hackRFClose }, #endif #ifdef ENABLE_LIMESDR - { "limesdr", SDR_LIMESDR, limesdrInitConfig, limesdrShowHelp, limesdrHandleOption, limesdrOpen, limesdrRun, limesdrClose }, + { "limesdr", SDR_LIMESDR, limesdrInitConfig, limesdrShowHelp, limesdrHandleOption, limesdrOpen, limesdrRun, noStop, limesdrClose }, #endif - { "none", SDR_NONE, noInitConfig, noShowHelp, noHandleOption, noOpen, noRun, noClose }, - { "ifile", SDR_IFILE, ifileInitConfig, ifileShowHelp, ifileHandleOption, ifileOpen, ifileRun, ifileClose }, + { "none", SDR_NONE, noInitConfig, noShowHelp, noHandleOption, noOpen, noRun, noStop, noClose }, + { "ifile", SDR_IFILE, ifileInitConfig, ifileShowHelp, ifileHandleOption, ifileOpen, ifileRun, noStop, ifileClose }, - { NULL, SDR_NONE, NULL, NULL, NULL, NULL, NULL, NULL } /* must come last */ + { NULL, SDR_NONE, NULL, NULL, NULL, NULL, NULL, NULL, NULL } /* must come last */ }; void sdrInitConfig() @@ -158,7 +163,7 @@ bool sdrHandleOption(int argc, char **argv, int *jptr) static sdr_handler *current_handler() { - static sdr_handler unsupported_handler = { "unsupported", SDR_NONE, noInitConfig, noShowHelp, noHandleOption, unsupportedOpen, noRun, noClose }; + static sdr_handler unsupported_handler = { "unsupported", SDR_NONE, noInitConfig, noShowHelp, noHandleOption, unsupportedOpen, noRun, noStop, noClose }; for (int i = 0; sdr_handlers[i].name; ++i) { if (Modes.sdr_type == sdr_handlers[i].sdr_type) { @@ -192,6 +197,11 @@ void sdrRun() pthread_mutex_unlock(&Modes.reader_cpu_mutex); } +void sdrStop() +{ + current_handler()->stop(); +} + void sdrClose() { pthread_mutex_destroy(&Modes.reader_cpu_mutex); diff --git a/sdr.h b/sdr.h index ba030e7..b73655b 100644 --- a/sdr.h +++ b/sdr.h @@ -28,6 +28,7 @@ void sdrShowHelp(); bool sdrHandleOption(int argc, char **argv, int *jptr); bool sdrOpen(); void sdrRun(); +void sdrStop(); void sdrClose(); // Call periodically from the SDR read thread to update reader thread CPU stats: diff --git a/sdr_rtlsdr.c b/sdr_rtlsdr.c index ef27f70..fd192c2 100644 --- a/sdr_rtlsdr.c +++ b/sdr_rtlsdr.c @@ -358,6 +358,15 @@ void rtlsdrRun() } } +void rtlsdrStop() +{ + if (!RTLSDR.dev) { + return; + } + + rtlsdr_cancel_async(RTLSDR.dev); +} + void rtlsdrClose() { if (RTLSDR.dev) { diff --git a/sdr_rtlsdr.h b/sdr_rtlsdr.h index 54999e7..af6c53d 100644 --- a/sdr_rtlsdr.h +++ b/sdr_rtlsdr.h @@ -25,6 +25,7 @@ void rtlsdrInitConfig(); void rtlsdrShowHelp(); bool rtlsdrOpen(); void rtlsdrRun(); +void rtlsdrStop(); void rtlsdrClose(); bool rtlsdrHandleOption(int argc, char **argv, int *jptr); diff --git a/util.c b/util.c index d0d2d07..dd0d570 100644 --- a/util.c +++ b/util.c @@ -132,3 +132,15 @@ void set_thread_name(const char *name) MODES_NOTUSED(name); #endif } + +int join_thread(pthread_t thread, void **retval, uint32_t timeout_ms) +{ +#if (__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 3) + struct timespec abstime; + get_deadline(timeout_ms, &abstime); + return pthread_timedjoin_np(thread, retval, &abstime); +#else + MODES_NOTUSED(timeout_ms); + return pthread_join(thread, retval); +#endif +} diff --git a/util.h b/util.h index b24e44f..7a95f1f 100644 --- a/util.h +++ b/util.h @@ -21,6 +21,7 @@ #define DUMP1090_UTIL_H #include +#include /* Returns system time in milliseconds */ uint64_t mstime(void); @@ -60,4 +61,9 @@ void update_cpu_timing(struct timespec *start_time, struct timespec *add_to); /* set current thread name, if supported */ void set_thread_name(const char *name); +/* wait for the given thread to terminate, like pthread_join. If supported, + * if the thread has not terminated within `timeout_ms` milliseconds, return ETIMEDOUT. + */ +int join_thread(pthread_t thread, void **retval, uint32_t timeout_ms); + #endif