Added support for detecting critical driver errors

This commit is contained in:
Robinson 2023-08-09 21:18:47 -06:00
parent 3dcd2af495
commit 28d170c25c
No known key found for this signature in database
GPG Key ID: 8E7DB78588BD6F5C
3 changed files with 68 additions and 4 deletions

View File

@ -113,6 +113,11 @@ internal class AeronDriverInternal(endPoint: EndPoint<*>?, private val config: C
private val stateMutex = Mutex()
/**
* Checks to see if there are any critical network errors (for example, a VPN connection getting disconnected while running)
*/
@Volatile
internal var criticalDriverError = false
private var closed = false
suspend fun closed(): Boolean = stateMutex.withLock {
@ -128,7 +133,51 @@ internal class AeronDriverInternal(endPoint: EndPoint<*>?, private val config: C
// configure the aeron error handler
val filter = config.aeronErrorFilter
aeronErrorHandler = { error ->
if (filter(error)) {
// if the network interface is removed (for example, a VPN connection).
if (error is io.aeron.exceptions.ChannelEndpointException ||
error.cause is BindException ||
error.cause is SocketException ||
error.cause is IOException) {
// this is bad! We must close this connection. THIS WILL BE CALLED AS FAST AS THE CPU CAN RUN (because of how aeron works).
if (!criticalDriverError) {
criticalDriverError = true
logger.error { "Aeron Driver [$driverId]: Critical driver error!" }
// make a copy
val endpoints = endPointUsages.toTypedArray()
runBlocking {
endpoints.forEach {
it.connections.forEach {conn ->
conn.closeImmediately(false, false)
}
}
// closing the driver here will SEGFAULT the jvm!! (cannot have reentrant code on this thread)
}
if (error.message?.startsWith("ERROR - channel error - Network is unreachable") == true) {
val exception = AeronDriverException("Network is disconnected or unreachable.")
exception.cleanAllStackTrace()
notifyError(exception)
} else if (error.message?.startsWith("WARN - failed to send") == true) {
val exception = AeronDriverException("Network socket error, can't send data.")
exception.cleanAllStackTrace()
notifyError(exception)
}
else if (error.message == "Can't assign requested address") {
val exception = AeronDriverException("Network socket error, can't assign requested address.")
exception.cleanAllStackTrace()
notifyError(exception)
} else {
error.cleanStackTrace()
// send this out to the listener-manager so we can be notified of global errors
notifyError(AeronDriverException(error.cause!!))
}
}
}
else if (filter(error)) {
error.cleanStackTrace()
// send this out to the listener-manager so we can be notified of global errors
notifyError(AeronDriverException(error))
@ -163,6 +212,7 @@ internal class AeronDriverInternal(endPoint: EndPoint<*>?, private val config: C
}
private suspend fun removeErrors() = onErrorLocalMutex.withLock {
criticalDriverError = false
onErrorLocalList.forEach {
removeOnError(it)
}

View File

@ -506,8 +506,14 @@ abstract class EndPoint<CONNECTION : Connection> private constructor(val type: C
}
}
} catch (e: Throwable) {
// if the driver is closed due to a network disconnect or a remote-client termination, we also must close the connection.
if (aeronDriver.criticalDriverError) {
// we had a HARD network crash/disconnect, we close the driver and then reconnect automatically
//NOTE: notifyDisconnect IS NOT CALLED!
}
// make sure we atomically create the listener manager, if necessary
if (message is MethodResponse && message.result is Exception) {
else if (message is MethodResponse && message.result is Exception) {
val result = message.result as Exception
val newException = SerializationException("Error serializing message ${message.javaClass.simpleName}: '$message'", result)
listenerManager.notifyError(connection, newException)

View File

@ -79,14 +79,22 @@ internal class Handshaker<CONNECTION : Connection>(
return aeronDriver.send(publication, buffer, logInfo, listenerManager, handshakeSendIdleStrategy)
} catch (e: Exception) {
if (e is ClientException || e is ServerException) {
// if the driver is closed due to a network disconnect or a remote-client termination, we also must close the connection.
if (aeronDriver.criticalDriverError) {
// we had a HARD network crash/disconnect, we close the driver and then reconnect automatically
//NOTE: notifyDisconnect IS NOT CALLED!
}
else if (e is ClientException || e is ServerException) {
throw e
} else {
}
else {
val exception = newException("[$logInfo] Error serializing handshake message $message", e)
exception.cleanStackTrace(2) // 2 because we do not want to see the stack for the abstract `newException`
listenerManager.notifyError(exception)
throw exception
}
return false
} finally {
handshakeSendIdleStrategy.reset()
}