Fixed/cleaned up connection polling and restarts

This commit is contained in:
Robinson 2023-10-26 21:19:36 +02:00
parent ba57447169
commit 0c4c442b3a
No known key found for this signature in database
GPG Key ID: 8E7DB78588BD6F5C
2 changed files with 83 additions and 45 deletions

View File

@ -130,6 +130,15 @@ open class Client<CONNECTION : Connection>(config: ClientConfiguration = ClientC
var connectionTimeoutSec: Int = 0 var connectionTimeoutSec: Int = 0
private set private set
/**
* - if the client is internally going to reconnect (because of a network error)
* - we have specified that we will run the disconnect logic
* - there is reconnect logic in the disconnect handler
*
* Then ultimately, we want to ignore the disconnect-handler reconnect (we do not want to have multiple reconnects happening concurrently)
*/
@Volatile
private var autoReconnect = false
private val handshake = ClientHandshake(this, logger) private val handshake = ClientHandshake(this, logger)
@ -165,6 +174,15 @@ open class Client<CONNECTION : Connection>(config: ClientConfiguration = ClientC
*/ */
@Suppress("DuplicatedCode") @Suppress("DuplicatedCode")
fun reconnect() { fun reconnect() {
if (autoReconnect) {
// we must check if we should permit a MANUAL reconnect, because the auto-reconnect MIGHT ALSO re-connect!
// autoReconnect will be "reset" when the connection closes. If in a happy state, then a manual reconnect is permitted.
logger.info("Ignoring reconnect, auto-reconnect is in progress")
return
}
if (connectionTimeoutSec == 0) { if (connectionTimeoutSec == 0) {
logger.info("Reconnecting...") logger.info("Reconnecting...")
} else { } else {
@ -509,6 +527,7 @@ open class Client<CONNECTION : Connection>(config: ClientConfiguration = ClientC
val startTime = System.nanoTime() val startTime = System.nanoTime()
var success = false var success = false
while (!stopConnectOnShutdown && (connectionTimoutInNs == 0L || System.nanoTime() - startTime < connectionTimoutInNs)) { while (!stopConnectOnShutdown && (connectionTimoutInNs == 0L || System.nanoTime() - startTime < connectionTimoutInNs)) {
if (isShutdown()) { if (isShutdown()) {
resetOnError() resetOnError()
@ -639,11 +658,12 @@ open class Client<CONNECTION : Connection>(config: ClientConfiguration = ClientC
if (stopConnectOnShutdown) { if (stopConnectOnShutdown) {
val exception = ClientException("Client closed during connection attempt. Aborting connection attempts.").cleanStackTrace(3) val exception = ClientException("Client closed during connection attempt. Aborting connection attempts.").cleanStackTrace(3)
listenerManager.notifyError(exception) listenerManager.notifyError(exception)
// if we are waiting for this connection to connect (on a different thread, for example), make sure to release it.
closeLatch.countDown()
throw exception throw exception
} }
if (System.nanoTime() - startTime < connectionTimoutInNs) { if (System.nanoTime() - startTime < connectionTimoutInNs) {
val type = if (isIPC) { val type = if (isIPC) {
"IPC" "IPC"
} else { } else {
@ -703,7 +723,7 @@ open class Client<CONNECTION : Connection>(config: ClientConfiguration = ClientC
// NOTE: this can change depending on what the server specifies! // NOTE: this can change depending on what the server specifies!
// should we queue messages during a reconnect? This is important if the client/server connection is unstable // should we queue messages during a reconnect? This is important if the client/server connection is unstable
if (connectionInfo.enableSession && sessionManager is SessionManagerNoOp) { if (connectionInfo.enableSession && sessionManager is SessionManagerNoOp) {
sessionManager = SessionManagerFull(config, aeronDriver, connectionInfo.sessionTimeout) sessionManager = SessionManagerFull(config, listenerManager as ListenerManager<SessionConnection>, aeronDriver, connectionInfo.sessionTimeout)
} else if (!connectionInfo.enableSession && sessionManager is SessionManagerFull) { } else if (!connectionInfo.enableSession && sessionManager is SessionManagerFull) {
sessionManager = SessionManagerNoOp() sessionManager = SessionManagerNoOp()
} }
@ -809,12 +829,14 @@ open class Client<CONNECTION : Connection>(config: ClientConfiguration = ClientC
logger.debug("[${handshakeConnection.details}] $connType (${newConnection.id}) done with handshake.") logger.debug("[${handshakeConnection.details}] $connType (${newConnection.id}) done with handshake.")
} }
connection0 = newConnection
newConnection.setImage()
// in the specific case of using sessions, we don't want to call 'init' or `connect` for a connection that is resuming a session // in the specific case of using sessions, we don't want to call 'init' or `connect` for a connection that is resuming a session
// when applicable - we ALSO want to restore RMI objects BEFORE the connection is fully setup! // when applicable - we ALSO want to restore RMI objects BEFORE the connection is fully setup!
val newSession = sessionManager.onInit(newConnection) val newSession = sessionManager.onInit(newConnection)
newConnection.setImage()
// before we finish creating the connection, we initialize it (in case there needs to be logic that happens-before `onConnect` calls // before we finish creating the connection, we initialize it (in case there needs to be logic that happens-before `onConnect` calls
if (newSession) { if (newSession) {
listenerManager.notifyInit(newConnection) listenerManager.notifyInit(newConnection)
@ -822,29 +844,30 @@ open class Client<CONNECTION : Connection>(config: ClientConfiguration = ClientC
// this enables the connection to start polling for messages // this enables the connection to start polling for messages
addConnection(newConnection) addConnection(newConnection)
connection0 = newConnection
// if we shutdown/close before the poller starts, we don't want to block forever // if we shutdown/close before the poller starts, we don't want to block forever
pollerClosedLatch = CountDownLatch(1) pollerClosedLatch = CountDownLatch(1)
networkEventPoller.submit( networkEventPoller.submit(
action = object : EventActionOperator { action = object : EventActionOperator {
override fun invoke(): Int { override fun invoke(): Int {
val connection = connection0
// if we initiate a disconnect manually, then there is no need to wait for aeron to verify it's closed // if we initiate a disconnect manually, then there is no need to wait for aeron to verify it's closed
// we only want to wait for aeron to verify it's closed if we are SUPPOSED to be connected, but there's a network blip // we only want to wait for aeron to verify it's closed if we are SUPPOSED to be connected, but there's a network blip
return if (!(shutdownEventPoller || newConnection.isClosed() || newConnection.isClosedWithTimeout())) { return if (connection != null) {
newConnection.poll() if (!shutdownEventPoller && connection.canPoll()) {
} else { connection.poll()
// If the connection has either been closed, or has expired, it needs to be cleaned-up/deleted. } else {
if (logger.isDebugEnabled) { // If the connection has either been closed, or has expired, it needs to be cleaned-up/deleted.
logger.debug("[${connection}] connection expired (cleanup)") logger.error("[${connection}] connection expired (cleanup). shutdownEventPoller=$shutdownEventPoller isClosed()=${connection.isClosed()} isClosedWithTimeout=${connection.isClosedWithTimeout()}")
if (logger.isDebugEnabled) {
logger.debug("[{}] connection expired (cleanup)", connection)
}
// remove ourselves from processing
EventPoller.REMOVE
} }
} else {
// the connection MUST be removed in the same thread that is processing events (it will be removed again in close, and that is expected)
removeConnection(newConnection)
// we already removed the connection, we can call it again without side effects
newConnection.close()
// remove ourselves from processing // remove ourselves from processing
EventPoller.REMOVE EventPoller.REMOVE
} }
@ -852,14 +875,20 @@ open class Client<CONNECTION : Connection>(config: ClientConfiguration = ClientC
}, },
onClose = object : EventCloseOperator { onClose = object : EventCloseOperator {
override fun invoke() { override fun invoke() {
val mustRestartDriverOnError = aeronDriver.internal.mustRestartDriverOnError val connection = connection0
val uncleanDisconnect = !shutdownEventPoller && !newConnection.isClosed() && newConnection.isClosedWithTimeout() if (connection == null) {
val autoReconnect = mustRestartDriverOnError || uncleanDisconnect logger.error("Unable to continue, as the connection has been removed before event dispatch shutdown!")
return
}
val mustRestartDriverOnError = aeronDriver.internal.mustRestartDriverOnError
val dirtyDisconnectWithSession = (this@Client is SessionClient) && !shutdownEventPoller && connection.isDirtyClose()
autoReconnect = mustRestartDriverOnError || dirtyDisconnectWithSession
if (mustRestartDriverOnError) { if (mustRestartDriverOnError) {
logger.error("[{}] Critical driver error detected, reconnecting client", connection) logger.error("[{}] Critical driver error detected, reconnecting client", connection)
} else if (dirtyDisconnect) { } else if (dirtyDisconnectWithSession) {
logger.error("[{}] Dirty disconnect detected, reconnecting client", connection) logger.error("[{}] Dirty disconnect detected, reconnecting client", connection)
} }
@ -871,13 +900,7 @@ open class Client<CONNECTION : Connection>(config: ClientConfiguration = ClientC
// we only need to run shutdown methods if there was a network outage or D/C // we only need to run shutdown methods if there was a network outage or D/C
if (!shutdownInProgress.value) { if (!shutdownInProgress.value) {
// this is because we restart automatically on driver errors/weird timeouts // this is because we restart automatically on driver errors/weird timeouts
val standardClose = !autoReconnect this@Client.close(closeEverything = false, sendDisconnectMessage = true, releaseWaitingThreads = !autoReconnect)
this@Client.close(
closeEverything = false,
sendDisconnectMessage = standardClose,
notifyDisconnect = standardClose,
releaseWaitingThreads = standardClose
)
} }
@ -885,29 +908,49 @@ open class Client<CONNECTION : Connection>(config: ClientConfiguration = ClientC
endpointIsRunning.lazySet(false) endpointIsRunning.lazySet(false)
pollerClosedLatch.countDown() pollerClosedLatch.countDown()
connection0 = null
if (autoReconnect) { if (autoReconnect) {
EventDispatcher.launchSequentially(EventDispatcher.CONNECT) { // clients can reconnect automatically ONLY if there are driver errors, otherwise it's explicit!
eventDispatch.CLOSE.launch {
logger.error("MUST AUTORECONNECT STARTING ON CLOSE ***********************************")
waitForEndpointShutdown() waitForEndpointShutdown()
// also wait for everyone else to shutdown!! // also wait for everyone else to shutdown!!
aeronDriver.internal.endPointUsages.forEach { aeronDriver.internal.endPointUsages.forEach {
it.waitForEndpointShutdown() if (it !== this@Client) {
it.waitForEndpointShutdown()
}
} }
// if we restart/reconnect too fast, errors from the previous run will still be present! // if we restart/reconnect too fast, errors from the previous run will still be present!
aeronDriver.delayLingerTimeout() aeronDriver.delayLingerTimeout()
reconnect() if (connectionTimeoutSec == 0) {
logger.info("Reconnecting...", Exception())
} else {
logger.info("Reconnecting... (timeout in $connectionTimeoutSec seconds)", Exception())
}
connect(
remoteAddress = address,
remoteAddressString = addressString,
remoteAddressPrettyString = addressPrettyString,
port1 = port1,
port2 = port2,
connectionTimeoutSec = connectionTimeoutSec,
reliable = reliable,
)
} }
} else {
logger.debug("[{}] Closed the Network Event Poller...", connection)
} }
logger.debug("[{}] Closed the Network Event Poller task.", connection)
} }
}) })
if (newSession) { listenerManager.notifyConnect(newConnection)
listenerManager.notifyConnect(newConnection)
} else { if (!newSession) {
(newConnection as SessionConnection).sendPendingMessages() (newConnection as SessionConnection).sendPendingMessages()
} }
} }
@ -1007,12 +1050,7 @@ open class Client<CONNECTION : Connection>(config: ClientConfiguration = ClientC
*/ */
fun close(closeEverything: Boolean = true) { fun close(closeEverything: Boolean = true) {
stopConnectOnShutdown = true stopConnectOnShutdown = true
close( close(closeEverything = closeEverything, sendDisconnectMessage = true, releaseWaitingThreads = true)
closeEverything = closeEverything,
sendDisconnectMessage = true,
notifyDisconnect = true,
releaseWaitingThreads = true
)
} }
override fun toString(): String { override fun toString(): String {

View File

@ -233,7 +233,7 @@ open class Server<CONNECTION : Connection>(config: ServerConfiguration = ServerC
// this manages existing clients (for cleanup + connection polling). This has a concurrent iterator, // this manages existing clients (for cleanup + connection polling). This has a concurrent iterator,
// so we can modify this as we go // so we can modify this as we go
connections.forEach { connection -> connections.forEach { connection ->
if (!(connection.isClosed() || connection.isClosedWithTimeout()) ) { if (connection.canPoll()) {
// Otherwise, poll the connection for messages // Otherwise, poll the connection for messages
pollCount += connection.poll() pollCount += connection.poll()
} else { } else {