When creating publications and handshaking, we CANNOT do this on the main processing thread

This commit is contained in:
Robinson 2023-11-03 18:21:14 +01:00
parent af19049519
commit a5286899b7
No known key found for this signature in database
GPG Key ID: 8E7DB78588BD6F5C
2 changed files with 198 additions and 180 deletions

View File

@ -223,6 +223,8 @@ internal class ServerHandshake<CONNECTION : Connection>(
/**
* NOTE: This must not be called on the main thread because it is blocking!
*
* @return true if the connection was SUCCESS. False if the handshake poller should immediately close the publication
*/
fun processIpcHandshakeMessageServer(
@ -331,6 +333,7 @@ internal class ServerHandshake<CONNECTION : Connection>(
var newConnection: CONNECTION? = null
try {
// Create a pub/sub at the given address and port, using the given stream ID.
// NOTE: This must not be called on the main thread because it is blocking!
val newConnectionDriver = ServerConnectionDriver.build(
aeronDriver = aeronDriver,
ipInfo = server.ipInfo,
@ -422,7 +425,7 @@ internal class ServerHandshake<CONNECTION : Connection>(
}
/**
* note: CANNOT be called in action dispatch. ALWAYS ON SAME THREAD
* NOTE: This must not be called on the main thread because it is blocking!
*
* @return true if the connection was SUCCESS. False if the handshake poller should immediately close the publication
*/
@ -585,6 +588,7 @@ internal class ServerHandshake<CONNECTION : Connection>(
var newConnection: CONNECTION? = null
try {
// Create a pub/sub at the given address and port, using the given stream ID.
// NOTE: This must not be called on the main thread because it is blocking!
val newConnectionDriver = ServerConnectionDriver.build(
ipInfo = server.ipInfo,
aeronDriver = aeronDriver,

View File

@ -26,6 +26,7 @@ import dorkbox.network.aeron.AeronDriver
import dorkbox.network.aeron.AeronDriver.Companion.uriHandshake
import dorkbox.network.aeron.AeronPoller
import dorkbox.network.connection.Connection
import dorkbox.network.connection.EventDispatcher
import dorkbox.network.connection.IpInfo
import dorkbox.network.exceptions.ServerException
import dorkbox.network.exceptions.ServerHandshakeException
@ -115,16 +116,20 @@ internal object ServerHandshakePollers {
// we should immediately remove the logbuffer for this! Aeron will **EVENTUALLY** remove the logbuffer, but if errors
// and connections occur too quickly (within the cleanup/linger period), we can run out of memory!
driver.deleteLogFile(image)
return
}
// NOTE: This MUST to happen in separates thread so that we can take as long as we need when creating publications and handshaking,
// because under load -- this will REGULARLY timeout! Under no circumstance can this happen in the main processing thread!!
EventDispatcher.MULTI.launch {
// we have read all the data, now dispatch it.
// HandshakeMessage.HELLO
// HandshakeMessage.DONE
val messageState = message.state
val connectKey = message.connectKey
if (messageState == HandshakeMessage.HELLO) {
// we create a NEW publication for the handshake, which connects directly to the client handshake subscription
@ -133,13 +138,14 @@ internal object ServerHandshakePollers {
// this will always connect to the CLIENT handshake subscription!
val publication = try {
driver.addExclusivePublication(publicationUri, message.streamId, logInfo, true)
} catch (e: Exception) {
}
catch (e: Exception) {
// we should immediately remove the logbuffer for this! Aeron will **EVENTUALLY** remove the logbuffer, but if errors
// and connections occur too quickly (within the cleanup/linger period), we can run out of memory!
driver.deleteLogFile(image)
server.listenerManager.notifyError(ServerHandshakeException("[$logInfo] Cannot create IPC publication back to client remote process", e))
return
return@launch
}
try {
@ -147,13 +153,14 @@ internal object ServerHandshakePollers {
driver.waitForConnection(publication, handshakeTimeoutNs, logInfo) { cause ->
ServerTimedoutException("$logInfo publication cannot connect with client in ${Sys.getTimePrettyFull(handshakeTimeoutNs)}", cause)
}
} catch (e: Exception) {
}
catch (e: Exception) {
// we should immediately remove the logbuffer for this! Aeron will **EVENTUALLY** remove the logbuffer, but if errors
// and connections occur too quickly (within the cleanup/linger period), we can run out of memory!
driver.deleteLogFile(image)
server.listenerManager.notifyError(ServerHandshakeException("[$logInfo] Cannot create IPC publication back to client remote process", e))
return
return@launch
}
@ -171,7 +178,8 @@ internal object ServerHandshakePollers {
if (success) {
publications[connectKey] = publication
} else {
}
else {
try {
// we might not be able to close this connection.
driver.close(publication, logInfo)
@ -180,7 +188,8 @@ internal object ServerHandshakePollers {
server.listenerManager.notifyError(e)
}
}
} catch (e: Exception) {
}
catch (e: Exception) {
// we should immediately remove the logbuffer for this! Aeron will **EVENTUALLY** remove the logbuffer, but if errors
// and connections occur too quickly (within the cleanup/linger period), we can run out of memory!
driver.deleteLogFile(image)
@ -205,7 +214,7 @@ internal object ServerHandshakePollers {
driver.deleteLogFile(image)
server.listenerManager.notifyError(ServerHandshakeException("[$logInfo] No publication back to IPC"))
return
return@launch
}
try {
@ -234,6 +243,7 @@ internal object ServerHandshakePollers {
}
}
}
}
fun close() {
publications.forEach { (connectKey, publication) ->
@ -365,7 +375,9 @@ internal object ServerHandshakePollers {
return
}
// NOTE: This MUST to happen in separates thread so that we can take as long as we need when creating publications and handshaking,
// because under load -- this will REGULARLY timeout! Under no circumstance can this happen in the main processing thread!!
EventDispatcher.MULTI.launch {
// HandshakeMessage.HELLO
// HandshakeMessage.DONE
val messageState = message.state
@ -390,11 +402,12 @@ internal object ServerHandshakePollers {
driver.deleteLogFile(image)
server.listenerManager.notifyError(ServerHandshakeException("[$logInfo] Cannot create publication back to $clientAddressString", e))
return
return@launch
}
try {
// we actually have to wait for it to connect before we continue
// we actually have to wait for it to connect before we continue.
//
driver.waitForConnection(publication, handshakeTimeoutNs, logInfo) { cause ->
ServerTimedoutException("$logInfo publication cannot connect with client in ${Sys.getTimePrettyFull(handshakeTimeoutNs)}", cause)
}
@ -404,7 +417,7 @@ internal object ServerHandshakePollers {
driver.deleteLogFile(image)
server.listenerManager.notifyError(ServerHandshakeException("[$logInfo] Cannot create publication back to $clientAddressString", e))
return
return@launch
}
try {
@ -465,7 +478,7 @@ internal object ServerHandshakePollers {
driver.deleteLogFile(image)
server.listenerManager.notifyError(ServerHandshakeException("[$logInfo] No publication back to $clientAddressString"))
return
return@launch
}
try {
@ -494,6 +507,7 @@ internal object ServerHandshakePollers {
driver.deleteLogFile(image)
}
}
}
fun close() {
publications.forEach { (connectKey, publication) ->