1
0
mirror of https://github.com/CumulusNetworks/ifupdown2.git synced 2024-05-06 15:54:50 +00:00

nlmanager: rdnbrd "Interrupted system call" traceback in nlmanager

Signed-off-by: Daniel Walton <dwalton@cumulusnetworks.com>
Reviewed-by:   roopa@cumulusnetworks.com
Ticket: CM-12487

Signed-off-by: Julien Fortin <julien@cumulusnetworks.com>
This commit is contained in:
Julien Fortin
2016-08-20 20:43:12 -07:00
committed by Nikhil
parent 5859b3228e
commit cee2e13ffb
2 changed files with 103 additions and 58 deletions

View File

@@ -15,7 +15,11 @@ class NetlinkError(Exception):
pass
class NetlinkNoAddressError(Exception):
class NetlinkNoAddressError(NetlinkError):
pass
class NetlinkInterruptedSystemCall(NetlinkError):
pass
@@ -137,13 +141,15 @@ class NetlinkManager(object):
# packet via the decode_packet call...so avoid printing
# two messages for one packet.
if not nlpacket.debug:
log.debug("TXed %12s, pid %d, seq %d, %d bytes" %
log.info("TXed %12s, pid %d, seq %d, %d bytes" %
(nlpacket.get_type_string(), nlpacket.pid, nlpacket.seq, nlpacket.length))
header_PACK = NetlinkPacket.header_PACK
header_LEN = NetlinkPacket.header_LEN
null_read = 0
MAX_NULL_READS = 30
nle_intr_count = 0
MAX_NULL_READS = 3
MAX_ERROR_NLE_INTR = 3
msgs = []
# Now listen to our socket and wait for the reply
@@ -154,21 +160,51 @@ class NetlinkManager(object):
return msgs
# Only block for 1 second so we can wake up to see if self.shutdown_flag is True
(readable, writeable, exceptional) = select([self.tx_socket, ], [], [self.tx_socket, ], 1)
try:
(readable, writeable, exceptional) = select([self.tx_socket, ], [], [self.tx_socket, ], 1)
except Exception as e:
# 4 is Interrupted system call
if isinstance(e.args, tuple) and e[0] == 4:
nle_intr_count += 1
log.info("select() Interrupted system call %d/%d" % (nle_intr_count, MAX_ERROR_NLE_INTR))
if not readable:
if nle_intr_count >= MAX_ERROR_NLE_INTR:
raise NetlinkInterruptedSystemCall(error_str)
else:
continue
else:
raise
if readable:
null_read = 0
else:
null_read += 1
# Safety net to make sure we do not spend too much time in
# this while True loop
if null_read >= MAX_NULL_READS:
log.warning('Socket was not readable for %d attempts' % null_read)
log.info('Socket was not readable for %d attempts' % null_read)
return msgs
continue
else:
continue
for s in readable:
data = s.recv(4096)
data = []
try:
data = s.recv(4096)
except Exception as e:
# 4 is Interrupted system call
if isinstance(e.args, tuple) and e[0] == 4:
nle_intr_count += 1
log.info("%s: recv() Interrupted system call %d/%d" % (s, nle_intr_count, MAX_ERROR_NLE_INTR))
if nle_intr_count >= MAX_ERROR_NLE_INTR:
raise NetlinkInterruptedSystemCall(error_str)
else:
continue
else:
raise
if not data:
log.info('RXed zero length data, the socket is closed')
@@ -207,12 +243,20 @@ class NetlinkManager(object):
# 0 is NLE_SUCCESS...everything else is a true error
if error_code:
error_code_str = msg.error_to_string.get(error_code)
if error_code_str != 'None':
error_str = 'Operation failed with \'%s\' (%s)' % (error_code_str, debug_str)
else:
error_str = 'Operation failed with code %s (%s)' % (error_code, debug_str)
if error_code == Error.NLE_NOADDR:
raise NetlinkNoAddressError(error_str)
elif error_code == Error.NLE_INTR:
nle_intr_count += 1
log.info("%s: RXed NLE_INTR Interrupted system call %d/%d" % (s, nle_intr_count, MAX_ERROR_NLE_INTR))
if nle_intr_count >= MAX_ERROR_NLE_INTR:
raise NetlinkInterruptedSystemCall(error_str)
else:
if error_code_str == 'None':
try:
@@ -227,6 +271,7 @@ class NetlinkManager(object):
# No ACK...create a nlpacket object and append it to msgs
else:
nle_intr_count = 0
# If debugs are enabled we will print the contents of the
# packet via the decode_packet call...so avoid printing