Commit 43fe623f authored by Nicholas Shindler's avatar Nicholas Shindler
Browse files

(YAY) correctly handle socket errors and reconnect on fail

parent 11bb9f82
......@@ -87,7 +87,7 @@ class Client:
self.discover_probes()
time.sleep(0.1)
# toggle state to record that the probes have been looked up and stored
self.found_probes=False
self.found_probes=True
print "Protocol version = 0x%04x, sample rate is %d Hz, %d probes" % (
self.version,
......@@ -110,13 +110,44 @@ class Client:
print "Client Disconnet Failed"
raise e
def reconnect(self):
def reconnect(self, host, port=4400):
# attempt to reconnect the client to the server
try:
# reconnect to the server
self.stop_listener()
time.sleep(2)
# clear up previous listener/socket connection
#self.listener.join()
self.s.close()
self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.s.connect((host, port))
self.recvbuf = ""
print "Reconnected"
self.start_listener()
# Expect the server to send us the protocol version, unsolicited:
(t, l, v) = self.get_next_message()
if (t != MP_TLV_PROTOCOL_VERSION) or (l != 2):
raise RuntimeError("unexpected message t=%d l=%d" % (t, l))
self.version = struct.unpack("<H", v)[0]
# Query the tick rate:
self.send_message(make_tlv(MP_TLV_HZ))
(t, l, v) = self.get_next_message()
while t != MP_TLV_HZ:
print ("message id {}").format(t)
time.sleep(0.1)
(t, l, v) = self.get_next_message()
if (t != MP_TLV_HZ) or (l != 2):
raise RuntimeError("unexpected message t=%d l=%d" % (t, l))
self.hz = struct.unpack("<H", v)[0]
print "Protocol version = 0x%04x, sample rate is %d Hz, %d probes" % (
self.version,
self.hz,
len(self.probe_defs.keys()),
)
except Exception as e:
print "Client Connection Failed"
raise e
## ----- Probe Methods ----- ##
......@@ -203,12 +234,12 @@ class Client:
raise RuntimeWarning("Probe list has not been discovered yet")
if type(t) != int:
raise ValueError("probe id must be an int")
print(t in self.probe_defs)
if (t not in self.probe_defs):
#print(t in self.probe_defs.keys())
if (t not in self.probe_defs.keys()):
raise LookupError("Probe %i not found in probe defs" % t)
return (self.probe_defs[t][0], self.probe_defs[t][1])
except Exception as e:
raise e
except Exception:
raise
def enable_probes(self, probes):
# enables one or more probes.
......@@ -301,6 +332,20 @@ class Client:
message_text = message_text[0 : len(message_text) - 1]
print ("Debug message (t = %d): %s" % (tick, message_text))
def get_debug_message(self, message):
# handles debug msgs
(t, l, v, message) = next_tlv(message)
if t != MP_TLV_CURRENT_TICK:
raise RuntimeError("unexpected type = %d" % t)
tick = struct.unpack("<I", v)[0]
(t, l, v, message) = next_tlv(message)
if t != MP_TLV_MESSAGE_TEXT:
raise RuntimeError("unexpected type = %d" % t)
message_text = v[0 : len(v) - 1]
if message_text[-1] == "\n":
message_text = message_text[0 : len(message_text) - 1]
return ("Debug message (t = %d): %s" % (tick, message_text))
def do_async_message(self, t, l, v):
# async msg handler
if t == MP_TLV_PROBE_DATA:
......
......@@ -9,6 +9,8 @@ import os
import sys
import subprocess
import traceback
from socket import error as SocketError
import errno
# add local python scripts to the path so that they can be iported
sys.path.append(os.path.abspath(os.path.join(sys.path[0], "../")))
......@@ -31,6 +33,7 @@ class HVClient(Client):
lstnr = []
srvs = []
using_lidar = False
recording = False
# constructor for the hv bridge instance of the client connection
def __init__(self, hostname, services):
......@@ -107,7 +110,7 @@ class HVClient(Client):
self.enable_services()
time.sleep(1)
Client.start_probes(self)
#Client.start_probes(self)
except Exception as e:
err = traceback.format_exc()
rospy.logerr(err)
......@@ -138,6 +141,7 @@ class HVClient(Client):
self.lstnr = []
self.srvs = []
self.stopped = True
self.recording = False
def restart(self):
# stop everything and restart the connection
......@@ -165,11 +169,11 @@ class HVClient(Client):
def write(self):
# gather all messages from controll class instances and write to the robot
msgs = []
try:
get_msg_arr = (
lambda a, b: get_msg_arr(a + b[0], b[1:]) if len(b) > 1 else a + b[0]
)
msgs = []
# collect all subscription messages to write to robot
if(len(self.cmdr) > 0):
msgs = get_msg_arr([], map((lambda a: a.get_msgs()), self.cmdr))
......@@ -180,10 +184,14 @@ class HVClient(Client):
if len(msgs) == 0:
return
for (probe, value) in msgs:
if(type(value) == tuple or type(value) == list):
value_type = type(value)
if(value_type == tuple or value_type == list):
#raise TypeError("message value incorrectly formatted")
rospy.logerr("Probe [%s] has an incorrectly formatted value", probe)
continue
if(value_type != str and value_type != int and value_type != float):
rospy.logerr("Probe [%s] has an incorrectly type value", probe)
continue
probe_id = Client.lookup_probe_id(self, probe)
probe_type = Client.lookup_probe_type(self, probe_id)
......@@ -191,16 +199,25 @@ class HVClient(Client):
probe_id, Client.encode_probe_value(self, probe_type, value)
)
Client.send_message(self, make_tlv(MP_TLV_WRITE_PROBES, v))
except SocketError as e:
print msgs
if e.errno == errno.ECONNRESET:
rospy.logerr("CONNECTION RESET BY PEER WHILE WRITING TO SERVER")
rospy.loginfo("Reconnecting to Server")
#self.restart()
Client.reconnect(self, self.hostname)
else:
raise e
except Exception as e:
raise e
def do_async_message(self, t, l, v):
# async msg handler
if t == MP_TLV_PROBE_DATA:
self.read_probe_msg(v)
self.read_probe_msg(v) if self.recording else self.capture_data(v)
return True
elif t == MP_TLV_DEBUG_MESSAGE:
Client.do_debug_message(self, v)
rospy.logdebug(Client.get_debug_message(self, v))
return True
elif t == MP_TLV_MISSED_DATA:
print ("Missed %d probe messages" % struct.unpack("<I", v))
......@@ -218,10 +235,18 @@ class HVClient(Client):
(t, l, v, data) = next_tlv(data)
(probe_name, probe_type) = Client.lookup_probe(self, t)
if any([lst.needs(probe_name) for lst in self.lstnr]):
val = Client.decode_probe_data(self, probe_type, v)
# print ("New Message[name: " + str(probe_name) + " val: " + str(val) + "]")
[serv.add(probe_name, val) for serv in self.lstnr]
try:
# attempt to decode the probe data
val = Client.decode_probe_data(self, probe_type, v)
#print ("New Message[name: " + str(probe_name) + " val: " + str(val) + "]")
# if the data is valid assign it to relavent listeners
[serv.add(probe_name, val) for serv in self.lstnr]
except RuntimeError as e:
# Catch the runtime error emitted when the probe data decoding fails
rospy.logwarn("Error processing probe data of type %s", type_table[probe_type][0])
err = traceback.format_exc()
rospy.logwarn(err)
continue
else:
# msg can be used for debugging. otherwise this block is probably not needed
# rospy.loginfo("Probe info not needed: %s" % probe_name)
......
......@@ -2,7 +2,9 @@
import rospy
import argparse
import traceback
import errno
import sys, os, time
from socket import error as SocketError
# add local python scripts to the path so that they can be iported
sys.path.append(os.path.abspath(os.path.join(sys.path[0], "../")))
......@@ -167,51 +169,72 @@ if __name__ == "__main__":
# handle closing everything nicely when ros shutsdown
rospy.on_shutdown(hv_bot.stop)
# start the connection to the robot
hv_bot.start()
time.sleep(1)
# set rate based on the mindprobe refresh rate.
# default to 100. (mp runs at 200)
hz = 100
if hv_bot.hz:
hz = hv_bot.hz / 2
# Start the ROS main loop
rate = rospy.Rate(hz)
rospy.loginfo(
"starting publishing data from " + host_name + " at " + str(hz) + "hz"
)
while not rospy.is_shutdown():
# write any messages from ROS to the robot
# hv_bot.capture(1 / 200)
hv_bot.write()
hv_bot.publish()
rate.sleep()
try:
# start the connection to the robot
hv_bot.start()
time.sleep(1)
# set rate based on the mindprobe refresh rate.
# default to 100. (mp runs at 200)
hz = 100
if hv_bot.hz:
hz = hv_bot.hz / 2
# Start the ROS main loop
rate = rospy.Rate(hz)
rospy.loginfo(
"starting publishing data from " + host_name + " at " + str(hz) + "hz"
)
hv_bot.start_probes()
hv_bot.recording = True
while not rospy.is_shutdown():
# write any messages from ROS to the robot
# hv_bot.capture(1 / 200)
hv_bot.write()
hv_bot.publish()
rate.sleep()
except OSError as e:
rospy.logerr("Connection failed!")
err = traceback.format_exc()
rospy.logerr(e)
rospy.loginfo(err)
if e.errno == errno.ECONNRESET:
rospy.logerr("CONNECTION RESET BY PEER")
if(hv_bot != None and not hv_bot.stopped):
rospy.loginfo("Reconnecting to Server")
hv_bot.reconnect(host_name)
except SocketError as e:
# this means that the connection has failed at some point
# if the socket error has not been handled at a previous point, assume the connection is broken
err = traceback.format_exc()
rospy.logerr(e)
rospy.loginfo(err)
hv_bot.reconnect()
except RuntimeError as e:
err = traceback.format_exc()
rospy.logerr(e)
rospy.loginfo(err)
except RuntimeWarning as e:
rospy.logwarn(e)
pass
except:
# if not a specifically handled error, raise end service
raise
except rospy.ROSInterruptException:
rospy.logerr("ROS interupt called -- Ending Processes")
except OSError as e:
rospy.logerr("Connection failed!")
err = traceback.format_exc()
rospy.logerr(e)
rospy.loginfo(err)
if e.errno == errno.ECONNRESET:
rospy.logerr("CONNECTION RESET BY PEER")
if(hv_bot != None and not hv_bot.stopped):
rospy.loginfo("Reconnecting to Server")
hv_bot.reconnect()
except Exception as e:
# print the error type - to be used to add better error handling in
rospy.logdebug("the type should be: %s" % type(e))
# general error handling
err = traceback.format_exc()
rospy.logerr(e)
rospy.loginfo(err)
print("the type should be: %s" % type(e))
rospy.logerr(err)
rospy.logfatal(e)
pass
finally:
# last code to run before everything closes
if(hv_bot != None and not hv_bot.stopped):
rospy.loginfo("Stopping Client")
hv_bot.stop()
rospy.loginfo("GOODBYE")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment