Merge branch 'bugfix/test_idf_monitor_hangs' into 'master'
tools: Fix idf_monitor test case occasional hangup or failure Closes #39 See merge request idf/esp-idf!3651
This commit is contained in:
commit
03c59658cb
3 changed files with 128 additions and 77 deletions
|
@ -547,7 +547,7 @@ test_idf_monitor:
|
||||||
expire_in: 1 week
|
expire_in: 1 week
|
||||||
script:
|
script:
|
||||||
- cd ${IDF_PATH}/tools/test_idf_monitor
|
- cd ${IDF_PATH}/tools/test_idf_monitor
|
||||||
- ${IDF_PATH}/tools/ci/multirun_with_pyenv.sh ./run_test_idf_monitor.py
|
- ./run_test_idf_monitor.py
|
||||||
|
|
||||||
test_idf_size:
|
test_idf_size:
|
||||||
<<: *host_test_template
|
<<: *host_test_template
|
||||||
|
|
|
@ -373,7 +373,7 @@ class Monitor(object):
|
||||||
else:
|
else:
|
||||||
raise RuntimeError("Bad event data %r" % ((event_tag,data),))
|
raise RuntimeError("Bad event data %r" % ((event_tag,data),))
|
||||||
except SerialStopException:
|
except SerialStopException:
|
||||||
pass
|
sys.stderr.write(ANSI_NORMAL + "Stopping condition has been received\n")
|
||||||
finally:
|
finally:
|
||||||
try:
|
try:
|
||||||
self.console_reader.stop()
|
self.console_reader.stop()
|
||||||
|
|
|
@ -19,13 +19,16 @@ from __future__ import unicode_literals
|
||||||
from builtins import object
|
from builtins import object
|
||||||
from io import open
|
from io import open
|
||||||
import os
|
import os
|
||||||
import signal
|
import sys
|
||||||
import time
|
import time
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import socket
|
||||||
import pty
|
import pty
|
||||||
|
import filecmp
|
||||||
|
import threading
|
||||||
|
|
||||||
test_list = (
|
test_list = (
|
||||||
# Add new tests here. All files should be placed in in_dir. Columns are:
|
# Add new tests here. All files should be placed in IN_DIR. Columns are:
|
||||||
# Input file Filter string File with expected output
|
# Input file Filter string File with expected output
|
||||||
('in1.txt', '', 'in1f1.txt'),
|
('in1.txt', '', 'in1f1.txt'),
|
||||||
('in1.txt', '*:V', 'in1f1.txt'),
|
('in1.txt', '*:V', 'in1f1.txt'),
|
||||||
|
@ -35,89 +38,137 @@ test_list = (
|
||||||
('in2.txt', 'vfs', 'in2f2.txt'),
|
('in2.txt', 'vfs', 'in2f2.txt'),
|
||||||
)
|
)
|
||||||
|
|
||||||
in_dir = 'tests/' # tests are in this directory
|
IN_DIR = 'tests/' # tests are in this directory
|
||||||
out_dir = 'outputs/' # test results are written to this directory (kept only for debugging purposes)
|
OUT_DIR = 'outputs/' # test results are written to this directory (kept only for debugging purposes)
|
||||||
socat_in = './socatfile'# temporary socat file (deleted after run)
|
ERR_OUT = OUT_DIR + 'monitor_error_output'
|
||||||
err_out = out_dir + 'monitor_error_output'
|
ELF_FILE = './dummy.elf' # ELF file used for starting the monitor
|
||||||
elf_file = './dummy.elf' # ELF file used for starting the monitor
|
IDF_MONITOR = '{}/tools/idf_monitor.py'.format(os.getenv("IDF_PATH"))
|
||||||
idf_monitor = '{}/tools/idf_monitor.py'.format(os.getenv("IDF_PATH"))
|
|
||||||
|
|
||||||
class SocatRunner(object):
|
# connection related to communicating with idf_monitor through sockets
|
||||||
"""
|
HOST = 'localhost'
|
||||||
Runs socat in the background for creating a socket.
|
# blocking socket operations are used with timeout:
|
||||||
"""
|
SOCKET_TIMEOUT = 30
|
||||||
|
# idf_monitor is killed if it doesn't respond:
|
||||||
|
MONITOR_TIMEOUT = 60
|
||||||
|
# the test is restarted after failure (idf_monitor has to be killed):
|
||||||
|
RETRIES_PER_TEST = 5
|
||||||
|
|
||||||
|
def monitor_timeout(process):
|
||||||
|
if process.poll() is None:
|
||||||
|
# idf_monitor is still running
|
||||||
|
try:
|
||||||
|
process.kill()
|
||||||
|
print('\tidf_monitor was killed because it did not finish in {} seconds.'.format(MONITOR_TIMEOUT))
|
||||||
|
except OSError as e:
|
||||||
|
if e.errno == errno.ESRCH:
|
||||||
|
# ignores a possible race condition which can occur when the process exits between poll() and kill()
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
class TestRunner(object):
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
# Wait for a connection on port 2399 and then run "tail" which will send the file content to that port. Tail
|
self.serversocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
# is used because it can start even when the file doesn't exists and remains running after the file has been
|
self.serversocket.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, 1)
|
||||||
# processed. This way the idf_monitor can end the communication when it received the content. Using regular
|
self.serversocket.bind((HOST, 0))
|
||||||
# "cat" would invoke exception in idf_monitor.
|
self.port = self.serversocket.getsockname()[1]
|
||||||
# Note: "-c 1GB" option is used to force sending the whole file under the assumption that all testing files
|
self.serversocket.listen(5)
|
||||||
# will be much smaller than 1G.
|
|
||||||
# Note: A temporary file socat_in is used in order to be able to start socat only once instead of for each test.
|
|
||||||
socat_cmd = ['socat',
|
|
||||||
'-U', # unidirectional pipe from file to port
|
|
||||||
'TCP4-LISTEN:2399,reuseaddr,fork',
|
|
||||||
'exec:"tail -c 1GB -F ' + socat_in + '"']
|
|
||||||
print(' '.join(socat_cmd))
|
|
||||||
self._socat_process = subprocess.Popen(socat_cmd, preexec_fn=os.setsid) # See __exit__ for os.setsid
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def __exit__(self, type, value, traceback):
|
def __exit__(self, type, value, traceback):
|
||||||
# self._socat_process.terminate() doesn't enough because each connection to the port starts a new socat and a
|
self.serversocket.shutdown(socket.SHUT_RDWR)
|
||||||
# tail processes
|
self.serversocket.close()
|
||||||
os.killpg(os.getpgid(self._socat_process.pid), signal.SIGTERM)
|
print('Socket was closed successfully')
|
||||||
# Note: this terminates all the processes but makes the script UNIX-only
|
|
||||||
|
|
||||||
def cleanup():
|
def accept_connection(self):
|
||||||
try:
|
""" returns a socket for sending the input for idf_monitor which must be closed before calling this again. """
|
||||||
os.remove(socat_in)
|
(clientsocket, address) = self.serversocket.accept()
|
||||||
except:
|
# exception will be thrown here if the idf_monitor didn't connect in time
|
||||||
# ignore if the file doesn't exist
|
clientsocket.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, 1)
|
||||||
pass
|
return clientsocket
|
||||||
|
|
||||||
def main():
|
def test_iteration(runner, test, startup_timeout):
|
||||||
start = time.time()
|
print('\nRunning test on {} with filter "{}" and expecting {}'.format(test[0], test[1], test[2]))
|
||||||
cleanup() # avoid sending old content
|
|
||||||
if not os.path.exists(out_dir):
|
|
||||||
os.mkdir(out_dir)
|
|
||||||
try:
|
|
||||||
with SocatRunner():
|
|
||||||
# Sleep is necessary to make sure that socat is already listening. Only one sleep is used per run (this is
|
|
||||||
# another reason while the temporary socat_in file is used instead of directly reading the test files).
|
|
||||||
time.sleep(1)
|
|
||||||
for t in test_list:
|
|
||||||
print('Running test on {} with filter "{}" and expecting {}'.format(t[0], t[1], t[2]))
|
|
||||||
with open(in_dir + t[0], "r", encoding='utf-8') as i_f, open(socat_in, "w", encoding='utf-8') as s_f:
|
|
||||||
print('cat {} > {}'.format(i_f.name, s_f.name))
|
|
||||||
for line in i_f:
|
|
||||||
s_f.write(line)
|
|
||||||
idf_exit_sequence = b'\x1d\n'
|
|
||||||
print('echo "<exit>" >> {}'.format(s_f.name))
|
|
||||||
s_f.write(idf_exit_sequence.decode())
|
|
||||||
monitor_cmd = [idf_monitor,
|
|
||||||
'--port', 'socket://localhost:2399',
|
|
||||||
'--print_filter', t[1],
|
|
||||||
elf_file]
|
|
||||||
with open(out_dir + t[2], "w", encoding='utf-8') as o_f, open(err_out, "w", encoding='utf-8') as e_f:
|
|
||||||
try:
|
try:
|
||||||
|
with open(OUT_DIR + test[2], "w", encoding='utf-8') as o_f, open(ERR_OUT, "w", encoding='utf-8') as e_f:
|
||||||
|
monitor_cmd = [sys.executable,
|
||||||
|
IDF_MONITOR, '--port', 'socket://{}:{}'.format(HOST, runner.port), '--print_filter', test[1], ELF_FILE]
|
||||||
(master_fd, slave_fd) = pty.openpty()
|
(master_fd, slave_fd) = pty.openpty()
|
||||||
print(' '.join(monitor_cmd), end=' ')
|
print('\t', ' '.join(monitor_cmd), sep='')
|
||||||
print(' > {} 2> {} < {}'.format(o_f.name, e_f.name, os.ttyname(slave_fd)))
|
print('\tstdout="{}" stderr="{}" stdin="{}"'.format(o_f.name, e_f.name, os.ttyname(slave_fd)))
|
||||||
proc = subprocess.Popen(monitor_cmd, stdin=slave_fd, stdout=o_f, stderr=e_f,
|
start = time.time()
|
||||||
close_fds=True)
|
# the server socket is alive so idf_monitor can start now
|
||||||
proc.wait()
|
proc = subprocess.Popen(monitor_cmd, stdin=slave_fd, stdout=o_f, stderr=e_f, close_fds=True, bufsize=-1)
|
||||||
|
# - idf_monitor's stdin needs to be connected to some pseudo-tty in docker image even when it is not
|
||||||
|
# used at all
|
||||||
|
# - setting bufsize is needed because the default value is different on Python 2 and 3
|
||||||
|
# - the default close_fds is also different on Python 2 and 3
|
||||||
|
monitor_watchdog = threading.Timer(MONITOR_TIMEOUT, monitor_timeout, [proc])
|
||||||
|
monitor_watchdog.start()
|
||||||
|
client = runner.accept_connection()
|
||||||
|
# The connection is ready but idf_monitor cannot yet receive data. This seems to happen on Ubuntu 16.04 LTS
|
||||||
|
# and is not related to the version of Python or pyserial. There seems to be no reliable way other than to
|
||||||
|
# do a sleep. The idf_monitor header in ERR_OUT could be checked on Python 2 but the file is not flushed
|
||||||
|
# on Python 3.
|
||||||
|
print('\tSleeping for {:.2f} seconds'.format(startup_timeout))
|
||||||
|
time.sleep(startup_timeout)
|
||||||
|
with open(IN_DIR + test[0], "r", encoding='utf-8') as f:
|
||||||
|
print('\tSending {} to the socket'.format(f.name))
|
||||||
|
for line in f:
|
||||||
|
client.sendall(line.encode())
|
||||||
|
idf_exit_sequence = b'\x1d\n'
|
||||||
|
print('\tSending <exit> to the socket')
|
||||||
|
client.sendall(idf_exit_sequence)
|
||||||
|
ret = proc.wait()
|
||||||
|
end = time.time()
|
||||||
|
print('\tidf_monitor exited after {:.2f} seconds'.format(end - start))
|
||||||
|
if ret < 0:
|
||||||
|
raise RuntimeError('idf_monitor was terminated by signal {}'.format(-ret))
|
||||||
|
# idf_monitor needs to end before the socket is closed in order to exit without an exception.
|
||||||
finally:
|
finally:
|
||||||
|
if monitor_watchdog:
|
||||||
|
monitor_watchdog.cancel()
|
||||||
os.close(slave_fd)
|
os.close(slave_fd)
|
||||||
os.close(master_fd)
|
os.close(master_fd)
|
||||||
diff_cmd = ['diff', in_dir + t[2], out_dir + t[2]]
|
if client:
|
||||||
print(' '.join(diff_cmd))
|
client.close()
|
||||||
subprocess.check_call(diff_cmd)
|
print('\tThe client was closed successfully')
|
||||||
print('Test has passed')
|
f1 = IN_DIR + test[2]
|
||||||
finally:
|
f2 = OUT_DIR + test[2]
|
||||||
cleanup()
|
print('\tdiff {} {}'.format(f1, f2))
|
||||||
|
if filecmp.cmp(f1, f2, shallow=False):
|
||||||
|
print('\tTest has passed')
|
||||||
|
else:
|
||||||
|
raise RuntimeError("The contents of the files are different. Please examine the artifacts.")
|
||||||
|
|
||||||
end = time.time()
|
def main():
|
||||||
print('Execution took {:.2f} seconds'.format(end - start))
|
gstart = time.time()
|
||||||
|
if not os.path.exists(OUT_DIR):
|
||||||
|
os.mkdir(OUT_DIR)
|
||||||
|
|
||||||
|
socket.setdefaulttimeout(SOCKET_TIMEOUT)
|
||||||
|
|
||||||
|
for test in test_list:
|
||||||
|
startup_timeout = 1.0
|
||||||
|
for i in range(RETRIES_PER_TEST):
|
||||||
|
with TestRunner() as runner:
|
||||||
|
# Each test (and each retry) is run with a different port (and server socket). This is done for
|
||||||
|
# the CI run where retry with a different socket is necessary to pass the test. According to the
|
||||||
|
# experiments, retry with the same port (and server socket) is not sufficient.
|
||||||
|
try:
|
||||||
|
test_iteration(runner, test, startup_timeout)
|
||||||
|
# no more retries if test_iteration exited without an exception
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
if i < RETRIES_PER_TEST - 1:
|
||||||
|
print('Test has failed with exception:', e)
|
||||||
|
print('Another attempt will be made.')
|
||||||
|
startup_timeout += 0.5
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
gend = time.time()
|
||||||
|
print('Execution took {:.2f} seconds\n'.format(gend - gstart))
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
Loading…
Reference in a new issue