diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 97d29cb4a..599958f39 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -547,7 +547,7 @@ test_idf_monitor: expire_in: 1 week script: - cd ${IDF_PATH}/tools/test_idf_monitor - - ${IDF_PATH}/tools/ci/multirun_with_pyenv.sh ./run_test_idf_monitor.py + - ./run_test_idf_monitor.py test_idf_size: <<: *host_test_template diff --git a/tools/idf_monitor.py b/tools/idf_monitor.py index f09e934a8..92ef71d96 100755 --- a/tools/idf_monitor.py +++ b/tools/idf_monitor.py @@ -373,7 +373,7 @@ class Monitor(object): else: raise RuntimeError("Bad event data %r" % ((event_tag,data),)) except SerialStopException: - pass + sys.stderr.write(ANSI_NORMAL + "Stopping condition has been received\n") finally: try: self.console_reader.stop() diff --git a/tools/test_idf_monitor/run_test_idf_monitor.py b/tools/test_idf_monitor/run_test_idf_monitor.py index da59bea86..748c37903 100755 --- a/tools/test_idf_monitor/run_test_idf_monitor.py +++ b/tools/test_idf_monitor/run_test_idf_monitor.py @@ -19,13 +19,16 @@ from __future__ import unicode_literals from builtins import object from io import open import os -import signal +import sys import time import subprocess +import socket import pty +import filecmp +import threading test_list = ( - # Add new tests here. All files should be placed in in_dir. Columns are: + # Add new tests here. All files should be placed in IN_DIR. Columns are: # Input file Filter string File with expected output ('in1.txt', '', 'in1f1.txt'), ('in1.txt', '*:V', 'in1f1.txt'), @@ -35,89 +38,137 @@ test_list = ( ('in2.txt', 'vfs', 'in2f2.txt'), ) -in_dir = 'tests/' # tests are in this directory -out_dir = 'outputs/' # test results are written to this directory (kept only for debugging purposes) -socat_in = './socatfile'# temporary socat file (deleted after run) -err_out = out_dir + 'monitor_error_output' -elf_file = './dummy.elf' # ELF file used for starting the monitor -idf_monitor = '{}/tools/idf_monitor.py'.format(os.getenv("IDF_PATH")) +IN_DIR = 'tests/' # tests are in this directory +OUT_DIR = 'outputs/' # test results are written to this directory (kept only for debugging purposes) +ERR_OUT = OUT_DIR + 'monitor_error_output' +ELF_FILE = './dummy.elf' # ELF file used for starting the monitor +IDF_MONITOR = '{}/tools/idf_monitor.py'.format(os.getenv("IDF_PATH")) -class SocatRunner(object): - """ - Runs socat in the background for creating a socket. - """ +# connection related to communicating with idf_monitor through sockets +HOST = 'localhost' +# blocking socket operations are used with timeout: +SOCKET_TIMEOUT = 30 +# idf_monitor is killed if it doesn't respond: +MONITOR_TIMEOUT = 60 +# the test is restarted after failure (idf_monitor has to be killed): +RETRIES_PER_TEST = 5 + +def monitor_timeout(process): + if process.poll() is None: + # idf_monitor is still running + try: + process.kill() + print('\tidf_monitor was killed because it did not finish in {} seconds.'.format(MONITOR_TIMEOUT)) + except OSError as e: + if e.errno == errno.ESRCH: + # ignores a possible race condition which can occur when the process exits between poll() and kill() + pass + else: + raise + +class TestRunner(object): def __enter__(self): - # Wait for a connection on port 2399 and then run "tail" which will send the file content to that port. Tail - # is used because it can start even when the file doesn't exists and remains running after the file has been - # processed. This way the idf_monitor can end the communication when it received the content. Using regular - # "cat" would invoke exception in idf_monitor. - # Note: "-c 1GB" option is used to force sending the whole file under the assumption that all testing files - # will be much smaller than 1G. - # Note: A temporary file socat_in is used in order to be able to start socat only once instead of for each test. - socat_cmd = ['socat', - '-U', # unidirectional pipe from file to port - 'TCP4-LISTEN:2399,reuseaddr,fork', - 'exec:"tail -c 1GB -F ' + socat_in + '"'] - print(' '.join(socat_cmd)) - self._socat_process = subprocess.Popen(socat_cmd, preexec_fn=os.setsid) # See __exit__ for os.setsid + self.serversocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.serversocket.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, 1) + self.serversocket.bind((HOST, 0)) + self.port = self.serversocket.getsockname()[1] + self.serversocket.listen(5) return self def __exit__(self, type, value, traceback): - # self._socat_process.terminate() doesn't enough because each connection to the port starts a new socat and a - # tail processes - os.killpg(os.getpgid(self._socat_process.pid), signal.SIGTERM) - # Note: this terminates all the processes but makes the script UNIX-only + self.serversocket.shutdown(socket.SHUT_RDWR) + self.serversocket.close() + print('Socket was closed successfully') -def cleanup(): + def accept_connection(self): + """ returns a socket for sending the input for idf_monitor which must be closed before calling this again. """ + (clientsocket, address) = self.serversocket.accept() + # exception will be thrown here if the idf_monitor didn't connect in time + clientsocket.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, 1) + return clientsocket + +def test_iteration(runner, test, startup_timeout): + print('\nRunning test on {} with filter "{}" and expecting {}'.format(test[0], test[1], test[2])) try: - os.remove(socat_in) - except: - # ignore if the file doesn't exist - pass + with open(OUT_DIR + test[2], "w", encoding='utf-8') as o_f, open(ERR_OUT, "w", encoding='utf-8') as e_f: + monitor_cmd = [sys.executable, + IDF_MONITOR, '--port', 'socket://{}:{}'.format(HOST, runner.port), '--print_filter', test[1], ELF_FILE] + (master_fd, slave_fd) = pty.openpty() + print('\t', ' '.join(monitor_cmd), sep='') + print('\tstdout="{}" stderr="{}" stdin="{}"'.format(o_f.name, e_f.name, os.ttyname(slave_fd))) + start = time.time() + # the server socket is alive so idf_monitor can start now + proc = subprocess.Popen(monitor_cmd, stdin=slave_fd, stdout=o_f, stderr=e_f, close_fds=True, bufsize=-1) + # - idf_monitor's stdin needs to be connected to some pseudo-tty in docker image even when it is not + # used at all + # - setting bufsize is needed because the default value is different on Python 2 and 3 + # - the default close_fds is also different on Python 2 and 3 + monitor_watchdog = threading.Timer(MONITOR_TIMEOUT, monitor_timeout, [proc]) + monitor_watchdog.start() + client = runner.accept_connection() + # The connection is ready but idf_monitor cannot yet receive data. This seems to happen on Ubuntu 16.04 LTS + # and is not related to the version of Python or pyserial. There seems to be no reliable way other than to + # do a sleep. The idf_monitor header in ERR_OUT could be checked on Python 2 but the file is not flushed + # on Python 3. + print('\tSleeping for {:.2f} seconds'.format(startup_timeout)) + time.sleep(startup_timeout) + with open(IN_DIR + test[0], "r", encoding='utf-8') as f: + print('\tSending {} to the socket'.format(f.name)) + for line in f: + client.sendall(line.encode()) + idf_exit_sequence = b'\x1d\n' + print('\tSending to the socket') + client.sendall(idf_exit_sequence) + ret = proc.wait() + end = time.time() + print('\tidf_monitor exited after {:.2f} seconds'.format(end - start)) + if ret < 0: + raise RuntimeError('idf_monitor was terminated by signal {}'.format(-ret)) + # idf_monitor needs to end before the socket is closed in order to exit without an exception. + finally: + if monitor_watchdog: + monitor_watchdog.cancel() + os.close(slave_fd) + os.close(master_fd) + if client: + client.close() + print('\tThe client was closed successfully') + f1 = IN_DIR + test[2] + f2 = OUT_DIR + test[2] + print('\tdiff {} {}'.format(f1, f2)) + if filecmp.cmp(f1, f2, shallow=False): + print('\tTest has passed') + else: + raise RuntimeError("The contents of the files are different. Please examine the artifacts.") def main(): - start = time.time() - cleanup() # avoid sending old content - if not os.path.exists(out_dir): - os.mkdir(out_dir) - try: - with SocatRunner(): - # Sleep is necessary to make sure that socat is already listening. Only one sleep is used per run (this is - # another reason while the temporary socat_in file is used instead of directly reading the test files). - time.sleep(1) - for t in test_list: - print('Running test on {} with filter "{}" and expecting {}'.format(t[0], t[1], t[2])) - with open(in_dir + t[0], "r", encoding='utf-8') as i_f, open(socat_in, "w", encoding='utf-8') as s_f: - print('cat {} > {}'.format(i_f.name, s_f.name)) - for line in i_f: - s_f.write(line) - idf_exit_sequence = b'\x1d\n' - print('echo "" >> {}'.format(s_f.name)) - s_f.write(idf_exit_sequence.decode()) - monitor_cmd = [idf_monitor, - '--port', 'socket://localhost:2399', - '--print_filter', t[1], - elf_file] - with open(out_dir + t[2], "w", encoding='utf-8') as o_f, open(err_out, "w", encoding='utf-8') as e_f: - try: - (master_fd, slave_fd) = pty.openpty() - print(' '.join(monitor_cmd), end=' ') - print(' > {} 2> {} < {}'.format(o_f.name, e_f.name, os.ttyname(slave_fd))) - proc = subprocess.Popen(monitor_cmd, stdin=slave_fd, stdout=o_f, stderr=e_f, - close_fds=True) - proc.wait() - finally: - os.close(slave_fd) - os.close(master_fd) - diff_cmd = ['diff', in_dir + t[2], out_dir + t[2]] - print(' '.join(diff_cmd)) - subprocess.check_call(diff_cmd) - print('Test has passed') - finally: - cleanup() + gstart = time.time() + if not os.path.exists(OUT_DIR): + os.mkdir(OUT_DIR) - end = time.time() - print('Execution took {:.2f} seconds'.format(end - start)) + socket.setdefaulttimeout(SOCKET_TIMEOUT) + + for test in test_list: + startup_timeout = 1.0 + for i in range(RETRIES_PER_TEST): + with TestRunner() as runner: + # Each test (and each retry) is run with a different port (and server socket). This is done for + # the CI run where retry with a different socket is necessary to pass the test. According to the + # experiments, retry with the same port (and server socket) is not sufficient. + try: + test_iteration(runner, test, startup_timeout) + # no more retries if test_iteration exited without an exception + break + except Exception as e: + if i < RETRIES_PER_TEST - 1: + print('Test has failed with exception:', e) + print('Another attempt will be made.') + startup_timeout += 0.5 + else: + raise + + gend = time.time() + print('Execution took {:.2f} seconds\n'.format(gend - gstart)) if __name__ == "__main__": main()