New test to restart entire ONOS cluster before verifying. New cord tester api to restart entire cluster used by the test. Changed the restarts to check for onos start using wait_for_onos_start instead of a hard delay. Added a robot test for restart cluster. Change-Id: I8dbc163462570a6a8eaf8e7684c790fc3fea8f48

commit: 2560f04b9d13eb042b4229324b1f9ab7cd23365e [log] [tgz]
author: A.R Karthick <kramanar@ciena.com> Wed Nov 30 14:38:52 2016 -0800
committer: A.R Karthick <kramanar@ciena.com> Wed Nov 30 14:38:52 2016 -0800
tree: 9ff5789914867df560e49adf4b00e1ce67e1362a
parent: 45ab3e129320aa1cc257bd64d77b4344c05a1553 [diff] [blame]
diff --git a/src/test/cluster/clusterTest.py b/src/test/cluster/clusterTest.py
index c3005ee..d5ad5fb 100644
--- a/src/test/cluster/clusterTest.py
+++ b/src/test/cluster/clusterTest.py

@@ -23,7 +23,7 @@
 from twisted.internet import defer
 from onosclidriver import OnosCliDriver
 from CordContainer import Container, Onos, Quagga
-from CordTestServer import cord_test_onos_restart, cord_test_onos_shutdown, cord_test_onos_add_cluster, cord_test_quagga_restart
+from CordTestServer import cord_test_onos_restart, cord_test_onos_shutdown, cord_test_onos_add_cluster, cord_test_quagga_restart, cord_test_restart_cluster
 from portmaps import g_subscriber_port_map
 from scapy.all import *
 import time, monotonic
@@ -61,7 +61,7 @@
     acl = cluster_acl()
     dhcprelay = cluster_dhcprelay()
     subscriber = cluster_subscriber()
-    testcaseLoggers = ('test_cluster_controller_restarts',)
+    testcaseLoggers = ('test_cluster_controller_restarts', 'test_cluster_single_controller_restarts', 'test_cluster_restarts')
 
     def setUp(self):
         if self._testMethodName not in self.testcaseLoggers:
@@ -450,6 +450,60 @@
             time.sleep(60)
             check_exception(controller, inclusive = True)
 
+    def test_cluster_restarts(self):
+        '''Test the cluster by repeatedly restarting the entire cluster'''
+        controllers = self.get_controllers()
+        ctlr_len = len(controllers)
+        if ctlr_len <= 1:
+            log.info('ONOS is not running in cluster mode. This test only works for cluster mode')
+            assert_greater(ctlr_len, 1)
+
+        #this call would verify the cluster for once
+        onos_map = self.get_cluster_container_names_ips()
+
+        def check_exception():
+            controller_list = controllers
+            storage_exceptions = []
+            for node in controller_list:
+                onosLog = OnosLog(host = node)
+                ##check the logs for storage exception
+                _, output = onosLog.get_log(('ERROR', 'Exception',))
+                if output and output.find('StorageException$Timeout') >= 0:
+                    log.info('\nStorage Exception Timeout found on node: %s\n' %node)
+                    log.info('Dumping the ERROR and Exception logs for node: %s\n' %node)
+                    log.info('\n' + '-' * 50 + '\n')
+                    log.info('%s' %output)
+                    log.info('\n' + '-' * 50 + '\n')
+                    storage_exceptions.append(node)
+
+            failed = self.verify_leaders(controller_list)
+            if failed:
+                log.info('Leaders command failed on nodes: %s' %failed)
+                if storage_exceptions:
+                    log.info('Storage exception seen on nodes: %s' %storage_exceptions)
+                    assert_equal(len(failed), 0)
+                    return
+
+            for ctlr in controller_list:
+                ips = self.get_cluster_current_member_ips(controller = ctlr,
+                                                          nodes_filter = \
+                                                          lambda nodes: [ n for n in nodes if n['state'] in [ 'ACTIVE', 'READY'] ])
+                log.info('ONOS cluster on node %s formed with controllers: %s' %(ctlr, ips))
+                assert_equal(len(ips), len(controllers))
+
+        tries = 10
+        for num in range(tries):
+            log.info('ITERATION: %d. Restarting cluster with controllers at %s' %(num+1, controllers))
+            try:
+                cord_test_restart_cluster()
+                log.info('Delaying before verifying cluster status')
+                time.sleep(60)
+            except:
+                time.sleep(10)
+                continue
+            #check for exceptions on the adjacent nodes
+            check_exception()
+
     #pass
     def test_cluster_formation_and_verification(self,onos_instances = ONOS_INSTANCES):
 	status = self.verify_cluster_status(onos_instances = onos_instances)
commit	2560f04b9d13eb042b4229324b1f9ab7cd23365e	[log] [tgz]
author	A.R Karthick <kramanar@ciena.com>	Wed Nov 30 14:38:52 2016 -0800
committer	A.R Karthick <kramanar@ciena.com>	Wed Nov 30 14:38:52 2016 -0800
tree	9ff5789914867df560e49adf4b00e1ce67e1362a
parent	45ab3e129320aa1cc257bd64d77b4344c05a1553 [diff] [blame]