diff --git a/ChangeLog b/ChangeLog index a3be16b..c4572d8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,6 @@ +2017-05-24 Aurelien Degremont + * On error, still apply tuning on other nodes (ticket #46) + 2017-04-03 Aurelien Degremont * Smart analysis of nid_map updates (ticket #111) diff --git a/lib/Shine/Commands/Base/FSLiveCommand.py b/lib/Shine/Commands/Base/FSLiveCommand.py index 690778d..b866c6e 100644 --- a/lib/Shine/Commands/Base/FSLiveCommand.py +++ b/lib/Shine/Commands/Base/FSLiveCommand.py @@ -50,22 +50,6 @@ class FSLiveCommand(RemoteCommand): def fs_status_to_rc(self, status): return self.TARGET_STATUS_RC_MAP.get(status, RC_RUNTIME_ERROR) - def copy_tuning(self, fs, comps=None): - """Copy tuning.conf if defined.""" - if not self.has_local_flag(): - tuning_conf = Globals().get_tuning_file() - if tuning_conf: - servers = None - if comps: - # take into account -n and -x options - servers = comps.allservers() - if self.options.nodes is not None: - servers.intersection_update(self.options.nodes) - if self.options.excludes is not None: - servers.difference_update(self.options.excludes) - - fs.install(tuning_conf, servers=servers) - def _open_fs(self, fsname, eh): return open_lustrefs(fsname, self.options.targets, diff --git a/lib/Shine/Commands/Mount.py b/lib/Shine/Commands/Mount.py index 0641a19..048c8fb 100644 --- a/lib/Shine/Commands/Mount.py +++ b/lib/Shine/Commands/Mount.py @@ -69,8 +69,6 @@ def execute_fs(self, fs, fs_conf, eh, vlevel): if hasattr(eh, 'pre'): eh.pre(fs) - self.copy_tuning(fs, comps=comps) - status = fs.mount(addopts=self.options.additional, fanout=self.options.fanout, dryrun=self.options.dryrun, diff --git a/lib/Shine/Commands/Start.py b/lib/Shine/Commands/Start.py index 454c5e0..861655c 100644 --- a/lib/Shine/Commands/Start.py +++ b/lib/Shine/Commands/Start.py @@ -85,8 +85,6 @@ def execute_fs(self, fs, fs_conf, eh, vlevel): if hasattr(eh, 'pre'): eh.pre(fs) - self.copy_tuning(fs, comps=comps) - status = fs.start(mount_options=mount_options, mount_paths=mount_paths, addopts=self.options.additional, diff --git a/lib/Shine/Commands/Tune.py b/lib/Shine/Commands/Tune.py index 63d48e3..34a71f8 100644 --- a/lib/Shine/Commands/Tune.py +++ b/lib/Shine/Commands/Tune.py @@ -64,8 +64,6 @@ def execute_fs(self, fs, fs_conf, eh, vlevel): if vlevel > 1: print "Tuning filesystem %s..." % fs.fs_name - self.copy_tuning(fs, comps=comps) - if not self.options.remote and vlevel > 1: print tuning @@ -76,12 +74,10 @@ def execute_fs(self, fs, fs_conf, eh, vlevel): status = fs.tune(tuning, addopts=self.options.additional, dryrun=self.options.dryrun, fanout=self.options.fanout) - if status == RUNTIME_ERROR: - self.display_proxy_errors(fs) - return RC_RUNTIME_ERROR - elif status == MOUNTED: + if status == MOUNTED: print "Filesystem %s successfully tuned." % fs.fs_name else: + self.display_proxy_errors(fs) print "Tuning of filesystem %s failed." % fs.fs_name return RC_RUNTIME_ERROR @@ -101,7 +97,8 @@ def get_tuning(cls, fs_conf, comps): # Is the tuning configuration file name specified? if Globals().get_tuning_file(): # Load the tuning configuration file - tuning.parse(filename=Globals().get_tuning_file()) + tuning.filename = Globals().get_tuning_file() + tuning.parse() # Add the quota tuning parameters to the tuning model. if Globals().lustre_version_is_smaller('2.4'): diff --git a/lib/Shine/Lustre/Actions/Install.py b/lib/Shine/Lustre/Actions/Install.py index d00c1aa..f31e862 100644 --- a/lib/Shine/Lustre/Actions/Install.py +++ b/lib/Shine/Lustre/Actions/Install.py @@ -29,11 +29,14 @@ class Install(CommonAction): Action class: install file configuration requirements on remote nodes. """ - def __init__(self, nodes, fs, config_file, **kwargs): + NAME = 'install' + + def __init__(self, nodes, fs, config_file, comps=None, **kwargs): CommonAction.__init__(self) self.nodes = nodes self.fs = fs self.config_file = config_file + self._comps = comps self.dryrun = kwargs.get('dryrun', False) def _launch(self): @@ -54,7 +57,7 @@ def ev_start(self, worker): (name, len(self.nodes)) else: msg = "Updating configuration file `%s' on %s" % (name, self.nodes) - self.fs.hdlr.log('info', msg) + self.fs.hdlr.log('verbose', msg) def ev_close(self, worker): """ @@ -77,6 +80,11 @@ def ev_close(self, worker): for rc, nodes in worker.iter_retcodes(): if rc == 0: continue + + # Avoid warnings, flag this component in error state + for comp in self._comps or []: + comp.sanitize_state(nodes=worker.nodes) + for output, nodes in worker.iter_buffers(match_keys=nodes): nodes = NodeSet.fromlist(nodes) msg = "Copy failed: %s" % output diff --git a/lib/Shine/Lustre/FileSystem.py b/lib/Shine/Lustre/FileSystem.py index 416ad3b..81c4048 100644 --- a/lib/Shine/Lustre/FileSystem.py +++ b/lib/Shine/Lustre/FileSystem.py @@ -383,6 +383,8 @@ def _prepare(self, action, comps=None, groupby=None, reverse=False, graph = ActionGroup() + comps = comps or self.components + first_comps = None last_comps = None localsrv = None @@ -411,6 +413,11 @@ def _prepare(self, action, comps=None, groupby=None, reverse=False, else: act = self._proxy_action(action, srv.hostname, comps, **kwargs) + if tunings: + copy = Install(srv.hostname, self, tunings.filename, + comps=comps, **kwargs) + act.depends_on(copy) + proxygrp.add(copy) proxygrp.add(act) if len(compgrp) > 0: @@ -575,8 +582,13 @@ def tune(self, tuning_model, comps=None, **kwargs): actions.add(server.tune(tuning_model, srvcomps, self.fs_name, **kwargs)) else: - actions.add(self._proxy_action('tune', server.hostname, - srvcomps, **kwargs)) + act = self._proxy_action('tune', server.hostname, srvcomps, + **kwargs) + copy = Install(server.hostname, self, tuning_model.filename, + comps=srvcomps, **kwargs) + act.depends_on(copy) + actions.add(act) + actions.add(copy) # Run local actions and FSProxyAction actions.launch() diff --git a/tests/Lustre/FileSystemTest.py b/tests/Lustre/FileSystemTest.py index 947ac9d..8e04c64 100644 --- a/tests/Lustre/FileSystemTest.py +++ b/tests/Lustre/FileSystemTest.py @@ -26,30 +26,128 @@ from Shine.Lustre.FileSystem import FileSystem, Server, FSRemoteError, \ MOUNTED, OFFLINE, MIGRATED -class SimpleFileSystemTest(unittest.TestCase): - """Tests which do not setup a real Lustre filesystem.""" + +def _graph2obj(graph): + try: + return [_graph2obj(item) for item in graph] + except TypeError: + result = {} + for key in ('NAME', 'comp', 'action', 'config_file'): + if hasattr(graph, key): + result[key] = getattr(graph, key) + return result + +class FakeTunings(object): + def __init__(self): + self.filename = 'foo' + + +class PrepareTest(unittest.TestCase): + """Verify graph from _prepare()""" def setUp(self): - self.fs = FileSystem('testfs') + self.fs = FileSystem('prepare') + self.remotesrv = Server('remote', ['remote@tcp']) + self.localsrv = Server(Utils.HOSTNAME, ['%s@tcp' % Utils.HOSTNAME]) + self.fs.local_server = self.localsrv + + def test_simple_local_action(self): + """prepare a simple action on a local component""" + comp = self.fs.new_target(self.localsrv, 'mgt', 0, '/dev/fakedev') + graph = self.fs._prepare('start') + + self.assertEqual(_graph2obj(graph), + [[[{'NAME': 'start', 'comp': comp}], + [{'NAME': 'load modules'}, + {'NAME': 'load modules'}]]]) + self.assertEqual(graph[0][1][0]._modname, 'lustre') + + def test_simple_remote_action(self): + """prepare a simple action on a remote component""" + self.fs.new_target(self.remotesrv, 'mgt', 0, '/dev/fakedev') + graph = self.fs._prepare('start') + + self.assertEqual(_graph2obj(graph), + [[[{'NAME': 'proxy', 'action': 'start'}]]]) + self.assertEqual(str(graph[0][0][0].nodes), 'remote') + + def test_proxy_tunings(self): + """prepare is ok with or without tunings""" + self.fs.new_target(self.remotesrv, 'mgt', 0, '/dev/fakedev') + + # Without tunings + graph = self.fs._prepare('dummy', tunings=None) + self.assertEqual(_graph2obj(graph), + [[[{'NAME': 'proxy', 'action': 'dummy'}]]]) + self.assertEqual(str(graph[0][0][0].nodes), 'remote') + + # With tunings + graph = self.fs._prepare('dummy', tunings=FakeTunings()) + self.assertEqual(_graph2obj(graph), + [[[{'NAME': 'install', 'config_file': 'foo'}, + {'NAME': 'proxy', 'action': 'dummy'}]]]) + self.assertEqual(str(graph[0][0][1].nodes), 'remote') + + def test_local_tunings(self): + """prepare is ok with or without tunings""" + comp = self.fs.new_target(self.localsrv, 'mgt', 0, '/dev/fakedev') + + # Without tunings + graph = self.fs._prepare('start', tunings=None) + self.assertEqual(_graph2obj(graph), + [[[{'NAME': 'start', 'comp': comp}], + [{'NAME': 'load modules'}, + {'NAME': 'load modules'}]]]) + self.assertEqual(graph[0][1][0]._modname, 'lustre') + + # With tunings + graph = self.fs._prepare('start', tunings=FakeTunings()) + self.assertEqual(_graph2obj(graph), + [[[{'NAME': 'start', 'comp': comp}], + [{'NAME': 'load modules'}, + {'NAME': 'load modules'}], []]]) + self.assertEqual(graph[0][1][0]._modname, 'lustre') + self.assertEqual(graph[0][2].NAME, 'tune') + + def test_need_unload(self): + """prepare handles need_unload correctly""" + comp = self.fs.new_target(self.localsrv, 'mgt', 0, '/dev/fakedev') + + # Without module unload + graph = self.fs._prepare('stop', need_unload=False) + self.assertEqual(_graph2obj(graph), + [[[{'NAME': 'stop', 'comp': comp}]]]) + + # With module unload + graph = self.fs._prepare('stop', need_unload=True) + self.assertEqual(_graph2obj(graph), + [[[{'NAME': 'stop', 'comp': comp}], + {'NAME': 'unload modules'}]]) + + +class SimpleFileSystemTest(unittest.TestCase): + """Tests which do not setup a real Lustre filesystem.""" def test_install_nothing(self): """install only using local node does nothing""" class MyFS(FileSystem): - def _run_actions(self): + def _run_actions(obj): self.fail("should not be called") + fs = MyFS('testfs') srv = Server(Utils.HOSTNAME, ['%s@tcp' % Utils.HOSTNAME]) - self.fs.local_server = srv - self.fs.new_target(srv, 'mgt', 0, '/dev/fakedev') - self.fs.install(fs_config_file=Utils.makeTempFilename()) + fs.local_server = srv + fs.new_target(srv, 'mgt', 0, '/dev/fakedev') + fs.install(fs_config_file=Utils.makeTempFilename()) def test_install_unreachable(self): """install on unreachable nodes raises an error""" + fs = FileSystem('testfs') badsrv1 = Server('badnode1', ['127.0.0.2@tcp']) badsrv2 = Server('badnode2', ['127.0.0.3@tcp']) - self.fs.new_target(badsrv1, 'mgt', 0, '/dev/fakedev') - self.fs.new_client(badsrv2, '/testfs') + fs.new_target(badsrv1, 'mgt', 0, '/dev/fakedev') + fs.new_client(badsrv2, '/testfs') try: - self.fs.install(fs_config_file=Utils.makeTempFilename()) + fs.install(fs_config_file=Utils.makeTempFilename()) except FSRemoteError, ex: self.assertEqual(str(ex.nodes), 'badnode[1-2]') self.assertEqual(ex.rc, 1)