Skip to content

Commit

Permalink
Ticket #46: Call Install() in _prepare for start,mount,tune
Browse files Browse the repository at this point in the history
Install, responsible for copying tuning.conf before running commands,
is now run inside FileSystem._prepare() action graph.
An Install action is created for each Proxy action, as a result, when
a copy failed, only the related Proxy action is cancelled.

This patch adapts the code to have better error messages for that.

Change-Id: Icf5cf57e3385ec0854cb6d639db0dc319a71ce5e
  • Loading branch information
degremont committed May 24, 2017
1 parent b983408 commit dd1f42f
Show file tree
Hide file tree
Showing 8 changed files with 139 additions and 41 deletions.
3 changes: 3 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
2017-05-24 Aurelien Degremont <[email protected]>
* On error, still apply tuning on other nodes (ticket #46)

2017-04-03 Aurelien Degremont <[email protected]>
* Smart analysis of nid_map updates (ticket #111)

Expand Down
16 changes: 0 additions & 16 deletions lib/Shine/Commands/Base/FSLiveCommand.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,22 +50,6 @@ class FSLiveCommand(RemoteCommand):
def fs_status_to_rc(self, status):
return self.TARGET_STATUS_RC_MAP.get(status, RC_RUNTIME_ERROR)

def copy_tuning(self, fs, comps=None):
"""Copy tuning.conf if defined."""
if not self.has_local_flag():
tuning_conf = Globals().get_tuning_file()
if tuning_conf:
servers = None
if comps:
# take into account -n and -x options
servers = comps.allservers()
if self.options.nodes is not None:
servers.intersection_update(self.options.nodes)
if self.options.excludes is not None:
servers.difference_update(self.options.excludes)

fs.install(tuning_conf, servers=servers)

def _open_fs(self, fsname, eh):
return open_lustrefs(fsname,
self.options.targets,
Expand Down
2 changes: 0 additions & 2 deletions lib/Shine/Commands/Mount.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,6 @@ def execute_fs(self, fs, fs_conf, eh, vlevel):
if hasattr(eh, 'pre'):
eh.pre(fs)

self.copy_tuning(fs, comps=comps)

status = fs.mount(addopts=self.options.additional,
fanout=self.options.fanout,
dryrun=self.options.dryrun,
Expand Down
2 changes: 0 additions & 2 deletions lib/Shine/Commands/Start.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,6 @@ def execute_fs(self, fs, fs_conf, eh, vlevel):
if hasattr(eh, 'pre'):
eh.pre(fs)

self.copy_tuning(fs, comps=comps)

status = fs.start(mount_options=mount_options,
mount_paths=mount_paths,
addopts=self.options.additional,
Expand Down
11 changes: 4 additions & 7 deletions lib/Shine/Commands/Tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,6 @@ def execute_fs(self, fs, fs_conf, eh, vlevel):
if vlevel > 1:
print "Tuning filesystem %s..." % fs.fs_name

self.copy_tuning(fs, comps=comps)

if not self.options.remote and vlevel > 1:
print tuning

Expand All @@ -76,12 +74,10 @@ def execute_fs(self, fs, fs_conf, eh, vlevel):
status = fs.tune(tuning, addopts=self.options.additional,
dryrun=self.options.dryrun,
fanout=self.options.fanout)
if status == RUNTIME_ERROR:
self.display_proxy_errors(fs)
return RC_RUNTIME_ERROR
elif status == MOUNTED:
if status == MOUNTED:
print "Filesystem %s successfully tuned." % fs.fs_name
else:
self.display_proxy_errors(fs)
print "Tuning of filesystem %s failed." % fs.fs_name
return RC_RUNTIME_ERROR

Expand All @@ -101,7 +97,8 @@ def get_tuning(cls, fs_conf, comps):
# Is the tuning configuration file name specified?
if Globals().get_tuning_file():
# Load the tuning configuration file
tuning.parse(filename=Globals().get_tuning_file())
tuning.filename = Globals().get_tuning_file()
tuning.parse()

# Add the quota tuning parameters to the tuning model.
if Globals().lustre_version_is_smaller('2.4'):
Expand Down
12 changes: 10 additions & 2 deletions lib/Shine/Lustre/Actions/Install.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,14 @@ class Install(CommonAction):
Action class: install file configuration requirements on remote nodes.
"""

def __init__(self, nodes, fs, config_file, **kwargs):
NAME = 'install'

def __init__(self, nodes, fs, config_file, comps=None, **kwargs):
CommonAction.__init__(self)
self.nodes = nodes
self.fs = fs
self.config_file = config_file
self._comps = comps
self.dryrun = kwargs.get('dryrun', False)

def _launch(self):
Expand All @@ -54,7 +57,7 @@ def ev_start(self, worker):
(name, len(self.nodes))
else:
msg = "Updating configuration file `%s' on %s" % (name, self.nodes)
self.fs.hdlr.log('info', msg)
self.fs.hdlr.log('verbose', msg)

def ev_close(self, worker):
"""
Expand All @@ -77,6 +80,11 @@ def ev_close(self, worker):
for rc, nodes in worker.iter_retcodes():
if rc == 0:
continue

# Avoid warnings, flag this component in error state
for comp in self._comps or []:
comp.sanitize_state(nodes=worker.nodes)

for output, nodes in worker.iter_buffers(match_keys=nodes):
nodes = NodeSet.fromlist(nodes)
msg = "Copy failed: %s" % output
Expand Down
16 changes: 14 additions & 2 deletions lib/Shine/Lustre/FileSystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,8 @@ def _prepare(self, action, comps=None, groupby=None, reverse=False,

graph = ActionGroup()

comps = comps or self.components

first_comps = None
last_comps = None
localsrv = None
Expand Down Expand Up @@ -411,6 +413,11 @@ def _prepare(self, action, comps=None, groupby=None, reverse=False,
else:
act = self._proxy_action(action, srv.hostname,
comps, **kwargs)
if tunings:
copy = Install(srv.hostname, self, tunings.filename,
comps=comps, **kwargs)
act.depends_on(copy)
proxygrp.add(copy)
proxygrp.add(act)

if len(compgrp) > 0:
Expand Down Expand Up @@ -575,8 +582,13 @@ def tune(self, tuning_model, comps=None, **kwargs):
actions.add(server.tune(tuning_model, srvcomps, self.fs_name,
**kwargs))
else:
actions.add(self._proxy_action('tune', server.hostname,
srvcomps, **kwargs))
act = self._proxy_action('tune', server.hostname, srvcomps,
**kwargs)
copy = Install(server.hostname, self, tuning_model.filename,
comps=srvcomps, **kwargs)
act.depends_on(copy)
actions.add(act)
actions.add(copy)

# Run local actions and FSProxyAction
actions.launch()
Expand Down
118 changes: 108 additions & 10 deletions tests/Lustre/FileSystemTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,30 +26,128 @@
from Shine.Lustre.FileSystem import FileSystem, Server, FSRemoteError, \
MOUNTED, OFFLINE, MIGRATED

class SimpleFileSystemTest(unittest.TestCase):
"""Tests which do not setup a real Lustre filesystem."""

def _graph2obj(graph):
try:
return [_graph2obj(item) for item in graph]
except TypeError:
result = {}
for key in ('NAME', 'comp', 'action', 'config_file'):
if hasattr(graph, key):
result[key] = getattr(graph, key)
return result

class FakeTunings(object):
def __init__(self):
self.filename = 'foo'


class PrepareTest(unittest.TestCase):
"""Verify graph from _prepare()"""

def setUp(self):
self.fs = FileSystem('testfs')
self.fs = FileSystem('prepare')
self.remotesrv = Server('remote', ['remote@tcp'])
self.localsrv = Server(Utils.HOSTNAME, ['%s@tcp' % Utils.HOSTNAME])
self.fs.local_server = self.localsrv

def test_simple_local_action(self):
"""prepare a simple action on a local component"""
comp = self.fs.new_target(self.localsrv, 'mgt', 0, '/dev/fakedev')
graph = self.fs._prepare('start')

self.assertEqual(_graph2obj(graph),
[[[{'NAME': 'start', 'comp': comp}],
[{'NAME': 'load modules'},
{'NAME': 'load modules'}]]])
self.assertEqual(graph[0][1][0]._modname, 'lustre')

def test_simple_remote_action(self):
"""prepare a simple action on a remote component"""
self.fs.new_target(self.remotesrv, 'mgt', 0, '/dev/fakedev')
graph = self.fs._prepare('start')

self.assertEqual(_graph2obj(graph),
[[[{'NAME': 'proxy', 'action': 'start'}]]])
self.assertEqual(str(graph[0][0][0].nodes), 'remote')

def test_proxy_tunings(self):
"""prepare is ok with or without tunings"""
self.fs.new_target(self.remotesrv, 'mgt', 0, '/dev/fakedev')

# Without tunings
graph = self.fs._prepare('dummy', tunings=None)
self.assertEqual(_graph2obj(graph),
[[[{'NAME': 'proxy', 'action': 'dummy'}]]])
self.assertEqual(str(graph[0][0][0].nodes), 'remote')

# With tunings
graph = self.fs._prepare('dummy', tunings=FakeTunings())
self.assertEqual(_graph2obj(graph),
[[[{'NAME': 'install', 'config_file': 'foo'},
{'NAME': 'proxy', 'action': 'dummy'}]]])
self.assertEqual(str(graph[0][0][1].nodes), 'remote')

def test_local_tunings(self):
"""prepare is ok with or without tunings"""
comp = self.fs.new_target(self.localsrv, 'mgt', 0, '/dev/fakedev')

# Without tunings
graph = self.fs._prepare('start', tunings=None)
self.assertEqual(_graph2obj(graph),
[[[{'NAME': 'start', 'comp': comp}],
[{'NAME': 'load modules'},
{'NAME': 'load modules'}]]])
self.assertEqual(graph[0][1][0]._modname, 'lustre')

# With tunings
graph = self.fs._prepare('start', tunings=FakeTunings())
self.assertEqual(_graph2obj(graph),
[[[{'NAME': 'start', 'comp': comp}],
[{'NAME': 'load modules'},
{'NAME': 'load modules'}], []]])
self.assertEqual(graph[0][1][0]._modname, 'lustre')
self.assertEqual(graph[0][2].NAME, 'tune')

def test_need_unload(self):
"""prepare handles need_unload correctly"""
comp = self.fs.new_target(self.localsrv, 'mgt', 0, '/dev/fakedev')

# Without module unload
graph = self.fs._prepare('stop', need_unload=False)
self.assertEqual(_graph2obj(graph),
[[[{'NAME': 'stop', 'comp': comp}]]])

# With module unload
graph = self.fs._prepare('stop', need_unload=True)
self.assertEqual(_graph2obj(graph),
[[[{'NAME': 'stop', 'comp': comp}],
{'NAME': 'unload modules'}]])


class SimpleFileSystemTest(unittest.TestCase):
"""Tests which do not setup a real Lustre filesystem."""

def test_install_nothing(self):
"""install only using local node does nothing"""
class MyFS(FileSystem):
def _run_actions(self):
def _run_actions(obj):
self.fail("should not be called")
fs = MyFS('testfs')
srv = Server(Utils.HOSTNAME, ['%s@tcp' % Utils.HOSTNAME])
self.fs.local_server = srv
self.fs.new_target(srv, 'mgt', 0, '/dev/fakedev')
self.fs.install(fs_config_file=Utils.makeTempFilename())
fs.local_server = srv
fs.new_target(srv, 'mgt', 0, '/dev/fakedev')
fs.install(fs_config_file=Utils.makeTempFilename())

def test_install_unreachable(self):
"""install on unreachable nodes raises an error"""
fs = FileSystem('testfs')
badsrv1 = Server('badnode1', ['127.0.0.2@tcp'])
badsrv2 = Server('badnode2', ['127.0.0.3@tcp'])
self.fs.new_target(badsrv1, 'mgt', 0, '/dev/fakedev')
self.fs.new_client(badsrv2, '/testfs')
fs.new_target(badsrv1, 'mgt', 0, '/dev/fakedev')
fs.new_client(badsrv2, '/testfs')
try:
self.fs.install(fs_config_file=Utils.makeTempFilename())
fs.install(fs_config_file=Utils.makeTempFilename())
except FSRemoteError, ex:
self.assertEqual(str(ex.nodes), 'badnode[1-2]')
self.assertEqual(ex.rc, 1)
Expand Down

0 comments on commit dd1f42f

Please sign in to comment.