Commit 9cb97bdb by Máhonfai Bálint

Merge branch 'batmanFix' into 'master'

Propagation of last valid node metrics instead of the [-2]nd value and Garbage colletor modification: limiting the batch size

See merge request !412
parents c3b43f25 012aa298
Pipeline #966 passed with stage
in 0 seconds
...@@ -360,15 +360,21 @@ class Node(OperatedMixin, TimeStampedModel): ...@@ -360,15 +360,21 @@ class Node(OperatedMixin, TimeStampedModel):
# Example: # Example:
# {"target": "circle.szianode.cpu.usage", # {"target": "circle.szianode.cpu.usage",
# "datapoints": [[0.6, 1403045700], [0.5, 1403045760] # "datapoints": [[0.6, 1403045700], [0.5, 1403045760]
logger.info('MONITOR_TARGET: %s', target)
try: try:
metric = target['target'] metric = target['target']
if metric.startswith(prefix): if metric.startswith(prefix):
metric = metric[len(prefix):] metric = metric[len(prefix):]
else: else:
logger.info('MONITOR_MET: %s %s', target, metric)
continue continue
value = target['datapoints'][-2][0] value = target['datapoints'][-1][0]
if value is None:
value = target['datapoints'][-2][0]
retval[metric] = float(value) retval[metric] = float(value)
logger.info('MONITOR_RETVAL: %s %s, %s', target['target'], metric, retval[metric])
except (KeyError, IndexError, ValueError, TypeError): except (KeyError, IndexError, ValueError, TypeError):
logger.info('MONITOR_ERR: %s %s', metric, value)
continue continue
return retval return retval
...@@ -435,7 +441,7 @@ class Node(OperatedMixin, TimeStampedModel): ...@@ -435,7 +441,7 @@ class Node(OperatedMixin, TimeStampedModel):
vm_state_changed hook. vm_state_changed hook.
""" """
domains = {} domains = {}
domain_list = self.remote_query(vm_tasks.list_domains_info, timeout=5, domain_list = self.remote_query(vm_tasks.list_domains_info, timeout=10,
priority="fast") priority="fast")
if domain_list is None: if domain_list is None:
logger.info("Monitoring failed at: %s", self.name) logger.info("Monitoring failed at: %s", self.name)
......
...@@ -35,7 +35,7 @@ def update_domain_states(): ...@@ -35,7 +35,7 @@ def update_domain_states():
@celery.task(ignore_result=True) @celery.task(ignore_result=True)
def garbage_collector(timeout=15): def garbage_collector(offset=timezone.timedelta(seconds=20)):
"""Garbage collector for instances. """Garbage collector for instances.
Suspends and destroys expired instances. Suspends and destroys expired instances.
...@@ -44,8 +44,10 @@ def garbage_collector(timeout=15): ...@@ -44,8 +44,10 @@ def garbage_collector(timeout=15):
:type timeout: int :type timeout: int
""" """
now = timezone.now() now = timezone.now()
bw = 0
for i in Instance.objects.filter(destroyed_at=None).all(): for i in Instance.objects.filter(destroyed_at=None).all():
if i.time_of_delete and now > i.time_of_delete: if i.time_of_delete and now > i.time_of_delete + offset and bw < 20:
bw = bw + 1
i.destroy.async(system=True) i.destroy.async(system=True)
logger.info("Expired instance %d destroyed.", i.pk) logger.info("Expired instance %d destroyed.", i.pk)
try: try:
...@@ -59,10 +61,11 @@ def garbage_collector(timeout=15): ...@@ -59,10 +61,11 @@ def garbage_collector(timeout=15):
logger.debug('Could not notify owner of instance %d .%s', logger.debug('Could not notify owner of instance %d .%s',
i.pk, unicode(e)) i.pk, unicode(e))
elif (i.time_of_suspend and now > i.time_of_suspend and elif (i.time_of_suspend and now > i.time_of_suspend and
i.state == 'RUNNING'): i.state == 'RUNNING' and bw < 20):
bw = bw + 1
logger.info("Expired instance %d suspended." % i.pk) logger.info("Expired instance %d suspended." % i.pk)
try: try:
i.sleep.async(system=True) i.sleep.async(system=True)
i.owner.profile.notify( i.owner.profile.notify(
ugettext_noop('%(instance)s suspended'), ugettext_noop('%(instance)s suspended'),
ugettext_noop( ugettext_noop(
...@@ -70,8 +73,8 @@ def garbage_collector(timeout=15): ...@@ -70,8 +73,8 @@ def garbage_collector(timeout=15):
'has been suspended due to expiration. ' 'has been suspended due to expiration. '
'You can resume or destroy it.'), 'You can resume or destroy it.'),
instance=i.name, url=i.get_absolute_url()) instance=i.name, url=i.get_absolute_url())
except ActivityInProgressError: except ActivityInProgressError:
logger.error("Expired instance %d can't be destroyed due the AtctivityInPorgressError.", i.pk) logger.error("Expired instance %d can't be destroyed due the AtctivityInPorgressError.", i.pk)
except Exception as e: except Exception as e:
logger.info('Could not notify owner of instance %d .%s', logger.info('Could not notify owner of instance %d .%s',
i.pk, unicode(e)) i.pk, unicode(e))
...@@ -79,7 +82,7 @@ def garbage_collector(timeout=15): ...@@ -79,7 +82,7 @@ def garbage_collector(timeout=15):
logger.debug("Instance %d expires soon." % i.pk) logger.debug("Instance %d expires soon." % i.pk)
i.notify_owners_about_expiration() i.notify_owners_about_expiration()
else: else:
logger.debug("Instance %d didn't expire." % i.pk) logger.debug("Instance %d didn't expire. bw:%d", i.pk, bw)
@celery.task(ignore_result=True) @celery.task(ignore_result=True)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment