Commit e183c87f by Máhonfai Bálint

Resolve merge conflict

parents a5572890 c3b43f25
Pipeline #963 failed with stage
in 0 seconds
......@@ -576,7 +576,7 @@ SESSION_COOKIE_NAME = "csessid%x" % (((getnode() // 139) ^
MAX_NODE_RAM = get_env_variable("MAX_NODE_RAM", 1024)
MAX_NODE_CPU_CORE = get_env_variable("MAX_NODE_CPU_CORE", 10)
SCHEDULER_METHOD = get_env_variable("SCHEDULER_METHOD", 'random')
SCHEDULER_METHOD = get_env_variable("SCHEDULER_METHOD", 'advanced')
# Url to download the client: (e.g. http://circlecloud.org/client/download/)
CLIENT_DOWNLOAD_URL = get_env_variable('CLIENT_DOWNLOAD_URL', 'http://circlecloud.org/client/download/')
......@@ -590,3 +590,12 @@ REQUEST_HOOK_URL = get_env_variable("REQUEST_HOOK_URL", "")
SSHKEY_EMAIL_ADD_KEY = False
TWO_FACTOR_ISSUER = get_env_variable("TWO_FACTOR_ISSUER", "CIRCLE")
# Default value is every day at midnight
AUTO_MIGRATION_CRONTAB = get_env_variable("AUTO_MIGRATION_CRONTAB", "0 0 * * *")
AUTO_MIGRATION_TIME_LIMIT_IN_HOURS = (
get_env_variable("AUTO_MIGRATION_TIME_LIMIT_IN_HOURS", "2"))
# Maximum time difference until the monitor's values get valid
SCHEDULER_TIME_SENSITIVITY_IN_SECONDS = (
get_env_variable("SCHEDULER_TIME_SENSITIVITY_IN_SECONDS", "60"))
......@@ -65,7 +65,10 @@
"modified": "2014-02-19T21:11:34.671Z",
"priority": 1,
"traits": [],
"host": 1
"host": 1,
"ram_weight": 1.0,
"cpu_weight": 1.0,
"time_stamp": "2017-12-13T21:08:08.819Z"
}
}
]
......@@ -1697,3 +1697,11 @@ class TwoFactorConfirmationForm(forms.Form):
totp = pyotp.TOTP(self.user.profile.two_factor_secret)
if not totp.verify(self.cleaned_data.get('confirmation_code')):
raise ValidationError(_("Invalid confirmation code."))
class AutoMigrationForm(forms.Form):
minute = forms.CharField()
hour = forms.CharField()
day_of_month = forms.CharField()
month_of_year = forms.CharField()
day_of_week = forms.CharField()
......@@ -162,7 +162,7 @@ class ConnectCommand(Model):
validators=[connect_command_template_validator])
class Meta:
ordering = ('id', )
ordering = ('id',)
def __unicode__(self):
return self.template
......@@ -218,7 +218,7 @@ class Profile(Model):
'id': command.id,
'cmd': command.template % {
'port': instance.get_connect_port(use_ipv6=use_ipv6),
'host': instance.get_connect_host(use_ipv6=use_ipv6),
'host': instance.get_connect_host(use_ipv6=use_ipv6),
'password': instance.pw,
'username': 'cloud',
}} for command in commands]
......@@ -263,7 +263,7 @@ class Profile(Model):
super(Profile, self).save(*args, **kwargs)
class Meta:
ordering = ('id', )
ordering = ('id',)
permissions = (
('use_autocomplete', _('Can use autocomplete.')),
)
......@@ -275,7 +275,7 @@ class FutureMember(Model):
group = ForeignKey(Group)
class Meta:
ordering = ('id', )
ordering = ('id',)
unique_together = ('org_id', 'group')
def __unicode__(self):
......@@ -295,7 +295,7 @@ class GroupProfile(AclBase):
description = TextField(blank=True)
class Meta:
ordering = ('id', )
ordering = ('id',)
def __unicode__(self):
return self.group.name
......@@ -331,7 +331,11 @@ def create_profile(user):
profile, created = Profile.objects.get_or_create(user=user)
try:
Store(user).create_user(profile.smb_password, None, profile.disk_quota)
store = Store(user)
if store.user_exist():
profile.disk_quota = store.get_quota()['soft']
profile.save()
store.create_user(profile.smb_password, None, profile.disk_quota)
except:
logger.exception("Can't create user %s", unicode(user))
return created
......
......@@ -1079,6 +1079,10 @@ textarea[name="new_members"] {
max-width: 100%;
}
#node-list-auto-migration-body {
padding: 20px;
}
#vm-list-table td.state,
#vm-list-table td.memory {
white-space: nowrap;
......
......@@ -3,4 +3,11 @@ $(function() {
// find disabled nodes, set danger (red) on the rows
$('.node-disabled').closest("tr").addClass('danger');
});
$('#reschedule-now').click(function() {
$.get($(this).attr('href'), function(data){
highlight = data.result === 'ok' ? 'success' : 'danger';
addMessage(data.message, highlight);
});
return false;
});
});
......@@ -44,9 +44,19 @@ class NoStoreException(StoreApiException):
pass
class NoOrgIdException(StoreApiException):
pass
class Store(object):
def __init__(self, user, default_timeout=0.5):
self.store_url = settings.STORE_URL
if not self.store_url:
raise NoStoreException
if not user.profile.org_id:
raise NoOrgIdException
self.username = 'u-%s' % user.profile.org_id
self.request_args = {'verify': settings.STORE_VERIFY_SSL}
if settings.STORE_SSL_AUTH:
self.request_args['cert'] = (settings.STORE_CLIENT_CERT,
......@@ -54,18 +64,15 @@ class Store(object):
if settings.STORE_BASIC_AUTH:
self.request_args['auth'] = (settings.STORE_CLIENT_USER,
settings.STORE_CLIENT_PASSWORD)
self.username = "u-%d" % user.pk
self.default_timeout = default_timeout
self.store_url = settings.STORE_URL
if not self.store_url:
raise NoStoreException
def _request(self, url, method=get, timeout=None,
raise_status_code=True, **kwargs):
url = urljoin(self.store_url, url)
if timeout is None:
timeout = self.default_timeout
payload = json.dumps(kwargs) if kwargs else None
kwargs['USER'] = self.username
payload = json.dumps(kwargs)
try:
headers = {'content-type': 'application/json'}
response = method(url, data=payload, headers=headers,
......@@ -83,7 +90,7 @@ class Store(object):
return response
def _request_cmd(self, cmd, **kwargs):
return self._request(self.username, post, CMD=cmd, **kwargs)
return self._request("/user/", post, CMD=cmd, **kwargs)
def list(self, path, process=True):
r = self._request_cmd("LIST", PATH=path)
......@@ -106,8 +113,8 @@ class Store(object):
return r.json()['LINK']
def request_upload(self, path):
r = self._request_cmd("UPLOAD", PATH=path)
return r.json()['LINK']
r = self._request_cmd("UPLOAD", PATH=path)
return r.json()['LINK']
def remove(self, path):
self._request_cmd("REMOVE", PATH=path)
......@@ -119,7 +126,7 @@ class Store(object):
self._request_cmd("RENAME", PATH=old_path, NEW_NAME=new_name)
def get_quota(self): # no CMD? :o
r = self._request(self.username)
r = self._request("/user/")
quota = r.json()
quota.update({
'readable_used': filesizeformat(float(quota['used'])),
......@@ -129,17 +136,17 @@ class Store(object):
return quota
def set_quota(self, quota):
self._request("/quota/" + self.username, post, QUOTA=quota)
self._request("/quota/", post, QUOTA=quota)
def user_exist(self):
try:
self._request(self.username)
self._request("/user/")
return True
except NotOkException:
return False
def create_user(self, password, keys, quota):
self._request("/new/" + self.username, method=post,
self._request("/new/", method=post,
SMBPASSWD=password, KEYS=keys, QUOTA=quota)
@staticmethod
......
......@@ -41,4 +41,23 @@
</div><!-- -col-md-12 -->
</div><!-- .row -->
<div class="row">
<div class="col-md-12">
<div class="panel panel-default">
<div class="panel-heading">
<a id="reschedule-now" class="btn btn-danger pull-right" href="{% url "dashboard.views.reschedule" %}">
<i class="fa fa-magic"></i> {% trans "Reschedule now" %}
</a>
<h3 class="no-margin"><i class="fa fa-truck"></i> {% trans "Virtual machine auto migration" %}</h3>
</div>
<div id="node-list-auto-migration-body">
<h1>Crontab</h1>
<form>
{{ auto_migration_form.as_p }}
</form>
</div>
</div>
</div><!-- -col-md-12 -->
</div><!-- .row -->
{% endblock %}
......@@ -56,6 +56,7 @@ from .views import (
MessageList, MessageDetail, MessageCreate, MessageDelete,
EnableTwoFactorView, DisableTwoFactorView,
AclUserGroupAutocomplete, AclUserAutocomplete,
RescheduleView,
)
from .views.vm import vm_ops, vm_mass_ops
from .views.node import node_ops
......@@ -153,6 +154,8 @@ urlpatterns = [
r'(?P<time>[0-9]{1,2}[hdwy])$'),
NodeListGraphView.as_view(),
name='dashboard.views.node-list-graph'),
url(r'^node/reschedule/$', RescheduleView.as_view(),
name="dashboard.views.reschedule"),
url((r'^template/(?P<pk>\d+)/graph/(?P<metric>[a-z]+)/'
r'(?P<time>[0-9]{1,2}[hdwy])$'),
TemplateGraphView.as_view(),
......
......@@ -25,7 +25,7 @@ from django.core.exceptions import PermissionDenied
from django.core.urlresolvers import reverse_lazy
from django.db.models import Count
from django.forms.models import inlineformset_factory
from django.http import HttpResponse
from django.http import HttpResponse, JsonResponse
from django.shortcuts import redirect
from django.template.loader import render_to_string
from django.utils.translation import ugettext as _
......@@ -37,11 +37,14 @@ from django_tables2 import SingleTableView
from firewall.models import Host
from vm.models import Node, NodeActivity, Trait
from vm.tasks.vm_tasks import check_queue
from vm.tasks.local_periodic_tasks import auto_migrate
from ..forms import TraitForm, HostForm, NodeForm
from ..forms import TraitForm, HostForm, NodeForm, AutoMigrationForm
from ..tables import NodeListTable
from .util import AjaxOperationMixin, OperationView, GraphMixin, DeleteViewBase
from manager.mancelery import crontab_parser
def get_operations(instance, user):
ops = []
......@@ -190,6 +193,14 @@ class NodeList(LoginRequiredMixin, GraphMixin, SingleTableView):
table_class = NodeListTable
table_pagination = False
def get_crontab(self):
return crontab_parser(settings.AUTO_MIGRATION_CRONTAB)
def get_context_data(self):
context = super(NodeList, self).get_context_data()
context["auto_migration_form"] = AutoMigrationForm(self.get_crontab())
return context
def get(self, *args, **kwargs):
if not self.request.user.has_perm('vm.view_statistics'):
raise PermissionDenied()
......@@ -367,3 +378,23 @@ class NodeActivityDetail(LoginRequiredMixin, SuperuserRequiredMixin,
).order_by('-started').select_related())
ctx['icon'] = _get_activity_icon(self.object)
return ctx
class RescheduleView(SuperuserRequiredMixin, View):
def get(self, *args, **kwargs):
try:
auto_migrate.apply_async(queue='localhost.man.slow')
except Exception as e:
msg = str(e)
result = 'error'
else:
result = 'ok'
msg = _('Reschedule has started.')
if self.request.is_ajax():
return JsonResponse({'result': result, 'message': msg})
else:
if result == 'ok':
messages.success(self.request, msg)
else:
messages.error(self.request, msg)
return redirect('dashboard.views.node-list')
......@@ -35,7 +35,8 @@ from django.views.generic import TemplateView
from braces.views import LoginRequiredMixin
from ..store_api import Store, NoStoreException, NotOkException
from ..store_api import (Store, NoStoreException,
NotOkException, NoOrgIdException)
logger = logging.getLogger(__name__)
......@@ -70,6 +71,11 @@ class StoreList(LoginRequiredMixin, TemplateView):
return super(StoreList, self).get(*args, **kwargs)
except NoStoreException:
messages.warning(self.request, _("No store."))
except NoOrgIdException:
messages.warning(self.request,
_("Your organization ID is not set."
" To use the store, you need a"
" unique organization ID."))
except NotOkException:
messages.warning(self.request, _("Store has some problems now."
" Try again later."))
......
......@@ -17,13 +17,27 @@
from celery import Celery
from celery.signals import worker_ready
from celery.schedules import crontab
from datetime import timedelta
from celery.schedules import crontab
from kombu import Queue, Exchange
from os import getenv
HOSTNAME = "localhost"
QUEUE_NAME = HOSTNAME + '.man'
AUTO_MIGRATION_CRONTAB = getenv('AUTO_MIGRATION_CRONTAB', '0 0 * * *')
def crontab_parser(crontab):
fields = crontab.split(' ')
return dict(
minute=fields[0],
hour=fields[1],
day_of_month=fields[2],
month_of_year=fields[3],
day_of_week=fields[4],
)
celery = Celery('manager',
......@@ -56,6 +70,11 @@ celery.conf.update(
'schedule': crontab(minute=10, hour=1),
'options': {'queue': 'localhost.man'}
},
'vm.local_periodic_tasks': {
'task': 'vm.tasks.local_periodic_tasks.auto_migrate',
'schedule': crontab(**crontab_parser(AUTO_MIGRATION_CRONTAB)),
'options': {'queue': 'localhost.man.slow'},
},
}
)
......
......@@ -15,15 +15,18 @@
# You should have received a copy of the GNU General Public License along
# with CIRCLE. If not, see <http://www.gnu.org/licenses/>.
import datetime
import json
import random
from logging import getLogger
from django.conf import settings
from django.core.cache import cache
from django.utils import timezone
from django.utils.translation import ugettext_noop
from common.models import HumanReadableException
from circle.settings.base import SCHEDULER_METHOD
import random
from common.models import HumanReadableException
logger = getLogger(__name__)
......@@ -69,14 +72,14 @@ def common_select(instance, nodes):
logger.warning('select_node: no enough RAM for %s', unicode(instance))
raise NotEnoughMemoryException()
# sort nodes first by processor usage, then priority
# sort nodes first by priority
nodes.sort(key=lambda n: n.priority, reverse=True)
nodes.sort(key=free_cpu_time, reverse=True)
return nodes
def common_evenly(instance, nodes):
nodes = common_select(instance, nodes)
nodes.sort(key=free_cpu_time, reverse=True)
result = nodes[0]
return result
......@@ -87,6 +90,16 @@ def common_random(instance, nodes):
return result
def advanced_with_time_stamp(instance, nodes):
nodes = common_select(instance, nodes)
nodes.sort(key=sorting_key, reverse=True)
logger.info("SCHEDLOG: {}".format(json.dumps({
"event": "after_sort",
"list": map(lambda node: unicode(node), nodes)})))
result = nodes[0]
return result
def select_node(instance, nodes):
''' Select a node for hosting an instance based on its requirements.
'''
......@@ -94,14 +107,72 @@ def select_node(instance, nodes):
result = common_evenly(instance, nodes)
elif SCHEDULER_METHOD == 'random':
result = common_random(instance, nodes)
elif SCHEDULER_METHOD == 'advanced':
result = advanced_with_time_stamp(instance, nodes)
else: # Default method is the random
result = common_random(instance, nodes)
logger.info('Scheduler method: %s selected', unicode(SCHEDULER_METHOD))
logger.info('select_node: %s for %s', unicode(result), unicode(instance))
logger.info("SCHEDLOG: {}".format(json.dumps(
{"event": "select",
"node": unicode(result),
"vm": unicode(instance)})))
set_time_stamp(result)
return result
def sorting_key(node):
"""Determines how valuable a node is for scheduling.
"""
key = 0
corr = last_scheduled_correction_factor(node)
if free_cpu_time(node) < free_ram(node):
key = free_cpu_time(node) * corr
else:
key = free_ram(node) * corr
logger.info("SCHEDLOG: {}".format(json.dumps({
"event": "sort",
"node": unicode(node),
"sorting_key": unicode(key),
"free_cpu_time": unicode(free_cpu_time(node)),
"free_ram": unicode(free_ram(node)),
"last_scheduled_correction_factor": unicode(last_scheduled_correction_factor(node))})))
return key
def set_time_stamp(node):
cache.set('time_stamp{}'.format(node.id), timezone.now())
def get_time_stamp(node):
time_stamp = cache.get('time_stamp{}'.format(node.id))
if time_stamp:
return time_stamp
return datetime.datetime(1970, 1, 1, tzinfo=timezone.get_current_timezone())
def last_scheduled_correction_factor(node):
"""Returns the time correction factor for a node.
The monitor data may be outdated, because of recent scheduling for a given node.
The return value is between 0 and 1, higher value indicates more time since the
last scheduling for the given node.
"""
factor = 0
max_time_diff = settings.SCHEDULER_TIME_SENSITIVITY_IN_SECONDS
current_time = timezone.now()
time_difference_in_seconds = (
current_time - get_time_stamp(node)).total_seconds()
factor = time_difference_in_seconds/float(max_time_diff)
if factor > 1:
factor = 1
elif factor < 0:
factor = 1
logger.info('Scheduler set factor to %s', unicode(factor))
return factor
def has_traits(traits, node):
"""True, if the node has all specified traits; otherwise, false.
"""
......@@ -142,11 +213,27 @@ def free_cpu_time(node):
Higher values indicate more idle time.
"""
try:
activity = node.cpu_usage / 100
inactivity = 1 - activity
cores = node.num_cores
return cores * inactivity
free_cpu_percent = 1 - node.cpu_usage
weight = node.cpu_weight
weighted_value = free_cpu_percent * weight
return weighted_value
except TypeError as e:
logger.exception('Got incorrect monitoring data for node %s. %s',
unicode(node), unicode(e))
return 0 # will result lowest priority
def free_ram(node):
"""Get an indicator number for free RAM on the node.
Higher value indicates more RAM.
"""
try:
free_ram_percent = 1 - node.ram_usage
weight = node.ram_weight
weighted_value = free_ram_percent * weight
return weighted_value
except TypeError as e:
logger.warning('Got incorrect monitoring data for node %s. %s',
unicode(node), unicode(e))
return False # monitoring data is incorrect
logger.exception('Got incorrect monitoring data for node %s. %s',
unicode(node), unicode(e))
return 0 # will result lowest priority
# -*- coding: utf-8 -*-
# Generated by Django 1.11.6 on 2017-12-13 20:18
from __future__ import unicode_literals
from django.db import migrations, models
import django.utils.timezone
class Migration(migrations.Migration):
dependencies = [
('vm', '0002_interface_model'),
]
operations = [
migrations.AddField(
model_name='node',
name='cpu_weight',
field=models.FloatField(default=1.0, help_text='Indicates the relative CPU power of this node.', verbose_name='CPU Weight'),
),
migrations.AddField(
model_name='node',
name='ram_weight',
field=models.FloatField(default=1.0, help_text='Indicates the relative RAM quantity of this node.', verbose_name='RAM Weight'),
),
migrations.AddField(
model_name='node',
name='time_stamp',
field=models.DateTimeField(auto_now_add=True, default=django.utils.timezone.now, help_text='A timestamp for the node, used by the scheduler.', verbose_name='Last Scheduled Time Stamp'),
preserve_default=False,
),
]
......@@ -865,6 +865,53 @@ class Instance(AclBase, VirtualMachineDescModel, StatusModel, OperatedMixin,
def metric_prefix(self):
return 'vm.%s' % self.vm_name
class MonitorUnavailableException(Exception):
"""Exception for monitor_info()
Indicates the unavailability of the monitoring server.
"""
pass
def monitor_info(self):
metrics = ('cpu.percent', 'memory.usage')
prefix = self.metric_prefix
params = [('target', '%s.%s' % (prefix, metric))
for metric in metrics]
params.append(('from', '-5min'))
params.append(('format', 'json'))
try:
logger.info('%s %s', settings.GRAPHITE_URL, params)
response = requests.get(settings.GRAPHITE_URL, params=params)
retval = {}
for target in response.json():
# Example:
# {"target": "circle.vm.{name}.cpu.usage",
# "datapoints": [[0.6, 1403045700], [0.5, 1403045760]
try:
metric = target['target']
if metric.startswith(prefix):
metric = metric[len(prefix):]
else:
continue
value = target['datapoints'][-2][0]
retval[metric] = float(value)
except (KeyError, IndexError, ValueError):
continue
return retval
except Exception:
logger.exception('Monitor server unavailable: ')
raise Instance.MonitorUnavailableException()
def cpu_usage(self):
return self.monitor_info().get('cpu.percent')
def ram_usage(self):
return self.monitor_info().get('memory.usage')
@contextmanager
def activity(self, code_suffix, readable_name, on_abort=None,
on_commit=None, task_uuid=None, user=None,
......
......@@ -30,7 +30,7 @@ from time import time, sleep
from django.conf import settings
from django.db.models import (
CharField, IntegerField, ForeignKey, BooleanField, ManyToManyField,
FloatField, permalink, Sum
FloatField, DateTimeField, permalink, Sum
)
from django.utils import timezone
from django.utils.translation import ugettext_lazy as _
......@@ -128,11 +128,15 @@ class Node(OperatedMixin, TimeStampedModel):
enabled = BooleanField(verbose_name=_('enabled'), default=False,
help_text=_('Indicates whether the node can '
'be used for hosting.'))
schedule_enabled = BooleanField(verbose_name=_('schedule enabled'),
default=False, help_text=_(
'Indicates whether a vm can be '
'automatically scheduled to this '
'node.'))
schedule_enabled = BooleanField(
verbose_name=_('schedule enabled'),
default=False,
help_text=_(
'Indicates whether a vm can be '
'automatically scheduled to this '
'node.'
)
)
traits = ManyToManyField(Trait, blank=True,
help_text=_("Declared traits."),
verbose_name=_('traits'))
......@@ -140,6 +144,21 @@ class Node(OperatedMixin, TimeStampedModel):
overcommit = FloatField(default=1.0, verbose_name=_("overcommit ratio"),
help_text=_("The ratio of total memory with "
"to without overcommit."))
ram_weight = FloatField(
default=1.0,
help_text=_("Indicates the relative RAM quantity of this node."),
verbose_name=_("RAM Weight")
)
cpu_weight = FloatField(
default=1.0,
help_text=_("Indicates the relative CPU power of this node."),
verbose_name=_("CPU Weight")
)
time_stamp = DateTimeField(
auto_now_add=True,
help_text=_("A timestamp for the node, used by the scheduler."),
verbose_name=_("Last Scheduled Time Stamp")
)
class Meta:
app_label = 'vm'
......@@ -162,7 +181,7 @@ class Node(OperatedMixin, TimeStampedModel):
self.get_remote_queue_name("vm", "fast")
self.get_remote_queue_name("vm", "slow")
self.get_remote_queue_name("net", "fast")
except:
except Exception:
return False
else:
return True
......@@ -359,7 +378,7 @@ class Node(OperatedMixin, TimeStampedModel):
continue
return retval
except:
except Exception:
logger.exception('Unhandled exception: ')
return self.remote_query(vm_tasks.get_node_metrics, timeout=30,
priority="fast")
......@@ -431,7 +450,7 @@ class Node(OperatedMixin, TimeStampedModel):
# [{'name': 'cloud-1234', 'state': 'RUNNING', ...}, ...]
try:
id = int(i['name'].split('-')[1])
except:
except Exception:
pass # name format doesn't match
else:
domains[id] = i['state']
......
......@@ -17,7 +17,9 @@
import logging
from django.utils import timezone
from datetime import timedelta
from django.utils.translation import ugettext_noop
from django.conf import settings
from manager.mancelery import celery
from vm.models import Node, Instance
......@@ -63,7 +65,7 @@ def garbage_collector(offset=timezone.timedelta(seconds=20)):
bw = bw + 1
logger.info("Expired instance %d suspended." % i.pk)
try:
i.sleep.async(system=True)
i.sleep.async(system=True)
i.owner.profile.notify(
ugettext_noop('%(instance)s suspended'),
ugettext_noop(
......@@ -71,8 +73,8 @@ def garbage_collector(offset=timezone.timedelta(seconds=20)):
'has been suspended due to expiration. '
'You can resume or destroy it.'),
instance=i.name, url=i.get_absolute_url())
except ActivityInProgressError:
logger.error("Expired instance %d can't be destroyed due the AtctivityInPorgressError.", i.pk)
except ActivityInProgressError:
logger.error("Expired instance %d can't be destroyed due the AtctivityInPorgressError.", i.pk)
except Exception as e:
logger.info('Could not notify owner of instance %d .%s',
i.pk, unicode(e))
......@@ -81,3 +83,43 @@ def garbage_collector(offset=timezone.timedelta(seconds=20)):