scheduler.py 4.36 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
# Copyright 2014 Budapest University of Technology and Economics (BME IK)
#
# This file is part of CIRCLE Cloud.
#
# CIRCLE is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# CIRCLE is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with CIRCLE.  If not, see <http://www.gnu.org/licenses/>.

18 19
from logging import getLogger

20
from django.db.models import Sum
21 22 23
from django.utils.translation import ugettext_noop

from common.models import HumanReadableException
24

25 26
logger = getLogger(__name__)

27

28 29
class SchedulerError(HumanReadableException):
    admin_message = None
30

31 32 33 34 35
    def __init__(self, params=None, level=None, **kwargs):
        kwargs.update(params or {})
        super(SchedulerError, self).__init__(
            level, self.message, self.admin_message or self.message,
            kwargs)
36 37


38 39 40 41
class NotEnoughMemoryException(SchedulerError):
    message = ugettext_noop(
        "The resources required for launching the virtual machine are not "
        "available currently. Please try again later.")
42

43 44 45
    admin_message = ugettext_noop(
        "The required free memory for launching the virtual machine is not "
        "available on any usable node currently. Please try again later.")
46

47

48 49 50 51
class TraitsUnsatisfiableException(SchedulerError):
    message = ugettext_noop(
        "No node can satisfy the required traits of the "
        "new vitual machine currently.")
52 53


54
def select_node(instance, nodes):
55
    ''' Select a node for hosting an instance based on its requirements.
tarokkk committed
56
    '''
57
    # check required traits
58
    nodes = [n for n in nodes
59 60
             if n.enabled and n.online
             and has_traits(instance.req_traits.all(), n)]
61
    if not nodes:
62
        logger.warning('select_node: no usable node for %s', unicode(instance))
63 64 65 66 67
        raise TraitsUnsatisfiableException()

    # check required RAM
    nodes = [n for n in nodes if has_enough_ram(instance.ram_size, n)]
    if not nodes:
68
        logger.warning('select_node: no enough RAM for %s', unicode(instance))
69 70 71 72
        raise NotEnoughMemoryException()

    # sort nodes first by processor usage, then priority
    nodes.sort(key=lambda n: n.priority, reverse=True)
73
    nodes.sort(key=free_cpu_time, reverse=True)
74
    result = nodes[0]
75

76 77
    logger.info('select_node: %s for %s', unicode(result), unicode(instance))
    return result
78 79 80 81 82 83 84 85 86 87 88 89 90


def has_traits(traits, node):
    """True, if the node has all specified traits; otherwise, false.
    """
    traits = set(traits)
    return traits.issubset(node.traits.all())


def has_enough_ram(ram_size, node):
    """True, if the node has enough memory to accomodate a guest requiring
       ram_size mebibytes of memory; otherwise, false.
    """
91
    ram_size = ram_size * 1024 * 1024
92 93
    try:
        total = node.ram_size
94
        used = node.byte_ram_usage
95
        unused = total - used
96

97
        overcommit = node.ram_size_with_overcommit
98 99
        reserved = (node.instance_set.aggregate(
            r=Sum('ram_size'))['r'] or 0) * 1024 * 1024
100
        free = overcommit - reserved
101

102 103 104 105 106 107 108
        retval = ram_size < unused and ram_size < free

        logger.debug('has_enough_ram(%d, %s)=%s (total=%s unused=%s'
                     ' overcommit=%s free=%s free_ok=%s overcommit_ok=%s)',
                     ram_size, node, retval, total, unused, overcommit, free,
                     ram_size < unused, ram_size < free)
        return retval
109
    except TypeError as e:
110 111
        logger.exception('Got incorrect monitoring data for node %s. %s',
                         unicode(node), unicode(e))
112
        return False
113 114


115 116
def free_cpu_time(node):
    """Get an indicator number for idle processor time on the node.
117

118
    Higher values indicate more idle time.
119
    """
120 121 122 123 124 125 126 127 128
    try:
        activity = node.cpu_usage / 100
        inactivity = 1 - activity
        cores = node.num_cores
        return cores * inactivity
    except TypeError as e:
        logger.warning('Got incorrect monitoring data for node %s. %s',
                       unicode(node), unicode(e))
        return False  # monitoring data is incorrect