scheduler.py 4.27 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
# Copyright 2014 Budapest University of Technology and Economics (BME IK)
#
# This file is part of CIRCLE Cloud.
#
# CIRCLE is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# CIRCLE is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with CIRCLE.  If not, see <http://www.gnu.org/licenses/>.

18 19
from logging import getLogger

20 21 22
from django.utils.translation import ugettext_noop

from common.models import HumanReadableException
23

24 25
logger = getLogger(__name__)

26

27 28
class SchedulerError(HumanReadableException):
    admin_message = None
29

30 31 32 33 34
    def __init__(self, params=None, level=None, **kwargs):
        kwargs.update(params or {})
        super(SchedulerError, self).__init__(
            level, self.message, self.admin_message or self.message,
            kwargs)
35 36


37 38 39 40
class NotEnoughMemoryException(SchedulerError):
    message = ugettext_noop(
        "The resources required for launching the virtual machine are not "
        "available currently. Please try again later.")
41

42 43 44
    admin_message = ugettext_noop(
        "The required free memory for launching the virtual machine is not "
        "available on any usable node currently. Please try again later.")
45

46

47 48 49
class TraitsUnsatisfiableException(SchedulerError):
    message = ugettext_noop(
        "No node can satisfy the required traits of the "
50
        "new virtual machine currently.")
51 52


53
def select_node(instance, nodes):
54
    ''' Select a node for hosting an instance based on its requirements.
tarokkk committed
55
    '''
56
    # check required traits
57
    nodes = [n for n in nodes
58
             if n.schedule_enabled and n.online
59
             and has_traits(instance.req_traits.all(), n)]
60
    if not nodes:
61
        logger.warning('select_node: no usable node for %s', unicode(instance))
62 63 64 65 66
        raise TraitsUnsatisfiableException()

    # check required RAM
    nodes = [n for n in nodes if has_enough_ram(instance.ram_size, n)]
    if not nodes:
67
        logger.warning('select_node: no enough RAM for %s', unicode(instance))
68 69 70 71
        raise NotEnoughMemoryException()

    # sort nodes first by processor usage, then priority
    nodes.sort(key=lambda n: n.priority, reverse=True)
72
    nodes.sort(key=free_cpu_time, reverse=True)
73
    result = nodes[0]
74

75 76
    logger.info('select_node: %s for %s', unicode(result), unicode(instance))
    return result
77 78 79 80 81 82 83 84 85 86 87 88 89


def has_traits(traits, node):
    """True, if the node has all specified traits; otherwise, false.
    """
    traits = set(traits)
    return traits.issubset(node.traits.all())


def has_enough_ram(ram_size, node):
    """True, if the node has enough memory to accomodate a guest requiring
       ram_size mebibytes of memory; otherwise, false.
    """
90
    ram_size = ram_size * 1024 * 1024
91 92
    try:
        total = node.ram_size
93
        used = node.byte_ram_usage
94
        unused = total - used
95

96
        overcommit = node.ram_size_with_overcommit
97
        reserved = node.allocated_ram
98
        free = overcommit - reserved
99

100 101 102 103 104 105 106
        retval = ram_size < unused and ram_size < free

        logger.debug('has_enough_ram(%d, %s)=%s (total=%s unused=%s'
                     ' overcommit=%s free=%s free_ok=%s overcommit_ok=%s)',
                     ram_size, node, retval, total, unused, overcommit, free,
                     ram_size < unused, ram_size < free)
        return retval
107
    except TypeError as e:
108 109
        logger.exception('Got incorrect monitoring data for node %s. %s',
                         unicode(node), unicode(e))
110
        return False
111 112


113 114
def free_cpu_time(node):
    """Get an indicator number for idle processor time on the node.
115

116
    Higher values indicate more idle time.
117
    """
118 119 120 121 122 123 124 125 126
    try:
        activity = node.cpu_usage / 100
        inactivity = 1 - activity
        cores = node.num_cores
        return cores * inactivity
    except TypeError as e:
        logger.warning('Got incorrect monitoring data for node %s. %s',
                       unicode(node), unicode(e))
        return False  # monitoring data is incorrect