scheduler.py 4.95 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
# Copyright 2014 Budapest University of Technology and Economics (BME IK)
#
# This file is part of CIRCLE Cloud.
#
# CIRCLE is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# CIRCLE is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with CIRCLE.  If not, see <http://www.gnu.org/licenses/>.

18 19
from logging import getLogger

20 21 22
from django.utils.translation import ugettext_noop

from common.models import HumanReadableException
23

24 25 26 27
from circle.settings.base import SCHEDULER_METHOD

import random

28 29
logger = getLogger(__name__)

30

31 32
class SchedulerError(HumanReadableException):
    admin_message = None
33

34 35 36 37 38
    def __init__(self, params=None, level=None, **kwargs):
        kwargs.update(params or {})
        super(SchedulerError, self).__init__(
            level, self.message, self.admin_message or self.message,
            kwargs)
39 40


41 42 43 44
class NotEnoughMemoryException(SchedulerError):
    message = ugettext_noop(
        "The resources required for launching the virtual machine are not "
        "available currently. Please try again later.")
45

46 47 48
    admin_message = ugettext_noop(
        "The required free memory for launching the virtual machine is not "
        "available on any usable node currently. Please try again later.")
49

50

51 52 53
class TraitsUnsatisfiableException(SchedulerError):
    message = ugettext_noop(
        "No node can satisfy the required traits of the "
54
        "new virtual machine currently.")
55 56


57
def common_select(instance, nodes):
58
    # check required traits
59
    nodes = [n for n in nodes
60 61
             if n.schedule_enabled and n.online and
             has_traits(instance.req_traits.all(), n)]
62
    if not nodes:
63
        logger.warning('select_node: no usable node for %s', unicode(instance))
64 65 66 67 68
        raise TraitsUnsatisfiableException()

    # check required RAM
    nodes = [n for n in nodes if has_enough_ram(instance.ram_size, n)]
    if not nodes:
69
        logger.warning('select_node: no enough RAM for %s', unicode(instance))
70 71 72 73
        raise NotEnoughMemoryException()

    # sort nodes first by processor usage, then priority
    nodes.sort(key=lambda n: n.priority, reverse=True)
74
    nodes.sort(key=free_cpu_time, reverse=True)
75 76 77 78 79
    return nodes


def common_evenly(instance, nodes):
    nodes = common_select(instance, nodes)
80
    result = nodes[0]
81
    return result
82

83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100

def common_random(instance, nodes):
    nodes = common_select(instance, nodes)
    result = random.choice(nodes)
    return result


def select_node(instance, nodes):
    ''' Select a node for hosting an instance based on its requirements.
    '''
    if SCHEDULER_METHOD == 'evenly':
        result = common_evenly(instance, nodes)
    elif SCHEDULER_METHOD == 'random':
        result = common_random(instance, nodes)
    else:  # Default method is the random
        result = common_random(instance, nodes)

    logger.info('Scheduler method: %s selected', unicode(SCHEDULER_METHOD))
101 102
    logger.info('select_node: %s for %s', unicode(result), unicode(instance))
    return result
103 104 105 106 107 108 109 110 111 112 113 114 115


def has_traits(traits, node):
    """True, if the node has all specified traits; otherwise, false.
    """
    traits = set(traits)
    return traits.issubset(node.traits.all())


def has_enough_ram(ram_size, node):
    """True, if the node has enough memory to accomodate a guest requiring
       ram_size mebibytes of memory; otherwise, false.
    """
116
    ram_size = ram_size * 1024 * 1024
117 118
    try:
        total = node.ram_size
119
        used = node.byte_ram_usage
120
        unused = total - used
121

122
        overcommit = node.ram_size_with_overcommit
123
        reserved = node.allocated_ram
124
        free = overcommit - reserved
125

126 127 128 129 130 131 132
        retval = ram_size < unused and ram_size < free

        logger.debug('has_enough_ram(%d, %s)=%s (total=%s unused=%s'
                     ' overcommit=%s free=%s free_ok=%s overcommit_ok=%s)',
                     ram_size, node, retval, total, unused, overcommit, free,
                     ram_size < unused, ram_size < free)
        return retval
133
    except TypeError as e:
134 135
        logger.exception('Got incorrect monitoring data for node %s. %s',
                         unicode(node), unicode(e))
136
        return False
137 138


139 140
def free_cpu_time(node):
    """Get an indicator number for idle processor time on the node.
141

142
    Higher values indicate more idle time.
143
    """
144 145 146 147 148 149 150 151 152
    try:
        activity = node.cpu_usage / 100
        inactivity = 1 - activity
        cores = node.num_cores
        return cores * inactivity
    except TypeError as e:
        logger.warning('Got incorrect monitoring data for node %s. %s',
                       unicode(node), unicode(e))
        return False  # monitoring data is incorrect