Commit 2dd60e88 by Szeberényi Imre

more exception safe

parent b7d0105b
......@@ -5,6 +5,7 @@ After=network.target
[Service]
User=cloud
Group=cloud
SyslogIdentifier=monitor-client
WorkingDirectory=/home/cloud/monitor-client
ExecStart=/bin/bash -c "source /etc/profile; workon monitor-client; exec python manage.py run"
Restart=always
......
......@@ -156,53 +156,69 @@ class Client:
now = time.time()
running_vms = []
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('-name')
# '-m ' (with trailing space) is intentional to avoid collision with '-machine'
parser.add_argument('--memory-size', '-m ', dest='memory_size',
type=parse_memory_size)
try:
for entry in psutil.process_iter():
if entry.name() in ('kvm', 'qemu-system-x86_64'):
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('-name')
# FONTOS: nincs szóköz a '-m' végén, és saját parse függvényt használunk
parser.add_argument('--memory-size', '-m ', dest='memory_size',
type=parse_memory_size)
args, unknown = parser.parse_known_args(entry.cmdline()[1:])
# for Red Hat style parametering of kvm
if isinstance(args.name, str):
args.name = re.sub(r"^guest=", "", args.name)
args.name = re.sub(r",debug-threads=.*$", "", args.name)
process = self.processes.get(entry.pid, None)
if not process or process.cmdline() != entry.cmdline():
process = psutil.Process(entry.pid)
logger.info('New process: %s', process)
self.processes[entry.pid] = process
if args.memory_size:
# args.memory_size MiB-ben van
rss_bytes = float(process.memory_info().rss)
mem_bytes_total = args.memory_size * 1024 ** 2
mem_perc = (rss_bytes / mem_bytes_total) * 90.0
metrics.append('vm.%(name)s.memory.usage %(value)f %(time)d' % {
'name': args.name,
'value': mem_perc,
'time': now,
})
else:
logger.warning('Pid %d: no memory size found for VM %s', entry.pid, args.name)
for entry in psutil.process_iter(attrs=['pid', 'name', 'cmdline']):
info = entry.info
pid = info.get('pid')
name = info.get('name')
cmdline = info.get('cmdline')
if not name or name not in ('kvm', 'qemu-system-x86_64'):
continue
if not cmdline or len(cmdline) < 2:
logger.debug('Pid %s: empty or too short cmdline: %r', pid, cmdline)
continue
try:
args, unknown = parser.parse_known_args(cmdline[1:])
except SystemExit:
logger.warning("Argparse failed pid=%s cmdline=%r", pid, cmdline)
continue
if isinstance(args.name, str):
args.name = re.sub(r"^guest=", "", args.name)
args.name = re.sub(r",debug-threads=.*$", "", args.name)
process = self.processes.get(pid, None)
if not process:
process = psutil.Process(pid)
logger.info('New process: %s', process)
self.processes[pid] = process
# no cpu_perc info when process appears first
else:
cpu_perc = process.cpu_percent()
metrics.append('vm.%(name)s.cpu.percent %(value)f %(time)d' % {
'name': args.name,
'value': cpu_perc,
'time': now,
})
running_vms.append(args.name)
if args.memory_size:
# args.memory_size MiB-ben van
rss_bytes = float(process.memory_info().rss)
mem_bytes_total = args.memory_size * 1024 ** 2
mem_perc = (rss_bytes / mem_bytes_total) * 90.0
metrics.append('vm.%(name)s.memory.usage %(value)f %(time)d' % {
'name': args.name,
'value': mem_perc,
'time': now,
})
else:
logger.warning('Pid: %d args.name is empty?: %s %s',
entry.pid, entry.name(), entry.cmdline())
except (psutil.NoSuchProcess, TypeError):
logger.warning('Process %d lost. Entry: %s', entry.pid, entry.cmdline()[0:])
logger.warning('Pid %d: no memory size found for VM %s', pid, args.name)
running_vms.append(args.name)
else:
logger.warning('Pid: %d args.name is empty?: %s %s', pid, name, cmdline)
except Exception:
logger.exception(
"collect_vms failed: pid=%s name=%r cmdline=%r",
locals().get("pid", "?"),
locals().get("name", None),
locals().get("cmdline", None),
)
interfaces = psutil.net_io_counters(pernic=True)
for interface, data in interfaces.items():
......@@ -243,8 +259,10 @@ class Client:
metricCollectors parameter that should be provided by the collectables
modul to work properly.
"""
CACHE_TTL = 1 * 24 * 3600 # One day
self.connect()
self.processes = {}
self.last_cache_cleaup = time.time()
try:
while True:
metrics = self.collect_node() + self.collect_vms()
......@@ -253,7 +271,11 @@ class Client:
self.send(chunk)
logger.debug("metrics: %r", metrics)
logger.info("%d metrics sent", len(metrics))
if time.time() - self.last_cache_cleaup > CACHE_TTL:
self.processes.clear()
self.last_cache_cleaup = time.time()
time.sleep(10)
except KeyboardInterrupt:
logger.info("Reporting has stopped by the user. Exiting...")
finally:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment