Merge pull request #17 from com6056/jrodgers-expire-tasks

fix expire bug, cleanup tasks
This commit is contained in:
Jordan Rodgers 2019-02-19 22:34:39 -08:00 committed by GitHub
commit a8703ec53c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 152 additions and 116 deletions

View file

@ -1,5 +1,6 @@
import os import os
import subprocess import subprocess
from flask import Flask from flask import Flask
app = Flask(__name__) app = Flask(__name__)

View file

@ -3,6 +3,7 @@ import json
import time import time
import psutil import psutil
import atexit import atexit
import logging
import psycopg2 import psycopg2
import subprocess import subprocess
import rq_dashboard import rq_dashboard
@ -20,6 +21,9 @@ from proxstar.starrs import *
from proxstar.ldapdb import * from proxstar.ldapdb import *
from proxstar.proxmox import * from proxstar.proxmox import *
logging.basicConfig(
format='%(asctime)s %(levelname)s %(message)s', level=logging.INFO)
app = Flask(__name__) app = Flask(__name__)
app.config.from_object(rq_dashboard.default_settings) app.config.from_object(rq_dashboard.default_settings)
if os.path.exists( if os.path.exists(
@ -38,10 +42,16 @@ with open('proxmox_ssh_key', 'w') as key:
ssh_tunnels = [] ssh_tunnels = []
auth = OIDCAuthentication( # Keep on retrying until we have auth defined since SSO sucks
app, while True:
issuer=app.config['OIDC_ISSUER'], try:
client_registration_info=app.config['OIDC_CLIENT_CONFIG']) auth
break
except:
auth = OIDCAuthentication(
app,
issuer=app.config['OIDC_ISSUER'],
client_registration_info=app.config['OIDC_CLIENT_CONFIG'])
redis_conn = Redis(app.config['REDIS_HOST'], app.config['REDIS_PORT']) redis_conn = Redis(app.config['REDIS_HOST'], app.config['REDIS_PORT'])
q = Queue(connection=redis_conn) q = Queue(connection=redis_conn)
@ -62,6 +72,7 @@ from proxstar.user import User
from proxstar.tasks import generate_pool_cache_task, process_expiring_vms_task, cleanup_vnc_task, delete_vm_task, create_vm_task, setup_template_task from proxstar.tasks import generate_pool_cache_task, process_expiring_vms_task, cleanup_vnc_task, delete_vm_task, create_vm_task, setup_template_task
if 'generate_pool_cache' not in scheduler: if 'generate_pool_cache' not in scheduler:
logging.info('adding generate pool cache task to scheduler')
scheduler.schedule( scheduler.schedule(
id='generate_pool_cache', id='generate_pool_cache',
scheduled_time=datetime.datetime.utcnow(), scheduled_time=datetime.datetime.utcnow(),
@ -69,10 +80,12 @@ if 'generate_pool_cache' not in scheduler:
interval=90) interval=90)
if 'process_expiring_vms' not in scheduler: if 'process_expiring_vms' not in scheduler:
logging.info('adding process expiring VMs task to scheduler')
scheduler.cron( scheduler.cron(
'0 5 * * *', id='process_expiring_vms', func=process_expiring_vms_task) '0 5 * * *', id='process_expiring_vms', func=process_expiring_vms_task)
if 'cleanup_vnc' not in scheduler: if 'cleanup_vnc' not in scheduler:
logging.info('adding cleanup VNC task to scheduler')
scheduler.schedule( scheduler.schedule(
id='cleanup_vnc', id='cleanup_vnc',
scheduled_time=datetime.datetime.utcnow(), scheduled_time=datetime.datetime.utcnow(),
@ -238,7 +251,7 @@ def vm_console(vmid):
port = str(5900 + int(vmid)) port = str(5900 + int(vmid))
token = add_vnc_target(port) token = add_vnc_target(port)
node = "{}.csh.rit.edu".format(vm.node) node = "{}.csh.rit.edu".format(vm.node)
print("Creating SSH tunnel to {} for VM {}.".format(node, vm.id)) logging.info("creating SSH tunnel to %s for VM %s", node, vm.id)
tunnel = start_ssh_tunnel(node, port) tunnel = start_ssh_tunnel(node, port)
ssh_tunnels.append(tunnel) ssh_tunnels.append(tunnel)
vm.start_vnc(port) vm.start_vnc(port)

View file

@ -1,8 +1,11 @@
import datetime import datetime
from sqlalchemy import exists
from dateutil.relativedelta import relativedelta from dateutil.relativedelta import relativedelta
from sqlalchemy import exists
from proxstar.ldapdb import * from proxstar.ldapdb import *
from proxstar.models import VM_Expiration, Usage_Limit, Pool_Cache, Ignored_Pools, Template, Allowed_Users, Base from proxstar.models import (Allowed_Users, Base, Ignored_Pools, Pool_Cache,
Template, Usage_Limit, VM_Expiration)
def get_vm_expire(db, vmid, months): def get_vm_expire(db, vmid, months):

View file

@ -1,12 +1,14 @@
from csh_ldap import CSHLDAP from csh_ldap import CSHLDAP
from flask import current_app as app from flask import current_app as app
from proxstar import logging
def connect_ldap(): def connect_ldap():
try: try:
ldap = CSHLDAP(app.config['LDAP_BIND_DN'], app.config['LDAP_BIND_PW']) ldap = CSHLDAP(app.config['LDAP_BIND_DN'], app.config['LDAP_BIND_PW'])
except: except Exception as e:
print("Unable to connect to LDAP.") logging.error("unable to connect to LDAP: %s", e)
raise raise
return ldap return ldap

View file

@ -1,7 +1,7 @@
import smtplib import smtplib
from email.utils import formatdate
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.utils import formatdate
def send_email(toaddr, subject, body): def send_email(toaddr, subject, body):

View file

@ -1,7 +1,7 @@
from sqlalchemy import Column, Integer, String, Date from sqlalchemy import Column, Date, Integer, String
from sqlalchemy.types import JSON, Text
from sqlalchemy.dialects import postgresql from sqlalchemy.dialects import postgresql
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.types import JSON, Text
Base = declarative_base() Base = declarative_base()

View file

@ -1,7 +1,9 @@
from proxmoxer import ProxmoxAPI
from proxstar.ldapdb import is_user
from proxstar.db import get_user_usage_limits, get_ignored_pools
from flask import current_app as app from flask import current_app as app
from proxmoxer import ProxmoxAPI
from proxstar import logging
from proxstar.db import get_ignored_pools, get_user_usage_limits
from proxstar.ldapdb import is_user
def connect_proxmox(): def connect_proxmox():
@ -17,7 +19,8 @@ def connect_proxmox():
except: except:
if app.config['PROXMOX_HOSTS'].index(host) == ( if app.config['PROXMOX_HOSTS'].index(host) == (
len(app.config['PROXMOX_HOSTS']) - 1): len(app.config['PROXMOX_HOSTS']) - 1):
print('Unable to connect to any of the given Proxmox servers!') logging.error(
'unable to connect to any of the given Proxmox servers')
raise raise
@ -35,7 +38,8 @@ def connect_proxmox_ssh():
except: except:
if app.config['PROXMOX_HOSTS'].index(host) == ( if app.config['PROXMOX_HOSTS'].index(host) == (
len(app.config['PROXMOX_HOSTS']) - 1): len(app.config['PROXMOX_HOSTS']) - 1):
print('Unable to connect to any of the given Proxmox servers!') logging.error(
'unable to connect to any of the given Proxmox servers')
raise raise

View file

@ -1,20 +1,26 @@
import logging
import os import os
import time import time
import requests
import paramiko import paramiko
import psycopg2 import psycopg2
import requests
from flask import Flask from flask import Flask
from rq import get_current_job from rq import get_current_job
from sqlalchemy import create_engine from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
from proxstar.db import * from proxstar.db import *
from proxstar.util import *
from proxstar.mail import * from proxstar.mail import *
from proxstar.starrs import *
from proxstar.vnc import send_stop_ssh_tunnel
from proxstar.vm import VM, create_vm, clone_vm
from proxstar.user import User, get_vms_for_rtp
from proxstar.proxmox import connect_proxmox, get_pools from proxstar.proxmox import connect_proxmox, get_pools
from proxstar.starrs import *
from proxstar.user import User, get_vms_for_rtp
from proxstar.util import *
from proxstar.vm import VM, clone_vm, create_vm
from proxstar.vnc import send_stop_ssh_tunnel
logging.basicConfig(
format='%(asctime)s %(levelname)s %(message)s', level=logging.INFO)
app = Flask(__name__) app = Flask(__name__)
if os.path.exists( if os.path.exists(
@ -43,25 +49,46 @@ def connect_starrs():
return starrs return starrs
def set_job_status(job, status):
job.meta['status'] = status
job.save_meta()
def create_vm_task(user, name, cores, memory, disk, iso): def create_vm_task(user, name, cores, memory, disk, iso):
with app.app_context(): with app.app_context():
job = get_current_job() job = get_current_job()
proxmox = connect_proxmox() proxmox = connect_proxmox()
db = connect_db() db = connect_db()
starrs = connect_starrs() starrs = connect_starrs()
job.meta['status'] = 'creating VM' logging.info("[{}] Creating VM.".format(name))
job.save_meta() set_job_status(job, 'creating VM')
vmid, mac = create_vm(proxmox, user, name, cores, memory, disk, iso) vmid = create_vm(proxmox, user, name, cores, memory, disk, iso)
job.meta['status'] = 'registering in STARRS' logging.info(
job.save_meta() "[{}] Waiting until Proxmox is done provisioning.".format(name))
register_starrs(starrs, name, app.config['STARRS_USER'], mac, set_job_status(job, 'waiting for Proxmox')
get_next_ip(starrs, app.config['STARRS_IP_RANGE'])) timeout = 20
job.meta['status'] = 'setting VM expiration' retry = 0
job.save_meta() while retry < timeout:
delete_vm_expire(db, vmid) if not VM(vmid).is_provisioned():
retry += 1
time.sleep(3)
continue
break
if retry == timeout:
logging.info("[{}] Failed to provision, deleting.".format(name))
set_job_status(job, 'failed to provision')
delete_vm_task(vmid)
return
logging.info("[{}] Registering in STARRS.".format(name))
set_job_status(job, 'registering in STARRS')
vm = VM(vmid)
ip = get_next_ip(starrs, app.config['STARRS_IP_RANGE'])
register_starrs(starrs, name, app.config['STARRS_USER'], vm.get_mac(),
ip)
set_job_status(job, 'setting VM expiration')
get_vm_expire(db, vmid, app.config['VM_EXPIRE_MONTHS']) get_vm_expire(db, vmid, app.config['VM_EXPIRE_MONTHS'])
job.meta['status'] = 'complete' logging.info("[{}] VM successfully provisioned.".format(name))
job.save_meta() set_job_status(job, 'complete')
def delete_vm_task(vmid): def delete_vm_task(vmid):
@ -78,8 +105,8 @@ def delete_vm_task(vmid):
break break
retry += 1 retry += 1
vm.delete() vm.delete()
delete_starrs(starrs, vm.name)
delete_vm_expire(db, vmid) delete_vm_expire(db, vmid)
delete_starrs(starrs, vm.name)
def process_expiring_vms_task(): def process_expiring_vms_task():
@ -103,7 +130,7 @@ def process_expiring_vms_task():
expired_vms.append([vm.id, vm.name, days]) expired_vms.append([vm.id, vm.name, days])
vm.stop() vm.stop()
elif days <= -7: elif days <= -7:
print( logging.info(
"Deleting {} ({}) as it has been at least a week since expiration." "Deleting {} ({}) as it has been at least a week since expiration."
.format(vm.name, vm.id)) .format(vm.name, vm.id))
send_stop_ssh_tunnel(vm.id) send_stop_ssh_tunnel(vm.id)
@ -128,16 +155,15 @@ def setup_template_task(template_id, name, user, ssh_key, cores, memory):
proxmox = connect_proxmox() proxmox = connect_proxmox()
starrs = connect_starrs() starrs = connect_starrs()
db = connect_db() db = connect_db()
print("[{}] Retrieving template info for template {}.".format( logging.info("[{}] Retrieving template info for template {}.".format(
name, template_id)) name, template_id))
template = get_template(db, template_id) template = get_template(db, template_id)
print("[{}] Cloning template {}.".format(name, template_id)) logging.info("[{}] Cloning template {}.".format(name, template_id))
job.meta['status'] = 'cloning template' set_job_status(job, 'cloning template')
job.save_meta() vmid = clone_vm(proxmox, template_id, name, user)
vmid, mac = clone_vm(proxmox, template_id, name, user) logging.info(
print("[{}] Waiting until Proxmox is done provisioning.".format(name)) "[{}] Waiting until Proxmox is done provisioning.".format(name))
job.meta['status'] = 'waiting for Proxmox' set_job_status(job, 'waiting for Proxmox')
job.save_meta()
timeout = 20 timeout = 20
retry = 0 retry = 0
while retry < timeout: while retry < timeout:
@ -147,40 +173,38 @@ def setup_template_task(template_id, name, user, ssh_key, cores, memory):
continue continue
break break
if retry == timeout: if retry == timeout:
print("[{}] Failed to provision, deleting.".format(name)) logging.info("[{}] Failed to provision, deleting.".format(name))
job.meta['status'] = 'failed to provision' set_job_status(job, 'failed to provision')
job.save_meta()
delete_vm_task(vmid) delete_vm_task(vmid)
return return
print("[{}] Registering in STARRS.".format(name)) logging.info("[{}] Registering in STARRS.".format(name))
job.meta['status'] = 'registering in STARRS' set_job_status(job, 'registering in STARRS')
job.save_meta()
ip = get_next_ip(starrs, app.config['STARRS_IP_RANGE'])
register_starrs(starrs, name, app.config['STARRS_USER'], mac, ip)
get_vm_expire(db, vmid, app.config['VM_EXPIRE_MONTHS'])
print("[{}] Setting CPU and memory.".format(name))
job.meta['status'] = 'setting CPU and memory'
job.save_meta()
vm = VM(vmid) vm = VM(vmid)
ip = get_next_ip(starrs, app.config['STARRS_IP_RANGE'])
register_starrs(starrs, name, app.config['STARRS_USER'], vm.get_mac(),
ip)
get_vm_expire(db, vmid, app.config['VM_EXPIRE_MONTHS'])
logging.info("[{}] Setting CPU and memory.".format(name))
set_job_status(job, 'setting CPU and memory')
vm.set_cpu(cores) vm.set_cpu(cores)
vm.set_mem(memory) vm.set_mem(memory)
print("[{}] Applying cloud-init config.".format(name)) logging.info("[{}] Applying cloud-init config.".format(name))
job.meta['status'] = 'applying cloud-init' set_job_status(job, 'applying cloud-init')
job.save_meta()
vm.set_ci_user(user) vm.set_ci_user(user)
vm.set_ci_ssh_key(ssh_key) vm.set_ci_ssh_key(ssh_key)
vm.set_ci_network() vm.set_ci_network()
print("[{}] Waiting for STARRS to propogate before starting VM.". logging.info(
format(name)) "[{}] Waiting for STARRS to propogate before starting VM.".format(
job.meta['status'] = 'waiting for STARRS' name))
set_job_status(job, 'waiting for STARRS')
job.save_meta() job.save_meta()
time.sleep(90) time.sleep(90)
print("[{}] Starting VM.".format(name)) logging.info("[{}] Starting VM.".format(name))
job.meta['status'] = 'starting VM' set_job_status(job, 'starting VM')
job.save_meta() job.save_meta()
vm.start() vm.start()
print("[{}] Template successfully provisioned.".format(name)) logging.info("[{}] Template successfully provisioned.".format(name))
job.meta['status'] = 'completed' set_job_status(job, 'completed')
job.save_meta() job.save_meta()

View file

@ -1,10 +1,11 @@
from proxmoxer.core import ResourceException
from rq.registry import StartedJobRegistry
from proxstar import db, q, redis_conn from proxstar import db, q, redis_conn
from proxstar.db import * from proxstar.db import *
from proxstar.vm import VM
from proxstar.util import *
from proxstar.proxmox import * from proxstar.proxmox import *
from rq.registry import StartedJobRegistry from proxstar.util import *
from proxmoxer.core import ResourceException from proxstar.vm import VM
class User(object): class User(object):
@ -23,7 +24,7 @@ class User(object):
vms = proxmox.pools(self.name).get()['members'] vms = proxmox.pools(self.name).get()['members']
except ResourceException: except ResourceException:
# they likely don't have a pool yet, try to create it # they likely don't have a pool yet, try to create it
if is_user(self.name) and not is_rtp(self.name): if is_user(self.name):
proxmox.pools.post( proxmox.pools.post(
poolid=self.name, comment='Managed by Proxstar') poolid=self.name, comment='Managed by Proxstar')
# if created, their pool is empty so return empty array # if created, their pool is empty so return empty array

View file

@ -1,5 +1,5 @@
import string
import random import random
import string
def gen_password( def gen_password(

View file

@ -1,13 +1,16 @@
import time
import json import json
import time
import urllib import urllib
from tenacity import retry, wait_fixed, stop_after_attempt
from proxstar import db, starrs
from proxstar.db import get_vm_expire
from proxstar.util import lazy_property
from proxstar.starrs import get_ip_for_mac
from proxstar.proxmox import connect_proxmox, connect_proxmox_ssh, get_node_least_mem, get_free_vmid, get_vm_node
from flask import current_app as app from flask import current_app as app
from tenacity import retry, stop_after_attempt, wait_fixed
from proxstar import db, starrs
from proxstar.db import delete_vm_expire, get_vm_expire
from proxstar.proxmox import (connect_proxmox, connect_proxmox_ssh,
get_free_vmid, get_node_least_mem, get_vm_node)
from proxstar.starrs import get_ip_for_mac
from proxstar.util import lazy_property
class VM(object): class VM(object):
@ -248,9 +251,13 @@ class VM(object):
proxmox.nodes(self.node).qemu(self.id).config.put(ipconfig0='ip=dhcp') proxmox.nodes(self.node).qemu(self.id).config.put(ipconfig0='ip=dhcp')
# Will create a new VM with the given parameters, does not guarantee
# the VM is done provisioning when returning
def create_vm(proxmox, user, name, cores, memory, disk, iso): def create_vm(proxmox, user, name, cores, memory, disk, iso):
node = proxmox.nodes(get_node_least_mem(proxmox)) node = proxmox.nodes(get_node_least_mem(proxmox))
vmid = get_free_vmid(proxmox) vmid = get_free_vmid(proxmox)
# Make sure lingering expirations are deleted
delete_vm_expire(db, vmid)
node.qemu.create( node.qemu.create(
vmid=vmid, vmid=vmid,
name=name, name=name,
@ -262,34 +269,22 @@ def create_vm(proxmox, user, name, cores, memory, disk, iso):
net0='virtio,bridge=vmbr0', net0='virtio,bridge=vmbr0',
pool=user, pool=user,
description='Managed by Proxstar') description='Managed by Proxstar')
retry = 0 return vmid
while retry < 20:
try:
mac = VM(vmid).get_mac()
break
except:
retry += 1
time.sleep(3)
return vmid, mac
# Will clone a new VM from a template, does not guarantee the
# VM is done provisioning when returning
def clone_vm(proxmox, template_id, name, pool): def clone_vm(proxmox, template_id, name, pool):
node = proxmox.nodes(get_vm_node(proxmox, template_id)) node = proxmox.nodes(get_vm_node(proxmox, template_id))
newid = get_free_vmid(proxmox) vmid = get_free_vmid(proxmox)
# Make sure lingering expirations are deleted
delete_vm_expire(db, vmid)
target = get_node_least_mem(proxmox) target = get_node_least_mem(proxmox)
node.qemu(template_id).clone.post( node.qemu(template_id).clone.post(
newid=newid, newid=vmid,
name=name, name=name,
pool=pool, pool=pool,
full=1, full=1,
description='Managed by Proxstar', description='Managed by Proxstar',
target=target) target=target)
retry = 0 return vmid
while retry < 100:
try:
mac = VM(newid).get_mac()
break
except:
retry += 1
time.sleep(3)
return newid, mac

View file

@ -1,20 +1,13 @@
import os import os
import time
import requests
import subprocess import subprocess
from sshtunnel import SSHTunnelForwarder import time
from proxstar.util import *
import requests
from flask import current_app as app from flask import current_app as app
from sshtunnel import SSHTunnelForwarder
from proxstar import logging
def start_websockify(websockify_path, target_file): from proxstar.util import *
result = subprocess.run(['pgrep', 'websockify'], stdout=subprocess.PIPE)
if not result.stdout:
subprocess.call([
websockify_path, '8081', '--token-plugin', 'TokenFile',
'--token-source', target_file, '-D'
],
stdout=subprocess.PIPE)
def stop_websockify(): def stop_websockify():
@ -28,7 +21,7 @@ def stop_websockify():
time.sleep(10) time.sleep(10)
if subprocess.run(['pgrep', 'websockify'], if subprocess.run(['pgrep', 'websockify'],
stdout=subprocess.PIPE).stdout: stdout=subprocess.PIPE).stdout:
print('Websockify didn\'t stop, killing forcefully.') logging.info('websockify didn\'t stop, killing forcefully')
subprocess.run(['kill', '-9', pid], stdout=subprocess.PIPE) subprocess.run(['kill', '-9', pid], stdout=subprocess.PIPE)
@ -93,7 +86,7 @@ def stop_ssh_tunnel(vmid, ssh_tunnels):
(tunnel for tunnel in ssh_tunnels if tunnel.local_bind_port == port), (tunnel for tunnel in ssh_tunnels if tunnel.local_bind_port == port),
None) None)
if tunnel: if tunnel:
print("Tearing down SSH tunnel for VM {}.".format(vmid)) logging.info('tearing down SSH tunnel for VM %s', vmid)
try: try:
tunnel.stop() tunnel.stop()
except: except: