Source code for scripts.cronjob

#!/usr/bin/env python
import sys
import os
""" Setup the script to use django ORM, by adding the current folder and its 
    parent folder to the python path and importing the relevant crud.
"""
PARENT_FOLDER = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
sys.path.insert(0, PARENT_FOLDER)
sys.path.insert(0, PROJECT_ROOT)
import django 
from django.core.management import setup_environ
os.environ['DJANGO_SETTINGS_MODULE']="djamon.settings"

""" Then import everything we need to allow us to run this. """
from django.core.mail import send_mail
from django.core.urlresolvers import reverse
from djamon import settings
from djamon.monitor.models import *
from djamon.scripts.status_code import get_status_code
from djamon.monitor.templatetags.readable_time import readable_time
from datetime import datetime
from uuid import uuid4
from twitter import *
from random import randint

setup_environ(settings)

DEBUG = False

TO_EMAIL = settings.TO_EMAIL
FROM_EMAIL = settings.FROM_EMAIL
TWITTER = settings.TWITTER

""" We need to setup some metadata so we know what to do with the new data we 
    find.
"""
FIXED_TWEETS = ["The website looks to be working again. ",
                "We have fixed the problem with the website. ",
                "The website should be working again now. ", 
                "It seems to be working again ",
                "You should be able to access the website again. ",
                "We have fixed the problem with the website. ",
                "Things are working again on the website. ",
                "The website looks to be working again, ",
                "It should be working again now. ",
                "The problems should be resolved now. "]

TIME_TWEETS = ["We fixed it in ",
               "It was fixed in about ",
               "We fixed it in about ",
               "It was up and running in ",
               "It was working inside ",
               "It was back up in ",
               "The fix took about ",
               "It was down for about "
               ]

THANKS_TWEETS = ["Thanks for your patience.",
                 "Thanks for your patience.",
                 "Thanks for your time.",
                 "Thanks.",
                 "Cheers.",
                 "Regards.",
                 "Thanks for your patience.",
                 "Kind regards."
                 ]

[docs]def tweet(message): """ Send the message to twitter""" if DEBUG is True: print 'tweeting %s' %(message) return True else: if TWITTER is True: twitter = Twitter(auth=OAuth(settings.OAUTH_TOKEN, settings.OAUTH_SECRET, settings.CONSUMER_KEY, settings.CONSUMER_SECRET)) try: twitter.statuses.update(status=message) return True except: #print 'Twitter failed...' return False else: return False
[docs]def connection_check(): """ Checks to see if the server is connected to the internet by checking a number of recognised URLs. If any of these connect then the server is connected to the internet. """ connected = False for site in settings.SANDBOX_URLS: status = get_status_code(site) if status in settings.GOOD_CODES: #print '<<< CONNECTION TEST SUCCESSFUL >>>' return True return connected
[docs]def make_fix_tweet(down_time=None, code=None): """ Builds the final tweet saying that it's now fixed. We make this fairly random so that Twitter accepts it. We also add the random code to make doubly sure. """ time_info = '' if down_time: minutes, remainder = divmod(down_time, 60) if minutes <= 180: if minutes == 1: time_info = TIME_TWEETS[randint(0, len(TIME_TWEETS)-1)] + 'a minute. ' else: time_info = TIME_TWEETS[randint(0, len(TIME_TWEETS)-1)] + '%s minutes. '%(minutes) thanks = THANKS_TWEETS[randint(0, len(THANKS_TWEETS)-1)] fix_msg = FIXED_TWEETS[randint(0, len(FIXED_TWEETS)-1)] code_msg = '' if code is not None: code_msg = ' (Issue #%s)' %(code) return fix_msg + time_info + thanks + code_msg
if connection_check() is True: tweeted = False message = None error_count = 0 try: outage = Outage.objects.get(end_time = None) new_outage = False except: outage = None new_outage = True outage_text = '' """ Get all the sites that are actively being monitored.""" sites = Site.objects.filter(is_active = True).order_by('priority') """ And check to see if any of the existing sites have errors, so that we know if a message has already been sent. We don't want to send a message unless we have to! """ current_errors = sites.filter(down_count__gte = 3).count() """ Loop through all of the sites and check to see if they are currently online or not. """ for site in sites: """ Get the status code of each site and store it in the database """ #print '>>>>>> Looking for %s' %(site.url) status = get_status_code(site.url) #print '>>> Status %s returned' %(status) new_status = SiteView(uuid = uuid4(), site = site, status_code = int(status)) new_status.save() if status in settings.GOOD_CODES: """ A 200 status means it's OK, so we make sure we note that is has been seen """ if DEBUG is True: print '>>> %s is OK' %(site.url) site.down_count = 0 site.last_seen = datetime.now() else: """ A code other than 200 means that it's not OK, so we need to record that it hasn't been seen. """ error_count = error_count + 1 site.down_count = site.down_count + 1 if DEBUG is True: print '>>> There was a problem with %s' %(site.url) print '>>> This has been down %s times' %(site.down_count) """ If it hasn't been seen 3 times on the bounce, we're in trouble so we need to think about sending the message. We only send the first message when more than one thing is down. That's why we run this in order of priority. """ if site.down_count == 3: """ As soon as we flag up an outage, we create a new outage record in the database (supposing that no current outage exists) """ if outage is None: outage = Outage(content = '', code = str(uuid4())[:6], start_time = site.last_seen) outage.save() """ And we want to record all the sites that were down during the outage """ record = OutageSites(site = site, outage = outage, primary = new_outage, start_time = site.last_seen) record.save() """ We want to record when notifications were sent to students and myself, and the message that was sent. """ if site.down_count >= 3 and message is None and outage is not None and outage.notification_sent is False: message = '%s (Issue #%s)' %(site.down_message, outage.code) """ If there is an outage we want to know exactly what is going on, so we record 200 statuses for each system too in the outage text. """ outage_text += '%s [%s] "%s (%s)"\n' %(status, datetime.now(), site.title, site.url) site.save() """ Now we want to delete all but the last 20 records. This should only ever return one record """ old_views = SiteView.objects.filter(site = site).order_by('-attempt_time')[20:] for v in old_views: #print 'Deleted old record' v.delete() """ If we have an outage, what do we do?""" if outage is not None: """ If we have a message to send, send it. """ if message is not None and error_count != 0: tweeted = tweet(message) if tweeted is True: outage_text += "Notification Sent\n=====================\n%s\n\n" %(message) else: outage_text += ">>>Notification Failed\n" %(site.down_message) """ Email it to me if it's a new outage being recorded """ if new_outage is True: email_msg = '%s\n\n%s' %(reverse('outage_code', args = [outage.code]), outage_text) if DEBUG is True: print "emailing" else: send_mail('WEBSITE MONITORING - site down', email_msg, FROM_EMAIL, TO_EMAIL, fail_silently=False) """ We want to append the log to the existing log. """ #print 'recording the outage' outage.content = outage.content + outage_text if outage.notification_sent is False: outage.notification_sent = tweeted outage.save() """ If we had errors before, but we don't have any now then we want to send a message letting people know that it's fixed. """ if error_count == 0 and current_errors != 0: #print 'FIXED' if outage: """ Record it as fixed and calculate how long it was down for altogether """ outage.end_time = datetime.now() d = outage.end_time - outage.start_time down_time = d.seconds + (d.days * 86400) outage.seconds_down = down_time outage.save() """ Send a random tweet letting people know it's been fixed """ fix_msg = make_fix_tweet(down_time = down_time, code = outage.code) tweeted = tweet(fix_msg) email_msg = """Outage Started: %s Outage Ended: %s The following systems were effected by this outage: %s This outage lasted %s """ %(outage.start_time, outage.end_time, list(outage.sites.all()), readable_time(outage.seconds_down)) if tweeted is False: email_msg = "The tweet failed \n\n" + email_msg email_msg = '%s\n\n%s' %(reverse('outage_code', args = [outage.code]), email_msg) if DEBUG is True: print "emailing \n\n%s" %(email_msg) else: send_mail('WEBSITE MONITORING - site up', email_msg, FROM_EMAIL, TO_EMAIL, fail_silently=False)