Source code for scripts.status_code

import httplib
import random
import string
from urlparse import urlparse

[docs]def get_status_code(url):
    """ This function gets the status code of a website (host) by requesting
        HEAD data from the host. This means that it only requests the headers.
        If the host cannot be reached or something else goes wrong, it returns
        None instead.
        
        Thanks to Evan Fosmark - 
        http://stackoverflow.com/questions/1140661/python-get-http-response-code-from-a-url
        
        Works as following::
        
            >>> url = 'http://www.bbc.co.uk/'
            >>> get_status_code(url)
            200
            >>> url = 'http://www.bbc.co.uk/404-page/'
            >>> get_status_code(url)
            404
            >>> url = 'http://www.bbc.co.uk/500-error/'
            >>> get_status_code(url)
            500
        
        If it takes more than 5 seconds to get the header we assume that there 
        is a problem with the server and returns a 500 code.  The server might 
        be OK, but if it's taking that long to respond, we should assume that 
        it's down - our users won't tolerate it taking that long.
    """
    """
    The following code adds a cache-breaking code to the end of any string that 
    doesn't have a query string.  This is important as a lot of the 
    unauthenticated pages in my django projects are cached.  This bypasses it 
    by making the URL semi-unique, or at least unique enough every time the 
    page is called.
    """
    try:
        url.index('?')
    except:
        url = url + '?cb=' + ''.join(random.choice(string.ascii_lowercase + string.digits) for x in range(8))
    o = urlparse(url)
    try:
        """ Timeout to 5 seconds - if it takes longer than that people will be 
            pissed off anyway and we need to find out why it's running this 
            slowly.
        """
        if o.scheme == 'http':
            conn = httplib.HTTPConnection(o.netloc, timeout=5)
        else:
            conn = httplib.HTTPSConnection(o.netloc, timeout=5)
        conn.request("HEAD", o.path)
        return conn.getresponse().status
    except StandardError:
        """
        If it can't get a code, then it's a 500 error - the site is probably 
        down.
        
        It might not be a 500, but we should treat it as if it is
        """
        return 500

if __name__ == "__main__":
    status = get_status_code('http://www.bbc.co.uk')
    print status   
    
    status = get_status_code('https://nailsea/test/testcard.html')
    print status
Navigation

Source code for scripts.status_code

Project Versions

RTD Search

Quick search

Navigation