Source code for scripts.status_code
import httplib
import random
import string
from urlparse import urlparse
[docs]def get_status_code(url):
""" This function gets the status code of a website (host) by requesting
HEAD data from the host. This means that it only requests the headers.
If the host cannot be reached or something else goes wrong, it returns
None instead.
Thanks to Evan Fosmark -
http://stackoverflow.com/questions/1140661/python-get-http-response-code-from-a-url
Works as following::
>>> url = 'http://www.bbc.co.uk/'
>>> get_status_code(url)
200
>>> url = 'http://www.bbc.co.uk/404-page/'
>>> get_status_code(url)
404
>>> url = 'http://www.bbc.co.uk/500-error/'
>>> get_status_code(url)
500
If it takes more than 5 seconds to get the header we assume that there
is a problem with the server and returns a 500 code. The server might
be OK, but if it's taking that long to respond, we should assume that
it's down - our users won't tolerate it taking that long.
"""
"""
The following code adds a cache-breaking code to the end of any string that
doesn't have a query string. This is important as a lot of the
unauthenticated pages in my django projects are cached. This bypasses it
by making the URL semi-unique, or at least unique enough every time the
page is called.
"""
try:
url.index('?')
except:
url = url + '?cb=' + ''.join(random.choice(string.ascii_lowercase + string.digits) for x in range(8))
o = urlparse(url)
try:
""" Timeout to 5 seconds - if it takes longer than that people will be
pissed off anyway and we need to find out why it's running this
slowly.
"""
if o.scheme == 'http':
conn = httplib.HTTPConnection(o.netloc, timeout=5)
else:
conn = httplib.HTTPSConnection(o.netloc, timeout=5)
conn.request("HEAD", o.path)
return conn.getresponse().status
except StandardError:
"""
If it can't get a code, then it's a 500 error - the site is probably
down.
It might not be a 500, but we should treat it as if it is
"""
return 500
if __name__ == "__main__":
status = get_status_code('http://www.bbc.co.uk')
print status
status = get_status_code('https://nailsea/test/testcard.html')
print status