What’s the best way to get an HTTP response code from a URL?
PythonPython Problem Overview
I’m looking for a quick way to get an HTTP response code from a URL (i.e. 200, 404, etc). I’m not sure which library to use.
Python Solutions
Solution 1 - Python
Update using the wonderful requests library. Note we are using the HEAD request, which should happen more quickly then a full GET or POST request.
import requests
try:
r = requests.head("https://stackoverflow.com")
print(r.status_code)
# prints the int of the status code. Find more at httpstatusrappers.com :)
except requests.ConnectionError:
print("failed to connect")
Solution 2 - Python
Here's a solution that uses httplib
instead.
import httplib
def get_status_code(host, path="/"):
""" This function retreives the status code of a website by requesting
HEAD data from the host. This means that it only requests the headers.
If the host cannot be reached or something else goes wrong, it returns
None instead.
"""
try:
conn = httplib.HTTPConnection(host)
conn.request("HEAD", path)
return conn.getresponse().status
except StandardError:
return None
print get_status_code("stackoverflow.com") # prints 200
print get_status_code("stackoverflow.com", "/nonexistant") # prints 404
Solution 3 - Python
You should use urllib2, like this:
import urllib2
for url in ["http://entrian.com/", "http://entrian.com/does-not-exist/"]:
try:
connection = urllib2.urlopen(url)
print connection.getcode()
connection.close()
except urllib2.HTTPError, e:
print e.getcode()
# Prints:
# 200 [from the try block]
# 404 [from the except block]
Solution 4 - Python
In future, for those that use python3 and later, here's another code to find response code.
import urllib.request
def getResponseCode(url):
conn = urllib.request.urlopen(url)
return conn.getcode()
Solution 5 - Python
The urllib2.HTTPError
exception does not contain a getcode()
method. Use the code
attribute instead.
Solution 6 - Python
Addressing @Niklas R's comment to @nickanor's answer:
from urllib.error import HTTPError
import urllib.request
def getResponseCode(url):
try:
conn = urllib.request.urlopen(url)
return conn.getcode()
except HTTPError as e:
return e.code
Solution 7 - Python
Here's an httplib
solution that behaves like urllib2. You can just give it a URL and it just works. No need to mess about splitting up your URLs into hostname and path. This function already does that.
import httplib
import socket
def get_link_status(url):
"""
Gets the HTTP status of the url or returns an error associated with it. Always returns a string.
"""
https=False
url=re.sub(r'(.*)#.*$',r'\1',url)
url=url.split('/',3)
if len(url) > 3:
path='/'+url[3]
else:
path='/'
if url[0] == 'http:':
port=80
elif url[0] == 'https:':
port=443
https=True
if ':' in url[2]:
host=url[2].split(':')[0]
port=url[2].split(':')[1]
else:
host=url[2]
try:
headers={'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0',
'Host':host
}
if https:
conn=httplib.HTTPSConnection(host=host,port=port,timeout=10)
else:
conn=httplib.HTTPConnection(host=host,port=port,timeout=10)
conn.request(method="HEAD",url=path,headers=headers)
response=str(conn.getresponse().status)
conn.close()
except socket.gaierror,e:
response="Socket Error (%d): %s" % (e[0],e[1])
except StandardError,e:
if hasattr(e,'getcode') and len(e.getcode()) > 0:
response=str(e.getcode())
if hasattr(e, 'message') and len(e.message) > 0:
response=str(e.message)
elif hasattr(e, 'msg') and len(e.msg) > 0:
response=str(e.msg)
elif type('') == type(e):
response=e
else:
response="Exception occurred without a good error message. Manually check the URL to see the status. If it is believed this URL is 100% good then file a issue for a potential bug."
return response
Solution 8 - Python
It depends on multiple factories, but try to test these methods:
import requests
def url_code_status(url):
try:
response = requests.head(url, allow_redirects=False)
return response.status_code
except Exception as e:
print(f'[ERROR]: {e}')
or:
import http.client as httplib
import urllib.parse
def url_code_status(url):
try:
protocol, host, path, query, fragment = urllib.parse.urlsplit(url)
if protocol == "http":
conntype = httplib.HTTPConnection
elif protocol == "https":
conntype = httplib.HTTPSConnection
else:
raise ValueError("unsupported protocol: " + protocol)
conn = conntype(host)
conn.request("HEAD", path)
resp = conn.getresponse()
conn.close()
return resp.status
except Exception as e:
print(f'[ERROR]: {e}')
Benchmark results for 100 URLs:
- First method: 20.90 seconds
- Second method: 23.15 seconds