1 回答
TA贡献1876条经验 获得超7个赞
如果你status_code 200知道你有一个有效的地址。
关于HTTPS://. 如果您不遵循本指南中的答案,您将收到 SSL 错误。一旦你有了它,程序就会为你找到正确的 URL。
import requests
import traceback
validProtocols = ["https://www.", "http://www.", "https://", "http://"]
def removeAnyProtocol(url):
url = url.replace("www.","") # to remove any inputs containing just www since we aren't planning on using them regardless.
for protocol in validProtocols:
url = url.replace(protocol, "")
return url
def validateUrl(url):
for protocol in validProtocols:
if(protocol not in url):
pUrl = protocol + removeAnyProtocol(url)
try:
req = requests.head(pUrl, allow_redirects=True)
if req.status_code == 200:
return pUrl
else:
continue
except Exception:
print(traceback.format_exc())
continue
else:
try:
req = requests.head(url, allow_redirects=True)
if req.status_code == 200:
return url
except Exception:
print(traceback.format_exc())
continue
用法:
correctUrl = validateUrl("google.com") # https://www.google.com
添加回答
举报