cursors
from bs4 import BeautifulSoup from urllib.request import urlopen import re import pymysql.cursors url = "https://zh.wikipedia.org/wiki/Wikipedia:%E9%A6%96%E9%A1%B5" resp = urlopen(url).read().decode("UTF-8") soup = BeautifulSoup(resp,"html.parser") listurls = soup.findAll('a', href=re.compile("^/wiki/")) for url in listurls: if not re.search("\.(jpg|JGP)$",url["href"]): print(url.get_text(),"<---->","https://zh.wikipedia.org" + url["href"]) connection = pymysql.connect(host='localhost', user = 'root', password = '', db = 'wiki', charset = 'utf8mb4') try: with connection.cursor() as cursor: sql = "insert into `wikiurls`(`urlname`,`urlhref`)values(%s,%s)" cursor.execute(sql,(url.get_text(),"https://zh.wikipedia.org" + url["href"])) connection.commit() finally: connection.close() #print(soup)
报错
Traceback (most recent call last):
File "C:\Program Files (x86)\Python36-32\lib\site-packages\pymysql\connections.py", line 920, in connect
**kwargs)
File "C:\Program Files (x86)\Python36-32\lib\socket.py", line 704, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
File "C:\Program Files (x86)\Python36-32\lib\socket.py", line 745, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 11001] getaddrinfo failed
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:\Program Files\eclipse2017\HelloPython\test\wikitosave.py", line 18, in <module>
charset = 'utf8mb4')
File "C:\Program Files (x86)\Python36-32\lib\site-packages\pymysql\__init__.py", line 90, in Connect
return Connection(*args, **kwargs)
File "C:\Program Files (x86)\Python36-32\lib\site-packages\pymysql\connections.py", line 699, in __init__
self.connect()
File "C:\Program Files (x86)\Python36-32\lib\site-packages\pymysql\connections.py", line 967, in connect
raise exc