Traceback (most recent call last):
File "D:\pythonxxhj\pycode\imooc\wiki2mysql.py", line 6, in <module>
import pymysql.cursors
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python36-32\lib\site-packages\pymysql\__init__.py", line 92, in <module>
from . import connections as _orig_conn
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python36-32\lib\site-packages\pymysql\connections.py", line 22, in <module>
from .cursors import Cursor
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python36-32\lib\site-packages\pymysql\cursors.py", line 138
connection = self._get_db()
^
IndentationError: unindent does not match any outer indentation level
#代码如下
#! /usr/local/bin/python3
# -*- coding:utf-8 -*-
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
import pymysql.cursors
resp = urlopen("https://en.wikipedia.org/wiki/Main_Page").read().decode("utf-8")
soup = BeautifulSoup(resp,"html.parser")
listUrls = soup.findAll("a",href=re.compile(r"^/wiki/"))
for url in listUrls:
if not re.search("\.(jpg|JPG)$", url["href"]):
print(url.get_text(),"<---->","https://en.wikipedia.org"+url["href"])
connection=pymysql.connect(
host='localhost',
user='root',
password='root',
db='wikiurl',
charset='utf8mb4'
)
try:
with connection.cursor() as cursor:
sql="insert into `urls`(`urlhref`,`urlname`)values(%s,%s)"
cursor.execute(sql,(url.get_text(),"https://en.wikipedia.org"+url["href"]))
connection.commit()
finally:
connection.close()