只能抓到一个第二个就异常了。KeyError: 'url'
craw 1 : http://baike.baidu.com/view/21087.htm
craw 2 : http://baike.baidu.com/view/10812319.htm
Traceback (most recent call last):
File "D:\System Files\Documents\EclipseWorkspace\Test\src\Baike_spider\spider_main.py", line 42, in <module>
obj_spider.craw(root_url)
File "D:\System Files\Documents\EclipseWorkspace\Test\src\Baike_spider\spider_main.py", line 37, in craw
self.outputer.ouput_html()
File "D:\System Files\Documents\EclipseWorkspace\Test\src\Baike_spider\html_outputer.py", line 27, in ouput_html
fout.write("<td>%s</td>" % data['url'])
KeyError: 'url'
#coding:utf-8
class HtmlOutputer(object):
def __init__(self):
self.datas=[]
def collect_data(self,data):
if data is None:
return
self.datas.append(data)
def ouput_html(self):
fout=open('output.html','w')
fout.write("<html>")
fout.write("<body>")
fout.write("<table>")
for data in self.datas:
fout.write("<tr>")
fout.write("<td>%s</td>" % data['url'])
fout.write("<td>%s</td>" % data['title'].encode('utf-8'))
fout.write("<td>%s</td>" % data['summary'].encode('utf-8'))
fout.write("</tr>")
fout.write("</table>")
fout.write("</body>")
fout.write("</html>")
fout.close()