# coding=utf-8def getHTMLlines(htmlpath): f=open(htmlpath,"r",encoding="utf-8") ls=f.readlines() f.close() return lsdef extractImageUrls(htmllist): urls=[] for line in htmllist: if "img"in line: url=line.split("src=")[-1].split('"')[1] if "http"in url: urls.append(url) return urlsdef showResults(urls): count=0 for url in urls: print("第{:2}个的URL:{}".format(count,url)) count +=1def saveResults(filepath,urls): f=open(filepath,"w") for url in urls: f.write(url+"\n") f.close()def main(): inputfile="F:/html/nationalgeographic.html" outfile="F:/html/ues.txt" htmlLines=getHTMLlines(inputfile) imageUrls=extractImageUrls(htmlLines) showResults(imageUrls) saveResults(outputfile,imageUrls)main()
添加回答
举报
0/150
提交
取消