首页 > 古诗词 > 1.2python网络爬虫实践(1)爬取89文学网小说
2020
12-06

1.2python网络爬虫实践(1)爬取89文学网小说

1.2python网络爬虫实践(1)爬取89文学网小说
网站url:#获取所有章节的链接()==()==BeautifulSoup(r.text,html.parser)#储存数据data=[]forddinsoup.find_all(dd):link=dd.find(a)ifnotlink:continuedata.append((http://www.89wxw.cn%s%link[href],link.get_text()))returndata#抓取章节的正文defget_chapter_content(url):r=requests.get(url)r.encoding=gbksoup=BeautifulSoup(r.text,html.parser)#解析数据returnsoup.find(div,id=content).get_text().strip().replace(xa0*4,wang zhan url:#huo qu suo you zhang jie de lian jie ()==()==BeautifulSoup(r.text,html.parser)#chu cun shu ju data=[]forddinsoup.find_all(dd):link=dd.find(a)ifnotlink:continuedata.append((http://www.89wxw.cn%s%link[href],link.get_text()))returndata#zhua qu zhang jie de zheng wen defget_chapter_content(url):r=requests.get(url)r.encoding=gbksoup=BeautifulSoup(r.text,html.parser)#jie xi shu ju returnsoup.find(div,id=content).get_text().strip().replace(xa0*4,
)#小说保存文件名称path=太古星辰诀ifnotos.path.exists(path):os.makedirs(path)novel_chapters=get_novel_chapters()total_cnt=len(novel_chapters)idx=0forchapteringet_novel_chapters():idx+=1url,title=chapterprint(chapter)print(下载中----------,title)time.sleep(3)filename=path+/+{}.txt.format(title)#保存数据withopen(filename,w,encoding=utf-8)asfout:fout.write(get_chapter_content(url)))#xiao shui bao cun wen jian ming chen path=tai gu xing chen jue ifnotos.path.exists(path):os.makedirs(path)novel_chapters=get_novel_chapters()total_cnt=len(novel_chapters)idx=0forchapteringet_novel_chapters():idx+=1url,title=chapterprint(chapter)print(xia zai zhong ----------,title)time.sleep(3)filename=path+/+{}.txt.format(title)#bao cun shu ju withopen(filename,w,encoding=utf-8)asfout:fout.write(get_chapter_content(url))
资料.rar

资料1.rar

资料2.rar


本文》有 0 条评论

留下一个回复