首页猿问请问在node中如何实现http小爬虫?

请问在node中如何实现http小爬虫?

Node.js

米琪卡哇伊 2018-08-02 16:05:23

在node中如何实现http小爬虫

查看完整描述

2 回答

呼如林

TA贡献1798条经验获得超3个赞

//HTTP小爬虫 2 //×××××类似 $.AJAX×××××× 3 4 5 var http = require('http'); //加载http模块 6 7 var cheerio = require('cheerio');//加载第三方模块 cheerio 类似JQuery 8 9 // 安装方法 npm install cheerio10 11 12 function printCourseInfo(courseData) {//打印函数传入获取数据13 courseData.forEach(function(item) {//循环打印14 var chapterTitle = item.chapterTitle;15 16 console.log(chapterTitle + '\n');17 18 item.videos.forEach(function(video) {19 console.log('[' + video.id + ']' + video.title + '\n');20 })21 })22 }23 24 25 function fliterChapters(html) {//数据筛选函数26 27 var $ = cheerio.load(html);28 29 var chapters = $('.chapter')//获取元素30 31 32 //目标数据结构33 /* [{34 capterTitle:'',35 videos:'',36 id:''37 }]*/38 39 var courseData = [];//存放数组40 41 42 chapters.each(function(item) {43 var chapters = $(this);44 45 var chapterTitle = chapters.find('strong').text();46 47 48 var videos = chapters.find('.video').children('li');49 50 var chapterData = {51 chapterTitle: chapterTitle,52 videos: []53 }54 55 videos.each(function(item) {56 var video = $(this).find('.J-media-item');57 var videoTitle = video.text();58 var id = video.attr('href').split('video/')[1]59 60 61 chapterData.videos.push({62 title: videoTitle,63 id: id64 })65 })66 67 68 courseData.push(chapterData);69 })70 71 return courseData//数据拼接完成并返回72 73 }74 75 //目标url76 var url = 'http://www.imooc.com/learn/348';//慕课网77 78 79 80 //使用get方法81 http.get(url, function(res) {//get方法爬取代码82 var html = '';83 84 res.on('data', function(data) {//获取数据事件85 html += data;86 })87 88 res.on('end', function() {//获取结束事件89 var courseData = fliterChapters(html);90 91 printCourseInfo(courseData);92 })93 94 }).on('error', function() {95 console.log('获取错误！');//报错96 })

反对回复 2018-08-20

慕少森

TA贡献2019条经验获得超9个赞

你这个问题很大，简单说下思路吧：在node里面发起一个http请求你访问你需要爬的网站，得到返回的html文本之后，再做文本解析，得到你想要的内容就好了。

反对回复 2018-08-20

2 回答
0 关注
936 浏览

关注

添加回答

0/150

提交

取消

热搜

最近搜索清空

请问在node中如何实现http小爬虫?

请问在node中如何实现http小爬虫?

2 回答

添加回答