本章节源码,并附有部分注解,希望可以帮助到有需要的同学
//爬取imooc单章内容,并整理筛选打印
var http = require('http')
var cheerio = require('cheerio')
var url = 'http://www.imooc.com/learn/348'
//把html作为参数传递给一个函数,让函数对数据做相应的过滤
function filterChapters(html){
//装载html
var $ = cheerio.load(html)
//charpts拿到每章的标题
var chapters = $('.chapter')
//数据结构
// var chapters = $('.chapter')
// [{
// chapterTitle :
// videos:[
// videoTitle:'',
// videoId:''
// ]
// }]
var courseData = []
chapters.each(function(item){
var chapter = $(this)
var chapterTitle = chapter.find('h3').text().trim()
var videos = chapter.find('.video').children('li')
var chapterData = {
chapterTitle: chapterTitle,
videos :[]
}
videos.each(function(liList){
var video = $(this).find('.J-media-item')
var videoTitle = video.text().trim()
var videoId = video.attr('href').split('video/')[1]
chapterData.videos.push({
videoTitle: videoTitle,
videoId: videoId
})
})
courseData.push(chapterData)
})
return courseData
}
function printCourseInfo(courseData){
courseData.forEach(function(item){
var chapterTitle = item.chapterTitle
console.log(chapterTitle + '\n')
item.videos.forEach(function(video){
console.log('['+ video.videoId +']' + video.videoTitle + '\n')
})
})
}
http.get(url,function(res){
var html = ''
res.on('data',function(data){
html += data
})
res.on('end',function(){
var courseData = filterChapters(html)
printCourseInfo(courseData)
}).on('error',function(e){
console.error('extrat 348 chapter action is failed')
})
})