var http = require('http')
var Promise = require('bluebird')
var url = 'http://www.imooc.com/learn/348'
var cheerio = require('cheerio')
var baseUrl = 'http://www.imooc.com/learn/'
vedioIds = ['348','637']
function filterChapters(html){
var $ = cheerio.load(html)
var chapters = $('.chapter')
// coureData = {
// title : title,
// number : number,
// vedios : [{
// chapterTitle : '',
// vedios : [
// title : '',
// id : ''
// ]
// }]
// }
//meta-value js-learn-num
// var path = $('.path').length
// console.log(path);
var title = $('.path').children('a').children('span').text().trim()
//var number = $('.static-item.l > span.js-learn-num').;
var level = $($('.static-item.l')[1]).find('span').last().text().trim()
console.log(level)
//var number = parseInt($($('.static-item.l ')[0]).text().trim(), 10)
//console.log(number)
var courseData = {
title : title,
number : 0,
vedios : []
}
chapters.each(function(item){
var chapter = $(this)
$('.chapter-content').remove()
$('.moco-btn').remove()
var chapterTitile = chapter.find('strong').text().trim()
var chapterData = {
chapterTitle : chapterTitile,
videos : []
}
var videos = chapter.find('ul').children('li')
videos.each(function(item){
var vedio = $(this)
var vedioTitle = vedio.find('.J-media-item').text().trim()
var vedioId = vedio.find('a').attr('href').split('video/')[1]
chapterData.videos.push({
title : vedioTitle,
id : vedioId
})
})
courseData.vedios.push(chapterData)
})
return courseData
}
function printCourse(courseData){
courseData.forEach(function(item){
var chapterTitle = item.chapterTitle
console.log(chapterTitle);
var vedios = item.videos
vedios.forEach(function(item){
console.log('[' + item.id + ']' + item.title);
})
})
}
function getPageAsync(url){
return new Promise(function(resolve, reject){
console.log('正在爬取 ' + url);
http
.get(url, function(res){
var html = ''
res.on('data', function(data){
html += data
})
res.on('end', function(){
//console.log(html);
resolve(html)
//var courseData = filterChapters(html)
//printCourse(courseData)
})
})
.on('error', function(e){
reject(e)
console.log('获取课程异常')
})
})
}
function printCourse2(coursesData){
coursesData.forEach(function(item){
var title = item.title
console.log(title)
var vedios = item.vedios
vedios.forEach(function(item){
var chapterTitle = item.chapterTitle
console.log(chapterTitle)
var chapterVedios = item.videos
chapterVedios.forEach(function(item){
console.log('['+item.id+'] ' + item.title)
})
})
})
}
var fetchCourseArray = []
vedioIds.forEach(function(id){
fetchCourseArray.push(getPageAsync(baseUrl + id))
})
//getPageAsync(url)
Promise
.all(fetchCourseArray)
.then(function(pages){
//console.log("pages: " + pages)
var coursesData = []
//console.log("pages : " + pages.length)
pages.forEach(function(html){
//console.log("html : " + html)
//var course = filterChapters(html)
var course = filterChapters(html)
coursesData.push(course)
printCourse2(coursesData)
})
})
共同学习,写下你的评论
评论加载中...
作者其他优质文章