var http = require('http');
var cheerio = require('cheerio');
// var Promise = window.Promise; //WINDOW is not defined
// var Promise = require('Promise'); //can not find module "Promise"
var Promise = require('bluebird');
var url = 'http://www.imooc.com/learn/';
var courseId =[56, 38, 637, 348, 259, 197, 134, 75];
function filterChapters(html){
var $ = cheerio.load(html);
/*
//数组
[{
chapterTitle:'',
videos:[
title:'',
id:''
]
}]*/
/*
//对象
courseData = {
title:title,
number:number,
videos:[{
chapterTitle:title,
videos:[
title:'',
id:''
]
}]
}*/
var chapters = $('.chapter');
var title = $('#main .hd h2').text();
var number = $('#main .static-item span.js-learn-num').text();
var courseData = {
title:title,
number:number,
videos : []
};
chapters.each(function(){
var chapterTitle = $(this).find('strong').not('strong div').text().trim(); //如何去掉
var videos = $(this).find('ul').children('li');
var chapterData = {
chapterTitle: chapterTitle,
videos:[]
}
videos.each(function(){ //item used as this
var videoTitle = $(this).find('a').text().trim();
var videoId = $(this).attr('data-media-id');
chapterData.videos.push({
title:videoTitle,
id:videoId
})
});
// courseData.push(chapterData); //courseData是对象,没有push方法
courseData.videos.push(chapterData);
});
return courseData;
}
function printCourseInfo(coursesData){
coursesData.forEach(function(courseData){
console.log(courseData.number + ' 人学过 ' + courseData.title + '\n');
var str = '';
courseData.videos.forEach(function(item){
str += item.chapterTitle +'\n';
item.videos.forEach(function(item){
str += '【'+item.id+'】'+ item.title +'\n'
})
})
console.log(str);
})
}
function getPageSync(url){
return new Promise(function(resolve, reject){ //er1:promise是对象
console.log('正在爬取' + url);
http.get(url,function(res){
var html = '';
res.on('data',function(data){
html += data;
})
res.on('end',function(){
resolve(html); //成功后返回传递html
// var courseData = filterChapters(html);
})
}).on('error',function(e){
reject(e);
console.log('获取课程数据失败');
})
})
}
var fetchCourseArray = []; //Promise对象的数组
courseId.forEach(function(id){
fetchCourseArray.push(getPageSync( url + id)); //组合地址
})
Promise
.all(fetchCourseArray) //接收一个数组,数组里面是一个一个Promise
.then(function(pages){ //pages拿到传递下来的结果
var coursesData = [];
pages.forEach(function(html){
var courses = filterChapters(html);
coursesData.push(courses);
})
coursesData.sort(function(a,b){ //返回正数,则a应该位于b后面
return a.number < b.number; //由大到小的顺序
})
printCourseInfo(coursesData);
})