var http = require('http');
var Promise = require('bluebird'); // 第三方 Promises 模块
var cheerio = require('cheerio'); // 爬虫分析模块
var BufferHelper = require('bufferhelper'); // buffer 组装模块
var iconv = require('iconv-lite'); // 字符转码模块
var baseUrl = 'http://www.imooc.com/learn/';
var courseIds = [348, 637, 259, 75, 197]; //要爬取的课程ID
var pagesArr = []; //爬取到的HTML页面集合
// 批量爬取课程页面
courseIds.forEach(function (cid) {
pagesArr.push(grabPageAsync((baseUrl + cid)));
});
// 异步爬取页面HTML
function grabPageAsync(url) {
return new Promise(function (resolve, reject) {
console.log('正在爬取 ' + url);
http.get(url, function (res) {
var bufferHelper = new BufferHelper();
res.on('data', function (chunk) {
bufferHelper.concat(chunk);
});
res.on('end', function () {
console.log('爬取 ' + url + ' 成功');
var fullBuffer = bufferHelper.toBuffer();
var utf8Buffer = iconv.decode(fullBuffer, 'UTF-8');
var html = utf8Buffer.toString()
resolve(html);
});
}).on('error', function (e) {
// 爬取成功
reject(e);
console.log('爬取 ' + url + ' 失败');
});
});
}
// 提取课程信息并打印
Promise
.all(pagesArr)
.then(function (pages) {
var coursesData = [];
pages.forEach(function (html) {
// 提取课程信息
var courses = filterChapters(html);
coursesData.push(courses);
});
// 打印课程信息
printCourseInfo(coursesData);
});
// 提取课程信息
function filterChapters(html) {
var $ = cheerio.load(html);
var $chapters = $('.chapter');
var title = $('.hd .l').text();
var number = parseInt($($(".meta-value strong")[3]).text().trim(), 10);
var courseData = {
title: title,
number: number,
videos: []
};
var $chapter;
var chapterTitle;
var chapterData = {};
var $videos;
var $video;
var videoTitle;
var id;
$chapters.each(function () {
$chapter = $(this);
chapterTitle = $chapter.find('strong').text();
chapterData = {
chapterTitle: chapterTitle,
videos: []
};
$videos = $chapter.find('.video').children('li');
$videos.each(function () {
$video = $(this).find('.studyvideo');
videoTitle = $video.text();
id = $video.attr('href').split('video/')[1];
chapterData.videos.push({
title: videoTitle,
id: id
})
});
courseData.videos.push(chapterData);
});
return courseData;
}
// 打印课程信息
function printCourseInfo(coursesData) {
if(Object.prototype.toString.call(coursesData) == '[object Array]' && coursesData.length > 0){
coursesData.forEach(function (courseData) {
console.log('\n\n【' + courseData.number + '】人学过《' + courseData.title + '》');
console.log('----------------------------------------------');
courseData.videos.forEach(function (item) {
console.log('\n' + item.chapterTitle);
item.videos.forEach(function (video) {
console.log(' ' + video.title.trim());
})
});
});
}else{
console.log('暂无课程信息');
}
}
打开App,阅读手记
热门评论
讲的非常好,太感谢老师的无私付出
BufferHelper 不用这个模块组装好像也可以啊…