手记

HTTP小爬虫——慕课网课程

/**
 * Created by fu on 2017/10/2.
 */
var http = require('http')
var cheerio = require('cheerio')
var url = 'http://www.imooc.com/learn/111'

function fitlerChapters(html) {
    var $ = cheerio.load(html)
    var chapters = $('.chapter');

    var courseData = []

    chapters.each(function (item) {
        var chapter = $(this)
        var chapterTitle = chapter.find('strong').text()
        var videos = chapter.find('.video').children('li')
        var chapterData = {
            chapterTitle: chapterTitle,
            videos:[]
        }

        videos.each(function (item) {
            var video = $(this).find('.J-media-item')
            var videoTitle = video.text()
            var id = video.attr('href').split('video/')[1]
            chapterData.videos.push({
                title:videoTitle,
                id:id
            })
        })

        courseData.push(chapterData)
    })

    return courseData
}

function printCourseInfo(courseData) {
    courseData.forEach(function (item) {
        var chapterTitle = item.chapterTitle
        console.log(chapterTitle+'\n')
        item.videos.forEach(function (video) {
            console.log(' ['+video.id+'] ' +video.title+'\n')
        })

    })
}

http.get(url, function (res) {
    var html = ''

    res.on('data', function (data) {
        html +=data
    })

    res.on('end', function () {
       var courseData = fitlerChapters(html);

       printCourseInfo(courseData);
    })
}).on('error', function () {
    console.log('获取课程数据出错!')
})
0人推荐
随时随地看视频
慕课网APP

热门评论

大佬用什么编程软件去弄Node.js的?

查看全部评论