转载请注明转载自:忆雨林枫

python3 爬虫学习一:爬视频

环境是python 3.6.2

# -*- coding: utf-8 -*-
import urllib.request
import re
def geturllist(page):
    req = urllib.request.Request('http://www.budejie.com/video/%d' %page)
    req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36')
    res = urllib.request.urlopen(req)
    html = res.read()
    reg = r'data-mp4="(.*?)">'
    urllist = re.findall(reg,html.decode('utf-8'))
    for url in urllist:
        urllib.request.urlretrieve(url,'mp4/%s.mp4' %url.split('/')[-1])
for page in range(10):
    geturllist(page+1)