爬取豆瓣电影,保存到json文件中

发布时间 2023-09-30 19:51:23作者: sgj191024
import urllib.request

url = 'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&start=0&limit=20'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62',
}
request = urllib.request.Request(url=url,headers=headers)
res = urllib.request.urlopen(request)
content = res.read().decode('utf-8')

file = open('movie.json','w',encoding='utf-8')
file.write(content)

  获取豆瓣电影1-10页

import urllib.request
import urllib.parse
def getMovieTest(page):
    data = {
        'start':(page - 1) * 20,
        'limit':20
    }
    reqData =urllib.parse.urlencode(data)
    url = 'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&' + reqData
    print(url)
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62',
    }
    request = urllib.request.Request(url=url, headers=headers)
    res = urllib.request.urlopen(request)
    content = res.read().decode('utf-8')
    file = open('moviessqw' + str(page) + '.json', 'w', encoding='utf-8')
    file.write(content)
    file.close()
for i in range(1,11):
    getMovieTest(i)