python crawler入门

发布时间 2023-11-10 15:26:07作者: DogLeftover
  • 将百度页面保存到本地
from urllib.request import urlopen
url = "http://www.baidu.com/"
r = urlopen(url)
b = r.read().decode('utf-8')
with open("a.html",mode="w",encoding="utf-8") as f:
    f.write(b)
print("OK")
  • 安装库
pip install requests
  • 获取网页源码
import requests as req

url = "https://sogou.com/web?query=刘德华"
head = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76"
}
res = req.get(url,headers=head)
print(res.text)
res.close()