自动登录抽屉并点赞
1 # -*- coding: utf-8 -*- 2 import scrapy 3 from scrapy.selector import Selector,HtmlXPathSelector #选择器,标签查找 4 from ..items import ChoutiItem #创建item对象 5 from scrapy.http import Request #创建request对象,用于请求url 6 from scrapy.http.cookies import CookieJar #cookies对象 7 8 class ChoutiSpider(scrapy.Spider): 9 name = 'chouti' 10 allowed_domains = ['chouti.com'] #递归时会检测域名是否包含,不包含则不能递归 11 start_urls = ['https://dig.chouti.com/'] #运行最开始请求的url 12 13 cookie_dict = None 14 has_request_url_set = set() 15 def parse(self, response): 16 cookie_obj = CookieJar() 17 cookie_obj.extract_cookies(response,response.request) 18 #保存cookies 19 self.cookie_dict = cookie_obj._cookies 20 #请求:用户名密码+cookies+headers 21 yield Request( 22 url='https://dig.chouti.com/login', 23 method="POST", 24 body="phone=8617744503421&password=Huang123&oneMonth=1", 25 headers={'content-type':'application/x-www-form-urlencoded; charset=UTF-8'}, 26 cookies=self.cookie_dict, 27 callback=self.check_login, 28 ) 29 30 #登录成功,进入首页 31 def check_login(self, response): 32 yield Request( 33 url='https://dig.chouti.com/', 34 method='GET', 35 cookies=self.cookie_dict, 36 callback=self.good, 37 ) 38 39 #https://dig.chouti.com/link/vote?linksId=24868551 40 #进行点赞 41 def good(self,response): 42 id_list = Selector(response=response).xpath('//div[@share-linkid]/@share-linkid').extract() 43 for id in id_list: 44 print(id) 45 url = 'https://dig.chouti.com/link/vote?linksId=%s' % id 46 yield Request( 47 url=url, 48 method="POST", 49 cookies=self.cookie_dict, 50 callback=self.show, 51 ) 52 53 #获取页码进行逐页点赞 54 page_urls = Selector(response=response).xpath('//div[@id="dig_lcpage"]//a/@href').extract() 55 for page_url in page_urls: 56 url = "https://dig.chouti.com%s" % page_url 57 md5_url = self.md5(url) 58 if md5_url in self.has_request_url_set: 59 pass 60 else: 61 print(page_url) 62 self.has_request_url_set.add(md5_url) 63 yield Request(url=url,method="POST",callback=self.good) 64 65 #查看是否点赞成功 66 def show(self,response): 67 print(response.text) 68 69 @staticmethod 70 def md5(url): 71 import hashlib 72 m = hashlib.md5() 73 m.update(bytes(url,encoding='utf-8')) 74 return m.hexdigest()
1 #注意:settings.py中设置DEPTH_LIMIT = 1来指定“递归”的层数。 2 DEPTH_LIMIT = 4
1 import scrapy 2 from scrapy.http.response.html import HtmlResponse 3 from scrapy.http import Request 4 from scrapy.http.cookies import CookieJar 5 6 7 class ChoutiSpider(scrapy.Spider): 8 name = "chouti" 9 allowed_domains = ["chouti.com"] 10 start_urls = ( 11 'http://www.chouti.com/', 12 ) 13 14 def start_requests(self): 15 url = 'http://dig.chouti.com/' 16 yield Request(url=url, callback=self.login, meta={'cookiejar': True}) 17 18 def login(self, response): 19 print(response.headers.getlist('Set-Cookie')) 20 req = Request( 21 url='http://dig.chouti.com/login', 22 method='POST', 23 headers={'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'}, 24 body='phone=8613121758648&password=woshiniba&oneMonth=1', 25 callback=self.check_login, 26 meta={'cookiejar': True} 27 ) 28 yield req 29 30 def check_login(self, response): 31 print(response.text)