import user_agentimport requestsclass UA_midd(object): def process_request(self,request,spider): request.headers['User-Agent'] = user_agent.generate_user_agent() referer = request.url if referer: request.headers['Referer'] = refererclass Proxy_midd(object): def __init__(self): self.ip = '' self.url = 'http://188.131.212.24:5010/get/' self.count = 0 def process_request(self, request, spider): if self.count == 0 or self.count >=20: res = requests.get(url=self.url).content.decode() if not 'no' in res: self.ip = res self.count = 1 if self.ip: request.meta['proxy'] = 'http://' + self.ip self.count += 1 else: self.count += 5 def process_exception(self, request, exception, spider): if isinstance(request,TimeoutError): self.count += 20 return request
单纯的处理ua和ip的功能
# 如果你是通过cookies池进行维护的,请请求不过是的cokies# 注意在中间件中设置cookies是字典化的import jsonimport requestsclass cookies_mid(object): def __init__(self): slef.cookies_url = '你维护的cookies池' def process_request(self,request,spider): request.cookies = self.get_cookies() def get_cookies(self): cookies = requests.get(self.cookies_url).content.decode() if cookies: return json.loads(cookies)
cookies更换
有关资料