网页爬虫 - 为什么python模拟登陆 appannie一直返回503 code
问题描述
#-*-encoding:utf-8-*-import requests, xlwt, sysfrom bs4 import BeautifulSoupreload(sys)referer = 'https://www.appannie.com/account/login/?_ref=header'user_agent = (’Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36’)sys.setdefaultencoding(’utf-8’)header = {'User-Agent': user_agent, 'Referer': referer, 'Host': 'www.appannie.com', ’Connection’: ’keep-alive’, ’Accept’: ’application/json, text/plain,*/*’, ’Accept-Encoding’: ’gzip, deflate, sdch’, ’Accept-Language’: ’zh-CN,zh;q=0.8’, ’X-NewRelic-ID’: ’VwcPUFJXGwEBUlJSDgc=’, ’X-Requested-With’: ’XMLHttpRequest’, }def main(): url = ’https://www.appannie.com/account/login/’ # content = requests.get(url,headers = header).content # soup = BeautifulSoup(content,’lxml’) # key = soup.select() s = requests.Session() s.get(url,headers = header) key = s.cookies[’csrftoken’] data = { ’csrfmiddlewaretoken’: key , ’next’: ’/dashboard/home/’ , ’username’:’1195615991@qq.com’ , ’password’:’xxxxx’ } req = s.post(url,data = data) if 2 != req.status_code / 100 :raise Exception('Error while logging in, code: %d' % (req.status_code)) cookies = req.cookies n = ’2017-04-11’ url_1 = ’https://www.appannie.com/apps/google-play/top-chart/?country=US&category=game&device=&date={}’.format(n) req_1 = s.get(url_1,headers = header,cookies = cookies).content #print req_1 soup = BeautifulSoup(req_1,’lxml’) print soup # ids = soup.find_all(’span’) # for id in ids : # name = id.get(’title’) # print nameif __name__ == ’__main__’: main()
问题解答
回答1:两个关键点:1. headers的user-agent2. csrfmiddlewaretoken参数
# coding: utf-8import requestsurl = ’https://www.appannie.com/account/login’session = requests.Session()session.headers[’user-agent’] = ’Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36’session.get(url)token = session.cookies.get(’csrftoken’)data = { ’csrfmiddlewaretoken’: token, ’next’:’/dashboard/home/’, ’username’:’XXXX’, ’password’:’XXXX’}r = session.post(url, data)print r.status_code
相关文章:
1. 修改mysql配置文件的默认字符集重启后依然不生效2. mongodb - windows7下mongod无法正常启动3. angular.js - 关于指令link 中的创建变量问题4. mysql - 请教一个Java做数据库缓存的问题5. php - 类似Apple官网顶部3级导航该如何设计数据库?6. javascript - ueditor引入报错问题7. javascript - 豆瓣的这个自适应是怎么做的?8. 请问一下各位老鸟 我一直在学习独孤九贱 现在是在tp5 今天发现 这个系列视频没有实战9. python sqlite3 长语句插入出错10. 作为新手,未定义索引username,求解,谢谢

网公网安备