爬虫模拟登录豆瓣
·
157
words
·
1
minute read
网络爬虫——模拟登录豆瓣
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 01 13:44:51 2017
@author: 马晶义
"""
import requests
import re
headers = {
"Host":"www.douban.com",
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36",
"Accept-Language":"zh-CN,zh;q=0.8,zh-TW;q=0.6",
"Accept-Encoding":"gzip, deflate, sdch, br",
"Connection":"keep-alive"
}
def login():
login_str = u'登录'
data = {
#'ck':'GiW5',
'source':'None',
#'redir':'https://movie.douban.com/',
'form_email':'xxxxxx',
'form_password':'xxxxx',
#'captcha_solution':captcha_solution,
#'captcha_id':captcha_id,
'login':login_str
}
login_url = 'https://www.douban.com/accounts/login'
session = requests.session()
html = session.get(login_url,headers=headers).text
captcha_img_pattern = r'(?<=<img id="captcha_image" src=\").*?(?=\")'
captcha_image_url = re.search(captcha_img_pattern,html,re.S|re.M|re.I)
if captcha_image_url is not None:
captcha_image_url = captcha_image_url.group()
print captcha_image_url
#captcha_image = session.get(captcha_image_url).text
captcha_image = requests.get(captcha_image_url,headers=headers).content
document ='login_captcha_douban.jpg'
file_ = open(document,'wb')
file_.write(captcha_image)
file_.close()
captcha_solution = raw_input('captcha_solution:')
data['captcha-solution'] = captcha_solution
#获取captcha_id
captcha_id_pattern = r'(?<=<input type="hidden" name="captcha-id" value=\").*?(?=\"/>)'
captcha_id = re.search(captcha_id_pattern,html,re.S|re.M|re.I)
if captcha_id is None:
print 'captcha_id error'
else:
captcha_id = captcha_id.group()
data['captcha-id'] = captcha_id
session.post(login_url,headers=headers,data=data)
print data
print session.cookies.items()
return session
def main():
session = login()
usinfo = session.get('https://www.douban.com/people/146365045/',headers=headers).text
#输出你的用户名
print (re.search(r'(?<=<title>).*?(?=</title>)',usinfo,re.M|re.I|re.S)).group()
usinfo = usinfo.encode('utf-8') #保存你个人信息页面
file_=open('use.html','wb')
file_.write(usinfo)
file_.close()
main()