继续浏览精彩内容
慕课网APP
程序员的梦工厂
打开
继续
感谢您的支持,我会继续努力的
赞赏金额会直接到老师账户
将二维码发送给自己后长按识别
微信支付
支付宝支付

基于Python的HTTPS协议模拟登陆+爬取页面

holdtom
关注TA
已关注
手记 1842
粉丝 240
获赞 991


    之前写的一直没成功,原因是用的不是HTTPS相关的函数。这次仔细研究了一下,有几个需要注意的点,一个是POST模拟登陆的时候,header中的cookie值,不同的网站应该会有不同的要求;另一个是GET页面的时候,是需要加上POST得到的response中的set-cookie的。这样才能利用登陆的成功。

    写完POST和GET页面后,顺便写了个简单的命令行实现。

import httplib, urllib

import urllib2

import cookielib

import sys

file_text = "build_change.txt"

resultTable = dict()

host = 'buuuuuuu.knight.com'

def Login(username, password , csrf =  'Gy2O70iSjOTbWhWgBLvf4HDuf4jUe4RP'):

 url = '/login/'

 values = {

   'username' : username,

   'password' : password,

   'next' : '',

   'csrfmiddlewaretoken': csrf,

 }

 

 headers = {

     'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.114 Safari/537.36',

     'Content-Type': 'application/x-www-form-urlencoded',

     'Connection' : 'keep-alive',

     'Cookie':'csrftoken=%s' % csrf ,  

     'Referer':'https://buuuuuuu.knight.com/login/',

     'Origin':'https://buuuuuuu.knight.com',

     'Content-Type':'application/x-www-form-urlencoded',

     'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',

 }

 values = urllib.urlencode(values)

 conn = httplib.HTTPSConnection(host, 443)

 conn.request("POST", url, values, headers)

 response = conn.getresponse()

 print 'Login: ', response.status, response.reason

 '''

 hdata = response.getheaders()

 for i in xrange(len(hdata)):

  for j in xrange(len(hdata[i])):

   print hdata[i][j],

  print 

 '''

 return response.getheader("set-cookie")

 

def GetHtml(_url , cookie):

 get_headers = {

     'Host' : 'xxxxx.knight.com',

     'Connection' : 'keep-alive' , 

     'Cache-Control' : 'max-age=0',

     'Cookie' : cookie ,

     'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',

     'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.114 Safari/537.36',

     'Accept-Language' : 'zh-CN,zh;q=0.8,en;q=0.6',

 }

 conn=httplib.HTTPSConnection(host)

 conn.request("GET", _url,None,get_headers)

 res2=conn.getresponse()

 print "Get %s:" % _url ,res2.status, res2.reason

 '''

 hdata1 = res2.getheaders()

 for i in xrange(len(hdata1)):

  for j in xrange(len(hdata1[i])):

   print hdata1[i][j],

  print 

 '''

 data = res2.read()

 fp = open("build_change.txt","w")

 fp.write(data)

 fp.close()

 

def ParseHtml():

 fp = open(file_text,"r")

 content = fp.readline()

 _pos = 0

 while content:  

  if content.find("class=\"change-body\"") >= 0:

   topic = content.split(">")

   resultTable[_pos] = topic[1]

   while content:

    content = fp.readline()

    resultTable[_pos] = resultTable[_pos] + content

    if content.find("</div>")>= 0:

     _pos = _pos + 1

     break

  content = fp.readline()

 fp.close()

 print "Parse html success."

def GenerateResultTxt():

 f = open("build_change_result.txt","w")

 for m in resultTable.keys():

  f.write("-------------------------------------------------------------------------------------------\n")

  f.write(resultTable[m])

 f.close()

 print "Generate result success : build_change_result.txt ."

def Help():

 print '-h    :    help'

 print '-u    :    username(must)'

 print '-p    :    password(must)'

 print '-c    :    csrftoken(optional)'

 print '-s    :    sandbox build id(must)'

 print 'For example:'

 print '[1]  python BuildChange.py -h'

 print '[2]  python BuildChang.py -u u -p p -s s1 s2'

 print '[3]  python BuildChang.py -u u -p p -c c -s s1 s2'

 

def ParseParam(com):

 length = len(com)

 username = ""

 password = ""

 csrf = ""

 sid1 = ""

 sid2 = ""

 if length == 2 or length == 8 or length == 10:

  if com[1] == '-h':

   Help()

  for i in range(1,length):

   if com[i] == '-u' and i < (length-1):

    username = com[i+1]

    i += 1

   elif com[i] == '-p' and i < (length-1):

    password = com[i+1]

    i += 1

   elif com[i] == '-c' and i < (length-1):

    csrf = com[i+1]

    i += 1

   elif com[i] == '-s' and i < (length-2):

    sid1 = com[i+1]

    sid2 = com[i+2]

    i += 2

 if username == "" or password == "" or sid1 == "" or sid2 == "":

  print '[Error] Parameter error!'

  print '[Error] You can use \"python BuildChange.py -h\" to see how can use this script. '

 else:

  if csrf == "":

   cookie = Login(username, password)

  else:

   cookie = Login(username, password, csrf)

  _url = "//changelog//between//%s//and//%s/" % (sid1, sid2)

  GetHtml(_url, cookie)

  ParseHtml()

  GenerateResultTxt()

# C:\Python27\python.exe C:\Users\knight\Desktop\build\BuildChange.py -u xux -p KKKKKKKK -s 1859409 1858525

if __name__ == "__main__":

 ParseParam(sys.argv)

 

©著作权归作者所有:来自51CTO博客作者风刃的原创作品,如需转载,请注明出处,否则将追究法律责任


打开App,阅读手记
0人推荐
发表评论
随时随地看视频慕课网APP