Miydream
2020-07-10 07:34
import requests import re class HandleLaGou(object): def __init__(self): #使用session保存cookie休息 self.lagou_session = requests.session() self.header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0' } self.city_list = "" #获取全国所有城市列表的方法 def handle_city(self): city_search = re.compile(r'zhaopin/">(.*?)</a>') city_url = "https://www.lagou.com/jobs/allCity.html" city_result = self.handle_request(method="GET",url=city_url) print(city_result) def handle_request(self,method,url,data=None,info=None): if method == 'GET': response = self.lagou_session.get(url=url,headers=self.header) return response.text if __name__ == '__main__': lagou = HandleLaGou() lagou.handle_city()
第一个人的问题,你试试把fiddler关掉
给大家一个解决方法吧,拉钩加入了一些新的规则。
在header里面需要添加cookie中的两个值才可以得到正确的页面:
'cookie':'user_trace_token=; __lg_token__='
我也也获取不到,获取的信息如下:
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
<meta name="viewport"
content="width=device-width, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0, user-scalable=no" />
<title>������</title>
<style>
html,
body {
margin: 0;
width: 100%;
height: 100%;
}
@keyframes loadingAnimation {
0% {
transform: translate3d(0, 0, 0);
}
50% {
transform: translate3d(0, -10px, 0);
}
}
.loading-info {
text-align: center;
height: 100%;
position: relative;
background: #fff;
top: 50%;
margin-top: -37px;
}
.loading-info .animation-word {
width: 100%;
}
.loading-info .animation-word p {
margin-top: 10px;
color: #9fa3b0;
}
.animation-word .component-l,
.animation-word .component-a,
.animation-word .component-g,
.animation-word .component-o,
.animation-word .component-u {
display: inline-block;
width: 40px;
height: 42px;
line-height: 42px;
font-family: Helvetica Neue, Helvetica, Arial, Hiragino Sans GB, Hiragino Sans GB W3, Microsoft YaHei UI, Microsoft YaHei, WenQuanYi Micro Hei, sans-serif;
font-weight: bolder;
font-size: 40px;
color: #eceef2;
vertical-align: top;
-webkit-animation-fill-mode: both;
animation-fill-mode: both;
-webkit-animation: loadingAnimation 0.6s infinite linear alternate;
-moz-animation: loadingAnimation 0.6s infinite linear alternate;
animation: loadingAnimation 0.6s infinite linear alternate;
}
.animation-word .component-a {
-webkit-animation-delay: 0.1s;
-moz-animation-delay: 0.1s;
animation-delay: 0.1s;
}
.animation-word .component-g {
-webkit-animation-delay: 0.2s;
-moz-animation-delay: 0.2s;
animation-delay: 0.2s;
}
.animation-word .component-o {
-webkit-animation-delay: 0.3s;
-moz-animation-delay: 0.3s;
animation-delay: 0.3s;
}
.animation-word .component-u {
-webkit-animation-delay: 0.4s;
-moz-animation-delay: 0.4s;
animation-delay: 0.4s;
}
</style>
</head>
<body>
<div class="loading-info">
<div class="info-inner">
<div class="animation-word">
<span class="component-l">L</span>
<span class="component-a">a</span>
<span class="component-g">g</span>
<span class="component-o">o</span>
<span class="component-u">u</span>
<p class="gray">���ڼ�����...</p>
</div>
</div>
</div>
<script>
var securityPageName = "securityCheck";
!function () {
function e(c) {
var l, m, n, o, p, q, r, e = function () {
var a = location.hostname;
return "localhost" === a || /^(\d+\.){3}\d+$/.test(a) ? a : "." + a.split(".").slice(-2).join(".")
}(),
f = function (a, b) {
var f = document.createElement("script");
f.setAttribute("type", "text/javascript"), f.setAttribute("charset", "UTF-8"), f.onload = f.onreadystatechange = function () {
d && "loaded" != this.readyState && "complete" != this.readyState || b()
}, f.setAttribute("src", a), "IFRAME" != c.tagName ? c.appendChild(f) : c.contentDocument ? c.contentDocument.body ? c.contentDocument.body.appendChild(f) : c.contentDocument.documentElement.appendChild(f) : c.document && (c.document.body ? c.document.body.appendChild(f) : c.document.documentElement.appendChild(f))
},
g = function (a) {
var b = new RegExp("(^|&)" + a + "=([^&]*)(&|$)"),
c = window.location.search.substr(1).match(b);
return null != c ? unescape(c[2]) : null
},
h = {
get: function (a) {
var b, c = new RegExp("(^| )" + a + "=([^;]*)(;|$)");
return (b = document.cookie.match(c)) ? unescape(b[2]) : null
},
set: function (a, b, c, d, e) {
var g, f = a + "=" + encodeURIComponent(b);
c && (g = new Date(c).toGMTString(), f += ";expires=" + g), f = d ? f + ";domain=" + d : f, f = e ? f + ";path=" + e : f, document.cookie = f
}
},
i = function (a) {
if (a) {
window.location.replace(a)
}
},
j = function (a, c) {
c || a.indexOf("security-check.html") > -1 ? i(c) : i(a);
};
window.location.href, l = g("seed") || "", m = g("ts"), n = g("name"),
o = g("callbackUrl"),
p = g("srcReferer") || "", "null" !== n && l && n && o, l && m && n && (f("dist/" + n + ".js", function () {
var n, a = (new Date).getTime() + 1728e5,
d = "",
f = {},
g = window.gt || c.contentWindow.gt;
try {
d = (new g()).a();
} catch (k) { console.log(k) }
if (d) {
(h.set("__lg_stoken__", d, a, e, "/"), j(p, o))
}
}))
}
function j(a) {
if (!f && !g && document.addEventListener) return document.addEventListener("DOMContentLoaded", a, !1);
if (!(h.push(a) > 1))
if (f) ! function () {
try {
document.documentElement.doScroll("left"), i()
} catch (a) {
setTimeout(arguments.callee, 0)
}
}();
else if (g) var b = setInterval(function () {
/^(loaded|complete)$/.test(document.readyState) && (clearInterval(b), i())
}, 0)
}
var d, f, g, h, i, a = 0,
b = (new Date).getTime(),
c = window.navigator.userAgent;
c.indexOf("MSIE ") > -1 && (d = !0),
f = !(!window.attachEvent || window.opera),
g = /webkit\/(\d+)/i.test(navigator.userAgent) && RegExp.$1 < 525,
h = [],
i = function () {
for (var a = 0; a < h.length; a++) h[a]()
};
j(function () {
var b, a = window.navigator.userAgent.toLowerCase();
return "micromessenger" !== a.match(/micromessenger/i) || "wkwebview" == a.match(/wkwebview/i) ?
(e(document.getElementsByTagName("head").item(0)), void 0)
:
(b = document.createElement("iframe"), b.style.height = 0, b.style.width = 0, b.style.margin = 0, b.style.padding = 0, b.style.border = "0 none", b.name = "tokenframe", b.src = "about:blank", b.attachEvent ? b.attachEvent("onload", function () {
e(b)
}) : b.onload = function () {
e(b)
}, (document.body || document.documentElement).appendChild(b), void 0)
})
}();
</script>
<script type="text/javascript" crossorigin="anonymous" src="https://www.lagou.com/upload/oss.js?v=1010"></script></body>
</html>
[Finished in 0.9s]
Python爬虫实战数据可视化分析
4276 学习 · 29 问题
相似问题
回答 1
回答 3