#coding:utf8
from bs4 import BeautifulSoup as bs
import urllib2
import re
import pymysql
url="http://baike.baidu.com/view/20965.htm"
resp=urllib2.urlopen(url).read().decode("utf-8")
soup=bs(resp,"html.parser")
links=soup.find_all("a",href=re.compile(r"^http://baike.baidu"))
for link in links:
print link.get_text()+": "+link["href"]
print "----------------------------"
try:
conn=pymysql.connect(
host="127.0.0.1",
port=3306,
user="root",
passwd="root",
db="test",
charset="utf8")
cursor=conn.cursor()
sql="insert into 'url'('urlname','urlhref') values(%s,%s)"
cursor.execute(sql,(link.get_text(),link["href"]))
conn.commit()
except Exception as e:
print e
cursor.close()
conn.close()
What is free software? : http://baike.baidu.com/redirect/5e79r1K4R-2NomzcGBBrbrhZlzPTF6qJ-9jFYpyAZVCkUn8WxubcnZcF8UNczadKXEZLleFlvgmO2VuBgV5GFsx1We0EldQ-Wn8
----------------------------
(1064, u"You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ''url'('urlname','urlhref') values('What is free software?\xa0','http://baike.baidu' at line 1")
GNU General Public License : http://baike.baidu.com/redirect/b225K4w1tT1KqEZN29N--HgZRFhWwPsGeFgE0SekI62-tSjC3z-upHovI9zUdmYcMi8dBxcoW49LoCGdjljImC6P
----------------------------
(1064, u"You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ''url'('urlname','urlhref') values('GNU General Public License\xa0','http://baike.b' at line 1")
Explaining BSD : http://baike.baidu.com/redirect/ef97prp0tq724owayibkn13MQAjSUk8mg15fMOwOKFTg_1bDygSpMw1P9SUU3gsb9LLnIE685k5DFdqA2-JxNykOfop8UCRiWHTLu3L8I2itYKHxbHmgVerJILjiY3U
----------------------------
(1064, u"You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ''url'('urlname','urlhref') values('Explaining BSD\xa0','http://baike.baidu.com/red' at line 1")
[Finished in 1.3s]
charset="utf8") ===>'utf8mb4' 试试