class UrlManager(object):
def __init____(self):
self.new_urls = set()
self.old_urls = set()
def add_new_url(self, url):
if url is None:
return
if url not in self.new_urls and url not in self.old_urls:
self.new_urls.add(url)
def add_new_urls(self, urls):
if urls is None or len(urls) == 0:
return
for url in urls:
self.add_new_url(url)
def has_new_url(self):
return len(self.new_urls) != 0
def get_new_url(self):
new_url = self.new_urls.pop()
self.old_urls.add(new_url)
return new_url
Traceback (most recent call last):
File "E:\java\imooc\baike_spider\spider_main.py", line 38, in <module>
obj_spider.craw(root_url)
File "E:\java\imooc\baike_spider\spider_main.py", line 13, in craw
self.urls.add_new_url(root_url)
File "E:\java\imooc\baike_spider\url_manager.py", line 9, in add_new_url
if url not in self.new_urls and url not in self.old_urls:
AttributeError: 'UrlManager' object has no attribute 'new_urls'
清波
相关分类