def _get_new_data(self, page_url, soup): res_data = {} # Url res_data['url'] = page_url print 'now page_url add in res_data !' ''' <div class="antialiased sans-serif text-lg _2c text-center bold truncate">All Pins</div> (Pinterest) <img id="i-f88v" src="https://s-media-cache-ak0.pinimg.com/236x/0a/ad/cc/0aadcce6c2daba0e0869e6fc6ee9649d.jpg" class="pinImg fullBleed loaded" alt="short curly bob hairstyle"> (Pinterest) #mg_node = soup.find('img', id=re.compile(r"^i-.+")).find(" ", src=re.compile(r"https://s-media-cache-ak0.pinimg.com/\d.+?")) ''' img_node = soup.find(re.compile(r"https://s-media-cache-ak0.pinimg.com/\d.+?")) # Add the img in res_data res_data["img"] = img_node print 'now img_node add in res_data !' ''' <p class="pinDescription">great piece for the living room or bedroom. I love the...</p> <div class="pinMetaWrapper"> ''' summary_node = soup.find('div', class_="pinMetaWrapper") # Add the summary in res_data res_data["summary"] = summary_node.get_text() print 'now summary_node add in res_data !' return res_data