叮当猫咪
我最终做的是创建一个类似lxml的中介类。这只实现了我需要的那些功能,但是,我仍然认为我会分享作为其他尝试做同样事情的人的起点。import lxml.etreeimport collections.abcclass AttribWrapper(collections.abc.MutableMapping): """ Make something that acts like a dict to use as a mock lxml.etree.element attrib value see also: https://docs.python.org/3/reference/datamodel.html?emulating-container-types#emulating-container-types """ def __init__(self,seleniumTag): self.seleniumTag=seleniumTag while not hasattr(seleniumTag,'execute_script'): seleniumTag=seleniumTag.parent self.webdriver=seleniumTag def hasAttribute(self,attrName): """ determine if a tag has an attribute """ try: val=self.seleniumTag.get_attribute(attrName) except Exception: return False return val is not None def getAttribute(self,attrName,default=None): """ get a tag's attribute or default if not present """ try: return self.seleniumTag.get_attribute(attrName) except Exception: pass return default def setAttribute(self,attrName,attrValue): """ get a tag's attribute """ self.webdriver.execute_script("arguments[0].setAttribute(arguments[1],arguments[2]);",self.seleniumTag,attrName,attrValue); def __getitem__(self,key): if key not in self.keys(): raise KeyError() return self.getAttribute(key) def __setitem__(self,key,value): self.setAttribute(key,value) def __delitem__(self,key): self.webdriver.execute_script('arguments[0].attributes.removeNamedItem("%s");'%key,self.seleniumTag) def __iter__(self): items=[] for k in self.keys(): v=self[k] items.append((k,v)) return self.keys().__iter__() def __len__(self): return self.webdriver.execute_script('return arguments[0].attributes.length',self.seleniumTag) def keys(self): n=self.webdriver.execute_script('return arguments[0].attributes.length',self.seleniumTag) names=[] for idx in range(n): result=self.webdriver.execute_script('return arguments[0].attributes.item(%d).nodeName'%idx,self.seleniumTag) names.append(result) return names def __repr__(self): ret=[] for k,v in self.items(): ret.append('("%s":"%s")'%(k,v)) return "{%s}"%(', '.join(ret))class LikeEtreeElement: def __init__(self,seleniumTag): self.seleniumTag=seleniumTag self.attrib=AttribWrapper(seleniumTag) while not hasattr(seleniumTag,'execute_script'): seleniumTag=seleniumTag.parent self.webdriver=seleniumTag @property def tag(self): return self.seleniumTag.tag_name def getchildren(self): """ get all child elements (NOTE: this will dip into the html every single time just in case things have changed.) """ return self.seleniumTag.find_elements_by_xpath('/*') @property def innerHTML(self): return self.webdriver.execute_script('return arguments[0].innerHTML',self.seleniumTag) @innerHTML.setter def innerHTML(self,value): value=str(value).replace('\\\\','\\').replace('\n','\\n').replace('"','\\"') return self.webdriver.execute_script('arguments[0].innerHTML="%s"'%(value),self.seleniumTag) def __getitem__(self,idx): return self.getchildren()[idx] def __iter__(self): return self.getchildren().__iter__() def __len__(self): return len(self.getchildren())