# -*- coding:UTF-8 -*- import sys from time import sleep import win32com.client from win32com.client import DispatchEx stdin, stdout, stderr = sys.stdin, sys.stdout, sys.stderr reload(sys) sys.setdefaultencoding("utf-8") sys.stdin, sys.stdout, sys.stderr = stdin, stdout, stderr class COM_IE: def __init__(self,url=None): self.url = url self.Visible = 1 self.ie = self.openIE(url) self.document = "" self.text = "" self.charset = None def ExistIE(self,url): ShellWindowsCLSID = '{9BA05972-F6A8-11CF-A442-00A0C90A8F39}' ies=DispatchEx(ShellWindowsCLSID) if len(ies)==0: return None for ie in ies: if ie.LocationURL==url: return ie return None def NewIE(self,url): ie = DispatchEx("InternetExplorer.Application") ie.Visible = self.Visible ie.Navigate(url) return ie def openIE(self,url): ie = self.ExistIE(url) if ie==None: ie = self.NewIE(url) return ie def WaitIE(self): # while self.ie.Busy: # leep(1) while 1: state = self.ie.ReadyState if state ==4: # print "load done..." self.charset = self.ie.Document.charset self.document = self.ie.Document.body.innerHTML self.text = self.ie.Document.body.innerText break sleep(1) def Visible(self): self.ie.Visible = self.Visible def GetBody(self): self.WaitIE() return self.ie.Document.body def GetNodes(self,parentNode,tag): """ >>> coldiv=GetNodes(body,"div") """ childNodes=[] for childNode in parentNode.getElementsByTagName(tag): childNodes.append(childNode) return childNodes def NodeByAttr(self,Nodes,nodeattr,nodeval): """ >>> div_id_editor=NodeByAttr(coldiv,"id","editor_ifr") """ for node in Nodes: if str(node.getAttribute(nodeattr))==nodeval: return node return None def SetNodeHtml(self,body,node_type,node_attr,node_attr_val,node_inner_html): tags = self.GetNodes(body,node_type) node = self.NodeByAttr(tags,node_attr,node_attr_val) node.innerHTML = node_inner_html def SetNodeVal(self,body,node_type,node_attr,node_attr_val,node_value): tags = self.GetNodes(body,node_type) node = self.NodeByAttr(tags,node_attr,node_attr_val) node.value = node_value def NodeClick(self,body,node_type,node_attr,node_attr_val): tags = self.GetNodes(body,node_type) node = self.NodeByAttr(tags,node_attr,node_attr_val) node.click() def GetNodeHtml(self,body,node_type,node_attr,node_attr_val): tags = self.GetNodes(body,node_type) node = self.NodeByAttr(tags,node_attr,node_attr_val) html = node.innerHTML return html def GetNodeVal(self,body,node_type,node_attr,node_attr_val): tags = self.GetNodes(body,node_type) node = self.NodeByAttr(tags,node_attr,node_attr_val) value = node.value return value #mutiple nodes def NodesByAttr(self,Nodes,nodeattr=None,nodeval=None): """ >>> div_id_editor=NodeByAttr(coldiv,"id","editor_ifr") """ value_list = [] for node in Nodes: # print node.nodeType,node.nodeName #,node.getAttribute("id"),node.innerText value_dict = {} if not nodeattr: nodeattr_list = ["id","nodeName","nodeType","nodeValue","className", "innerHTML","innerText","href","name","title","type","value"] for attr in nodeattr_list: value_dict[attr] = node.getAttribute(attr) value_list.append(value_dict) else: if not nodeval: value_dict[nodeattr] = node.getAttribute(nodeattr) value_list.append(value_dict) else: if str(node.getAttribute(nodeattr))==nodeval: value_dict[nodeattr] = node.getAttribute(nodeattr) value_list.append(value_dict) return value_list #mutiple nodes def GetNodesVal(self,body,node_type,node_attr=None,node_val=None): # print '*'*50 tags = self.GetNodes(body,node_type) value_list = self.NodesByAttr(tags,node_attr,node_val) return value_list def Quit(self): self.ie.Quit() if __name__=="__main__": url = "http://blog.csdn.net/agoago_2009/" IE = COM_IE(url) BODY = IE.GetBody() # a_list = IE.GetNodesVal(BODY,"a","href") a_list = IE.GetNodesVal(BODY,"a") for a in a_list: print a.get("innerText"),a.get("href") ''' IE.SetNodeVal(BODY,"input","id","inputSearch","COM") IE.NodeClick(BODY,"input","id","btnSubmit") IE.WaitIE() print IE.document.strip()[:100] print IE.charset print IE.text.strip()[:100] ''' raw_input('quit') IE.Quit()
COM-IE-(2)
转载本文章为转载内容,我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题,欢迎原作者联系我们进行内容更正或删除文章。
上一篇:站点技术---301重定向
下一篇:自己定义定时器(Timer)
提问和评论都可以,用心的回复会被更多人看到
评论
发布评论
相关文章
-
COM笔记 (2)COM中事件驱动技术探讨 zz
COM中事件驱动技术探讨邹飞版本v1.02004年7月目
连接点 事件驱动 #include ico c++ -
【com编程】IE浏览器(右键扩展功能总结版)
序言:大家是不是莫名其妙,我怎么什么都搀和上两脚。搞起这个高深的COM编程来了。呵呵……这是帮同学做的一个小东西,由于以前拿这个比赛过,今天由于业务需要又用上了.
右键 javascript 绝对路径 情感分析 microsoft