猿问

从第一个搜索结果返回 URL

我有一个包含大约 25,000 个公司关键字的 Excel 工作簿,我想从中获取公司网站 URL。

我希望运行一个 VBA 脚本,该脚本可以将这些关键字作为 Google 搜索来运行,并将第一个结果的 URL 拉入电子表格中。

我发现了一个类似的线程
这样做的结果是偶然的;某些关键字会在下一列中返回 URL,其他关键字则保留空白。
它还似乎在第一个搜索结果中提取了 Google 优化子链接的 URL,而不是主网站 URL:Google 搜索结果示例

然后我在这里找到了下面的代码,我在包含 1,000 个关键字的示例列表上运行了该代码。该博客的作者规定该代码适用于 Mozilla Firefox。

我测试了他也编写的 IE 代码,但这并没有达到相同的结果(它添加了由搜索结果中的描述性文本组成的超链接,而不是原始 URL)。

Firefox 代码一直运行到第 714,然后返回错误消息

运行时错误 91:未设置对象变量或 with 块变量

显示成功结果和宏停止的行的电子表格布局

Sub GoogleURL ()


    Dim url As String, lastRow As Long

    Dim XMLHTTP As Object

    Dim html As Object

    Dim objResultDiv As Object

    Dim objH As Object


    lastRow = Range(“A” & Rows.Count).End(xlUp).Row


    For i = 2 To lastRow


        url = “https://www.google.co.uk/search?q=” & Cells(i, 1) & “&rnd=” & WorksheetFunction.RandBetween(1, 10000)


        Set XMLHTTP = CreateObject(“MSXML2.serverXMLHTTP”)


        XMLHTTP.Open “GET”, url, False


        XMLHTTP.setRequestHeader “Content-Type”, “text/xml”


        XMLHTTP.setRequestHeader “User-Agent”, “Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0”


        XMLHTTP.send


        Set html = CreateObject(“htmlfile”)


        html.body.innerHTML = XMLHTTP.ResponseText


        Set objResultDiv = html.getelementbyid(“rso”)


        Set objH = objResultDiv.getelementsbytagname(“h3”)(0)


        Cells(i, 2).Value = objH.innerText


        Set html = CreateObject(“htmlfile”)


        html.body.innerHTML = XMLHTTP.ResponseText


        Set objResultDiv = html.getelementbyid(“rso”)


        Set objH = objResultDiv.getelementsbytagname(“cite”)(0)


        Cells(i, 3).Value = objH.innerText


        DoEvents


    Next


End Sub


侃侃尔雅
浏览 115回答 1
1回答

九州编程

由于Firefox是微软支持范围内的第三方浏览器,我可以帮你查看IE浏览器的VBA代码。您的要求是将描述和链接存储在单独的列中。我尝试根据您的要求修改该示例代码。这是该示例的修改后的代码。Option ExplicitConst TargetItemsQty = 1 ' results for each keywordSub GWebSearchIECtl()    Dim objSheet As Worksheet    Dim objIE As Object    Dim x As Long    Dim y As Long    Dim strSearch As String    Dim lngFound As Long    Dim st As String    Dim colGItems As Object    Dim varGItem As Variant    Dim strHLink As String    Dim strDescr As String    Dim strNextURL As String    Set objSheet = Sheets("Sheet1")    Set objIE = CreateObject("InternetExplorer.Application")    objIE.Visible = True ' for debug or captcha request cases    y = 1 ' start searching for the keyword in the first row    With objSheet        .Select        .Range(.Columns("B:B"), .Columns("B:B").End(xlToRight)).Delete ' clear previous results        .Range(.Columns("C:C"), .Columns("C:C").End(xlToRight)).Delete ' clear previous results        .Range("A1").Select        Do Until .Cells(y, 1) = ""            x = 2 ' start writing results from column B            .Cells(y, 1).Select            strSearch = .Cells(y, 1) ' current keyword            With objIE                lngFound = 0                .navigate "https://www.google.com/search?q=" & EncodeUriComponent(strSearch) ' go to first search results page                Do                    Do While .Busy Or Not .READYSTATE = 4: DoEvents: Loop ' wait IE                    Do Until .document.READYSTATE = "complete": DoEvents: Loop ' wait document                    Do While TypeName(.document.getelementbyid("res")) = "Null": DoEvents: Loop ' wait [#res] element                    Set colGItems = .document.getelementbyid("res").getElementsByClassName("g") ' collection of search result [.g] items                    For Each varGItem In colGItems ' process each item in collection                        If varGItem.getelementsbytagname("a").Length > 0 And varGItem.getElementsByClassName("st").Length > 0 Then ' must have hyperlink and description                            strHLink = varGItem.getelementsbytagname("a")(0).href ' get first hyperlink [a] found in current item                            strDescr = GetInnerText(varGItem.getElementsByClassName("st")(0).innerHTML) ' get first description [span.st] found in current item                            lngFound = lngFound + 1                            'Debug.Print (strHLink)                            'Debug.Print (strDescr)                            With objSheet ' put result into cell                                 .Cells(y, x).Value = strDescr                                 .Hyperlinks.Add .Cells(y, x + 1), strHLink                                .Cells(y, x).WrapText = True                                x = x + 1 ' next column                            End With                            If lngFound = TargetItemsQty Then Exit Do ' continue with next keyword - necessary quantity of the results for current keyword found                        End If                        DoEvents                    Next                    If TypeName(.document.getelementbyid("pnnext")) = "Null" Then Exit Do ' continue with next keyword - no [a#pnnext.pn] next page button exists                    strNextURL = .document.getelementbyid("pnnext").href ' get next page url                    .navigate strNextURL ' go to next search results page                Loop            End With            y = y + 1 ' next row        Loop    End With    objIE.Quit    ' google web search page contains the elements:    ' [div#res] - main search results block    ' [div.g] - each result item block within [div#res]    ' [a] - hyperlink ancor(s) within each [div.g]    ' [span.st] - description(s) within each [div.g]    ' [a#pnnext.pn] - hyperlink ancor to the next search results pageEnd SubFunction EncodeUriComponent(strText As String) As String    Static objHtmlfile As Object    If objHtmlfile Is Nothing Then        Set objHtmlfile = CreateObject("htmlfile")        objHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript"    End If    EncodeUriComponent = objHtmlfile.parentWindow.encode(strText)End FunctionFunction GetInnerText(strText As String) As String    Static objHtmlfile As Object    If objHtmlfile Is Nothing Then        Set objHtmlfile = CreateObject("htmlfile")        objHtmlfile.Open        objHtmlfile.Write "<body></body>"    End If    objHtmlfile.body.innerHTML = strText    GetInnerText = objHtmlfile.body.innerTextEnd Function
随时随地看视频慕课网APP

相关分类

Html5
我要回答