为了账号安全,请及时绑定邮箱和手机立即绑定

从第一个搜索结果返回 URL

从第一个搜索结果返回 URL

侃侃尔雅 2023-09-04 16:04:49
我有一个包含大约 25,000 个公司关键字的 Excel 工作簿,我想从中获取公司网站 URL。我希望运行一个 VBA 脚本,该脚本可以将这些关键字作为 Google 搜索来运行,并将第一个结果的 URL 拉入电子表格中。我发现了一个类似的线程。这样做的结果是偶然的;某些关键字会在下一列中返回 URL,其他关键字则保留空白。它还似乎在第一个搜索结果中提取了 Google 优化子链接的 URL,而不是主网站 URL:Google 搜索结果示例然后我在这里找到了下面的代码,我在包含 1,000 个关键字的示例列表上运行了该代码。该博客的作者规定该代码适用于 Mozilla Firefox。我测试了他也编写的 IE 代码,但这并没有达到相同的结果(它添加了由搜索结果中的描述性文本组成的超链接,而不是原始 URL)。Firefox 代码一直运行到第 714行,然后返回错误消息“运行时错误 91:未设置对象变量或 with 块变量”显示成功结果和宏停止的行的电子表格布局Sub GoogleURL ()    Dim url As String, lastRow As Long    Dim XMLHTTP As Object    Dim html As Object    Dim objResultDiv As Object    Dim objH As Object    lastRow = Range(“A” & Rows.Count).End(xlUp).Row    For i = 2 To lastRow        url = “https://www.google.co.uk/search?q=” & Cells(i, 1) & “&rnd=” & WorksheetFunction.RandBetween(1, 10000)        Set XMLHTTP = CreateObject(“MSXML2.serverXMLHTTP”)        XMLHTTP.Open “GET”, url, False        XMLHTTP.setRequestHeader “Content-Type”, “text/xml”        XMLHTTP.setRequestHeader “User-Agent”, “Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0”        XMLHTTP.send        Set html = CreateObject(“htmlfile”)        html.body.innerHTML = XMLHTTP.ResponseText        Set objResultDiv = html.getelementbyid(“rso”)        Set objH = objResultDiv.getelementsbytagname(“h3”)(0)        Cells(i, 2).Value = objH.innerText        Set html = CreateObject(“htmlfile”)        html.body.innerHTML = XMLHTTP.ResponseText        Set objResultDiv = html.getelementbyid(“rso”)        Set objH = objResultDiv.getelementsbytagname(“cite”)(0)        Cells(i, 3).Value = objH.innerText        DoEvents    NextEnd Sub
查看完整描述

1 回答

?
九州编程

TA贡献1785条经验 获得超4个赞

由于Firefox是微软支持范围内的第三方浏览器,我可以帮你查看IE浏览器的VBA代码。

您的要求是将描述和链接存储在单独的列中。

我尝试根据您的要求修改该示例代码。

这是该示例的修改后的代码。

Option Explicit

Const TargetItemsQty = 1 ' results for each keyword


Sub GWebSearchIECtl()


    Dim objSheet As Worksheet

    Dim objIE As Object

    Dim x As Long

    Dim y As Long

    Dim strSearch As String

    Dim lngFound As Long

    Dim st As String

    Dim colGItems As Object

    Dim varGItem As Variant

    Dim strHLink As String

    Dim strDescr As String

    Dim strNextURL As String


    Set objSheet = Sheets("Sheet1")

    Set objIE = CreateObject("InternetExplorer.Application")

    objIE.Visible = True ' for debug or captcha request cases

    y = 1 ' start searching for the keyword in the first row

    With objSheet

        .Select

        .Range(.Columns("B:B"), .Columns("B:B").End(xlToRight)).Delete ' clear previous results

        .Range(.Columns("C:C"), .Columns("C:C").End(xlToRight)).Delete ' clear previous results

        .Range("A1").Select

        Do Until .Cells(y, 1) = ""

            x = 2 ' start writing results from column B

            .Cells(y, 1).Select

            strSearch = .Cells(y, 1) ' current keyword

            With objIE

                lngFound = 0

                .navigate "https://www.google.com/search?q=" & EncodeUriComponent(strSearch) ' go to first search results page

                Do

                    Do While .Busy Or Not .READYSTATE = 4: DoEvents: Loop ' wait IE

                    Do Until .document.READYSTATE = "complete": DoEvents: Loop ' wait document

                    Do While TypeName(.document.getelementbyid("res")) = "Null": DoEvents: Loop ' wait [#res] element

                    Set colGItems = .document.getelementbyid("res").getElementsByClassName("g") ' collection of search result [.g] items

                    For Each varGItem In colGItems ' process each item in collection

                        If varGItem.getelementsbytagname("a").Length > 0 And varGItem.getElementsByClassName("st").Length > 0 Then ' must have hyperlink and description

                            strHLink = varGItem.getelementsbytagname("a")(0).href ' get first hyperlink [a] found in current item

                            strDescr = GetInnerText(varGItem.getElementsByClassName("st")(0).innerHTML) ' get first description [span.st] found in current item

                            lngFound = lngFound + 1

                            'Debug.Print (strHLink)

                            'Debug.Print (strDescr)

                            With objSheet ' put result into cell

                                 .Cells(y, x).Value = strDescr

                                 .Hyperlinks.Add .Cells(y, x + 1), strHLink

                                .Cells(y, x).WrapText = True

                                x = x + 1 ' next column

                            End With

                            If lngFound = TargetItemsQty Then Exit Do ' continue with next keyword - necessary quantity of the results for current keyword found

                        End If

                        DoEvents

                    Next

                    If TypeName(.document.getelementbyid("pnnext")) = "Null" Then Exit Do ' continue with next keyword - no [a#pnnext.pn] next page button exists

                    strNextURL = .document.getelementbyid("pnnext").href ' get next page url

                    .navigate strNextURL ' go to next search results page

                Loop

            End With

            y = y + 1 ' next row

        Loop

    End With

    objIE.Quit


    ' google web search page contains the elements:

    ' [div#res] - main search results block

    ' [div.g] - each result item block within [div#res]

    ' [a] - hyperlink ancor(s) within each [div.g]

    ' [span.st] - description(s) within each [div.g]

    ' [a#pnnext.pn] - hyperlink ancor to the next search results page


End Sub


Function EncodeUriComponent(strText As String) As String

    Static objHtmlfile As Object


    If objHtmlfile Is Nothing Then

        Set objHtmlfile = CreateObject("htmlfile")

        objHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript"

    End If

    EncodeUriComponent = objHtmlfile.parentWindow.encode(strText)

End Function


Function GetInnerText(strText As String) As String

    Static objHtmlfile As Object


    If objHtmlfile Is Nothing Then

        Set objHtmlfile = CreateObject("htmlfile")

        objHtmlfile.Open

        objHtmlfile.Write "<body></body>"

    End If

    objHtmlfile.body.innerHTML = strText

    GetInnerText = objHtmlfile.body.innerText

End Function


查看完整回答
反对 回复 2023-09-04
  • 1 回答
  • 0 关注
  • 100 浏览

添加回答

举报

0/150
提交
取消
意见反馈 帮助中心 APP下载
官方微信