提取电子邮件 HTML 元素

我首先使用这些行


Dim post As Object


Set post = html.querySelectorAll(".mod-Treffer")


For i = 0 To post.Length - 1

Debug.Print post.Item(i).getElementsByTagName("h2")(0).innerText

Debug.Print post.Item(i).getElementsByTagName("Address")(0).getElementsByTagName("p")(1).innerText


'I am stuck with extracting the email

'HERE

Next i

此外,有时帖子对象没有电子邮件信息,因此我仅在找到时才需要提取。


这就是到目前为止的代码


    Const sURL As String = "https://www.gelbeseiten.de/Suche/Ambulante%20Pflegedienste/Bundesweit"

Dim http As MSXML2.XMLHTTP60, html As HTMLDocument


Set http = New MSXML2.XMLHTTP60

Set html = New MSHTML.HTMLDocument


With http

    .Open "Get", sURL, False

    .send

    html.body.innerHTML = .responseText

End With


Dim post As Object


Set post = html.querySelectorAll(".mod-Treffer")


Dim i As Long, r As Long

Range("A1").Resize(1, 3).Value = Array("Title", "Phone", "Email")

r = 2


For i = 0 To post.Length - 1

Cells(r, 1).Value = post.Item(i).getElementsByTagName("h2")(0).innerText

Cells(r, 2).Value = post.Item(i).getElementsByTagName("Address")(0).getElementsByTagName("p")(1).innerText



Next i

这是电子邮件部分的快照

https://img1.sycdn.imooc.com/65376df10001c83d11630417.jpg

慕容3067478
浏览 187回答 3
3回答

隔江千里

原问题:在这种情况下,我将使用带有 contains 运算符的 attribute = value 选择器来href通过 string 定位属性mailto。添加 CSS 选择器:[href*=mailto]如果使用,querySelectorAll("[href*=mailto]")您可以测试该.Length属性是否大于 0 或使用querySelector并测试if Not querySelector("[href*=mailto]") Is Nothing。如果你设置一个变量Dim ele As ObjectSet ele = html.document.querySelector("[href*=mailto]")If Not ele Is Nothing Then    Debug.Print ele.href  'do something with the href to parse out emailEnd If更新的问题:对于更新的问题,我会将 nodeList 中的当前节点转移outerHTML到代理HTMLDocument变量中,以便我可以querySelector再次利用方法。我会按类别定位电子邮件。Option ExplicitPublic Sub GetListingInfo()    Const URL As String = "https://www.gelbeseiten.de/Suche/Ambulante%20Pflegedienste/Bundesweit"    Dim http As MSXML2.XMLHTTP60, html As MSHTML.HTMLDocument    Set http = New MSXML2.XMLHTTP60    Set html = New MSHTML.HTMLDocument    With http        .Open "Get", URL, False        .send        html.body.innerHTML = .responseText    End With    Dim post As Object, html2 As MSHTML.HTMLDocument    Set post = html.querySelectorAll(".mod-Treffer")    Set html2 = New MSHTML.HTMLDocument    Dim i As Long, emailNode As Object    With ActiveSheet        .Range("A1").Resize(1, 3).Value = Array("Title", "Phone", "Email")        For i = 0 To post.Length - 1            html2.body.innerHTML = post.Item(i).outerHTML            .Cells(i + 2, 1).Value = html2.querySelector("h2").innerText            .Cells(i + 2, 2).Value = html2.querySelector(".mod-AdresseKompakt__phoneNumber").innerText            Set emailNode = html2.querySelector(".contains-icon-email")            If Not emailNode Is Nothing Then .Cells(i + 2, 3).Value = Replace$(emailNode.href, "mailto:", vbNullString)        Next i    End WithEnd Sub

德玛西亚99

<article class="mod mod-Treffer" data-teilnehmerid="122085958708">&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <div data-wipe="{&quot;listener&quot;: &quot;click&quot;, &quot;name&quot;: &quot;Trefferliste Eintrag zur Detailseite&quot;, &quot;id&quot;: &quot;122085958708&quot;, &quot;synchron&quot;: true}" data-realid="2aeca1d2-2bc5-4070-ac4d-e16b10badca5" data-tnid="122085958708" target="_self">&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; <div class="mod-hervorhebung">&nbsp; &nbsp; <p class="mod-hervorhebung--partnerHervorhebung" data-hervorhebungsstufe="3">Silber Partner</p>&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;</div>&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <picture class="trefferlisten_logo">&nbsp; &nbsp; &nbsp; &nbsp; <source media="(min-width: 768px)" srcset="https://ies.v4all.de/0122/GS/0122/5/8335/49428335_310x190.png">&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; <img alt="" data-lazy-src="https://ies.v4all.de/0122/GS/0122/5/8335/49428335_310x190.png" src="https://ies.v4all.de/0122/GS/0122/5/8335/49428335_310x190.png">&nbsp; &nbsp; </picture>&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; <h2 data-wipe-name="Titel">A &amp; S Billing Pflege-Service GmbH</h2>&nbsp; &nbsp; &nbsp; &nbsp; <p class="d-inline-block mod-Treffer--besteBranche">Ambulante Pflegedienste</p>&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <div class="mod mod-Stars mod-Stars--" title="2.9/5" data-float="2,9">&nbsp; &nbsp; &nbsp; &nbsp; <span class="mod-Stars__text" style="width: 58.000001907348632812500%;">2.9</span>&nbsp; &nbsp; </div>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <span>2.9</span>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <span>(8)</span>&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; <address class="mod mod-AdresseKompakt">&nbsp; &nbsp; &nbsp; &nbsp; <p data-wipe-name="Adresse">&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; Kirchenberg&nbsp;2‑4,&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <span class="nobr">&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 90482&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; Nürnberg&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; </span>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; (Mögeldorf)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; </p>&nbsp; &nbsp; &nbsp; &nbsp; <p class="mod-AdresseKompakt__phoneNumber" data-hochgestellt-position="end" data-wipe-name="Kontaktdaten">(0911) 60 00 99 77</p>&nbsp; &nbsp; </address>&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; </div>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <div class="aktionsleiste_kompakt">&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; <div class="mod-gsSlider mod-gsSlider--noneOnWhite">&nbsp; &nbsp; &nbsp; &nbsp; <span class="mod-gsSlider__arrow mod-gsSlider__arrow--arrow" data-direction="left" data-show="false" data-wipe="{&quot;listener&quot;:&quot;click&quot;,&quot;name&quot;:&quot;Trefferliste: Aktionleiste-button-links&quot;}"></span>&nbsp; &nbsp; &nbsp; &nbsp; <span class="mod-gsSlider__arrow mod-gsSlider__arrow--arrow" data-direction="right" data-show="false" data-wipe="{&quot;listener&quot;:&quot;click&quot;,&quot;name&quot;:&quot;Trefferliste: Aktionleiste-button-rechts&quot;}"></span>&nbsp; &nbsp; &nbsp; &nbsp; <div class="mod-gsSlider__slider" data-initialized="true">&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <a class="contains-icon-homepage gs-btn" target="_blank" rel=" noopener" href="http://www.as-billing.de" data-wipe="{&quot;listener&quot;:&quot;click&quot;, &quot;name&quot;:&quot;Trefferliste Webseite-Button&quot;, &quot;id&quot;:&quot;122085958708&quot;}" data-isneededpromise="false">Webseite</a>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <a class="contains-icon-email gs-btn" href="mailto:info@as-billing.de?subject=Anfrage%20%C3%BCber%20Gelbe%20Seiten" data-wipe="{&quot;listener&quot;:&quot;click&quot;, &quot;name&quot;:&quot;Trefferliste Email-Button&quot;, &quot;id&quot;:&quot;122085958708&quot;}" data-isneededpromise="false">E-Mail</a>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <span class="contains-icon-route_finden gs-btn" data-wipe="{&quot;listener&quot;:&quot;click&quot;, &quot;name&quot;:&quot;Trefferliste Navigation-Button&quot;, &quot;id&quot;:&quot;122085958708&quot;}" data-parameters="{&quot;partner&quot;: &quot;googlemaps&quot;, &quot;searchquery&quot;: &quot;A%20%26%20S%20Billing%20Pflege-Service%20GmbH%20Kirchenberg%202-4%2090482%20N%C3%BCrnberg&quot;}" data-target="_blank">Route</span>&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <span class="contains-icon-details gs-btn" data-wipe="{&quot;listener&quot;:&quot;click&quot;, &quot;name&quot;:&quot;Trefferliste Actionbutton Mehr Details&quot;, &quot;id&quot;:&quot;122085958708&quot;}" data-parameters="{&quot;partner&quot;: &quot;gs&quot;, &quot;realId&quot;: &quot;2aeca1d2-2bc5-4070-ac4d-e16b10badca5&quot;, &quot;tnId&quot;: &quot;122085958708&quot;}">Mehr Details</span>&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; </div>&nbsp; &nbsp; </div>&nbsp; &nbsp; </div>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; </article>

莫回无

我可以用这些线来弄清楚&nbsp; &nbsp; If InStr(post.Item(i).getElementsByTagName("a")(1).href, "mailto:") Then&nbsp; &nbsp; Debug.Print Split(Split(post.Item(i).getElementsByTagName("a")(1).href, "mailto:")(1), "?")(0)End If但我欢迎任何其他改进和了解更多的建议。* 经过测试,如果在元素中找不到电子邮件,我会遇到错误。如何避免错误呢?我可以用On Error Resume Next。但我希望处理该错误而不是跳过它。** 编辑:我可以使用这个结构解决第二点&nbsp; &nbsp; Dim emailObj As ObjectSet emailObj = post.Item(i).getElementsByTagName("a")(1)If Not emailObj Is Nothing Then&nbsp;If InStr(post.Item(i).getElementsByTagName("a")(1).href, "mailto:") Then&nbsp; &nbsp; Debug.Print Split(Split(post.Item(i).getElementsByTagName("a")(1).href, "mailto:")(1), "?")(0)End If该代码可以工作,但有时电子邮件无法正确抓取..这是因为这一行Set emailObj = post.Item(i).getElementsByTagName("a")(1)有时该对象未分配给 1。所以我的最后一个问题:无论分配的数字如何,如何获取电子邮件数据?在循环中,我尝试了这条线并没有用Set aNodeList = post.Item(i).querySelectorAll(".contains-icon-email")(0)
打开App,查看更多内容
随时随地看视频慕课网APP

相关分类

Html5