-
利用VBA爬取豆瓣Top250到Excel文件,并给出源代码
-
-
Jackll2302说:
Jackll2302 说:
利用VBA爬取豆瓣Top250到Excel文件,并给出源代码
Sub Get_Douban_Top250() Dim http As Object Dim html As Object Dim i As Long Dim tr As Object Dim td As Object Dim a As Object Dim txt As String Dim ws As Worksheet Set ws = Sheets("Sheet1") Set http = CreateObject("MSXML2.XMLHTTP") http.Open "GET", "https://movie.douban.com/top250", False http.send Set html = CreateObject("htmlfile") html.body.innerHTML = http.responsetext For Each tr In html.getelementsbytagname("tr") i = i + 1 If i > 1 Then For Each td In tr.getelementsbytagname("td") For Each a In td.getelementsbytagname("a") If a.classname = "title" Then txt = txt & "| " & a.innertext & " |" ws.Range("A" & i).Value = a.innertext End If If a.classname = "rating_num" Then txt = txt & " 评分:" & a.innertext ws.Range("B" & i).Value = a.innertext End If Next Next ws.Range("C" & i).Value = txt txt = "" End If Next MsgBox "豆瓣Top250已经成功爬取到Excel文件中!" End Sub
-
Jackll2302说:
利用VBA爬取豆瓣Top250到Excel文件中,注意使用模拟浏览器语句,并给出源代码
Sub DoubanTop250()
Dim http As Object, html As Object, i As Integer
Set http = CreateObject("Microsoft.XMLHTTP")
i = 1
Do Until i > 250
http.Open "GET", "https://movie.douban.com/top250?start=" & 25 * (i - 1) & "&filter=", False
http.send
Set html = CreateObject("htmlfile")
html.body.innerHTML = http.responseText
Dim tb As Object
Set tb = html.getelementsbytagname("table")(0)
Dim tr As Object
Set tr = tb.getelementsbytagname("tr")
Dim td As Object
Set td = tr(0).getelementsbytagname("td")
Cells(i, 1).Value = td(1).getelementsbytagname("a")(0).innerText
Cells(i, 2).Value = td(2).getelementsbytagname("div")(0).innerText
i = i + 1
Loop
End Sub