|
楼主 |
发表于 2017-1-20 14:47
|
显示全部楼层
经过kaohsing老师的指点,修改了代码,进度明显加快了。经测试,抓50页2000条仅用77秒
- Function strText(ByVal url As String) As String
- On Error GoTo UNDEFIND '若异常则返回“访问异常”
- DoEvents
- With CreateObject("MSXML2.XMLHTTP")
- .Open "GET", url, False
- .Send
- strText = Right(ByteToStr(.responsebody, "gbk"), Len(ByteToStr(.responsebody, "gbk")) - InStr(ByteToStr(.responsebody, "gbk"), "</a>]</em> <a href=""") - Len("</a>]</em> <a href=""") + 1)
- 'Debug.Print ByteToStr(.responsebody, "gbk")
- GoTo FUNRETURN '若正正常则返回数据
- End With
- UNDEFIND: strText = "访问异常"
- FUNRETURN:
- End Function
- Public Sub URL_REPLY()
- t = Timer
- Dim r As Long
- Dim sh As Worksheet
- Set sh = ActiveSheet
- Dim eachurl
- Dim maxrow
- Dim arr
- response = strText("http://club.excelhome.net/forum-2-1.html")
- If response = "访问异常" Then
- MsgBox "访问异常"
- Exit Sub
- End If
- On Error Resume Next
- ReDim arr(1 To 4, 1 To 1)
- For pg = 1 To 50
- 'For pg = 1 To 2500
- response = strText("http://club.excelhome.net/forum-2-" & pg & ".html")
- 'Debug.Print response
- If response = "访问异常" Then
- MsgBox "访问异常"
- Exit For
- End If
- eachurl = Split(response, "</a>]</em> <a href=""")
- j = 0
- For i = 1 + UBound(arr, 2) To UBound(eachurl) + UBound(arr, 2)
- ReDim Preserve arr(1 To 4, 1 To i)
- arr(1, i) = "http://club.excelhome.net/" & Split(eachurl(j), """")(0)
- arr(2, i) = Split(Split(eachurl(j), "class=""s xst"">")(1), "</a>")(0)
- arr(3, i) = Split(Split(eachurl(j), "</a><em>")(1), "</em>")(0)
- arr(4, i) = Split(Split(eachurl(j), "html"" class=""xi2"">")(1), "</a><em>")(0)
- j = j + 1
- Next
- Application.StatusBar = "正在抓取第" & pg & "页内容"
- Next
- arr(1, 1) = "URL"
- arr(2, 1) = "标题"
- arr(3, 1) = "查看"
- arr(4, 1) = "回复"
- Range("A1").Resize(UBound(arr, 2), 4) = Application.Transpose(arr)
- MsgBox "完成,用时" & Timer - t & "秒"
- End Sub
- Function ByteToStr(arrByte, strCharset As String) As String
- With CreateObject("Adodb.Stream")
- .Type = 1 'adTypeBinary
- .Open
- .Write arrByte
- .Position = 0
- .Type = 2 'adTypeText
- .Charset = strCharset
- ByteToStr = .Readtext
- .Close
- End With
- End Function
复制代码 |
|