|
楼主 |
发表于 2017-1-20 18:09
|
显示全部楼层
我也是刚学网抓,看源码看得我眼花缭乱的,有几条抓不到,马上下班的了,先提交上来,有时间再写吧
- Function strResponse(ByVal url As String) As String
- On Error GoTo UNDEFIND '若异常则返回“访问异常”
- url = "http://" & Replace(url, "http://", "")
- Dim strText As String
- DoEvents
- With CreateObject("MSXML2.XMLHTTP")
- .Open "GET", url, False
- .Send
- strResponse = ByteToStr(.responsebody, "gb2312")
- GoTo FUNRETURN '若正正常则返回数据
- 'Debug.Print strResponse
- End With
- UNDEFIND: strResponse = "访问异常"
- FUNRETURN:
- End Function
- Function strTA(ByVal str As String, strStart As String, strStop As String)
- strTA = Split(Split(str, strStart)(1), strStop)(0)
- End Function
- Function strTaformURL(ByVal url As String, strStart As String, Optional strStop As String = "</html>")
- strTaformURL = strTA(strResponse(url), strStart, strStop)
- End Function
- Public Sub URL_REPLY()
- Dim r As Long
- Dim sh As Worksheet
- Dim eachTitle 'Title数组
- Set sh = ActiveSheet
- pgcount = strTaformURL("http://club.excelhome.net/thread-1322442-1-1.html", "<span title=""共 ", " 页"">")
- 'Debug.Print pgcount
- For pg = 1 To pgcount
- response = strResponse("http://club.excelhome.net/thread-1322442-1-1.html")
- eachFloor = Split(response, "</em>楼</a>")
- maxrow = sh.UsedRange.Rows.Count
- For i = 1 + maxrow To UBound(eachFloor) + maxrow '跳过下标为0的第一部分
- If InStr(eachFloor(i - maxrow), "<div class=""quote""><blockquote><font size=""2"">") = 0 Then '不是回复贴
- strFloor = Split(Split(Split(Split(eachFloor(i - maxrow), "<div class=""t_fsz"">")(1), "<table cellspacing=""0"" cellpadding=""0""><tr><td")(1), "</td></tr></table>")(0), """>")(1)
- sh.Range("A" & i).Value = strFloor
- Else '是回复贴
- rawFloor = strTA(eachFloor(i - maxrow), "</font></a></font><br />", "</blockquote></div><br />")
- replyFloor = strTA(eachFloor(i - maxrow), "</blockquote></div><br />", "</td></tr></table>")
- sh.Range("A" & i).Value = replyFloor
- sh.Range("B" & i).Value = rawFloor
- End If
- Next
- Next
- End Sub
- Function ByteToStr(arrByte, strCharset As String) As String
- With CreateObject("Adodb.Stream")
- .Type = 1 'adTypeBinary
- .Open
- .Write arrByte
- .Position = 0
- .Type = 2 'adTypeText
- .Charset = strCharset
- ByteToStr = .Readtext
- .Close
- End With
- End Function
复制代码 |
|