|
Option Explicit
Public TP%
Public arrNML
Public Sub TAOBAO()
On Error Resume Next
Dim rawDT$, smpic$, tmp
Dim i&, j&, k&, n&, m&, tt&, tts&, row&
Dim ttPage%
Dim http As Object, ie As InternetExplorer
Dim arrTMP, arrNID, arrEVE, arrSMPIC, arrBGPIC, arrPICT, arrPIC, arrCONTT(1 To 2, 1 To 5)
Dim pic() As Byte
Dim tm
Dim fso
Set http = CreateObject("MSXML2.XMLHTTP")
Set ie = CreateObject("InternetExplorer.Application")
With http
Application.StatusBar = "打开首页"
.Open "POST", "http://hk.taobao.com/", True
.Send
Do Until .readyState = 4
DoEvents
Loop
rawDT = .responseText
arrTMP = Split(rawDT, "</a> <a href=" & Chr(34) & "http://s.taobao.com/search?spm=1")
ReDim arrNML(1 To 2, 1 To UBound(arrTMP))
For i = 0 To UBound(arrTMP) - 1
arrNML(1, i + 1) = Split(arrTMP(i), Chr(34) & ">")(UBound(Split(arrTMP(i), Chr(34) & ">")))
arrNID = Split(rawDT, Chr(34) & ">" & arrNML(1, i + 1))
arrNML(2, i + 1) = "http://s.taobao.com/search?spm=1" & Split(arrNID(0), "<a href=" & Chr(34) & "http://s.taobao.com/search?spm=1")(UBound(Split(arrNID(0), "<a href=" & Chr(34) & "http://s.taobao.com/search?spm=1")))
Next
PROD.Show
'获取总页数
Application.StatusBar = "获取宝贝总页数"
.Open "POST", arrNML(2, TP), True
.Send
Do Until .readyState = 4
DoEvents
Loop
tmp = Split(.responseText, "</span> <a href=")(0)
ttPage = Split(tmp, "</strong>/")(UBound(Split(tmp, "</strong>/")))
Set fso = CreateObject("Scripting.FileSystemObject")
If Worksheets("TB").PICTX.text = "ONE" Then
If fso.Folderexists(ThisWorkbook.path & "\" & arrNML(1, TP)) = False Then fso.CreateFolder ThisWorkbook.path & "\" & arrNML(1, TP)
Else
If fso.Folderexists(ThisWorkbook.path & "\" & arrNML(1, TP) & "B") = False Then fso.CreateFolder ThisWorkbook.path & "\" & arrNML(1, TP) & "B"
End If
Set fso = Nothing
'取大图代码
tt = 1: tts = 1
For i = 1 To ttPage
'进入某类总界面
Application.StatusBar = "打开第" & i & "页宝贝"
.Open "POST", arrNML(2, TP) & "&tab=all&bcoffset=-4&s=" & 44 * (i - 1), True
.Send
Do Until .readyState = 4
DoEvents
Loop
'获取单个产品的链接
n = 0
arrTMP = Split(.responseText, Chr(34) & " target=" & Chr(34) & "_blank" & Chr(34) & " title=") '" target="_blank" title=
ReDim arrEVE(1 To UBound(arrTMP) / 2 + 1)
For j = 1 To UBound(arrTMP) Step 2
n = n + 1
arrEVE(n) = Split(arrTMP(j), "<a href=" & Chr(34))(UBound(Split(arrTMP(j), "<a href=" & Chr(34))))
Next
If Worksheets("TB").PICTX.text = "ONE" Then
'取小图同时变大图
arrTMP = Split(.responseText, "data-ks-lazyload=")
ReDim arrSMPIC(1 To UBound(arrTMP))
ReDim arrBGPIC(1 To UBound(arrTMP))
n = 0
For j = 1 To UBound(arrTMP)
' Application.StatusBar = "正在提取第 " & tts & " 个小图"
' arrSMPIC(j) = Replace(Split(arrTMP(j), " src=")(0), Chr(34), "")
' .Open "GET", arrSMPIC(j), True
' .Send
' Do Until .readyState = 4
' DoEvents
' Loop
'
' pic = .responseBody
' Call WriteBinary(ThisWorkbook.path & "\TAOBAO\small_" & tts & ".jpg", pic)
Application.StatusBar = "正在提取第 " & tts & " 张大图"
arrBGPIC(j) = Split(Replace(Split(arrTMP(j), " src=")(0), Chr(34), ""), "jpg_")(0) & "jpg"
.Open "GET", arrBGPIC(j), True
.Send
tm = Timer
Do Until .readyState = 4 Or Timer - tm > 12
DoEvents
Loop
If .status = 200 Then
pic = .responseBody
Call WriteBinary(ThisWorkbook.path & "\" & arrNML(1, TP) & "\big_" & tts & ".jpg", pic)
End If
tts = tts + 1
Next
ElseIf Worksheets("TB").PICTX.text = "ALL" Then
'使用IE方法打开单个产品,以获取产品代码,最终取得图片
ReDim arrPIC(1 To n, 0 To 10)
For j = 1 To n
Application.StatusBar = "正在打开第 " & tt & " 个宝贝"
With ie
.Visible = False
.navigate arrEVE(j)
tm = Timer
Do Until .readyState = 4 Or Timer - tm > 20
DoEvents
Loop
If .readyState = 4 Then
rawDT = .document.Body.innerHTML
Else
rawDT = ""
End If
'此段抓取相关数据,但页面内容在变,可能抓取失败
' With CreateObject("new:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}") '得到的字符串放入剪贴板,记事本观察数据
' .SetText rawDT '数据正常显示,可以提取了
' .PutInClipboard
' End With
'
' arrCONTT(1, 1) = Split(Split(rawDT, "tb-metatit" & Chr(34) & ">")(1), "</dt>")(0)
' arrCONTT(1, 2) = Split(Split(rawDT, "tb-metatit" & Chr(34) & ">")(2), "</dt>")(0)
' arrCONTT(1, 3) = Split(Split(rawDT, "tm-label" & Chr(34) & ">")(1), "</p>")(0)
' arrCONTT(1, 4) = Split(Split(rawDT, "tm-label" & Chr(34) & ">")(2), "</p>")(0)
' arrCONTT(1, 5) = "链接"
'
' arrCONTT(2, 1) = Split(Split(rawDT, "tm-price" & Chr(34) & ">")(1), "</span>")(0)
' arrCONTT(2, 2) = Split(Split(rawDT, "tm-price" & Chr(34) & ">")(2), "</span>")(0)
' arrCONTT(2, 3) = Split(Split(rawDT, "tm-count" & Chr(34) & ">")(1), "</p>")(0)
' arrCONTT(2, 4) = Split(Split(rawDT, "tm-count" & Chr(34) & ">")(2), "</p>")(0)
' arrCONTT(2, 5) = arrEVE(j)
'
' row = Worksheets("TB").Range("E65536").End(3).row + 1
' If arrCONTT(1, 1) <> "" Then
' For k = 1 To 5
' Cells(row, k) = arrCONTT(2, k)
' Next
' End If
End With
If rawDT <> "" Then
'取出全部大图所在的链接
arrTMP = Split(rawDT, Chr(34) & " data-hasZoom")
arrPIC(j, 0) = Split(arrTMP(0), "src=" & Chr(34))(UBound(Split(arrTMP(0), "src=" & Chr(34)))) '主图
arrPICT = Split(arrTMP(1), "<a href=" & Chr(34) & "#" & Chr(34) & "><img src=" & Chr(34))
For k = 1 To UBound(arrPICT)
arrPIC(j, k) = Split(arrPICT(k), "jpg_")(0) & "jpg"
Next
'打开图片链接
For k = 1 To UBound(arrPIC, 2)
If arrPIC(j, k) <> "" Then
Application.StatusBar = "正在提取第 " & tt & "_" & k & " 张图片"
.Open "GET", arrPIC(j, k), True
.Send
tm = Timer
Do Until .readyState = 4 Or Timer - tm > 15
DoEvents
Loop
If .status = 200 Then
pic = .responseBody
Call WriteBinary(ThisWorkbook.path & "\" & arrNML(1, TP) & "B\" & i & "_" & tt & "_" & k & ".jpg", pic)
End If
Else
Exit For
End If
Next
tt = tt + 1
End If
Next 这就是你的代码 Option Explicit
Private Sub CONFIRMBT_Click()
TP = Split(TYPECB.text, " ")(0)
PROD.Hide
End Sub
Private Sub TYPECB_Change()
End Sub
Private Sub UserForm_Activate()
Dim i&
TYPECB.ListRows = UBound(arrNML, 2)
For i = 1 To UBound(arrNML, 2)
TYPECB.AddItem (i & " " & arrNML(1, i))
Next
End Sub
|
|