|
老师好,
我的一段抓取国外电商平台(亚马逊 amazon.com)的代码,
- Sub 亚马逊商品数据查询()
- Dim SURL$, i%, Lrow&
- Dim oHttp, tt$
- Lrow = Sheet1.[J502].End(xlUp).Row '统计包含ASIN的单元格
- For i = 3 To Lrow '定义第一条ASIN的行数
-
- DoEvents
-
- SURL = "https://www.amazon.com/dp/" & Cells(i, 10).Value
-
- 'Set oHttp = CreateObject("WinHttp.WinHttpRequest.5.1")
- Set oHttp = CreateObject("MSXML2.XMLHTTP")
- With oHttp
- .Open "GET", SURL, False
- .send
- ' tt = .responseBody
- tt = .responseText
- 'tt = StrConv(.responsebody, vbUnicode)
- End With
-
-
- If InStr(tt, "<a id=""bylineInfo"" class") Then '抓取品牌
- Sheet1.Cells(i, 13) = Split(Split(Split(tt, "<a id=""bylineInfo"" class")(1), "</a>")(0), ">")(1)
- Else
- Sheet1.Cells(i, 13) = "-"
- End If
-
- If InStr(tt, "<span id=""productTitle"" class=""a-size-large"">") Then '抓取标题
- Sheet1.Cells(i, 14) = Trim(Replace(Split(Split(tt, "<span id=""productTitle"" class=""a-size-large"">")(1), "</span>")(0), vbLf, ""))
- Else
- Sheet1.Cells(i, 14) = "-"
- End If
-
- If InStr(tt, "<span id=""acrCustomerReviewText"" class=""a-size-base"">") Then '抓取评论
- Sheet1.Cells(i, 15) = Split(Split(Split(tt, "<span id=""acrCustomerReviewText"" class=""a-size-base"">")(1), "</span>")(0), " ")(0)
- Else
- Sheet1.Cells(i, 15) = 0
- End If
-
- If InStr(tt, "<span id=""acrPopover""") Then '抓取星级
- Sheet1.Cells(i, 16) = Split(Split(Split(Split(tt, "<span id=""acrPopover""")(1), """>")(0), "title=""")(1), " ")(0)
- Else
- Sheet1.Cells(i, 16) = 0
- End If
-
- If InStr(tt, "<a id=""askATFLink""") Then '抓取问答
- Sheet1.Cells(i, 17) = Split(Trim(Replace(Split(Split(Split(tt, "<a id=""askATFLink""")(1), "</span>")(0), "<span class=""a-size-base"">")(1), vbLf, "")), " ")(0)
- Else
- Sheet1.Cells(i, 17) = 0
- End If
-
- If InStr(tt, "<span id=""priceblock_ourprice""") Then '抓取价格
- Sheet1.Cells(i, 18) = Split(Split(Split(tt, "<span id=""priceblock_ourprice""")(1), "</span>")(0), ">")(1)
- Else
- Sheet1.Cells(i, 18) = 0
- End If
- If InStr(tt, "<li id=""SalesRank"">") Then '抓取类目
- Sheet1.Cells(i, 19) = Replace(Split(Split(Split(Split(tt, "<li id=""SalesRank"">")(1), "</a>")(0), " in ")(1), "(")(0), "&", "&")
- Else
- Sheet1.Cells(i, 19) = "-"
- End If
-
- If InStr(tt, "<li id=""SalesRank"">") Then '抓取BSR1
- Sheet1.Cells(i, 20) = Split(Split(Split(tt, "<li id=""SalesRank"">")(1), " in ")(0), "#")(1)
- Else
- Sheet1.Cells(i, 20) = 0
- End If
-
- If InStr(tt, "<li id=""SalesRank"">") Then '抓取节点
- Sheet1.Cells(i, 21) = Split(Split(Split(Split(Split(tt, "<li id=""SalesRank"">")(1), "</a></span>")(0), "</style>")(1), "<span class=""zg_hrsr_ladder""")(1), """>")(1)
- Else
- Sheet1.Cells(i, 21) = "-"
- End If
- If InStr(tt, "<li id=""SalesRank"">") Then '抓取BSR2
- Sheet1.Cells(i, 22) = Split(Split(Split(Split(Split(tt, "<li id=""SalesRank"">")(1), "</a></span>")(0), "</style>")(1), "</span>")(0), "<span class=""zg_hrsr_rank"">#")(1)
- Else
- Sheet1.Cells(i, 22) = 0
- End If
-
- Next i
-
- End Sub
复制代码
原来都可以正常抓取商品信息(标题,品牌,评论,星级...),但是最近抓取出现错误,商品本来是英文界面和数据,但是抓取的时候变成了中文内容,导致原代码运行错误,(这种现象在我们正常浏览商品页面的时候也会出现,但是只需要修改网站的语言设置就可以防止中文内容),请问:
能不能在代码中指定浏览器比如谷歌浏览器抓取数据(我想到的是不是代码打开了360或者ie网页,这两种网页都可能出现中文界面)
或者是不是其他的问题导致的?
还有,这段代码我想加一下 setRequestHeader 以避开网站的反爬体系,也希望得到老师的帮助
非常感谢老师的帮助
谢谢谢
|
|