|
[广告] Excel易用宝 - 提升Excel的操作效率 · Excel / WPS表格插件 ★ 免费下载 ★ ★ 使用帮助★
VBA也能实现多线程?这个似乎有点不可思议。现在就给大家介绍一种可以实现多线程的方法。比如我们以爬虫为例,现在我们要通过多线程爬取安居客的房源信息。你会发现多线程的速度会快很多。
Private tparr, arr(13) '声明这两个变量为了用于错误处理
Public Function s() As Object '这一句必须写用于引用sqlcel函数
Set s = Application.COMAddIns("SqlCelAddIn").Object
End Function
'**********多线程爬取(速度更快)
Sub AnjukeCrawMultiThread()
Range("a1:n1") = Array("城市", "区", "镇", "地址", "小区", "建造日期", "房型", "楼层", "面积", "特点", "总价", "单价", "业主", "描述")
Dim pg As Integer
Dim arr() '定义一个Variant类型的数组
For pg = 1 To 10
ReDim Preserve arr(pg - 1)
arr(pg - 1) = Array("getNodes", pg) '数组的每个元素仍是数组,子数组的第一个元素为需要引用的过程/函数名,其它元素依次为传入的参数
Next pg
s.multithread arr, True '调用多线程函数
End Sub
'*********单线程爬取(容易控制)
Sub AnjukeCrawSingleThread()
Range("a1:n1") = Array("城市", "区", "镇", "地址", "小区", "建造日期", "房型", "楼层", "面积", "特点", "总价", "单价", "业主", "描述")
Dim pg As Integer
For pg = 1 To 5
Call getNodes(pg)
Next pg
End Sub
'通过标签绝对地址爬虫是sqlcel函数的标志
Sub getNodes(pg As Integer)
Dim doc As Variant, bfstr As String, lastR As Long, i As Integer
Dim tpnode As String, tpstr As String, url As String
url = "https://shanghai.anjuke.com/sale/pudong/p"
bfstr = "/html[1]/body[1]/div[1]/div[2]/div[4]/ul[1]/" '通过标签路径获取标签
Set doc = s.getdoc(url & pg)
Erase arr()
For i = 1 To 60
arr(0) = "上海"
tpstr = s.getnode(doc, bfstr & "li[" & i & "]/div[2]/div[3]")
tparr = Split(tpstr, "-")
arr(1) = tparr(0)
tryArr1
arr(2) = tparr(1)
tryArr3
arr(4) = Split(tparr(0), " ")(0)
arr(5) = s.getnode(doc, bfstr & "li[" & i & "]/div[2]/div[2]/span[4]")
arr(6) = s.getnode(doc, bfstr & "li[" & i & "]/div[2]/div[2]/span[1]")
arr(7) = s.getnode(doc, bfstr & "li[" & i & "]/div[2]/div[2]/span[3]")
arr(8) = s.getnode(doc, bfstr & "li[" & i & "]/div[2]/div[2]/span[2]")
arr(9) = s.getnode(doc, bfstr & "li[" & i & "]/div[2]/div[4]")
arr(10) = s.getnode(doc, bfstr & "li[" & i & "]/div[3]/span[1]")
arr(11) = s.getnode(doc, bfstr & "li[" & i & "]/div[3]/span[2]")
arr(12) = s.getnode(doc, bfstr & "li[" & i & "]/div[2]/div[2]/span[5]")
tryArr12
arr(13) = s.getnode(doc, bfstr & "li[" & i & "]/div[2]/div[1]/a[1]")
lastR = Cells(1048576, 1).End(xlUp).Row + 1
Range("A" & lastR & ":N" & lastR).Value = arr
Next i
lastR = Cells(1048576, 1).End(xlUp).Row + 1
Range("A" & lastR).Select
DoEvents
End Sub
'错误处理,可实现类似Try-Catch的错误处理方法
Sub tryArr1()
On Error GoTo line '这一句必须,用于捕捉异常
arr(1) = Split(tparr(0), ";")(2)
Exit Sub '这一句必须,表示没有异常退出过程
line: '这一句必须表示出现异常后需执行的语句
arr(1) = ""
End Sub
Sub tryArr12()
On Error GoTo line
arr(12) = Split(arr(12), ";")(1)
Exit Sub
line:
arr(12) = ""
End Sub
Sub tryArr3()
On Error GoTo line
arr(3) = tparr(2)
Exit Sub
line:
arr(3) = ""
End Sub
需要注意的是使用该方法需要引用一个excel插件sqlcelfuncs,该插件免费且无需联网可实现许多VBA做不到或者做起来很麻烦的事情。具体可参照
https://sqlcel.com/sqlcelfuncs/
|
评分
-
1
查看全部评分
-
|