|
本帖最后由 孤独骑士 于 2015-10-4 00:22 编辑
- Sub 提取报名表()
- On Error Resume Next
- Application.ScreenUpdating = False
- Dim arr(), x%
- Range("a1").Resize(1, 4) = Split("姓名,性别,就读学校,报名所在地", ",")
- For x = 0 To 63
- s = getXmlHttpText("https://gaokao.chsi.com.cn/zzbm/mdgs/detail.action?oid=476733931&lx=1&start=" & application.min(x*30,1890))
- s = regReplace(s, "\s+", "")
- Set m = regMatch(s, "</tr><tr><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td>")
- ReDim arr(1 To m.Count, 1 To 4)
- For i = 1 To UBound(arr)
- For j = 1 To UBound(arr, 2)
- arr(i, j) = m(i - 1).submatches(j - 1)
- Next
- Next
- Range("a" & x * 30 + 2).Resize(UBound(arr), 4) = arr
- Erase arr
- Next x
- Application.ScreenUpdating = True
- End Sub
- Function getXmlHttpText(url)
- Dim strText ' As String
- With CreateObject("MSXML2.XMLHTTP") 'CreateObject("WinHttp.WinHttpRequest.5.1")
- .Open "GET", url, False
- .Send
- getXmlHttpText = .responsetext
- End With
- End Function
- Function regReplace(s, pstrt, rstr)
- Dim regex As Object
- Dim temp, n
- n = Len(s)
- Set regex = CreateObject("VBScript.RegExp")
- With regex
- .Global = True
- .Pattern = pstrt
- temp = .Replace(s, rstr)
- End With
- regReplace = temp
- Set regex = Nothing
- End Function
- Function regMatch(s, pString)
- Dim regex As Object
- Dim temp, n
- Set regex = CreateObject("VBScript.RegExp")
- With regex
- .Global = True
- .IgnoreCase = True
- .Pattern = pString
- Set matchs = .Execute(s) '返回一个集合
- End With
- Set regex = Nothing
- Set regMatch = matchs
- Set matchs = Nothing
- End Function
复制代码
一共1920条记录,全取出来了。不过要花点时间运行 |
评分
-
1
查看全部评分
-
|