|
楼主 |
发表于 2022-9-2 14:11
|
显示全部楼层
代码如下:
- Public Aurl As String, URLX As String, URLZ As String, brr, m As Long
- Sub 提取省份()
- Dim i, j, R, arr, reg As Object, mh, str As String, bds As String, URL As String
- ' Application.ScreenUpdating = False
- With Sheet1
- .UsedRange.Offset(1).ClearContents
- .Cells.Font.Color = vbBlack
- .Cells.Font.Bold = False
- End With
- Set X = CreateObject("MSXML2.XMLHTTP")
- Aurl = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/"
- With X
- .Open "GET", Aurl, False
- .send
- Do Until .readyState = 4 And .Status = 200
- DoEvents
- Loop
- str = .responsetext
- ' str = StrConv(.responseBody, vbUnicode, &H804) '防乱码
- ' Debug.Print str
- End With
- Set reg = CreateObject("vbscript.regexp")
- reg.Global = True
- reg.Pattern = "<a href=""(.+?html)"">([一-龢]+)<br /></a></td>"
- If reg.test(str) Then
- Set mh = reg.Execute(str)
- ReDim arr(1 To mh.Count, 1 To 3)
- For i = 0 To mh.Count - 1
- arr(i + 1, 1) = mh(i).submatches(1) '省级名称
- arr(i + 1, 2) = Aurl & mh(i).submatches(0) '省URL
- arr(i + 1, 3) = Split(mh(i).submatches(0), ".")(0) & "0000000000" '省级区划代码
- Next i
- End If
- Set reg = Nothing
- Sheet3.Range("A2").Resize(UBound(arr), UBound(arr, 2)) = arr
- End Sub
- Sub 行政区划()
- Dim i, j, arr, reg As Object, mh, str As String, bds As String, URL As String
- arr = Sheet3.Range("A1").CurrentRegion
- Aurl = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/"
- Set reg = CreateObject("vbscript.regexp")
- reg.Global = True
- reg.Pattern = "citytr"">[\s\S]+?<td><a href=""(.+?html)"">([0-9]{12})[\s\S]+?([一-龢]+)</a></td>"
- Set X = CreateObject("MSXML2.XMLHTTP")
- For i = 2 To UBound(arr)
- If arr(i, 4) <> "已提取" Then
- m = 1
- ReDim brr(1 To 100000, 1 To 7)
- brr(m, 1) = "=row()-1"
- brr(m, 2) = arr(i, 3) '省级区划代码
- brr(m, 3) = arr(i, 1) '省级名称
- With X
- .Open "GET", arr(i, 2), False
- .send
- Do Until .readyState = 4 And .Status = 200
- DoEvents
- Loop
- str = .responsetext
- ' Debug.Print str
- If reg.test(str) Then
- Set mh = reg.Execute(str)
- For j = 0 To mh.Count - 1
- m = m + 1
- brr(m, 1) = "=row()-1"
- brr(m, 4) = mh(j).submatches(2) '地市级名称
- brr(m, 2) = mh(j).submatches(1) '地市级区划代码
- URL = Aurl & mh(j).submatches(0)
- Call 地市级(URL)
- Next j
- End If
- End With
- R = Sheet1.Cells(Rows.Count, 2).End(3).Row + 1
- Sheet1.Range("A" & R).Resize(UBound(brr), UBound(brr, 2)) = brr
- Sheet3.Range("D" & i).Value = "已提取"
- ThisWorkbook.Save
- End If
- Next i
- Set X = Nothing
- ' Application.ScreenUpdating = True
- End Sub
- Function 地市级(URL As String)
- Dim i, j, reg As Object, mh, bds As String, str As String, Murl1 As String
- Set a = CreateObject("MSXML2.XMLHTTP")
- bds = "countytr""><td><a href=""(.+?html)"">([0-9]{12}).+?([一-龢]+)</a></td>"
- With a
- .Open "GET", URL, False
- .send
- Do Until .readyState = 4 And .Status = 200
- DoEvents
- Loop
- str = .responsetext
- ' Debug.Print str
- ' str = StrConv(.responsebody, vbUnicode, &H804) '防乱码
- End With
- Set a = Nothing
- Set reg = CreateObject("vbscript.regexp")
- reg.Global = True
- reg.Pattern = bds
- Murl1 = Split(URL, Aurl)(1)
- Murl1 = Split(Murl1, "/")(0)
- If reg.test(str) Then
- Set mh = reg.Execute(str)
- With Sheet1
- For i = 0 To mh.Count - 1
- m = m + 1
- brr(m, 1) = "=row()-1"
- brr(m, 2) = mh(i).submatches(1) '县级区划代码
- brr(m, 5) = mh(i).submatches(2) '县级名称
- URLX = Aurl & Murl1 & "/" & mh(i).submatches(0)
- Call 县市区(URLX)
- Next i
- End With
- End If
- Set reg = Nothing
- End Function
- Function 县市区(URL As String)
- Dim i, j, reg As Object, mh1, bds As String, str As String, Murl2 As String
- Set a = CreateObject("MSXML2.XMLHTTP")
- bds = "towntr""><td><a href=""(.+?html)"">([0-9]{12}).+?([一-龢]+)</a></td>"
- With a
- .Open "GET", URL, False
- .send
- Do Until .readyState = 4 And .Status = 200
- DoEvents
- Loop
- str = .responsetext
- ' Debug.Print str
- ' str = StrConv(.responsebody, vbUnicode, &H804) '防乱码
- End With
- Set a = Nothing
- Set reg1 = CreateObject("vbscript.regexp")
- reg1.Global = True
- reg1.Pattern = bds
- If reg1.test(str) Then
- Set mh1 = reg1.Execute(str)
- For i = 0 To mh1.Count - 1
- m = m + 1
- brr(m, 1) = "=row()-1"
- brr(m, 2) = mh1(i).submatches(1) '乡级区划代码
- brr(m, 6) = mh1(i).submatches(2) '乡级名称
- Murl2 = Split(URL, "/")(UBound(Split(URL, "/")))
- Murl2 = Replace(URL, Murl2, "")
- URLZ = Murl2 & mh1(i).submatches(0)
- Call 乡镇(URLZ)
- Next i
- End If
- Set reg1 = Nothing
- End Function
- Function 乡镇(URL As String)
- Dim i, j, srr, reg2 As Object, mh2, bds As String, str As String
- Set a = CreateObject("MSXML2.XMLHTTP")
- bds = "villagetr""><td>([0-9]{12}).+?([一-龢]+)</td></tr>"
- With a
- .Open "GET", URL, False
- .send
- Do Until .readyState = 4 And .Status = 200
- DoEvents
- Loop
- str = .responsetext
- ' Debug.Print str
- ' str = StrConv(.responsebody, vbUnicode, &H804) '防乱码
- End With
- Set a = Nothing
- Set reg2 = CreateObject("vbscript.regexp")
- reg2.Global = True
- reg2.Pattern = bds
- If reg2.test(str) Then
- Set mh2 = reg2.Execute(str)
- ReDim srr(1 To mh2.Count, 1 To 7)
- For i = 0 To mh2.Count - 1
- m = m + 1
- brr(m, 1) = "=row()-1"
- brr(m, 7) = mh2(i).submatches(1) '村级名称
- brr(m, 2) = mh2(i).submatches(0) '村级区划代码
- Next i
- End If
- Set reg2 = Nothing
- End Function
复制代码
|
|