|
本帖最后由 duquancai 于 2019-5-18 10:30 编辑
周末有点时间,我就用WordVba写个代码,把《绝魔之地狱之门》下载到当前Word文档
Dim xmlhttp As Object, doc As Document
Sub main()
Dim host_url$
Set xmlhttp = CreateObject("msxml2.xmlhttp")
host_url = "https://www.ddshubao.com"
Set doc = ActiveDocument
doc.Content.Text = Empty
doc.Content.Text = "绝魔之地狱之门" + vbCr
Call get_url(host_url)
MsgBox "下载完毕!"
End Sub
Sub get_url(host_url)
Dim strText$, list_url, i&
xmlhttp.Open "GET", host_url & "/book/1221/", False
xmlhttp.send
strText = xmlhttp.responseText
list_url = Split(Split(Split(strText, "allchapter")(1), "show-more")(0), "<a href=""")
For i = 1 To UBound(list_url)
Call get_Article(Split(host_url & list_url(i), """>")(0), host_url)
Next
End Sub
Sub get_Article(url, host_url)
Dim strText$, tem_str$, temp$, s_url$
xmlhttp.Open "GET", url, False
xmlhttp.send
strText = xmlhttp.responseText
temp = Split(Split(strText, "readbox")(1), "<!--/reader-->")(0)
Call parse_Article(temp)
if_str = Split(temp, "下一页</a>")
If UBound(if_str) = 2 Then
s_url = Split(Split(Split(if_str(0), "小说详情</a>")(1), "<a href=""")(1), """")(0)
xmlhttp.Open "GET", host_url + s_url, False
xmlhttp.send
temp = Split(Split(xmlhttp.responseText, "readbox")(1), "<!--/reader-->")(0)
Call parse_Article(temp)
End If
End Sub
Sub parse_Article(tem_str)
Dim temp$, re As Object
Set re = CreateObject("VBScript.Regexp")
re.Global = True: re.Pattern = "<?br\s*/>|(?:<p[^>]*[^<]*</p>)"
temp = Split(Split(tem_str, "<h1>")(1), "</h1>")(0) + vbCr
temp = temp + Split(Split(tem_str, "content"">")(1), "</div>")(0)
temp = Replace(temp, " ", " ")
temp = re.Replace(temp, vbCr)
doc.Bookmarks("\EndOfDoc").Range.InsertAfter temp
End Sub
|
|