|
楼主 |
发表于 2016-1-8 20:49
|
显示全部楼层
测试成功,主要代码如下:
- using System;
- using System.Collections.Generic;
- using System.ComponentModel;
- using System.Data;
- using System.Drawing;
- using System.Linq;
- using System.Text;
- using System.Windows.Forms;
- using System.IO;
- using System.Net;
- namespace ExcelHelpTaskPane
- {
- public partial class Form5 : Form
- {
- public Form5()
- {
- InitializeComponent();
- }
- //根据Url地址得到网页的html源码
- private string GetWebContent(string Url)
- {
- string strResult = "";
- try
- {
- HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url); //声明一个HttpWebRequest请求
- request.Timeout = 30000; //设置连接超时时间
- request.Headers.Set("Pragma", "no-cache");
- HttpWebResponse response = (HttpWebResponse)request.GetResponse();
- Stream streamReceive = response.GetResponseStream();
- Encoding encoding = Encoding.GetEncoding("GB2312");
- StreamReader streamReader = new StreamReader(streamReceive, encoding);
- strResult = streamReader.ReadToEnd();
- }
- catch
- {
- MessageBox.Show("出错");
- }
- return strResult;
- }
- private void button1_Click(object sender, EventArgs e)
- {
- Microsoft.Office.Interop.Excel.Range cel = Globals.ThisAddIn.Application.ActiveCell;
- try
- {
- //要抓取的URL地址
- string Url = "http://list.mp3.baidu.com/topso/mp3topsong.html?id=1#top2";
- //得到指定Url的源码
- string strWebContent = GetWebContent(Url);
- //取出和数据有关的那段源码
- int iBodyStart = strWebContent.IndexOf("<body", 0);
- int iStart = strWebContent.IndexOf("歌曲TOP500", iBodyStart);
- int iTableStart = strWebContent.IndexOf("<table", iStart);
- int iTableEnd = strWebContent.IndexOf("</table>", iTableStart);
- string strWeb = strWebContent.Substring(iTableStart, iTableEnd - iTableStart + 8);
- //生成HtmlDocument
- WebBrowser webb = new WebBrowser();
- webb.Navigate("about:blank");
- HtmlDocument htmldoc = webb.Document.OpenNew(true);
- htmldoc.Write(strWeb);
- HtmlElementCollection htmlTR = htmldoc.GetElementsByTagName("TR");
- int n = 0;
- foreach (HtmlElement tr in htmlTR)
- {
- n++;
- cel.Cells[n, 1].Value = tr.GetElementsByTagName("TD")[0].InnerText;
- cel.Cells[n, 2].Value = tr.GetElementsByTagName("TD")[1].InnerText;
- cel.Cells[n, 4].Value = tr.GetElementsByTagName("TD")[2].InnerText;
- cel.Cells[n, 5].Value = tr.GetElementsByTagName("TD")[3].InnerText;
- cel.Cells[n, 7].Value = tr.GetElementsByTagName("TD")[4].InnerText;
- cel.Cells[n, 8].Value = tr.GetElementsByTagName("TD")[5].InnerText;
- }
- }
- catch (Exception exception)
- {
- MessageBox.Show(exception.Message);
- }
- }
- }
- }
复制代码 |
|