还有就是怎么样转换成汉字呀,谢谢!
public static void TestToDe()
{
StreamReader sr = new StreamReader("d:\\11.html", Encoding.GetEncoding("gb2312"));
string strHtml = sr.ReadToEnd();
string p = @"<a\s+[^>]*href\s*=\s*(?:""(?<url>[^""]*)""|'(?<url>[^']*)'|(?<url>[^>^\s]+))[^/]*>(?<title>[^<>]*)<[/]?";
//@"\<a.*href\s*=\s*(?:""(?<url>[^""]*)""|'(?<url>[^']*)'|(?<url>[^\>^\s]+)).*\>(?<title>[^\<^\>]*)\<[^\</a\>]*/a\>";
// getBytes
Regex reg = new Regex(p, RegexOptions.IgnoreCase | RegexOptions.Compiled);
MatchCollection ms = reg.Matches(strHtml);
foreach (Match m in ms)
{
Console.WriteLine("{0}\n{1}\n\n", m.Groups["title"].Value, m.Groups["url"].Value);
}
//Encoding fileEncoding = Encoding.GetEncoding("d:\\11.html", Encoding.GetEncoding("GB2312"));
}
副上代码 希望帮忙解决, 我自己也找找!累哟!