public static void TestToDe()
{
StreamReader sr = new StreamReader("d:\\11.html", Encoding.GetEncoding("gb2312"));
string strHtml = sr.ReadToEnd();
string p = @"<a\s+[^>]*href\s*=\s*(?:""(?<url>[^""]*)""|'(?<url>[^']*)'|(?<url>[^>^\s]+))[^/]*>(?<title>[^<>]*)<[/]?";
//@"\<a.*href\s*=\s*(?:""(?<url>[^""]*)""|'(?<url>[^']*)'|(?<url>[^\>^\s]+)).*\>(?<title>[^\<^\>]*)\<[^\</a\>]*/a\>";
// getBytes
Regex reg = new Regex(p, RegexOptions.IgnoreCase | RegexOptions.Compiled);
MatchCollection ms = reg.Matches(strHtml);
foreach (Match m in ms)
{
Console.WriteLine("{0}\n{1}\n\n", m.Groups["title"].Value, m.Groups["url"].Value);
}
//Encoding fileEncoding = Encoding.GetEncoding("d:\\11.html", Encoding.GetEncoding("GB2312"));
}
副上代码 希望帮忙解决, 我自己也找找!累哟!
现在问题有点明朗了,就是如何把Unicode转化成gb2312
但是我只知道转个字符串,要是我现在是读取html文件,还有点问题
附上代码!
StreamReader sr = new StreamReader("d:\\11.html", Encoding.GetEncoding("gb2312"));
string strHtml = sr.ReadToEnd();
string a = System.Text.Encoding.GetEncoding("GB2312").GetString(System.Text.Encoding.UTF8.GetBytes(strHtml));