自己提的问题自己结
性能全部损耗在排序上,由于hashtable不能排序,所以只能借助其他来排序,
不过还好
一本60000字的书10几秒就能统计完.
using
using System.Collections;//use hashtable
using System.Text.RegularExpressions;
namespace WindowsApplication1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void cmdStart_Click(object sender, EventArgs e)
{
string Pattern = @"\,|\.|\ |\n|\r|\?|\;|\:|\!|\(|\)|\042|\“|\”|\-|[\u4e00-\u9fa5]|[0-9]";
//匹配正值表达式 逗号,点号,空格,换行符,回车符,问号,,分号,,冒号,感叹号,,左括号,又括号,双引号,左双引号,右双引号,中文字符,数字
string textstring = "";
int j;
Hashtable ht = new Hashtable();
//创建一个Hashtable实例
//Hashtable ht = new Hashtable();
//创建一个Hashtable实例
Regex regex = new Regex(Pattern);
//创建Regex实类
textBox2.Text = "";
//读取文件
try
{
StreamReader sr = new StreamReader(@textBox1.Text,System.Text.Encoding.GetEncoding("GB2312"));
textstring = sr.ReadToEnd();
sr.Close();
}
catch
{
MessageBox.Show("请把test.txt文件拷贝到C:/");
}
//根据匹配正值表达式分割字符串
string[] words = regex.Split(textstring);
//单词加入哈希表
foreach (string word in words)
{
//textBox2.Text = textBox2.Text + "
" + i;
if (word != null && word != "")
{
if (ht.Contains(word))
{
j = Convert.ToInt32(ht[word]) + 1;
//ht.Remove(word);
//ht.Add(word, j);
ht[word]=j;
}
else
{
ht.Add(word, 1);
}
}
}
////对哈希表排序
ArrayList akeys = new ArrayList(ht.Keys);
//按字母顺序进行排序
//akeys.Sort() ;
//按字单词次数进行排序
string[] keyarray = new string[akeys.Count];
int[] valuearray = new int[akeys.Count];
int index=0;
//将key/value分别赋给数祖
foreach (string skey in akeys)
{
keyarray[index] = Convert.ToString(skey);
valuearray[index] = Convert.ToInt32(ht[skey]);
index++;
}
for(int a=0;a<akeys.Count;a++)
{
for (int b = a+1; b < akeys.Count; b++)
{
if (valuearray[a]>valuearray[b])
{
valuearray[a] ^= valuearray[b];
valuearray[b] ^= valuearray[a];
valuearray[a] ^= valuearray[b];
string tempstr = keyarray[a];
keyarray[a] = keyarray[b];
keyarray[b] = tempstr;
}
}
}
//显示
//按字母顺序进行排序显示
//foreach(string skey in akeys)
//{
//
textBox2.Text = textBox2.Text + skey + "\t" + ht[skey] + "\r\n";
//}
//按字单词次数进行排序显示
for (int a = 0; a < akeys.Count; a++)
{
textBox2.Text = textBox2.Text + keyarray[a] + "\t" + valuearray[a] + "\r\n";
}
}
}
}
[[italic] 本帖最后由 simpson 于 2008-1-12 00:13 编辑 [/italic]]