一个相似文本检测的问题 大神进来帮忙看看程序
程序有时报错有时正常 英文好像有点检测不了还有 我想加入 显示相同语句这一功能 大神能不能帮我加个检测相同语句的方法啊
程序代码:
package simil; import java.awt.*; import java.awt.event.*; import *; import java.util.*; import java.util.List; import java.util.regex.*; import javax.swing.JOptionPane; public class FileWindows extends Frame implements ActionListener { float similarity; String SFname = ""; String TFname = ""; FileDialog file_open; DirPanel dirPanel; MainPanel mainPanel; Button taButton; FileWindows(){ super("文档检测系统"); setLocation(300, 50); setSize(600,500); dirPanel = new DirPanel(); mainPanel = new MainPanel(); taButton = new Button("开始检测"); taButton.addActionListener(this); dirPanel.SFbutton.addActionListener(this); dirPanel.TFbutton.addActionListener(this); add(dirPanel,BorderLayout.NORTH); add(mainPanel,BorderLayout.CENTER); add(taButton,BorderLayout.SOUTH); setResizable(false); setBackground(Color.GRAY); setVisible(true); validate(); addWindowListener(new WindowAdapter(){ public void windowClosing(WindowEvent e) { setVisible(false); System.exit(0); } }); file_open = new FileDialog(this,"打开文件对话框",FileDialog.LOAD); file_open.addWindowListener(new WindowAdapter(){ public void windowClosing(WindowEvent e) { file_open.setVisible(false); } }); } public void actionPerformed(ActionEvent e) { if(e.getSource()==dirPanel.SFbutton){ file_open.setVisible(true); SFname = file_open.getDirectory()+file_open.getFile(); dirPanel.SFdir.setText(SFname); } else if(e.getSource()==dirPanel.TFbutton){ file_open.setVisible(true); TFname = file_open.getDirectory(); dirPanel.TFdir.setText(file_open.getDirectory()); } else if(e.getSource()==taButton){ try{ File sf = new File(dirPanel.SFdir.getText()); File tf = new File(dirPanel.TFdir.getText()); File[] tFiles = tf.listFiles(); mainPanel.ta1.setText(""); mainPanel.ta2.setText(""); File temp; for(int i=0;i<tFiles.length;i++){ parse(sf,tFiles[i]); } for(int i=0;i<tFiles.length-1;i++){ for(int j=i+1;j<tFiles.length;j++){ if(parse(sf,tFiles[i])<parse(sf, tFiles[j])){ temp = tFiles[i]; tFiles[i] = tFiles[j]; tFiles[j] = temp; } } } for(int i=0;i<tFiles.length;i++){ mainPanel.ta1.append("\n"+"检测原文档 与 "+tFiles[i].getName()+" 的相似度:"+parse(sf,tFiles[i])+"%"); same(sf,tFiles[i]); } }catch(NullPointerException e1){ // ta.append("请选择文档"); JOptionPane.showMessageDialog(this, "请选择文档","提示对话框",JOptionPane.ERROR_MESSAGE); // e1.printStackTrace(); } } } public double parse(File sf,File tf) { int TRUE = 0; BufferedReader br = null; String s =""; String doc1 = ""; String doc2 = ""; try { br = new BufferedReader(new FileReader(sf)); while((s = br.readLine())!=null){ doc1 =doc2 + s; } br = new BufferedReader(new FileReader(tf)); while((s = br.readLine())!=null){ doc2 =doc2 + s; } } catch (FileNotFoundException e) { e.printStackTrace(); }catch (IOException e) { e.printStackTrace(); }finally{ if(br!=null){ try { br.close(); br = null; } catch (IOException e) { e.printStackTrace(); } } } if (doc1 != null && doc1.trim().length() > 0 && doc2 != null && doc2.trim().length() > 0) { Map<Integer, int[]> AlgorithmMap = new HashMap<Integer, int[]>(); for (int i = 0; i < doc1.length(); i++) { char d1 = doc1.charAt(i); if(isHanZi(d1)){ int charIndex = getGB2312Id(d1); if(charIndex != -1){ int[] fq = AlgorithmMap.get(charIndex); if(fq != null && fq.length == 2){ fq[0]++; }else { fq = new int[2]; fq[0] = 1; fq[1] = 0; AlgorithmMap.put(charIndex, fq); } } } } for (int i = 0; i < doc2.length(); i++) { char d2 = doc2.charAt(i); if(isHanZi(d2)){ int charIndex = getGB2312Id(d2); if(charIndex != -1){ int[] fq = AlgorithmMap.get(charIndex); if(fq != null && fq.length == 2){ fq[1]++; }else { fq = new int[2]; fq[0] = 0; fq[1] = 1; AlgorithmMap.put(charIndex, fq); } } } } Iterator<Integer> iterator = AlgorithmMap.keySet().iterator(); double sqdoc1 = 0; double sqdoc2 = 0; double denominator = 0; while(iterator.hasNext()){ int[] c = AlgorithmMap.get(iterator.next()); denominator += c[0]*c[1]; sqdoc1 += c[0]*c[0]; sqdoc2 += c[1]*c[1]; } return (denominator / Math.sqrt(sqdoc1*sqdoc2))*100; } else { throw new NullPointerException( " the Document is null or have not chars!!"); } } public static boolean isHanZi(char ch) { return (ch >= 0x4E00 && ch <= 0x9FA5); } public static short getGB2312Id(char ch) { try { byte[] buffer = Character.toString(ch).getBytes("GB2312"); if (buffer.length != 2) { return -1; } int b0 = (int) (buffer[0] & 0x0FF) - 161; int b1 = (int) (buffer[1] & 0x0FF) - 161; return (short) (b0 * 94 + b1); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } return -1; } }