| 网站首页 | 业界新闻 | 小组 | 威客 | 人才 | 下载频道 | 博客 | 代码贴 | 在线编程 | 编程论坛
欢迎加入我们,一同切磋技术
用户名:   
 
密 码:  
共有 514 人关注过本帖
标题:一个相似文本检测的问题 大神进来帮忙看看程序
只看楼主 加入收藏
xxyuan
Rank: 1
等 级:新手上路
帖 子:9
专家分:3
注 册:2012-9-14
结帖率:50%
收藏
 问题点数:0 回复次数:1 
一个相似文本检测的问题 大神进来帮忙看看程序
程序有时报错有时正常  英文好像有点检测不了
还有 我想加入  显示相同语句这一功能 大神能不能帮我加个检测相同语句的方法啊
程序代码:
package simil;

import java.awt.*;
import java.awt.event.*;
import *;
import java.util.*;
import java.util.List;
import java.util.regex.*;
import javax.swing.JOptionPane;

public class FileWindows extends Frame implements ActionListener {

    float similarity;
    String SFname = "";
    String TFname = "";
    FileDialog file_open;
    DirPanel dirPanel;
    MainPanel mainPanel;
    Button taButton;

    FileWindows(){
        super("文档检测系统");
        setLocation(300, 50);
        setSize(600,500);
        dirPanel = new DirPanel();
        mainPanel = new MainPanel();
        taButton = new Button("开始检测");
        
        taButton.addActionListener(this);
        dirPanel.SFbutton.addActionListener(this);
        dirPanel.TFbutton.addActionListener(this);
        add(dirPanel,BorderLayout.NORTH);
        add(mainPanel,BorderLayout.CENTER);
        add(taButton,BorderLayout.SOUTH);
        setResizable(false);
        setBackground(Color.GRAY);
        setVisible(true);
        validate();
        
        addWindowListener(new WindowAdapter(){

            public void windowClosing(WindowEvent e) {
                setVisible(false);
                System.exit(0);
            }
        });
        file_open = new FileDialog(this,"打开文件对话框",FileDialog.LOAD);
        file_open.addWindowListener(new WindowAdapter(){

            public void windowClosing(WindowEvent e) {
                file_open.setVisible(false);
            }
        });
        
    }

public void actionPerformed(ActionEvent e) {
    
    if(e.getSource()==dirPanel.SFbutton){
        file_open.setVisible(true);
        SFname = file_open.getDirectory()+file_open.getFile();
        dirPanel.SFdir.setText(SFname);

}
    else if(e.getSource()==dirPanel.TFbutton){
        file_open.setVisible(true);
        TFname = file_open.getDirectory();
        dirPanel.TFdir.setText(file_open.getDirectory());
    }
    else if(e.getSource()==taButton){
        try{
            File sf = new File(dirPanel.SFdir.getText());
            File tf = new File(dirPanel.TFdir.getText());
            File[] tFiles = tf.listFiles();
        
            mainPanel.ta1.setText("");
            mainPanel.ta2.setText("");
        
        File temp;
        for(int i=0;i<tFiles.length;i++){
            parse(sf,tFiles[i]);

        }
        for(int i=0;i<tFiles.length-1;i++){
            for(int j=i+1;j<tFiles.length;j++){
                if(parse(sf,tFiles[i])<parse(sf, tFiles[j])){
                    temp = tFiles[i];
                    tFiles[i] = tFiles[j];
                    tFiles[j] = temp;
                }
            }
        }
        
        
        for(int i=0;i<tFiles.length;i++){
            
                mainPanel.ta1.append("\n"+"检测原文档 与 "+tFiles[i].getName()+" 的相似度:"+parse(sf,tFiles[i])+"%");
                same(sf,tFiles[i]);
                
    }    
        }catch(NullPointerException e1){
//            ta.append("请选择文档");
            JOptionPane.showMessageDialog(this, "请选择文档","提示对话框",JOptionPane.ERROR_MESSAGE);
//            e1.printStackTrace();
        }
    
    }
}


public double parse(File sf,File tf) {
    int TRUE = 0;
    BufferedReader br = null;
    String s ="";
    String doc1 = "";
    String doc2 = "";
    
    try {
        br = new BufferedReader(new FileReader(sf));
        while((s = br.readLine())!=null){
            doc1 =doc2 + s;
        }
            br = new BufferedReader(new FileReader(tf));
            while((s = br.readLine())!=null){
                doc2 =doc2 + s;
            }
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    }catch (IOException e) {
        e.printStackTrace();
    }finally{
        if(br!=null){
            try {
                br.close();
                br = null;
            } catch (IOException e) {
                e.printStackTrace();
            }
        } 
    
    }
    
            
        if (doc1 != null && doc1.trim().length() > 0 && doc2 != null
                    && doc2.trim().length() > 0) {
            
                
                Map<Integer, int[]> AlgorithmMap = new HashMap<Integer, int[]>();
                for (int i = 0; i < doc1.length(); i++) {
                    char d1 = doc1.charAt(i);
                    if(isHanZi(d1)){
                        int charIndex = getGB2312Id(d1);
                        if(charIndex != -1){
                            int[] fq = AlgorithmMap.get(charIndex);
                            if(fq != null && fq.length == 2){
                                fq[0]++;
                            }else {
                                fq = new int[2];
                                fq[0] = 1;
                                fq[1] = 0;
                                AlgorithmMap.put(charIndex, fq);
                            }
                        }
                    }
                }

                for (int i = 0; i < doc2.length(); i++) {
                    char d2 = doc2.charAt(i);
                    if(isHanZi(d2)){
                        int charIndex = getGB2312Id(d2);
                        if(charIndex != -1){
                            int[] fq = AlgorithmMap.get(charIndex);
                            if(fq != null && fq.length == 2){
                                fq[1]++;
                            }else {
                                fq = new int[2];
                                fq[0] = 0;
                                fq[1] = 1;
                                AlgorithmMap.put(charIndex, fq);
                            }
                        }
                    }
                }
                
                Iterator<Integer> iterator = AlgorithmMap.keySet().iterator();
                double sqdoc1 = 0;
                double sqdoc2 = 0;
                double denominator = 0; 
                while(iterator.hasNext()){
                    int[] c = AlgorithmMap.get(iterator.next());
                    denominator += c[0]*c[1];
                    sqdoc1 += c[0]*c[0];
                    sqdoc2 += c[1]*c[1];
                }
                
                return (denominator / Math.sqrt(sqdoc1*sqdoc2))*100;
            } else {
                throw new NullPointerException(
                        " the Document is null or have not chars!!");
            }
        
        }

        public static boolean isHanZi(char ch) {
            return (ch >= 0x4E00 && ch <= 0x9FA5);

        }

        public static short getGB2312Id(char ch) {
            try {
                byte[] buffer = Character.toString(ch).getBytes("GB2312");
                if (buffer.length != 2) {
                    return -1;
                }
                int b0 = (int) (buffer[0] & 0x0FF) - 161; 
                int b1 = (int) (buffer[1] & 0x0FF) - 161; 
                return (short) (b0 * 94 + b1);
            } catch (UnsupportedEncodingException e) {
                e.printStackTrace();
            }
            return -1;
    
}

}
搜索更多相关主题的帖子: 英文 检测 
2013-09-10 10:34
xxyuan
Rank: 1
等 级:新手上路
帖 子:9
专家分:3
注 册:2012-9-14
收藏
得分:0 
主要的代码就这段   其他的界面代码没贴出来
大神麻烦看看啊
2013-09-10 10:37
快速回复:一个相似文本检测的问题 大神进来帮忙看看程序
数据加载中...
 
   



关于我们 | 广告合作 | 编程中国 | 清除Cookies | TOP | 手机版

编程中国 版权所有,并保留所有权利。
Powered by Discuz, Processed in 0.024655 second(s), 7 queries.
Copyright©2004-2025, BCCN.NET, All Rights Reserved