[求助]文本检索功能实现 - JAVA论坛

得分:20

回复楼主 txing521

package com.lch.test;

import java.awt.Font;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.util.ArrayList;
import java.util.List;

import javax.swing.JButton;
import javax.swing.JComboBox;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextArea;
import javax.swing.JTextField;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
public class FindFrame extends JFrame
{
    public FindFrame()
    {
      this.setTitle("文本挖掘");
        this.setSize(600, 400);
        this.setLocationRelativeTo(null);
        this.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);

        this.add(new FindPanel());
    }
    public static void main(String[] args) throws Exception
    {

        TextFileIndexer t = new TextFileIndexer();
        TextFileIndexer.s(args);
        FindFrame frame=new FindFrame();
        frame.setVisible(true);
    }
}
class FindPanel extends JPanel
{
    private JLabel findLabel, pathLabel;
    private JTextField findField, showPathField;
    private JComboBox pathBox;
    private JTextArea resultArea;
    private JButton findButton;

    public FindPanel()
    {
        this.setLayout(null);
        findLabel=new JLabel("请输入关键字");
        findLabel.setFont(new Font("仿宋", Font.BOLD, 20));
        findLabel.setSize(200, 60);
        findLabel.setLocation(10, 10);

        findField=new JTextField();
        findField.setSize(200, 25);
        findField.setLocation(10, 55);

        pathLabel=new JLabel("在这里寻找");
        pathLabel.setFont(new Font("仿宋", Font.BOLD, 20));
        pathLabel.setSize(200, 60);
        pathLabel.setLocation(10, 65);

        String[] pathString={ "d:\\s\\1.txt", "d:\\s\\2.txt", "d:\\s\\3.txt",
                "d:\\s\\4.txt", "d:\\s\\5.txt", "d:\\s\\6.txt", "d:\\s\\7.txt",
                "d:\\s\\8.txt"};
        pathBox=new JComboBox(pathString);
        pathBox.setSize(200, 25);
        pathBox.setLocation(10, 110);

        findButton=new JButton("搜索");
        findButton.setFont(new Font("仿宋", Font.BOLD, 15));
        findButton.setSize(80, 30);
        findButton.setLocation(65, 145);

        resultArea=new JTextArea();
        resultArea.setEnabled(false);
        resultArea.setFont(new Font("宋体", Font.PLAIN, 20));

        JScrollPane area=new JScrollPane(resultArea);
        area.setSize(350, 330);
        area.setLocation(220, 10);

        showPathField=new JTextField();
        showPathField.setEditable(false);
        showPathField.setSize(600, 25);
        showPathField.setLocation(0, 340);

        this.add(findLabel);
        this.add(findField);
        this.add(pathLabel);
        this.add(pathBox);
        this.add(findButton);
        this.add(area);
        this.add(showPathField);

        findButton.addActionListener(new ActionListener(){
            public void actionPerformed(ActionEvent arg0)
            {
                resultArea.setText(null);  //清空文本区
                //线程
                new Thread()
                {
                    public void run()
                    {
                        try {

                            listfile();
                        } catch (Exception e) {
                            // TODO Auto-generated catch block
                            e.printStackTrace();
                        }
                    }
                }.start();
            }
        });
    }
    //构建一个取得抽象路径名数组的方法
    public void listfile() throws Exception
    {
        Hits hits = null;
        // 搜索内容自己换
        String queryString = "";
        String pathBoxText = "";
        queryString = findField.getText();
        if(queryString.equals("")){
            return;
        }
        int selectInt = pathBox.getSelectedIndex() + 1;
        System.out.println(selectInt);
        pathBoxText = (String)pathBox.getSelectedItem();
        //System.out.println(pathBoxText);
        Query query = null;

        IndexSearcher searcher = new IndexSearcher("d:\\index"+selectInt); // 这里注意索引存放的路径
        System.out.println("正在搜索目录d:\\index"+selectInt);

        Analyzer analyzer = new StandardAnalyzer();
        try {
            QueryParser qp = new QueryParser("body", analyzer);
            /**
             * 建索引的时候我们指定了body建立为内容，我们搜索的时候也是针对body的，所以 QueryParser qp = new
             * QueryParser("body", analyzer); 这句和建立索引时候 Field FieldBody = new
             * Field("body", temp, Field.Store.YES, Field.Index.TOKENIZED,
             * Field.TermVector.WITH_POSITIONS_OFFSETS); 的这句的"body"是对应的。
             */
            query = qp.parse(queryString);

        } catch (ParseException e) {
            System.out.println("异常");
        }
        if (searcher != null) {
            hits = searcher.search(query);
            if (hits.length() > 0) {
                System.out.println("找到:" + hits.length() + " 个结果!");
                List<String> list = new ArrayList<String>();
                for (int i = 0; i < hits.length(); i++) {// 输出搜索信息
                    Document document = hits.doc(i);
                    list.add(document.get("body"));
                    list.add(document.get("path"));
//                    System.out.println("contents：" + document.get("body"));
                    // 同样原理这里的document.get("body")就是取得建立在索引文件里面的额body的所有内容
                    // 你若想输出文件路径就用document.get("path")就可以了
                }
                StringBuffer sb = new StringBuffer();

                sb.append("找到:" + hits.length() + " 个结果!"+"\n");
                for (int i = 0; i < list.size(); i++) {
                    sb.append(list.get(i));
                    sb.append("\n");
                }
                resultArea.setText(sb.toString());
            } else {
                System.out.println("0个结果!");
            }
        }
    }


/*    public static String readTxt(String path) {
        StringBuffer content = new StringBuffer("");// 文档内容
        try {
            FileReader reader = new FileReader(path);
            BufferedReader br = new BufferedReader(reader);
            String s1 = null;

            while ((s1 = br.readLine()) != null) {
                content.append(s1 + "\r");
            }
            br.close();
            reader.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return content.toString().trim();
    }
    */
}
***********************************************************************************
***********************************************************************************
***********************************************************************************
package com.lch.test;

import
import
import
import
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;

/**
*
*/

/**
* @author lijieli_2
*
*/
public class TextFileIndexer {

    /**
     * @param args
     */
    public static void s(String[] args) throws Exception { /* 指明要索引文件夹的位置,这里是d盘的s文件夹下 */

        //File[] textFiles = fileDir.listFiles();
        long startTime = new Date().getTime();

        // 增加document到索引去
        System.out.println("File正在被索引.");

        /*
         * 注意要变的就是这里，路径和读取文件的方法
         */
        String[] paths = { "d:\\s\\1.txt", "d:\\s\\2.txt", "d:\\s\\3.txt",
                "d:\\s\\4.txt", "d:\\s\\5.txt", "d:\\s\\6.txt", "d:\\s\\7.txt",
                "d:\\s\\8.txt", };
        int i = 1;
        for (String path : paths) {
            File fileDir = new File("d:\\s");

            /* 这里放索引文件的位置 */
            File indexDir = new File("d:\\index" + i);
            System.out.println("创建索引目录为d:\\index" + i );
            i++;
            Analyzer luceneAnalyzer = new StandardAnalyzer();
            IndexWriter indexWriter = new IndexWriter(indexDir, luceneAnalyzer,
                    true);
            String temp = readTxt(path);

            // String path ="d:\\s\\index.htm";
            // String temp = ReadFile.readHtml(path);

            Document document = new Document();
            Field FieldPath = new Field("path", path, Field.Store.YES,
                    Field.Index.NO);
            Field FieldBody = new Field("body", temp, Field.Store.YES,
                    Field.Index.TOKENIZED,
                    Field.TermVector.WITH_POSITIONS_OFFSETS);
            document.add(FieldPath);
            document.add(FieldBody);
            indexWriter.addDocument(document);
            indexWriter.optimize();
            indexWriter.close();
        }

        // optimize()方法是对索引进行优化
//        // 测试一下索引的时间
//        long endTime = new Date().getTime();
//        System.out.println("这花费了" + (endTime - startTime) + " 毫秒来把文档增加到索引里面去!"
//                + fileDir.getPath());
    }

    public static String readTxt(String path) {
        StringBuffer content = new StringBuffer("");// 文档内容
        try {
            FileReader reader = new FileReader(path);
            BufferedReader br = new BufferedReader(reader);
            String s1 = null;

            while ((s1 = br.readLine()) != null) {
                content.append(s1 + "\r");
            }
            br.close();
            reader.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return content.toString().trim();
    }
}

***********************************************************************************
***********************************************************************************
思路：1. 建立的索引的时候就分开建立，可能我这么写的方法效率有点低。
2. 其他的几个问题，不值得我去做。
3. 要不是对这个问题感兴趣，不会回答你的问题。今天才注册！