URLDemo.MyThread cannot be resolved to a type 怎么解决 - JAVA论坛

问题点数：0 回复次数：1

URLDemo.MyThread cannot be resolved to a type 怎么解决

package com.exe1;
/*
* 读取当当网下首页图书的数据，并进行分析
* 爬取深度为2
* 爬取数据存储到D:\Java文件保存库\爬虫信息文件，目录下
* */
import *;
import *;
import java.util.*;
import java.util.regex.*;

public class URLDemo2 {
    private static final int MAX_THREAD = 0;
    //提取的数据放到该目录下
    private static String savepath = "D:\\Java文件保存库\\爬虫信息文件";
    //等待爬取的url
    private static List<String> allwaiturl = new ArrayList<>();
    //爬取过的url
    private static Set<String> alloverurl = new HashSet<>();
    //记录所有url的深度进行爬取判断
    private static Map<String,Integer> allurldepth = new HashMap<>();
    //爬取的深度
    private static int maxdepth = 2;
    //生命对象，帮助进行线程的等待操作
    private static Object obj = new Object();
    //记录总线程数5条
    private static int count = 0;

    public static void main(String args[]) {
        //确定爬取的网页地址，此处为当当网首页上的图书分类进去的网页
        //网址为    http://book.
        //String strurl = "http://search.
        String strurl = "http://book.

        //workurl(strurl,1);
        addurl(strurl,0);
        for(int i=0;i<MAX_THREAD;i++){
              new URLDemo().new MyThread().start();
            }
    }

    /*
     * 网页数据爬取
     * @param strurl
     * @param depth
     * */
    public static void workurl(String strurl,int depth) {
        //判断当前url是否爬取过
        if(!(alloverurl.contains(strurl)||depth>maxdepth)) {
            //检测线程是否执行
            System.out.println("当前执行："+Thread.currentThread().getName()+" 爬取线程处理爬取："+strurl);
            //建立url爬取核心对象
            try {
                URL url = new URL(strurl);
                //通过俩建立与网页的连接
                URLConnection conn = url.openConnection();
                //通过链接取得网页返回的数据
                InputStream is = conn.getInputStream();

                //提取text类型的数据
                if(conn.getContentType().startsWith("text")) {

                }
                System.out.println(conn.getContentEncoding());
                //一般按行读取网页数据，并进行内容分析
                //因此用BufferedRead 和 InputStreamReader 把字节流转换为字符流的缓冲流
                //进行转换时，需要处理编码格式问题
                BufferedReader br = new BufferedReader(new InputStreamReader(is, "GB2312"));

                //按行读取并打印
                String line = null;
                //正则表达式的匹配规则提取该网页的链接
                Pattern p = ("<a.*href=.+</a>");
                //建立一个输出流，用于保存文件，文件名为执行时间，以防重复
                PrintWriter pw=new PrintWriter(new File(savepath+System.currentTimeMillis()+".txt"));

                while((line = br.readLine())!=null) {
                    //System.out.println(line);
                    //编写正则，匹配超链接地址
                    pw.println(line);
                    Matcher m = p.matcher(line);
                    while(m.find()) {
                        String href = m.group();
                        //找到超链接地址并截取字符串
                        //有无引号
                        href = href.substring(href.indexOf("href="));
                        if(href.charAt(5)=='\"'){
                            href = href.substring(6);
                        }else {
                            href = href.substring(5);
                        }
                        //截取到引号或者空格或者到">"结束
                        try {
                            href = href.substring(0, href.indexOf("\""));
                        }catch(Exception e) {
                            try {
                                href = href.substring(0, href.indexOf(" "));
                            }catch(Exception e1) {
                                href = href.substring(0, href.indexOf(">"));
                            }
                        }
                        if(href.startsWith("http:")||href.startsWith("https:"));
                        /*
                         * 输出该网页存在的链接
                         * System.out.println(href);
                         * 将url地址放到队列中
                          allwaiturl.add(href);
                        allurldepth(href,depth);
                         * */
                        //调用addurl方法
                        addurl(href,depth);
                    }

                }
                 pw.close();
                 br.close();
            }catch(Exception e) {
                //TODO Auto-generated catch block
                //e.printstackTrace()
            }
            //将当前url归列到alloverurl中
            alloverurl.add(strurl);
            System.out.println(strurl+"网页爬取完成，已爬取数量："+alloverurl.size()+"，剩余爬取数量："+allwaiturl.size());

        }
        /*
         * 用递归的方法继续爬取其他链接
         * String nexturl = allwaiturl.get(0);
         * allwaiturl.remove(0);
         * workurl(nexturl,allurldepth.get(nexturl));
         * */
        if(allwaiturl.size()>0) {
            synchronized(obj) {
                obj.notify();
            }
        }else {
            System.out.println("爬取结束........");
        }

        }

        /*
         * 将获取的url放入等待队列中，同时判断是否已经放过
         * @param href
         * @param depth
         * */
        public static synchronized void addurl(String href, int depth) {
            //将url放到队列中
            allwaiturl.add(href);
            //判断url是否放过
            if(!allurldepth.containsKey(href)) {
                allurldepth.put(href, depth+1);
            }
        }

        /*
         * 移除爬取完成的url，获取下一个未爬取的url
         * @return
         * */
        public static synchronized String geturl() {
            String nexturl = allwaiturl.get(0);
            allwaiturl.remove(0);
            return nexturl;
        }

        /*
         * 线程分配任务
         * */
        public class MyThread extends Thread{
            public void run() {
                //设定一个死循环，让线程一直存在
                while(true) {
                    //判断是否新连接，有则获取
                    if(allwaiturl.size()>0) {
                        //获取url进行处理
                        String url = geturl();
                        //调用workurl方法爬取
                        workurl(url,allurldepth.get(url));
                    }else {
                        System.out.println("当前线程准备就绪，等待连接爬取："+this.getName());
                        count++;
                        //建立一个对象，让线程进入等待状态，即外套()
                        synchronized(obj) {
                            try {
                                obj.wait();
                            }catch(Exception e) {

                            }
                        }
                        count--;
                    }
                }
            }

        }

    }

搜索更多相关主题的帖子: new　static　href　URL　String