java做的简单解析器,行不??
import java.io.*; import javax.swing.text.*;
import javax.swing.text.html.*;
import javax.swing.text.html.parser.*;
import java.net.*;
public class ParseHtml{
public static void main(String args[])
{
System.out.println("Get image src from html page");
// for(int i=26510;i>0;i--)
// processPage("http://64.124.76.23/BBSView.asp?SubID=photo&MsgID="+i);
//processPage("http://64.124.76.23/BBSView.asp?SubID=photo&MsgID=26509");
processPage("http://www.topwind.net/reika/wallpaper/page01.html");
}
public static void processPage2(String surl)
{
boolean bsuccess=false;
int trycount = 0;
int MAX_TRY_TIME = 5;
while(trycount<MAX_TRY_TIME)
{
trycount++;
try
{
Reader r = getReaderFromUrl(surl);
if(r!=null)
{
ParserDelegator parser = new ParserDelegator();
HTMLEditorKit.ParserCallback callback = new Callback();
parser.parse(r, callback, true);
r.close();
//If no exception occurr until here, it may be successful
break;
}
}
catch(Exception e)
{
e.printStackTrace();
}
}
if(bsuccess)
{
System.out.println("Process succeed:"+surl+" , within "+trycount+" times");
}
else
{
System.out.println("Process failure:"+surl);
}
}
public static void processPage(String surl)
{
boolean bsuccess=false;
int trycount = 0;
int MAX_TRY_TIME = 5;
while(trycount<MAX_TRY_TIME)
{
trycount++;
try
{
Reader r = getReaderFromUrl(surl);
if(r!=null)
{
bsuccess=parseHtml(r);
if(bsuccess)
break;
}
}
catch(Exception e)
{
e.printStackTrace();
}
}
if(bsuccess)
{
System.out.println("Process succeed:"+surl+" , within "+trycount+" times");
}
else
{
System.out.println("Process failure:"+surl);
}
}
public static InputStreamReader getReaderFromUrl(String surl)
{
try
{
URL link = new URL(surl);
URLConnection uconn=link.openConnection();
InputStream httpis=uconn.getInputStream();
return new InputStreamReader(httpis);
}
catch(Exception e)
{
e.printStackTrace();
}
return null;
}
public static boolean parseHtml(Reader r)
{
try
{
ParserDelegator parser = new ParserDelegator();
HTMLEditorKit.ParserCallback callback = new Callback();
parser.parse(r, callback, true);
r.close();
return true;
}
catch(Exception e)
{
e.printStackTrace();
}
return false;
}
}
class Callback extends HTMLEditorKit.ParserCallback
{
private String sPageLink;
private String sFileNamePrefix="";
public void setPageLink(String s)
{
sPageLink = s;
if(s==null)
return;
String stmp = "http://64.124.76.23/BBSView.asp?SubID=photo&MsgID=";
int npos=s.indexOf(stmp);
if(npos==0)
sFileNamePrefix = sPageLink.substring(stmp.length());
else
sFileNamePrefix ="";
}
private String getFilePrefix()
{
return sFileNamePrefix;
}
public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos)
{
if(t.equals(HTML.Tag.IMG))
{
String imgsrc=(String)a.getAttribute(HTML.Attribute.SRC);
//System.out.println("Image source:"+imgsrc);
String savefilename=getFileName(imgsrc);
if(savefilename!=null)
saveFile(imgsrc);
}
}
private String getFileName(String url)
{
try
{
String tmp = url.toUpperCase();
if(tmp.startsWith("HTTP:")==false)
return null;
if(tmp.indexOf("=0'>\\")>=0)
return null;
if(tmp.endsWith(".JPG")||tmp.endsWith(".GIF"))
{
int slashpos=url.lastIndexOf("/");
return url.substring(slashpos+1);
}
}
catch(Exception e)
{
e.printStackTrace();
return null;
}
return null;
}
public boolean downLoadFile2(String fileurl, String savepath)
{//Things need to do:
//Set connection timeout
//Try more times to download
//Catch a general Exception is not so good
//...
try
{
int httpStatusCode;
URL url =new URL(fileurl);
URLConnection conn = url.openConnection();
conn.connect();
HttpURLConnection httpconn =(HttpURLConnection)conn;
httpStatusCode =httpconn.getResponseCode();
if(httpStatusCode!=HttpURLConnection.HTTP_OK)
{//HttpURLConnection return an error code
System.out.println("Connect to "+fileurl+" failed,return code:"+httpStatusCode);
return false;
}
int filelen = conn.getContentLength();
InputStream is = conn.getInputStream();
byte[] tmpbuf=new byte[1024];
File savefile =new File(savepath);
if(!savefile.exists())
savefile.createNewFile();
FileOutputStream fos = new FileOutputStream(savefile);
int readnum = 0;
if(filelen<0)//for http://www.csdn.net/expert/topic/204/204361.shtm, conn.getContentLength() return -1.
{
while(readnum>-1)
{
readnum = is.read(tmpbuf);
if(readnum>0)
fos.write(tmpbuf,0,readnum);
}
}
else
{
int readcount =0;
while(readcount<filelen&&readnum!=-1)
{
readnum=is.read(tmpbuf);
if(readnum>0)
{
fos.write(tmpbuf,0,readnum);
readcount =readcount +readnum;
}
}
if(readcount<filelen)
{
System.out.println("download error");
is.close();
fos.close();
savefile.delete();
return false;
}
}
fos.flush();
fos.close();
is.close();
}
catch(Exception e)
{
e.printStackTrace();
return false;
}
return true;
}
private void saveFile(String url)
{
try
{
URL urlo = new URL(url);
URLConnection uconn=urlo.openConnection();
int len=uconn.getContentLength();
String stmpname = getFileName(url);
if((len>16240)&&(stmpname!=null))
{
downLoadFile2(url,stmpname);
}
}
catch(Exception e)
{
e.printStackTrace();
}
}
}