请教一下读取网页内容的问题
我在做一个音乐播放器,像通过程序输入歌曲的名字,然后使用百度的搜索mp3的功能返回给我一个歌曲的url和歌词的url,然后去看了下开源的YOYOPLAYER,里面的读取网页得到歌词的内容着实看不懂(就是如何使用HttpClient懂一点,但是具体怎么得到url我就不懂),把相关代码贴上来,谁来帮我讲解一下,或是给个参考例子给我看下,万分感谢private static String getBaidu_Lyric(String key) throws Exception {
HttpClient http = new HttpClient();
http.getParams().setContentCharset("GBK");
GetMethod get = new GetMethod("http://www.baidu.com/s?wd=" + URLEncoder.encode("filetype:lrc " + key, "GBK"));
get.addRequestHeader("Host", "www.baidu.com");
get.addRequestHeader("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11");
get.addRequestHeader("Accept", "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5");
get.addRequestHeader("Accept-Language", "zh-cn,zh;q=0.5");
get.addRequestHeader("Keep-Alive", "300");
get.addRequestHeader("Referer", "http://www.baidu.com/");
get.addRequestHeader("Connection", "keep-alive");
int i = http.executeMethod(get);
String temp = getString(get.getResponseBodyAsStream());
get.releaseConnection();
// System.out.println("TEMP="+temp);
// Matcher m = ("(?<=<b>【LRC】</b>).*?(?=文件格式)").matcher(temp);
// Matcher m = ("(?<='\\)\" href=\").*?(?=\" target=\"_blank\"><font size=\"3\">)").matcher(temp);
Matcher m = ("(?<=LRC/Lyric - <a href=\").*?(?=\" target=\"_blank\">HTML版</a>)").matcher(temp);
String content = null;
if (m.find()) {
String str = m.group();
content = Util.getURLContent(str);
m = ("(?<=<body>).*?(?=</body>)").matcher(content);
if (m.find()) {
content = m.group();
}
}
System.out.println(content);
return content;
}
private static String getString(InputStream is) {
InputStreamReader r = null;
try {
StringBuilder sb = new StringBuilder();
//TODO 这里是固定把网页内容的编码写在GBK,应该是可设置的
r = new InputStreamReader(is, "GBK");
char[] buffer = new char[128];
int length = -1;
while ((length = r.read(buffer)) != -1) {
sb.append(new String(buffer, 0, length));
}
return sb.toString();
} catch (Exception ex) {
System.out.println("发生在getString");
return "";
} finally {
try {
r.close();
} catch (Exception ex) {
System.out.println("发生在finally中");
}
}
}