占楼编辑。
循环也挺麻烦的。下载数据库什么的最方便了。
循环也挺麻烦的。下载数据库什么的最方便了。
test=CREATEOBJECT("msxml2.Serverxmlhttp") mn=FCREATE("aa.txt") func1("http://www.stats./tjbz/cxfldm/2011/41/4101.html") FCLOSE(mn) FUNCTION func1(website) LOCAL tempstr,websites,daima, mingcheng test.OPEN("get", website,0) test.SEND tempstr=test.responsetext tempstr=STREXTRACT(tempstr,[href=']) IF EMPTY(tempstr) func2(website) ELSE DO WHILE .T. websites=STREXTRACT(tempstr,"","'>") websites=LEFT(website,RATC('/',website))+websites tempstr=STREXTRACT(tempstr,"'>") daima=STREXTRACT(tempstr,"","<") tempstr=STREXTRACT(tempstr,"'>") mingcheng=STREXTRACT(tempstr,"","<") tempstr=STREXTRACT(tempstr,[href=']) FPUTS(mn,daima+SPACE(1)+mingcheng) FPUTS(mn,"") func1(websites) IF EMPTY(tempstr) EXIT ENDIF ENDDO ENDIF ENDFUNC FUNCTION func2(website) LOCAL tempstr,websites,daima, mingcheng,daima4 test.OPEN("get", website,0) test.SEND tempstr=test.responsetext DO WHILE .T. tempstr=STREXTRACT(tempstr,[villagetr'><td>]) IF EMPTY(tempstr) EXIT ENDIF daima=STREXTRACT(tempstr,"","<") tempstr=STREXTRACT(tempstr,"<td>") daima4=STREXTRACT(tempstr,"","<") tempstr=STREXTRACT(tempstr,"<td>") mingcheng=STREXTRACT(tempstr,"","<") FPUTS(mn,daima+SPACE(1)+daima4+SPACE(1)+mingcheng) ENDDO FPUTS(mn,"") ENDFUNC
*!* 程序分为一个主程序模块,三个次程序模块 *!* 主程序模块***************************************************** test=CREATEOBJECT("msxml2.Serverxmlhttp") mn=FCREATE("aa.txt") xiazai("http://www.stats./tjbz/cxfldm/2011/41/4101.html") FCLOSE(mn) *!* 主程序模块结束************************************************** *!* 下载HTML代码程序块********************************************* FUNCTION xiazai(website) LOCAL tempstr test.OPEN("get", website,0) test.SEND tempstr=test.responsetext tempstr=STREXTRACT(tempstr,[href=']) IF EMPTY(tempstr) func2(test.responsetext) ELSE func1(tempstr,website) ENDIF ENDFUNC *!* 下载HTML代码程序块结束************************************** *!* 分析有链接的网页代码程序块即1、县区市2、街道办事处和乡镇******************************* FUNCTION func1(tempstr,website) LOCAL tempstr,websites,daima, mingcheng DO WHILE .T. websites=STREXTRACT(tempstr,"","'>") websites=LEFT(website,RATC('/',website))+websites tempstr=STREXTRACT(tempstr,"'>") daima=STREXTRACT(tempstr,"","<") tempstr=STREXTRACT(tempstr,"'>") mingcheng=STREXTRACT(tempstr,"","<") tempstr=STREXTRACT(tempstr,[href=']) FPUTS(mn,daima+SPACE(1)+mingcheng) FPUTS(mn,"") xiazai(websites) IF EMPTY(tempstr) EXIT ENDIF ENDDO ENDFUNC *!* 分析有链接的网页代码程序块结束******************************* *!* 分析无链接的网页代码程序块即居委会和村******************************* FUNCTION func2(tempstr) LOCAL tempstr,daima, mingcheng,daima4 DO WHILE .T. tempstr=STREXTRACT(tempstr,[villagetr'><td>]) IF EMPTY(tempstr) EXIT ENDIF daima=STREXTRACT(tempstr,"","<") tempstr=STREXTRACT(tempstr,"<td>") daima4=STREXTRACT(tempstr,"","<") tempstr=STREXTRACT(tempstr,"<td>") mingcheng=STREXTRACT(tempstr,"","<") FPUTS(mn,daima+SPACE(1)+daima4+SPACE(1)+mingcheng) ENDDO FPUTS(mn,"") ENDFUNC *!* 分析无链接的网页代码程序块结束*******************************
CLEAR T1=SECONDS() CREATE CURSOR TQHDM (层次 N(1),代码 C(12),分类 C(3),名称 C(40),网址 C(150)) LOCAL DM,MC DM=[4101.html] MC=[郑州市] URL=[http://www.stats.]+DM CC=2 INSERT INTO TQHDM VALUES (CC,LEFT(DM,4),[],MC,URL) DO WHILE .T. SELECT * FROM TQHDM INTO CURSOR TEMP WHERE 层次=CC IF EOF() EXIT ENDIF SCAN FOR NOT EMPTY(网址) CSTR=CWY(SUBSTR(ALLTRIM(网址),RAT([/],网址)+ICASE(CC=2,1,CC=3,-2,CC=4,-5)),ICASE(CC=2,'countytr',CC=3,'towntr',CC=4,'villagetr')) NROW=ALINES(AA,CSTR,[</td></tr>]) FOR I=1 TO NROW =ALINES(BB,AA(I),[</td><td>]) DM=RIGHT(BB(1),12) FL=IIF(CC+1=5,BB(2),[]) MC=IIF(CC+1=5,BB(3),SUBSTR(BB(2),AT([>],BB(2))+1)) URL=IIF([.html]$AA(I),SUBSTR(ALLTRIM(网址),1,RAT([/],网址))+SUBSTR(BB(2),1,AT([>],BB(2))-1),[]) INSERT INTO TQHDM VALUES (CC+1,DM,FL,MC,URL) ENDFOR ENDSCAN CC=CC+1 ENDDO MESSAGEBOX(SECONDS()-T1) SELECT TQHDM BROWSE *---------------------------------- FUNCTION CWY PARAMETERS WY,BZ LCURL = [http://www.stats.]+WY OHTML = CREATEOBJECT("MICROSOFT.XMLHTTP") OHTML.OPEN([GET], LCURL, .F.) OHTML.SEND CSTRA=STRTRAN(STRTRAN(STRTRAN(STRTRAN(STREXTRACT(STRCONV(OHTML.RESPONSEBODY,2),[名称</td></tr>],[</table></TD></TR>]),; [<tr class='&BZ'><td>],[]),[</a>],[]),[<a href=],[]),['],[]) RETURN CSTRA请大家帮忙测试一下
starttime=SECONDS()*1000 test=CREATEOBJECT("MICROSOFT.XMLHTTP") mn=FCREATE("aa.txt") xiazai("http://www.stats./tjbz/cxfldm/2011/41/4101.html") FCLOSE(mn) endtime=SECONDS()*1000 TIMES=endtime-starttime MESSAGEBOX("你所花费的时间为"+ALLTRIM(STR(TIMES,20))+"毫秒",64,"测试") MODIFY FILE aa.txt *!* 主程序模块结束************************************************** *!* 下载HTML代码程序块********************************************* FUNCTION xiazai(website) LOCAL tempstr test.OPEN("get", website,0) test.SEND tempstr=test.responsetext tempstr=STREXTRACT(tempstr,[href=']) IF EMPTY(tempstr) func2(test.responsetext) ELSE func1(tempstr,website) ENDIF ENDFUNC *!* 下载HTML代码程序块结束************************************** *!* 分析有链接的网页代码程序块即1、县区市2、街道办事处和乡镇******************************* FUNCTION func1(tempstr,website) LOCAL tempstr,websites,daima, mingcheng DO WHILE .T. websites=STREXTRACT(tempstr,"","'>") websites=LEFT(website,RATC('/',website))+websites tempstr=STREXTRACT(tempstr,"'>") daima=STREXTRACT(tempstr,"","<") tempstr=STREXTRACT(tempstr,"'>") mingcheng=STREXTRACT(tempstr,"","<") tempstr=STREXTRACT(tempstr,[href=']) FPUTS(mn,daima+SPACE(1)+mingcheng) FPUTS(mn,"") xiazai(websites) IF EMPTY(tempstr) EXIT ENDIF ENDDO ENDFUNC *!* 分析有链接的网页代码程序块结束******************************* *!* 分析无链接的网页代码程序块即居委会和村******************************* FUNCTION func2(tempstr) LOCAL tempstr,daima, mingcheng,daima4 DO WHILE .T. tempstr=STREXTRACT(tempstr,[villagetr'><td>]) IF EMPTY(tempstr) EXIT ENDIF daima=STREXTRACT(tempstr,"","<") tempstr=STREXTRACT(tempstr,"<td>") daima4=STREXTRACT(tempstr,"","<") tempstr=STREXTRACT(tempstr,"<td>") mingcheng=STREXTRACT(tempstr,"","<") FPUTS(mn,daima+SPACE(1)+daima4+SPACE(1)+mingcheng) ENDDO FPUTS(mn,"") ENDFUNC *!* 分析无链接的网页代码程序块结束*******************************