我的思路是找到一个RUL,然后找子URL,再然后找子RUL这样
程序代码:
Clear
lcUrl = [http://www.stats.]
oHtml = Createobject("MICROSOFT.XMLHTTP")
oHtml.open("get", lcUrl, .F.)
oHtml.send
lcStr = oHtml.responseBody
Strtofile("", "aa.txt")
m.i = 1
Do While .T.
lcTemp = Strconv(Strextract(lcStr, [<tr class='countytr'>], [</tr>], m.i), 2)
If Empty(lcTemp) == .T.
Exit
Else
If At('a href', lcTemp) == 0
Strtofile(Transform(m.i) + Chr(9) + Strextract(lcTemp, "<td>", "</td>", 1) + Chr(9) + Strextract(lcTemp, "<td>", "</td>", 2) + Chr(13) + Chr(10), "aa.txt", .T.)
Else
Strtofile(Transform(m.i) + Chr(9) + Strextract(lcTemp, "'>", "</", 1) + Chr(9) + Strextract(lcTemp, "'>", "</", 2) + Chr(13) + Chr(10), "aa.txt", .T.)
lcUrl1 = Left(lcUrl, Rat('/', lcUrl, 1)) + Strextract(lcTemp, "'", "'")
m.i1 = 1
oHtml.open("get", lcUrl1, .F.)
oHtml.send
lcStr1 = oHtml.responseBody
Do While .T.
lcTemp1 = Strconv(Strextract(lcStr1, [<tr class='towntr'>], [</tr>], m.i1), 2)
If Empty(lcTemp1) == .T.
Exit
Else
If At('a href', lcTemp1) == 0
Strtofile(Transform(m.i) + "." + Transform(m.i1) + Chr(9) + Strextract(lcTemp1, "<td>", "</td>", 1) + Chr(9) + Strextract(lcTemp1, "<td>", "</td>", 2) + Chr(13) + Chr(10), "aa.txt", .T.)
Else
Strtofile(Transform(m.i) + "." + Transform(m.i1) + Chr(9) + Strextract(lcTemp1, "'>", "</", 1) + Chr(9) + Strextract(lcTemp1, "'>", "</", 2) + Chr(13) + Chr(10), "aa.txt", .T.)
lcUrl2 = Left(lcUrl1, Rat('/', lcUrl1, 1)) + Strextract(lcTemp1, "'", "'")
m.i2 = 1
oHtml.open("get", lcUrl2, .F.)
oHtml.send
lcStr2 = oHtml.responseBody
Do While .T.
lcTemp2 = Strconv(Strextract(lcStr2, [<tr class='villagetr'>], [</tr>], m.i2), 2)
If Empty(lcTemp2) == .T.
Exit
Else
Strtofile(Transform(m.i) + "." + Transform(m.i1) + "." + Transform(m.i2) + Chr(9) + Strextract(lcTemp2, "<td>", "</td>", 1) + Chr(9) + Strextract(lcTemp2, "<td>", "</td>", 2) + Chr(9) + Strextract(lcTemp2, "<td>", "</td>", 3) + Chr(13) + Chr(10), "aa.txt", .T.)
Endif
m.i2 = m.i2 + 1
Enddo
Endif
Endif
m.i1 = m.i1 + 1
Enddo
Endif
Endif
m.i = m.i + 1
Enddo