估计是从ocr取得的数据。彻底的解决办法是做“相似数据库”,通过不断完善这个库的数据来反复过滤——反正是机器做,做多少次都不累人。
授人以渔,不授人以鱼。
CLEAR CREATE CURSOR TMP (姓名 C(10), 省份 C(10)) INSERT INTO TMP VALUES ("张小三", "北京") INSERT INTO TMP VALUES ("张小四", "北京市") INSERT INTO TMP VALUES ("王小二", "天津") INSERT INTO TMP VALUES ("王小四", "河北") INSERT INTO TMP VALUES ("张三", "河北省") INSERT INTO TMP VALUES ("王小三", "天津市") INSERT INTO TMP VALUES ("赵四", "上海") INSERT INTO TMP VALUES ("陈小小", "云南") INSERT INTO TMP VALUES ("陈一二", "广西") INSERT INTO TMP VALUES ("陈小二", "云南省") INSERT INTO TMP VALUES ("李五", "中国 北京") INSERT INTO TMP VALUES ("张小五", "北京市") FOR i = 1 TO RECCOUNT() GO i c姓名 = 姓名 c省份 = 省份 bTag = .F. SKIP SCAN REST IF (DIFFERENCE(c姓名, 姓名) == 4) AND (DIFFERENCE(c省份, 省份) == 4) IF !bTag ? ? c姓名, c省份 bTag = .T. ENDIF ? 姓名, 省份 ENDIF ENDSCAN ENDFOR
CLEAR CREATE CURSOR TMP (姓名 C(10), 省份 C(10), 标志 L) INSERT INTO TMP VALUES ("张小三", "北京", .F.) INSERT INTO TMP VALUES ("张小四", "北京市", .F.) INSERT INTO TMP VALUES ("王小二", "天津", .F.) INSERT INTO TMP VALUES ("王小四", "河北", .F.) INSERT INTO TMP VALUES ("张三", "河北省", .F.) INSERT INTO TMP VALUES ("王小三", "天津市", .F.) INSERT INTO TMP VALUES ("赵四", "上海", .F.) INSERT INTO TMP VALUES ("陈小小", "云南", .F.) INSERT INTO TMP VALUES ("陈一二", "广西", .F.) INSERT INTO TMP VALUES ("陈小二", "云南省", .F.) INSERT INTO TMP VALUES ("李五", "中国 北京", .F.) INSERT INTO TMP VALUES ("张小五", "北京市", .F.) FOR i = 1 TO RECCOUNT() GO i c姓名 = 姓名 c省份 = 省份 bTag = .F. SKIP SCAN REST FOR !标志 IF (DIFFERENCE(c姓名, 姓名) == 4) AND (DIFFERENCE(c省份, 省份) == 4) IF !bTag ? ? c姓名, c省份 bTag = .T. ENDIF REPLACE 标志 WITH .T. ? 姓名, 省份 ENDIF ENDSCAN ENDFOR