初为人父,诸事纷繁。好久没来了,不知道楼主的问题解决的怎么样了。
这里按照包含优先、先占优先的原则制定的去重方案,代码如下。并附上你给的数据的去重结果,看看是否满意,或是否存在漏洞。
使用代码时注意修改输入输出的文件名(在宏里修改)。
这里按照包含优先、先占优先的原则制定的去重方案,代码如下。并附上你给的数据的去重结果,看看是否满意,或是否存在漏洞。
使用代码时注意修改输入输出的文件名(在宏里修改)。
程序代码:
#include<stdio.h> #include<stdlib.h> #include<malloc.h> #include<string.h> #define INPUT_FILE_PATH "in.txt" #define OUTPUT_FILE_PATH "out.txt" #define LINE_BUFFER_SIZE 0x1000000 #define CAPACITY_GROWTH 0x1000000 #define SPLIT " ,\t\r\n" typedef struct { int start; int end; int line; int index; }NODE; int get_data(NODE ** dataptr, FILE * file_in) { NODE * p, * tp; int capacity, len; int i, j, t; char *str, * sp; str = (char *)malloc(LINE_BUFFER_SIZE); p = (NODE *)malloc(sizeof(NODE) * CAPACITY_GROWTH); capacity = CAPACITY_GROWTH; len = 0; for(i = 0; fgets(str, LINE_BUFFER_SIZE, file_in); i++) { for(j = 0, sp = strtok(str, SPLIT); sp; j++) { if(len == capacity) { if((tp = realloc(p, capacity += CAPACITY_GROWTH * sizeof(NODE))) == NULL) { puts("Memory low"); free(p); free(str); return -1; } p = tp; } p[len].start = strtol(sp, NULL, 0); if((sp = strtok(NULL, SPLIT)) == NULL) { printf("Data miss at %d line.\n", i + 1); printf("index %d\n", j + 1); free(p); free(str); return -1; } p[len].end = strtol(sp, NULL, 0); if(p[len].start > p[len].end) { t = p[len].start; p[len].start = p[len].end; p[len].end = t; } p[len].line = i; p[len].index = j; len++; sp = strtok(NULL, SPLIT); } } *dataptr = p; free(str); return len; } int cmp_segment(const void * a, const void * b) { int d; if(d = ((NODE *)a)->start - ((NODE *)b)->start) return d; if(d = ((NODE *)a)->end - ((NODE *)b)->end) return d; if(d = ((NODE *)a)->line - ((NODE *)b)->line) return d; return ((NODE *)a)->index - ((NODE *)b)->index; } int cmp_position(const void * a, const void * b) { int d; if(d = ((NODE *)a)->line - ((NODE *)b)->line) return d; return ((NODE *)a)->index - ((NODE *)b)->index; } void work(NODE * data, int len) { int start, end, i, j; qsort(data, len, sizeof(NODE), cmp_segment); for(j = 0, i = 1; i < len; i++) for(;;) { while(j >= 0 && data[j].start == 0) j--; if(j < 0) { j = i; break; } if(data[i].end <= data[j].end) { data[i].start = data[i].end = 0; break; } if(data[i].start <= data[j].start) { data[j].start = data[j].end = 0; continue; } if(data[i].start <= data[j].end) { data[i].start = data[j].end + 1; j = i; break; } j = i; break; } qsort(data, len, sizeof(NODE), cmp_position); } int main() { FILE * fin, * fout; NODE * data; int len, i; if((fin = fopen(INPUT_FILE_PATH, "r")) == NULL) { printf("cannot open file.\n"); return 0; } len = get_data(&data, fin); fclose(fin); if(len < 0) return 0; work(data, len); if((fout = fopen(OUTPUT_FILE_PATH, "w")) == NULL) { printf("cannot create file.\n"); free(data); return 0; } fprintf(fout, "%d,%d", data[0].start, data[0].end); for(i = 1; i < len; i++) { if(data[i].line != data[i - 1].line) { fprintf(fout, "\n%d,%d", data[i].start, data[i].end); } else { fprintf(fout, ",%d,%d", data[i].start, data[i].end); } } fclose(fout); free(data); return 0; }贴结果的前几行出来,完整数据在附件里
66999065,66999928,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,66999929,67208755,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,67208756,67213982
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0
8377885,8384365,0,0,0,0,0,0,8384366,8403806,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8403807,8404227
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
16767166,16770126,0,0,0,0,0,0,0,0,0,0,0,0,16786272,16786573
0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,16770127,16786271,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0
25071847,25124232,0,0,0,0,0,0,0,0,25170620,25170815
0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,25124233,25170619,0,0,0,0,0,0
33546704,33546991,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,33546992,33585644,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,33585645,33586131
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,67208756,67213982
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0
8377885,8384365,0,0,0,0,0,0,8384366,8403806,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8403807,8404227
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
16767166,16770126,0,0,0,0,0,0,0,0,0,0,0,0,16786272,16786573
0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,16770127,16786271,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0
25071847,25124232,0,0,0,0,0,0,0,0,25170620,25170815
0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,25124233,25170619,0,0,0,0,0,0
33546704,33546991,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,33546992,33585644,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,33585645,33586131
out.rar
(103.17 KB)
重剑无锋,大巧不工