这段代码是什么意思?
int segSentence (char *strText ,BOOL bSpace ) {
int iTextLen = strlen(strText) ;
int iWordLen= 0 ;
int i = 0 ;
int iNexti = 0 ;
BOOL bSep = FALSE ;//否是分隔符或者是一个词结束
BOOL bChinese = FALSE ;//上一个有效字符类型,
//false:英文 true: Chinese
BOOL bFound = FALSE ;
unsigned char strWord[MAX_SWORD_LEN + 1]; //当前词
unsigned char strChar[3]; //
unsigned char cChar; //
memset(strWord,0,sizeof(strWord));
strChar[0] = '\0';
for ( i = 0 ; i < iTextLen ; i ++ ) {
cChar = (unsigned char )strText[i];
if (128 > cChar) { //英文字符
/****如果连续空格不算分隔的话,用下面这一段代码 ****/
if ( ' ' == cChar || '\t' == cChar || '\r' == cChar || '\n' == cChar ){
if ( TRUE == bChinese ) { //如果前面一个有效字符是Chinese
if ( TRUE == bSpace && (' ' == cChar || '\t' == cChar))
bSep = TRUE ;
else continue ; //继续取下一个字符,因为汉字的词可以换行或用空格隔开
} else {
bSep = TRUE ;
}
} else {
bSep = isAsciiSymbol(cChar); //判断是否是分隔符
}
if ( (TRUE == bSep || TRUE == bChinese ) && 0 < iWordLen ) { //一个单词结束
if ( iWordLen > MAX_SWORD_LEN )
iWordLen = MAX_SWORD_LEN;
strWord[iWordLen] = '\0';
segWord(strWord,iWordLen,bChinese);
iWordLen = 0 ;
}
if ( FALSE == bSep ) {
strWord[iWordLen] = cChar;
//同一个单词的字母,并在一起
iWordLen ++ ;
}
bChinese = FALSE ;
} else { //字符Chinese
if ( FALSE == bChinese && FALSE == bSep && 0 < iWordLen ) {//以前是英文,碰到汉字,就当英文单词结束
if ( iWordLen > MAX_SWORD_LEN )
iWordLen = MAX_SWORD_LEN;
strWord[iWordLen] = '\0';
bFound = isEnglishStop(strWord);
if ( FALSE == bFound ) {
segWord(strWord,iWordLen,bChinese);
} // end if
iWordLen = 0 ;
}
iNexti = i + 1 ;
if ( iNexti < iTextLen ) {
if ( 128 > ((unsigned char)strText[iNexti]) ) { //单字符>=128,不处理该字符
continue ;
}
} else { //最后一个字符是大于128的单字符
break;
}
strChar[0] = strText[i];
strChar[1] = strText[i+1];
strChar[2] = '\0';
//是否有可能>128的字符,只有一个字符,如果有需要另外判断,现在默认一定会有至少两个字符同时出现
bChinese = TRUE ; //是汉字Chinese
i ++ ; //只需要加1
if (strChar[0] == 0xa1 && strChar[1] == 0xa1 ) { //Chinese空格,16进制:A1,A1
if ( TRUE == bSpace )
bSep = TRUE ;
else continue ; //连续Chinese空格
}else if ( strChar[0] < 176 ) {//中文标点等非汉字字符
bSep = TRUE ;
}else bSep = FALSE;
if ( TRUE == bSep && 0 < iWordLen ) {
if ( iWordLen > MAX_SWORD_LEN )
iWordLen = MAX_SWORD_LEN;
strWord[iWordLen] = '\0';
segWord(strWord,iWordLen,bChinese);
iWordLen = 0 ;
}
if ( FALSE == bSep ) {
strWord[iWordLen++] = strChar[0];
strWord[iWordLen++] = strChar[1];
}
} // end if ord
} // end for $i
if ( 0 < iWordLen ) { //还有未处理的单词
if ( iWordLen > MAX_SWORD_LEN )
iWordLen = MAX_SWORD_LEN;
strWord[iWordLen] = '\0';
segWord(strWord,iWordLen,bChinese);
iWordLen = 0 ;
}
return 0;
}