这段代码是什么意思？ - C语言论坛

问题点数：0 回复次数：0

这段代码是什么意思？

int segSentence (char *strText ,BOOL bSpace )
{
    int         iTextLen     = strlen(strText) ;
    int         iWordLen=  0 ;
    int            i        = 0 ;
    int            iNexti        = 0 ;
    BOOL        bSep          = FALSE ;//否是分隔符或者是一个词结束
    BOOL        bChinese    = FALSE ;//上一个有效字符类型，
                        //false:英文 true: Chinese
    BOOL        bFound        = FALSE ;
    unsigned     char    strWord[MAX_SWORD_LEN + 1];         //当前词
    unsigned     char    strChar[3];    //
    unsigned     char    cChar;            //

    memset(strWord,0,sizeof(strWord));
    strChar[0] = '\0';
    for ( i = 0 ; i < iTextLen ; i ++ ) {
        cChar    = (unsigned char )strText[i];
        if (128 > cChar) {                    //英文字符
            /****如果连续空格不算分隔的话，用下面这一段代码 ****/
            if ( ' ' == cChar || '\t' == cChar || '\r' == cChar || '\n' == cChar ){
                if ( TRUE == bChinese ) {            //如果前面一个有效字符是Chinese
                    if ( TRUE == bSpace && (' ' == cChar || '\t' == cChar))
                        bSep = TRUE ;
                    else continue ;                //继续取下一个字符，因为汉字的词可以换行或用空格隔开
                } else  {
                    bSep = TRUE ;
                }
            } else {
                bSep = isAsciiSymbol(cChar);    //判断是否是分隔符
            }
            if ( (TRUE == bSep || TRUE == bChinese ) && 0 < iWordLen ) {            //一个单词结束
                if ( iWordLen > MAX_SWORD_LEN )
                     iWordLen = MAX_SWORD_LEN;
                strWord[iWordLen] = '\0';
                segWord(strWord,iWordLen,bChinese);
                iWordLen = 0 ;
            }
            if ( FALSE == bSep ) {
                strWord[iWordLen] = cChar;
                            //同一个单词的字母,并在一起
                iWordLen ++ ;
            }
            bChinese = FALSE ;
        } else {                            //字符Chinese
            if ( FALSE == bChinese && FALSE == bSep && 0 < iWordLen ) {//以前是英文，碰到汉字，就当英文单词结束
                if ( iWordLen > MAX_SWORD_LEN )
                     iWordLen = MAX_SWORD_LEN;
                strWord[iWordLen] = '\0';
                bFound = isEnglishStop(strWord);
                if ( FALSE == bFound ) {
                    segWord(strWord,iWordLen,bChinese);
                } // end if
                iWordLen = 0 ;
            }

            iNexti    = i + 1 ;
            if ( iNexti <  iTextLen ) {
                if ( 128 > ((unsigned char)strText[iNexti]) ) {        //单字符>=128，不处理该字符
                    continue ;
                }
            } else {                        //最后一个字符是大于128的单字符
                break;
            }

            strChar[0] = strText[i];
            strChar[1] = strText[i+1];
            strChar[2] = '\0';
                        //是否有可能>128的字符，只有一个字符，如果有需要另外判断，现在默认一定会有至少两个字符同时出现
            bChinese    = TRUE ;                //是汉字Chinese
            i ++ ;                        //只需要加1

            if (strChar[0] == 0xa1 && strChar[1] == 0xa1 ) {                //Chinese空格,16进制:A1,A1
                if ( TRUE == bSpace )
                    bSep = TRUE ;
                else continue ;                    //连续Chinese空格
            }else if ( strChar[0] < 176 ) {//中文标点等非汉字字符
                bSep = TRUE ;
            }else bSep = FALSE;

            if ( TRUE == bSep && 0 < iWordLen ) {
                if ( iWordLen > MAX_SWORD_LEN )
                     iWordLen = MAX_SWORD_LEN;
                strWord[iWordLen] = '\0';
                segWord(strWord,iWordLen,bChinese);
                iWordLen = 0 ;
            }
            if ( FALSE == bSep ) {
                strWord[iWordLen++] = strChar[0];
                strWord[iWordLen++] = strChar[1];
            }
        } // end if ord
    } // end for $i

    if ( 0 < iWordLen ) {                        //还有未处理的单词
        if ( iWordLen > MAX_SWORD_LEN )
             iWordLen = MAX_SWORD_LEN;
        strWord[iWordLen] = '\0';
        segWord(strWord,iWordLen,bChinese);
        iWordLen = 0 ;
    }
    return 0;
}

搜索更多相关主题的帖子: 英文　 Chinese　false