| 网站首页 | 业界新闻 | 小组 | 威客 | 人才 | 下载频道 | 博客 | 代码贴 | 在线编程 | 编程论坛
欢迎加入我们,一同切磋技术
用户名:   
 
密 码:  
共有 491 人关注过本帖
标题:这段代码是什么意思?
只看楼主 加入收藏
xuzhichao
Rank: 1
等 级:新手上路
帖 子:1
专家分:0
注 册:2011-10-25
收藏
 问题点数:0 回复次数:0 
这段代码是什么意思?
int segSentence (char *strText ,BOOL bSpace )
{   
    int         iTextLen     = strlen(strText) ;
    int         iWordLen=  0 ;            
    int            i        = 0 ;
    int            iNexti        = 0 ;
    BOOL        bSep          = FALSE ;//否是分隔符或者是一个词结束
    BOOL        bChinese    = FALSE ;//上一个有效字符类型,
                        //false:英文 true: Chinese
    BOOL        bFound        = FALSE ;
    unsigned     char    strWord[MAX_SWORD_LEN + 1];         //当前词
    unsigned     char    strChar[3];    //
    unsigned     char    cChar;            //
   
    memset(strWord,0,sizeof(strWord));
    strChar[0] = '\0';
    for ( i = 0 ; i < iTextLen ; i ++ ) {
        cChar    = (unsigned char )strText[i];
        if (128 > cChar) {                    //英文字符
            /****如果连续空格不算分隔的话,用下面这一段代码 ****/
            if ( ' ' == cChar || '\t' == cChar || '\r' == cChar || '\n' == cChar ){
                if ( TRUE == bChinese ) {            //如果前面一个有效字符是Chinese
                    if ( TRUE == bSpace && (' ' == cChar || '\t' == cChar))
                        bSep = TRUE ;            
                    else continue ;                //继续取下一个字符,因为汉字的词可以换行或用空格隔开
                } else  {
                    bSep = TRUE ;
                }
            } else {                        
                bSep = isAsciiSymbol(cChar);    //判断是否是分隔符
            }
            if ( (TRUE == bSep || TRUE == bChinese ) && 0 < iWordLen ) {            //一个单词结束
                if ( iWordLen > MAX_SWORD_LEN )
                     iWordLen = MAX_SWORD_LEN;
                strWord[iWordLen] = '\0';
                segWord(strWord,iWordLen,bChinese);
                iWordLen = 0 ;
            }
            if ( FALSE == bSep ) {
                strWord[iWordLen] = cChar;
                            //同一个单词的字母,并在一起
                iWordLen ++ ;
            }
            bChinese = FALSE ;
        } else {                            //字符Chinese
            if ( FALSE == bChinese && FALSE == bSep && 0 < iWordLen ) {//以前是英文,碰到汉字,就当英文单词结束
                if ( iWordLen > MAX_SWORD_LEN )
                     iWordLen = MAX_SWORD_LEN;
                strWord[iWordLen] = '\0';
                bFound = isEnglishStop(strWord);
                if ( FALSE == bFound ) {
                    segWord(strWord,iWordLen,bChinese);
                } // end if
                iWordLen = 0 ;
            }

            iNexti    = i + 1 ;
            if ( iNexti <  iTextLen ) {
                if ( 128 > ((unsigned char)strText[iNexti]) ) {        //单字符>=128,不处理该字符
                    continue ;                    
                }
            } else {                        //最后一个字符是大于128的单字符
                break;
            }
            
            strChar[0] = strText[i];
            strChar[1] = strText[i+1];
            strChar[2] = '\0';
                        //是否有可能>128的字符,只有一个字符,如果有需要另外判断,现在默认一定会有至少两个字符同时出现
            bChinese    = TRUE ;                //是汉字Chinese
            i ++ ;                        //只需要加1
            
            if (strChar[0] == 0xa1 && strChar[1] == 0xa1 ) {                //Chinese空格,16进制:A1,A1
                if ( TRUE == bSpace )
                    bSep = TRUE ;
                else continue ;                    //连续Chinese空格
            }else if ( strChar[0] < 176 ) {//中文标点等非汉字字符
                bSep = TRUE ;
            }else bSep = FALSE;
            
            if ( TRUE == bSep && 0 < iWordLen ) {
                if ( iWordLen > MAX_SWORD_LEN )
                     iWordLen = MAX_SWORD_LEN;
                strWord[iWordLen] = '\0';
                segWord(strWord,iWordLen,bChinese);
                iWordLen = 0 ;
            }
            if ( FALSE == bSep ) {
                strWord[iWordLen++] = strChar[0];
                strWord[iWordLen++] = strChar[1];
            }
        } // end if ord
    } // end for $i
   
    if ( 0 < iWordLen ) {                        //还有未处理的单词
        if ( iWordLen > MAX_SWORD_LEN )
             iWordLen = MAX_SWORD_LEN;
        strWord[iWordLen] = '\0';
        segWord(strWord,iWordLen,bChinese);
        iWordLen = 0 ;   
    }
    return 0;
}
搜索更多相关主题的帖子: 英文 Chinese false 
2011-10-25 11:52
快速回复:这段代码是什么意思?
数据加载中...
 
   



关于我们 | 广告合作 | 编程中国 | 清除Cookies | TOP | 手机版

编程中国 版权所有,并保留所有权利。
Powered by Discuz, Processed in 0.022178 second(s), 9 queries.
Copyright©2004-2025, BCCN.NET, All Rights Reserved