| 网站首页 | 业界新闻 | 小组 | 威客 | 人才 | 下载频道 | 博客 | 代码贴 | 在线编程 | 编程论坛
欢迎加入我们,一同切磋技术
用户名:   
 
密 码:  
共有 625 人关注过本帖
标题:还是字符编码问题
只看楼主 加入收藏
C_Y_ME
Rank: 1
等 级:新手上路
帖 子:10
专家分:0
注 册:2013-5-22
结帖率:66.67%
收藏
已结贴  问题点数:20 回复次数:4 
还是字符编码问题
程序代码:
#include<stdio.h>
#include<string.h>
#include<malloc.h>
#define uint8_t  unsigned __int8
#define uint16_t unsigned __int16
#define uint32_t unsigned __int32
#define uint64_t unsigned __int64 
#define int8_t  __int8
#define int16_t __int16
#define int32_t __int32

int unicode_to_utf8(uint16_t *in, int insize, uint8_t **out)
{
    int i = 0;
    int outsize = 0;
    int charscount = 0;
    uint8_t *result = NULL;
    uint8_t *tmp = NULL; 
    charscount = insize / sizeof(uint16_t);
    result = (uint8_t *)malloc(charscount * 3 + 1);
    memset(result, 0, charscount * 3 + 1);
    tmp = result; 
    for (i = 0; i < charscount; i++)
    {
        uint16_t unicode = in[i];
       
       if (unicode >= 0x0000 && unicode <= 0x007f)
        {
            *tmp = (uint8_t)unicode;
            tmp += 1;
            outsize += 1;
        }
        else if (unicode >= 0x0080 && unicode <= 0x07ff)
        {
            *tmp = 0xc0 | (unicode >> 6);
            tmp += 1;
            *tmp = 0x80 | (unicode & (0xff >> 2));
            tmp += 1;
            outsize += 2;
        }
        else if (unicode >= 0x0800 && unicode <= 0xffff)
        {
            *tmp = 0xe0 | (unicode >> 12);
            tmp += 1;
            *tmp = 0x80 | (unicode >> 6 & 0x00ff);
            tmp += 1;
            *tmp = 0x80 | (unicode & (0xff >> 2));
            tmp += 1;
            outsize += 3;
        } 
    } 
    *tmp = '\0';
    *out = result;
    return 0;
}

int utf8_to_unicode(uint8_t *in, uint16_t **out, int *outsize)
{
    uint8_t *p = in;
    uint16_t *result = NULL;
    int resultsize = 0;
    uint8_t *tmp = NULL;
    result = (uint16_t *)malloc(strlen(in) * 2 + 2);
   memset(result, 0, strlen(in) * 2 + 2);
    tmp = (uint8_t *)result;
    while(*p)
    {
        if (*p >= 0x00 && *p <= 0x7f)
        {
            *tmp = *p;
           tmp++;
            *tmp = '\0';
            resultsize += 2;
        }
        else if ((*p & (0xff << 5))== 0xc0)
        {
            uint16_t t = 0;
            uint8_t t1 = 0;
            uint8_t t2 = 0;

            t1 = *p & (0xff >> 3);
            p++;
            t2 = *p & (0xff >> 2);

            *tmp = t2 | ((t1 & (0xff >> 6)) << 6);//t1 >> 2;
            tmp++;

            *tmp = t1 >> 2;//t2 | ((t1 & (0xff >> 6)) << 6);
            tmp++;
              resultsize += 2;
        }
       else if ((*p & (0xff << 4))== 0xe0)
        {
           uint16_t t = 0;
            uint8_t t1 = 0;
            uint8_t t2 = 0;
            uint8_t t3 = 0;

            t1 = *p & (0xff >> 3);
            p++;
            t2 = *p & (0xff >> 2);
           p++;
            t3 = *p & (0xff >> 2);
           *tmp = ((t2 & (0xff >> 6)) << 6) | t3;
            tmp++;
            *tmp = (t1 << 4) | (t2 >> 2);
            tmp++;
           resultsize += 2;
        }
       p++;
    }

    *tmp = '\0';
    tmp++;
   *tmp = '\0';
    resultsize += 2; 
    *out = result;
    *outsize = resultsize;
    return 0;
}
void dump_utf8(uint8_t *utf8)
{
    uint8_t *p = utf8;
    while(*p)
    {
        printf("%02X", *p);
        p++;
    }
    putchar('\n');
}
void dump_unicode(uint16_t *utf16, int size)
{
    uint8_t *p = (uint8_t *)utf16;
    int i = 0; 

 for (i = 0; i < size; i++)
    {
        printf("%02X", *p);
        p++;
    }
    putchar('\n');
}

/*void main()
{
    FILE *fp;
    int i;
    char str[100];
    fp=fopen("zhang.txt","wt");
    printf("请输入字符:\n");
    gets(str);
    for(i=0;str[i]&&i<100;i++)    //    for(i=0;i<100;i++) 有bug
        fputc(str[i],fp);
    fclose(fp);
/*    fp=fopen("zhang.txt","r");
    fgets(str,100,fp);
    printf("%s\n",str);
    fclose(fp);*/
/*    printf("%s\n",str);
    printf("%d\n",str);
    printf("%x\n",str);
    printf("%o\n",str);
    fclose(fp);
}
*/
int main()
{
    uint16_t unicode[] = '';
    uint8_t *utf8 = NULL;
    int unisize = 0;
    uint16_t *uni = NULL; 

    printf("original unicode: \n");
    dump_unicode(unicode, sizeof(unicode));
    printf("converted to utf8: \n");
    unicode_to_utf8(unicode, sizeof(unicode), &utf8);
    dump_utf8(utf8);  

    printf("converted to unicode: \n");
    utf8_to_unicode(utf8, &uni, &unisize);
    dump_unicode(uni, unisize);
    free(utf8);
    free(uni); 
   return 0;
} 




帮忙看看,指点一下
2013-07-21 09:34
C_Y_ME
Rank: 1
等 级:新手上路
帖 子:10
专家分:0
注 册:2013-5-22
收藏
得分:0 
程序代码:
#include<stdio.h>
#include<string.h>
#include<malloc.h>

#define uint8_t  unsigned __int8
#define uint16_t unsigned __int16
#define uint32_t unsigned __int32
#define uint64_t unsigned __int64  
#define int8_t  __int8
#define int16_t __int16
#define int32_t __int32 

int unicode_to_utf8(uint16_t *in, int insize, uint8_t **out)
{
    int i = 0;
    int outsize = 0;
    int charscount = 0;
    uint8_t *result = NULL;
    uint8_t *tmp = NULL;  
    charscount = insize / sizeof(uint16_t);
    result = (uint8_t *)malloc(charscount * 3 + 1);
    memset(result, 0, charscount * 3 + 1);
    tmp = result;  
    for (i = 0; i < charscount; i++)
    {
        uint16_t unicode = in[i];
        
       if (unicode >= 0x0000 && unicode <= 0x007f)
        {
            *tmp = (uint8_t)unicode;
            tmp += 1;
            outsize += 1;
        }
        else if (unicode >= 0x0080 && unicode <= 0x07ff)
        {
            *tmp = 0xc0 | (unicode >> 6);
            tmp += 1;
            *tmp = 0x80 | (unicode & (0xff >> 2));
            tmp += 1;
            outsize += 2;
        }
        else if (unicode >= 0x0800 && unicode <= 0xffff)
        {
            *tmp = 0xe0 | (unicode >> 12);
            tmp += 1;
            *tmp = 0x80 | (unicode >> 6 & 0x00ff);
            tmp += 1;
            *tmp = 0x80 | (unicode & (0xff >> 2));
            tmp += 1;
            outsize += 3;
        }  
    }  
    *tmp = '\0';
    *out = result;
    return 0;
} 

void dump_utf8(uint8_t *utf8)
{
    uint8_t *p = utf8;
    while(*p)
    {
        printf("%02X", *p);
        p++;
    }
    putchar('\n');
}
void dump_unicode(uint16_t *utf16, int size)
{
    uint8_t *p = (uint8_t *)utf16;
    int i = 0;  

 for (i = 0; i < size; i++)
    {
        printf("%02X", *p);
        p++;
    }
    putchar('\n');
} 

int main()
{
    uint16_t unicode[] = {''};
    uint8_t *utf8 = NULL; 
    int unisize = 0;
    uint16_t *uni = NULL;  

    printf("转换为 unicode: \n");
    dump_unicode(unicode, sizeof(unicode));

    printf("转换为 to utf8: \n");
    unicode_to_utf8(unicode, sizeof(unicode), &utf8);
    dump_utf8(utf8);   

    free(utf8);
    free(uni);  
   return 0;
}




求大神指点
2013-07-21 09:44
liu0919
Rank: 2
等 级:论坛游民
帖 子:124
专家分:35
注 册:2013-5-13
收藏
得分:7 
方法
2013-07-21 09:45
love云彩
Rank: 19Rank: 19Rank: 19Rank: 19Rank: 19Rank: 19
来 自:青藏高原
等 级:贵宾
威 望:53
帖 子:3663
专家分:11416
注 册:2012-11-17
收藏
得分:7 
包含头文件<stdlib.h>

思考赐予新生,时间在于定义
2013-07-21 10:02
C_Y_ME
Rank: 1
等 级:新手上路
帖 子:10
专家分:0
注 册:2013-5-22
收藏
得分:0 
回复 3楼 liu0919
什么意思
2013-07-21 10:45
快速回复:还是字符编码问题
数据加载中...
 
   



关于我们 | 广告合作 | 编程中国 | 清除Cookies | TOP | 手机版

编程中国 版权所有,并保留所有权利。
Powered by Discuz, Processed in 0.021363 second(s), 7 queries.
Copyright©2004-2025, BCCN.NET, All Rights Reserved