还是汉字编码问题,已经差不了,但是还有小问题。
有几个问题:①用UE打开把一个汉字用十六进制打开,比如:我D2 CE,这个算什么编码?是unicode吗?
②用什么方式验证一个汉字的utf8是否正确?没有字库。
③看看这个程序吧,自己感觉已经对了,但是有人说有问题,没说什么问题,请看看。
谢谢了!是这两天刚刚接触这些编码,有什么贻笑大方的话请见谅。
程序代码:
#include<stdio.h> #include<string.h> #include<malloc.h> #define uint8_t unsigned _int8 #define uint16_t unsigned _int16 void printutf8(uint8_t *utf8) { uint8_t *p = utf8; while(*p) { printf("%02X", *p); p++; } printf("\n"); } void printuni(uint16_t *utf16, int size) { uint8_t *p = (uint8_t *)utf16; int i = 0; for (i = 0; i < size; i++) { printf("%02X", *p); p++; } printf("\n"); } void unicode_to_utf8(uint16_t *in, int insize, uint8_t **out) { int i; int charscount; int outsize=0; uint8_t *result; uint8_t *tmp; charscount = insize / sizeof(uint16_t); result = (uint8_t *)malloc(charscount * 3 + 1); memset(result, 0, charscount * 3 + 1); //将result中后charscount*3+1个字节,用0替换并返回result tmp = result; for (i = 0; i < charscount; i++) { uint16_t unicode = in[i]; if (unicode <= 0x0000007f) *tmp = (uint8_t)unicode; else if (unicode >= 0x00000080 && unicode <= 0x000007ff) { *tmp = 0x000000c0 | (unicode >> 6); tmp += 1; *tmp = 0x00000080 | (unicode & (0x000000ff >> 2)); tmp += 1; } else if (unicode >= 0x00000800 && unicode <= 0x0000ffff) { *tmp = 0x000000e0 | (unicode >> 12); tmp += 1; *tmp = 0x00000080 | (unicode >> 6 & 0x000000ff); tmp += 1; *tmp = 0x00000080 | (unicode & (0x000000ff >> 2)); tmp += 1; } } *out = result; } void main() { uint16_t unicode[] = {'中'}; uint8_t *utf8 = NULL; uint16_t *uni = NULL; printf("字符“我”转换为 unicode: \n"); printuni(unicode, sizeof(unicode)); printf("再将unicode转换为 utf8: \n"); unicode_to_utf8(unicode, sizeof(unicode), &utf8); printutf8(utf8); }