190 lines
5.5 KiB
C
190 lines
5.5 KiB
C
|
/* Copyright (C) 2000-2002, 2004-2005 Free Software Foundation, Inc.
|
||
|
This file is part of the GNU LIBICONV Library.
|
||
|
|
||
|
The GNU LIBICONV Library is free software; you can redistribute it
|
||
|
and/or modify it under the terms of the GNU Lesser General Public
|
||
|
License as published by the Free Software Foundation; either version 2.1
|
||
|
of the License, or (at your option) any later version.
|
||
|
|
||
|
The GNU LIBICONV Library is distributed in the hope that it will be
|
||
|
useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
Lesser General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU Lesser General Public
|
||
|
License along with the GNU LIBICONV Library; see the file COPYING.LIB.
|
||
|
If not, see <https://www.gnu.org/licenses/>. */
|
||
|
|
||
|
/* Create a table from CHARSET to Unicode. */
|
||
|
|
||
|
#include "config.h"
|
||
|
|
||
|
#include <stddef.h>
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
#include <iconv.h>
|
||
|
#include <errno.h>
|
||
|
|
||
|
#include "binary-io.h"
|
||
|
|
||
|
/* If nonzero, ignore conversions outside Unicode plane 0. */
|
||
|
static int bmp_only;
|
||
|
|
||
|
static const char* hexbuf (unsigned char buf[], unsigned int buflen)
|
||
|
{
|
||
|
static char msg[50];
|
||
|
switch (buflen) {
|
||
|
case 1: sprintf(msg,"0x%02X",buf[0]); break;
|
||
|
case 2: sprintf(msg,"0x%02X%02X",buf[0],buf[1]); break;
|
||
|
case 3: sprintf(msg,"0x%02X%02X%02X",buf[0],buf[1],buf[2]); break;
|
||
|
case 4: sprintf(msg,"0x%02X%02X%02X%02X",buf[0],buf[1],buf[2],buf[3]); break;
|
||
|
default: abort();
|
||
|
}
|
||
|
return msg;
|
||
|
}
|
||
|
|
||
|
static int try (iconv_t cd, unsigned char buf[], unsigned int buflen, unsigned int* out)
|
||
|
{
|
||
|
const char* inbuf = (const char*) buf;
|
||
|
size_t inbytesleft = buflen;
|
||
|
char* outbuf = (char*) out;
|
||
|
size_t outbytesleft = 3*sizeof(unsigned int);
|
||
|
size_t result;
|
||
|
iconv(cd,NULL,NULL,NULL,NULL);
|
||
|
result = iconv(cd,(ICONV_CONST char**)&inbuf,&inbytesleft,&outbuf,&outbytesleft);
|
||
|
if (result != (size_t)(-1))
|
||
|
result = iconv(cd,NULL,NULL,&outbuf,&outbytesleft);
|
||
|
if (result == (size_t)(-1)) {
|
||
|
if (errno == EILSEQ) {
|
||
|
return -1;
|
||
|
} else if (errno == EINVAL) {
|
||
|
return 0;
|
||
|
} else {
|
||
|
int saved_errno = errno;
|
||
|
fprintf(stderr,"%s: iconv error: ",hexbuf(buf,buflen));
|
||
|
errno = saved_errno;
|
||
|
perror("");
|
||
|
exit(1);
|
||
|
}
|
||
|
} else if (result > 0) /* ignore conversions with transliteration */ {
|
||
|
return -1;
|
||
|
} else {
|
||
|
if (inbytesleft != 0) {
|
||
|
fprintf(stderr,"%s: inbytes = %ld, outbytes = %ld\n",hexbuf(buf,buflen),(long)(buflen-inbytesleft),(long)(3*sizeof(unsigned int)-outbytesleft));
|
||
|
exit(1);
|
||
|
}
|
||
|
return (3*sizeof(unsigned int)-outbytesleft)/sizeof(unsigned int);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Returns the out[] buffer as a Unicode value, formatted as 0x%04X. */
|
||
|
static const char* ucs4_decode (const unsigned int* out, unsigned int outlen)
|
||
|
{
|
||
|
static char hexbuf[21];
|
||
|
char* p = hexbuf;
|
||
|
while (outlen > 0) {
|
||
|
if (p > hexbuf)
|
||
|
*p++ = ' ';
|
||
|
sprintf (p, "0x%04X", out[0]);
|
||
|
out += 1; outlen -= 1;
|
||
|
if (bmp_only && strlen(p) > 6)
|
||
|
return NULL;
|
||
|
p += strlen(p);
|
||
|
}
|
||
|
return hexbuf;
|
||
|
}
|
||
|
|
||
|
int main (int argc, char* argv[])
|
||
|
{
|
||
|
const char* charset;
|
||
|
iconv_t cd;
|
||
|
int search_depth;
|
||
|
|
||
|
if (argc != 2) {
|
||
|
fprintf(stderr,"Usage: table-from charset\n");
|
||
|
exit(1);
|
||
|
}
|
||
|
charset = argv[1];
|
||
|
|
||
|
#if O_BINARY
|
||
|
SET_BINARY(fileno(stdout));
|
||
|
#endif
|
||
|
|
||
|
cd = iconv_open("UCS-4-INTERNAL",charset);
|
||
|
if (cd == (iconv_t)(-1)) {
|
||
|
perror("iconv_open");
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
/* When testing UTF-8, stop at 0x10000, otherwise the output file gets too
|
||
|
big. */
|
||
|
bmp_only = (strcmp(charset,"UTF-8") == 0);
|
||
|
search_depth = (strcmp(charset,"UTF-8") == 0 ? 3 : 4);
|
||
|
|
||
|
{
|
||
|
unsigned int out[3];
|
||
|
unsigned char buf[4];
|
||
|
unsigned int i0, i1, i2, i3;
|
||
|
int result;
|
||
|
for (i0 = 0; i0 < 0x100; i0++) {
|
||
|
buf[0] = i0;
|
||
|
result = try(cd,buf,1,out);
|
||
|
if (result < 0) {
|
||
|
} else if (result > 0) {
|
||
|
const char* unicode = ucs4_decode(out,result);
|
||
|
if (unicode != NULL)
|
||
|
printf("0x%02X\t%s\n",i0,unicode);
|
||
|
} else {
|
||
|
for (i1 = 0; i1 < 0x100; i1++) {
|
||
|
buf[1] = i1;
|
||
|
result = try(cd,buf,2,out);
|
||
|
if (result < 0) {
|
||
|
} else if (result > 0) {
|
||
|
const char* unicode = ucs4_decode(out,result);
|
||
|
if (unicode != NULL)
|
||
|
printf("0x%02X%02X\t%s\n",i0,i1,unicode);
|
||
|
} else {
|
||
|
for (i2 = 0; i2 < 0x100; i2++) {
|
||
|
buf[2] = i2;
|
||
|
result = try(cd,buf,3,out);
|
||
|
if (result < 0) {
|
||
|
} else if (result > 0) {
|
||
|
const char* unicode = ucs4_decode(out,result);
|
||
|
if (unicode != NULL)
|
||
|
printf("0x%02X%02X%02X\t%s\n",i0,i1,i2,unicode);
|
||
|
} else if (search_depth > 3) {
|
||
|
for (i3 = 0; i3 < 0x100; i3++) {
|
||
|
buf[3] = i3;
|
||
|
result = try(cd,buf,4,out);
|
||
|
if (result < 0) {
|
||
|
} else if (result > 0) {
|
||
|
const char* unicode = ucs4_decode(out,result);
|
||
|
if (unicode != NULL)
|
||
|
printf("0x%02X%02X%02X%02X\t%s\n",i0,i1,i2,i3,unicode);
|
||
|
} else {
|
||
|
fprintf(stderr,"%s: incomplete byte sequence\n",hexbuf(buf,4));
|
||
|
exit(1);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (iconv_close(cd) < 0) {
|
||
|
perror("iconv_close");
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
if (ferror(stdin) || ferror(stdout) || fclose(stdout)) {
|
||
|
fprintf(stderr,"I/O error\n");
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
exit(0);
|
||
|
}
|