264 lines
8.2 KiB
C
264 lines
8.2 KiB
C
/* Copyright (C) 1999-2003, 2005, 2011-2012, 2016, 2018, 2020 Free Software Foundation, Inc.
|
|
This file is part of the GNU LIBICONV Library.
|
|
|
|
The GNU LIBICONV Library is free software; you can redistribute it
|
|
and/or modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either version 2.1
|
|
of the License, or (at your option) any later version.
|
|
|
|
The GNU LIBICONV Library is distributed in the hope that it will be
|
|
useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU LIBICONV Library; see the file COPYING.LIB.
|
|
If not, see <https://www.gnu.org/licenses/>. */
|
|
|
|
/*
|
|
* Generates a table of small strings, used for transliteration, from a table
|
|
* containing lines of the form
|
|
* Unicode <tab> utf-8 replacement <tab> # comment
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <stdbool.h>
|
|
|
|
int main (int argc, char *argv[])
|
|
{
|
|
unsigned int *data;
|
|
int *uni2index;
|
|
int index;
|
|
|
|
if (argc != 1)
|
|
exit(1);
|
|
|
|
data = malloc(0x100000 * sizeof(*data));
|
|
uni2index = malloc(0x110000 * sizeof(*uni2index));
|
|
if (data == NULL || uni2index == NULL) {
|
|
fprintf(stderr, "out of memory\n");
|
|
exit(1);
|
|
}
|
|
|
|
printf("/*\n");
|
|
printf(" * Copyright (C) 1999-2003 Free Software Foundation, Inc.\n");
|
|
printf(" * This file is part of the GNU LIBICONV Library.\n");
|
|
printf(" *\n");
|
|
printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
|
|
printf(" * and/or modify it under the terms of the GNU Lesser General Public\n");
|
|
printf(" * License as published by the Free Software Foundation; either version 2\n");
|
|
printf(" * of the License, or (at your option) any later version.\n");
|
|
printf(" *\n");
|
|
printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
|
|
printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
|
|
printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n");
|
|
printf(" * Lesser General Public License for more details.\n");
|
|
printf(" *\n");
|
|
printf(" * You should have received a copy of the GNU Lesser General Public\n");
|
|
printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
|
|
printf(" * If not, see <https://www.gnu.org/licenses/>.\n");
|
|
printf(" */\n");
|
|
printf("\n");
|
|
printf("/*\n");
|
|
printf(" * Transliteration table\n");
|
|
printf(" */\n");
|
|
printf("\n");
|
|
{
|
|
int c;
|
|
int j;
|
|
for (j = 0; j < 0x110000; j++)
|
|
uni2index[j] = -1;
|
|
index = 0;
|
|
for (;;) {
|
|
c = getc(stdin);
|
|
if (c == EOF)
|
|
break;
|
|
if (c == '#') {
|
|
do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
|
|
continue;
|
|
}
|
|
ungetc(c,stdin);
|
|
if (scanf("%x",&j) != 1)
|
|
exit(1);
|
|
c = getc(stdin);
|
|
if (c != '\t')
|
|
exit(1);
|
|
for (;;) {
|
|
c = getc(stdin);
|
|
if (c == EOF || c == '\n')
|
|
exit(1);
|
|
if (c == '\t')
|
|
break;
|
|
if (uni2index[j] < 0) {
|
|
uni2index[j] = index;
|
|
data[index++] = 0;
|
|
}
|
|
if (c >= 0x80) {
|
|
/* Finish reading an UTF-8 character. */
|
|
if (c < 0xc0)
|
|
exit(1);
|
|
else {
|
|
unsigned int i = (c < 0xe0 ? 2 : c < 0xf0 ? 3 : c < 0xf8 ? 4 : c < 0xfc ? 5 : 6);
|
|
c &= (1 << (8-i)) - 1;
|
|
while (--i > 0) {
|
|
int cc = getc(stdin);
|
|
if (!(cc >= 0x80 && cc < 0xc0))
|
|
exit(1);
|
|
c <<= 6; c |= (cc & 0x3f);
|
|
}
|
|
}
|
|
}
|
|
data[index++] = (unsigned int) c;
|
|
}
|
|
if (uni2index[j] >= 0)
|
|
data[uni2index[j]] = index - uni2index[j] - 1;
|
|
do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
|
|
}
|
|
}
|
|
printf("static const unsigned int translit_data[%d] = {",index);
|
|
{
|
|
int i;
|
|
for (i = 0; i < index; i++) {
|
|
if (data[i] < 32)
|
|
printf("\n %3d,",data[i]);
|
|
else if (data[i] == '\'')
|
|
printf("'\\'',");
|
|
else if (data[i] == '\\')
|
|
printf("'\\\\',");
|
|
else if (data[i] < 127)
|
|
printf(" '%c',",data[i]);
|
|
else if (data[i] < 256)
|
|
printf("0x%02X,",data[i]);
|
|
else
|
|
printf("0x%04X,",data[i]);
|
|
}
|
|
printf("\n};\n");
|
|
}
|
|
printf("\n");
|
|
{
|
|
int line[0x22000];
|
|
int tableno;
|
|
struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000];
|
|
int i, j, p, j1, j2, t;
|
|
|
|
for (j1 = 0; j1 < 0x22000; j1++) {
|
|
bool all_invalid = true;
|
|
for (j2 = 0; j2 < 8; j2++) {
|
|
j = 8*j1+j2;
|
|
if (uni2index[j] >= 0)
|
|
all_invalid = false;
|
|
}
|
|
if (all_invalid)
|
|
line[j1] = -1;
|
|
else
|
|
line[j1] = 0;
|
|
}
|
|
tableno = 0;
|
|
for (j1 = 0; j1 < 0x22000; j1++) {
|
|
if (line[j1] >= 0) {
|
|
if (tableno > 0
|
|
&& ((j1 > 0 && line[j1-1] == tableno-1)
|
|
|| ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
|
|
&& j1 - tables[tableno-1].maxline <= 8))) {
|
|
line[j1] = tableno-1;
|
|
tables[tableno-1].maxline = j1;
|
|
} else {
|
|
tableno++;
|
|
line[j1] = tableno-1;
|
|
tables[tableno-1].minline = tables[tableno-1].maxline = j1;
|
|
}
|
|
}
|
|
}
|
|
for (t = 0; t < tableno; t++) {
|
|
tables[t].usecount = 0;
|
|
j1 = 8*tables[t].minline;
|
|
j2 = 8*(tables[t].maxline+1);
|
|
for (j = j1; j < j2; j++)
|
|
if (uni2index[j] >= 0)
|
|
tables[t].usecount++;
|
|
}
|
|
for (t = 0, p = -1, i = 0; t < tableno; t++) {
|
|
if (tables[t].usecount > 1) {
|
|
char* s;
|
|
if (p == tables[t].minline >> 5) {
|
|
i++;
|
|
/* i is the number of tables with the same (tables[t].minline >> 5)
|
|
that we have seen so far. Since the tables[t].minline values are
|
|
strongly monotonically increasing, there are at most 32 of them. */
|
|
if (!(i >= 0 && i <= 32)) abort();
|
|
s = (char*) malloc(4+1+2+1);
|
|
sprintf(s, "%02x_%d", p, i);
|
|
} else {
|
|
p = tables[t].minline >> 5;
|
|
i = 0;
|
|
s = (char*) malloc(4+1);
|
|
sprintf(s, "%02x", p);
|
|
}
|
|
tables[t].suffix = s;
|
|
} else
|
|
tables[t].suffix = NULL;
|
|
}
|
|
{
|
|
p = -1;
|
|
for (t = 0; t < tableno; t++)
|
|
if (tables[t].usecount > 1) {
|
|
p = 0;
|
|
printf("static const short translit_page%s[%d] = {\n", tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1));
|
|
for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
|
|
if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
|
|
printf(" /* 0x%04x */\n", 8*j1);
|
|
printf(" ");
|
|
for (j2 = 0; j2 < 8; j2++) {
|
|
j = 8*j1+j2;
|
|
printf(" %4d,", uni2index[j]);
|
|
}
|
|
printf(" /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
|
|
}
|
|
printf("};\n");
|
|
}
|
|
if (p >= 0)
|
|
printf("\n");
|
|
}
|
|
printf("#define translit_index(wc) \\\n (");
|
|
for (j1 = 0; j1 < 0x22000;) {
|
|
t = line[j1];
|
|
for (j2 = j1; j2 < 0x22000 && line[j2] == t; j2++);
|
|
if (t >= 0) {
|
|
if (j1 != tables[t].minline) abort();
|
|
if (j2 > tables[t].maxline+1) abort();
|
|
j2 = tables[t].maxline+1;
|
|
}
|
|
if (t == -1) {
|
|
} else {
|
|
if (t >= 0 && tables[t].usecount == 0) abort();
|
|
if (t >= 0 && tables[t].usecount == 1) {
|
|
if (j2 != j1+1) abort();
|
|
for (j = 8*j1; j < 8*j2; j++)
|
|
if (uni2index[j] >= 0) {
|
|
printf("wc == 0x%04x ? %d", j, uni2index[j]);
|
|
break;
|
|
}
|
|
} else {
|
|
if (j1 == 0) {
|
|
printf("wc < 0x%04x", 8*j2);
|
|
} else {
|
|
printf("wc >= 0x%04x && wc < 0x%04x", 8*j1, 8*j2);
|
|
}
|
|
printf(" ? translit_page%s[wc", tables[t].suffix);
|
|
if (tables[t].minline > 0)
|
|
printf("-0x%04x", 8*j1);
|
|
printf("]");
|
|
}
|
|
printf(" : \\\n ");
|
|
}
|
|
j1 = j2;
|
|
}
|
|
printf("-1)\n");
|
|
}
|
|
|
|
if (ferror(stdout) || fclose(stdout))
|
|
exit(1);
|
|
exit(0);
|
|
}
|