/* Copyright (C) 1999-2003, 2005, 2011-2012, 2016, 2018, 2020 Free Software Foundation, Inc. This file is part of the GNU LIBICONV Library. The GNU LIBICONV Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU LIBICONV Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU LIBICONV Library; see the file COPYING.LIB. If not, see . */ /* * Generates a table of small strings, used for transliteration, from a table * containing lines of the form * Unicode utf-8 replacement # comment */ #include #include #include int main (int argc, char *argv[]) { unsigned int *data; int *uni2index; int index; if (argc != 1) exit(1); data = malloc(0x100000 * sizeof(*data)); uni2index = malloc(0x110000 * sizeof(*uni2index)); if (data == NULL || uni2index == NULL) { fprintf(stderr, "out of memory\n"); exit(1); } printf("/*\n"); printf(" * Copyright (C) 1999-2003 Free Software Foundation, Inc.\n"); printf(" * This file is part of the GNU LIBICONV Library.\n"); printf(" *\n"); printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n"); printf(" * and/or modify it under the terms of the GNU Lesser General Public\n"); printf(" * License as published by the Free Software Foundation; either version 2\n"); printf(" * of the License, or (at your option) any later version.\n"); printf(" *\n"); printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n"); printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n"); printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"); printf(" * Lesser General Public License for more details.\n"); printf(" *\n"); printf(" * You should have received a copy of the GNU Lesser General Public\n"); printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n"); printf(" * If not, see .\n"); printf(" */\n"); printf("\n"); printf("/*\n"); printf(" * Transliteration table\n"); printf(" */\n"); printf("\n"); { int c; int j; for (j = 0; j < 0x110000; j++) uni2index[j] = -1; index = 0; for (;;) { c = getc(stdin); if (c == EOF) break; if (c == '#') { do { c = getc(stdin); } while (!(c == EOF || c == '\n')); continue; } ungetc(c,stdin); if (scanf("%x",&j) != 1) exit(1); c = getc(stdin); if (c != '\t') exit(1); for (;;) { c = getc(stdin); if (c == EOF || c == '\n') exit(1); if (c == '\t') break; if (uni2index[j] < 0) { uni2index[j] = index; data[index++] = 0; } if (c >= 0x80) { /* Finish reading an UTF-8 character. */ if (c < 0xc0) exit(1); else { unsigned int i = (c < 0xe0 ? 2 : c < 0xf0 ? 3 : c < 0xf8 ? 4 : c < 0xfc ? 5 : 6); c &= (1 << (8-i)) - 1; while (--i > 0) { int cc = getc(stdin); if (!(cc >= 0x80 && cc < 0xc0)) exit(1); c <<= 6; c |= (cc & 0x3f); } } } data[index++] = (unsigned int) c; } if (uni2index[j] >= 0) data[uni2index[j]] = index - uni2index[j] - 1; do { c = getc(stdin); } while (!(c == EOF || c == '\n')); } } printf("static const unsigned int translit_data[%d] = {",index); { int i; for (i = 0; i < index; i++) { if (data[i] < 32) printf("\n %3d,",data[i]); else if (data[i] == '\'') printf("'\\'',"); else if (data[i] == '\\') printf("'\\\\',"); else if (data[i] < 127) printf(" '%c',",data[i]); else if (data[i] < 256) printf("0x%02X,",data[i]); else printf("0x%04X,",data[i]); } printf("\n};\n"); } printf("\n"); { int line[0x22000]; int tableno; struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000]; int i, j, p, j1, j2, t; for (j1 = 0; j1 < 0x22000; j1++) { bool all_invalid = true; for (j2 = 0; j2 < 8; j2++) { j = 8*j1+j2; if (uni2index[j] >= 0) all_invalid = false; } if (all_invalid) line[j1] = -1; else line[j1] = 0; } tableno = 0; for (j1 = 0; j1 < 0x22000; j1++) { if (line[j1] >= 0) { if (tableno > 0 && ((j1 > 0 && line[j1-1] == tableno-1) || ((tables[tableno-1].maxline >> 5) == (j1 >> 5) && j1 - tables[tableno-1].maxline <= 8))) { line[j1] = tableno-1; tables[tableno-1].maxline = j1; } else { tableno++; line[j1] = tableno-1; tables[tableno-1].minline = tables[tableno-1].maxline = j1; } } } for (t = 0; t < tableno; t++) { tables[t].usecount = 0; j1 = 8*tables[t].minline; j2 = 8*(tables[t].maxline+1); for (j = j1; j < j2; j++) if (uni2index[j] >= 0) tables[t].usecount++; } for (t = 0, p = -1, i = 0; t < tableno; t++) { if (tables[t].usecount > 1) { char* s; if (p == tables[t].minline >> 5) { i++; /* i is the number of tables with the same (tables[t].minline >> 5) that we have seen so far. Since the tables[t].minline values are strongly monotonically increasing, there are at most 32 of them. */ if (!(i >= 0 && i <= 32)) abort(); s = (char*) malloc(4+1+2+1); sprintf(s, "%02x_%d", p, i); } else { p = tables[t].minline >> 5; i = 0; s = (char*) malloc(4+1); sprintf(s, "%02x", p); } tables[t].suffix = s; } else tables[t].suffix = NULL; } { p = -1; for (t = 0; t < tableno; t++) if (tables[t].usecount > 1) { p = 0; printf("static const short translit_page%s[%d] = {\n", tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1)); for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) { if ((j1 % 0x20) == 0 && j1 > tables[t].minline) printf(" /* 0x%04x */\n", 8*j1); printf(" "); for (j2 = 0; j2 < 8; j2++) { j = 8*j1+j2; printf(" %4d,", uni2index[j]); } printf(" /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7); } printf("};\n"); } if (p >= 0) printf("\n"); } printf("#define translit_index(wc) \\\n ("); for (j1 = 0; j1 < 0x22000;) { t = line[j1]; for (j2 = j1; j2 < 0x22000 && line[j2] == t; j2++); if (t >= 0) { if (j1 != tables[t].minline) abort(); if (j2 > tables[t].maxline+1) abort(); j2 = tables[t].maxline+1; } if (t == -1) { } else { if (t >= 0 && tables[t].usecount == 0) abort(); if (t >= 0 && tables[t].usecount == 1) { if (j2 != j1+1) abort(); for (j = 8*j1; j < 8*j2; j++) if (uni2index[j] >= 0) { printf("wc == 0x%04x ? %d", j, uni2index[j]); break; } } else { if (j1 == 0) { printf("wc < 0x%04x", 8*j2); } else { printf("wc >= 0x%04x && wc < 0x%04x", 8*j1, 8*j2); } printf(" ? translit_page%s[wc", tables[t].suffix); if (tables[t].minline > 0) printf("-0x%04x", 8*j1); printf("]"); } printf(" : \\\n "); } j1 = j2; } printf("-1)\n"); } if (ferror(stdout) || fclose(stdout)) exit(1); exit(0); }