/* * Copyright (C) 1999-2008, 2011, 2018, 2020 Free Software Foundation, Inc. * This file is part of the GNU LIBICONV Library. * * The GNU LIBICONV Library is free software; you can redistribute it * and/or modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * The GNU LIBICONV Library is distributed in the hope that it will be * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with the GNU LIBICONV Library; see the file COPYING.LIB. * If not, see . */ /* Part 1 of iconv_open. Input: const char* tocode, const char* fromcode. Output: unsigned int from_index; int from_wchar; unsigned int to_index; int to_wchar; int transliterate; int discard_ilseq; Jumps to 'invalid' in case of errror. */ { char buf[MAX_WORD_LENGTH+10+1]; const char* cp; char* bp; const struct alias * ap; unsigned int count; transliterate = 0; discard_ilseq = 0; /* Before calling aliases_lookup, convert the input string to upper case, * and check whether it's entirely ASCII (we call gperf with option "-7" * to achieve a smaller table) and non-empty. If it's not entirely ASCII, * or if it's too long, it is not a valid encoding name. */ for (to_wchar = 0;;) { /* Search tocode in the table. */ for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) { unsigned char c = (unsigned char) *cp; if (c >= 0x80) goto invalid; if (c >= 'a' && c <= 'z') c -= 'a'-'A'; *bp = c; if (c == '\0') break; if (--count == 0) goto invalid; } for (;;) { if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) { bp -= 10; *bp = '\0'; transliterate = 1; continue; } if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) { bp -= 8; *bp = '\0'; discard_ilseq = 1; continue; } break; } if (buf[0] == '\0') { tocode = locale_charset(); /* Avoid an endless loop that could occur when using an older version of localcharset.c. */ if (tocode[0] == '\0') goto invalid; continue; } ap = aliases_lookup(buf,bp-buf); if (ap == NULL) { ap = aliases2_lookup(buf); if (ap == NULL) goto invalid; } if (ap->encoding_index == ei_local_char) { tocode = locale_charset(); /* Avoid an endless loop that could occur when using an older version of localcharset.c. */ if (tocode[0] == '\0') goto invalid; continue; } if (ap->encoding_index == ei_local_wchar_t) { /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. This is also the case on native Woe32 systems and Cygwin >= 1.7, where we know that it is UTF-16. */ #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007) if (sizeof(wchar_t) == 4) { to_index = ei_ucs4internal; break; } if (sizeof(wchar_t) == 2) { # if WORDS_LITTLEENDIAN to_index = ei_utf16le; # else to_index = ei_utf16be; # endif break; } #elif __STDC_ISO_10646__ if (sizeof(wchar_t) == 4) { to_index = ei_ucs4internal; break; } if (sizeof(wchar_t) == 2) { to_index = ei_ucs2internal; break; } if (sizeof(wchar_t) == 1) { to_index = ei_iso8859_1; break; } #endif #if HAVE_MBRTOWC to_wchar = 1; tocode = locale_charset(); continue; #endif goto invalid; } to_index = ap->encoding_index; break; } for (from_wchar = 0;;) { /* Search fromcode in the table. */ for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) { unsigned char c = (unsigned char) *cp; if (c >= 0x80) goto invalid; if (c >= 'a' && c <= 'z') c -= 'a'-'A'; *bp = c; if (c == '\0') break; if (--count == 0) goto invalid; } for (;;) { if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) { bp -= 10; *bp = '\0'; continue; } if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) { bp -= 8; *bp = '\0'; continue; } break; } if (buf[0] == '\0') { fromcode = locale_charset(); /* Avoid an endless loop that could occur when using an older version of localcharset.c. */ if (fromcode[0] == '\0') goto invalid; continue; } ap = aliases_lookup(buf,bp-buf); if (ap == NULL) { ap = aliases2_lookup(buf); if (ap == NULL) goto invalid; } if (ap->encoding_index == ei_local_char) { fromcode = locale_charset(); /* Avoid an endless loop that could occur when using an older version of localcharset.c. */ if (fromcode[0] == '\0') goto invalid; continue; } if (ap->encoding_index == ei_local_wchar_t) { /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. This is also the case on native Woe32 systems and Cygwin >= 1.7, where we know that it is UTF-16. */ #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007) if (sizeof(wchar_t) == 4) { from_index = ei_ucs4internal; break; } if (sizeof(wchar_t) == 2) { # if WORDS_LITTLEENDIAN from_index = ei_utf16le; # else from_index = ei_utf16be; # endif break; } #elif __STDC_ISO_10646__ if (sizeof(wchar_t) == 4) { from_index = ei_ucs4internal; break; } if (sizeof(wchar_t) == 2) { from_index = ei_ucs2internal; break; } if (sizeof(wchar_t) == 1) { from_index = ei_iso8859_1; break; } #endif #if HAVE_WCRTOMB from_wchar = 1; fromcode = locale_charset(); continue; #endif goto invalid; } from_index = ap->encoding_index; break; } }