/* * Copyright (C) 1999-2003, 2005-2006, 2008 Free Software Foundation, Inc. * This file is part of the GNU LIBICONV Library. * * The GNU LIBICONV Library is free software; you can redistribute it * and/or modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * The GNU LIBICONV Library is distributed in the hope that it will be * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with the GNU LIBICONV Library; see the file COPYING.LIB. * If not, see . */ /* This file defines the conversion loop via Unicode as a pivot encoding. */ /* Attempt to transliterate wc. Return code as in xxx_wctomb. */ static int unicode_transliterate (conv_t cd, ucs4_t wc, unsigned char* outptr, size_t outleft) { if (cd->oflags & HAVE_HANGUL_JAMO) { /* Decompose Hangul into Jamo. Use double-width Jamo (contained in all Korean encodings and ISO-2022-JP-2), not half-width Jamo (contained in Unicode only). */ ucs4_t buf[3]; int ret = johab_hangul_decompose(cd,buf,wc); if (ret != RET_ILUNI) { /* we know 1 <= ret <= 3 */ state_t backup_state = cd->ostate; unsigned char* backup_outptr = outptr; size_t backup_outleft = outleft; int i, sub_outcount; for (i = 0; i < ret; i++) { if (outleft == 0) { sub_outcount = RET_TOOSMALL; goto johab_hangul_failed; } sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft); if (sub_outcount <= RET_ILUNI) goto johab_hangul_failed; if (!(sub_outcount <= outleft)) abort(); outptr += sub_outcount; outleft -= sub_outcount; } return outptr-backup_outptr; johab_hangul_failed: cd->ostate = backup_state; outptr = backup_outptr; outleft = backup_outleft; if (sub_outcount != RET_ILUNI) return RET_TOOSMALL; } } { /* Try to use a variant, but postfix it with U+303E IDEOGRAPHIC VARIATION INDICATOR (cf. Ken Lunde's "CJKV information processing", p. 188). */ int indx = -1; if (wc == 0x3006) indx = 0; else if (wc == 0x30f6) indx = 1; else if (wc >= 0x4e00 && wc < 0xa000) indx = cjk_variants_indx[wc-0x4e00]; if (indx >= 0) { for (;; indx++) { ucs4_t buf[2]; unsigned short variant = cjk_variants[indx]; unsigned short last = variant & 0x8000; variant &= 0x7fff; variant += 0x3000; buf[0] = variant; buf[1] = 0x303e; { state_t backup_state = cd->ostate; unsigned char* backup_outptr = outptr; size_t backup_outleft = outleft; int i, sub_outcount; for (i = 0; i < 2; i++) { if (outleft == 0) { sub_outcount = RET_TOOSMALL; goto variant_failed; } sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft); if (sub_outcount <= RET_ILUNI) goto variant_failed; if (!(sub_outcount <= outleft)) abort(); outptr += sub_outcount; outleft -= sub_outcount; } return outptr-backup_outptr; variant_failed: cd->ostate = backup_state; outptr = backup_outptr; outleft = backup_outleft; if (sub_outcount != RET_ILUNI) return RET_TOOSMALL; } if (last) break; } } } if (wc >= 0x2018 && wc <= 0x201a) { /* Special case for quotation marks 0x2018, 0x2019, 0x201a */ ucs4_t substitute = (cd->oflags & HAVE_QUOTATION_MARKS ? (wc == 0x201a ? 0x2018 : wc) : (cd->oflags & HAVE_ACCENTS ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */ : 0x0027 /* use apostrophe */ ) ); int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft); if (outcount != RET_ILUNI) return outcount; } { /* Use the transliteration table. */ int indx = translit_index(wc); if (indx >= 0) { const unsigned int * cp = &translit_data[indx]; unsigned int num = *cp++; state_t backup_state = cd->ostate; unsigned char* backup_outptr = outptr; size_t backup_outleft = outleft; unsigned int i; int sub_outcount; for (i = 0; i < num; i++) { if (outleft == 0) { sub_outcount = RET_TOOSMALL; goto translit_failed; } sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft); if (sub_outcount == RET_ILUNI) /* Recursive transliteration. */ sub_outcount = unicode_transliterate(cd,cp[i],outptr,outleft); if (sub_outcount <= RET_ILUNI) goto translit_failed; if (!(sub_outcount <= outleft)) abort(); outptr += sub_outcount; outleft -= sub_outcount; } return outptr-backup_outptr; translit_failed: cd->ostate = backup_state; outptr = backup_outptr; outleft = backup_outleft; if (sub_outcount != RET_ILUNI) return RET_TOOSMALL; } } return RET_ILUNI; } #ifndef LIBICONV_PLUG struct uc_to_mb_fallback_locals { unsigned char* l_outbuf; size_t l_outbytesleft; int l_errno; }; static void uc_to_mb_write_replacement (const char *buf, size_t buflen, void* callback_arg) { struct uc_to_mb_fallback_locals * plocals = (struct uc_to_mb_fallback_locals *) callback_arg; /* Do nothing if already encountered an error in a previous call. */ if (plocals->l_errno == 0) { /* Attempt to copy the passed buffer to the output buffer. */ if (plocals->l_outbytesleft < buflen) plocals->l_errno = E2BIG; else { memcpy(plocals->l_outbuf, buf, buflen); plocals->l_outbuf += buflen; plocals->l_outbytesleft -= buflen; } } } struct mb_to_uc_fallback_locals { conv_t l_cd; unsigned char* l_outbuf; size_t l_outbytesleft; int l_errno; }; static void mb_to_uc_write_replacement (const unsigned int *buf, size_t buflen, void* callback_arg) { struct mb_to_uc_fallback_locals * plocals = (struct mb_to_uc_fallback_locals *) callback_arg; /* Do nothing if already encountered an error in a previous call. */ if (plocals->l_errno == 0) { /* Attempt to convert the passed buffer to the target encoding. */ conv_t cd = plocals->l_cd; unsigned char* outptr = plocals->l_outbuf; size_t outleft = plocals->l_outbytesleft; for (; buflen > 0; buf++, buflen--) { ucs4_t wc = *buf; int outcount; if (outleft == 0) { plocals->l_errno = E2BIG; break; } outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft); if (outcount != RET_ILUNI) goto outcount_ok; /* Handle Unicode tag characters (range U+E0000..U+E007F). */ if ((wc >> 7) == (0xe0000 >> 7)) goto outcount_zero; /* Try transliteration. */ if (cd->transliterate) { outcount = unicode_transliterate(cd,wc,outptr,outleft); if (outcount != RET_ILUNI) goto outcount_ok; } if (cd->discard_ilseq) { outcount = 0; goto outcount_ok; } #ifndef LIBICONV_PLUG else if (cd->fallbacks.uc_to_mb_fallback != NULL) { struct uc_to_mb_fallback_locals locals; locals.l_outbuf = outptr; locals.l_outbytesleft = outleft; locals.l_errno = 0; cd->fallbacks.uc_to_mb_fallback(wc, uc_to_mb_write_replacement, &locals, cd->fallbacks.data); if (locals.l_errno != 0) { plocals->l_errno = locals.l_errno; break; } outptr = locals.l_outbuf; outleft = locals.l_outbytesleft; outcount = 0; goto outcount_ok; } #endif outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft); if (outcount != RET_ILUNI) goto outcount_ok; plocals->l_errno = EILSEQ; break; outcount_ok: if (outcount < 0) { plocals->l_errno = E2BIG; break; } #ifndef LIBICONV_PLUG if (cd->hooks.uc_hook) (*cd->hooks.uc_hook)(wc, cd->hooks.data); #endif if (!(outcount <= outleft)) abort(); outptr += outcount; outleft -= outcount; outcount_zero: ; } plocals->l_outbuf = outptr; plocals->l_outbytesleft = outleft; } } #endif /* !LIBICONV_PLUG */ static size_t unicode_loop_convert (iconv_t icd, const char* * inbuf, size_t *inbytesleft, char* * outbuf, size_t *outbytesleft) { conv_t cd = (conv_t) icd; size_t result = 0; const unsigned char* inptr = (const unsigned char*) *inbuf; size_t inleft = *inbytesleft; unsigned char* outptr = (unsigned char*) *outbuf; size_t outleft = *outbytesleft; while (inleft > 0) { state_t last_istate = cd->istate; ucs4_t wc; int incount; int outcount; incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft); if (incount < 0) { if ((unsigned int)(-1-incount) % 2 == (unsigned int)(-1-RET_ILSEQ) % 2) { /* Case 1: invalid input, possibly after a shift sequence */ incount = DECODE_SHIFT_ILSEQ(incount); if (cd->discard_ilseq) { switch (cd->iindex) { case ei_ucs4: case ei_ucs4be: case ei_ucs4le: case ei_utf32: case ei_utf32be: case ei_utf32le: case ei_ucs4internal: case ei_ucs4swapped: incount += 4; break; case ei_ucs2: case ei_ucs2be: case ei_ucs2le: case ei_utf16: case ei_utf16be: case ei_utf16le: case ei_ucs2internal: case ei_ucs2swapped: incount += 2; break; default: incount += 1; break; } goto outcount_zero; } #ifndef LIBICONV_PLUG else if (cd->fallbacks.mb_to_uc_fallback != NULL) { unsigned int incount2; struct mb_to_uc_fallback_locals locals; switch (cd->iindex) { case ei_ucs4: case ei_ucs4be: case ei_ucs4le: case ei_utf32: case ei_utf32be: case ei_utf32le: case ei_ucs4internal: case ei_ucs4swapped: incount2 = 4; break; case ei_ucs2: case ei_ucs2be: case ei_ucs2le: case ei_utf16: case ei_utf16be: case ei_utf16le: case ei_ucs2internal: case ei_ucs2swapped: incount2 = 2; break; default: incount2 = 1; break; } locals.l_cd = cd; locals.l_outbuf = outptr; locals.l_outbytesleft = outleft; locals.l_errno = 0; cd->fallbacks.mb_to_uc_fallback((const char*)inptr+incount, incount2, mb_to_uc_write_replacement, &locals, cd->fallbacks.data); if (locals.l_errno != 0) { inptr += incount; inleft -= incount; errno = locals.l_errno; result = -1; break; } incount += incount2; outptr = locals.l_outbuf; outleft = locals.l_outbytesleft; result += 1; goto outcount_zero; } #endif inptr += incount; inleft -= incount; errno = EILSEQ; result = -1; break; } if (incount == RET_TOOFEW(0)) { /* Case 2: not enough bytes available to detect anything */ errno = EINVAL; result = -1; break; } /* Case 3: k bytes read, but only a shift sequence */ incount = DECODE_TOOFEW(incount); } else { /* Case 4: k bytes read, making up a wide character */ if (outleft == 0) { cd->istate = last_istate; errno = E2BIG; result = -1; break; } outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft); if (outcount != RET_ILUNI) goto outcount_ok; /* Handle Unicode tag characters (range U+E0000..U+E007F). */ if ((wc >> 7) == (0xe0000 >> 7)) goto outcount_zero; /* Try transliteration. */ result++; if (cd->transliterate) { outcount = unicode_transliterate(cd,wc,outptr,outleft); if (outcount != RET_ILUNI) goto outcount_ok; } if (cd->discard_ilseq) { outcount = 0; goto outcount_ok; } #ifndef LIBICONV_PLUG else if (cd->fallbacks.uc_to_mb_fallback != NULL) { struct uc_to_mb_fallback_locals locals; locals.l_outbuf = outptr; locals.l_outbytesleft = outleft; locals.l_errno = 0; cd->fallbacks.uc_to_mb_fallback(wc, uc_to_mb_write_replacement, &locals, cd->fallbacks.data); if (locals.l_errno != 0) { cd->istate = last_istate; errno = locals.l_errno; return -1; } outptr = locals.l_outbuf; outleft = locals.l_outbytesleft; outcount = 0; goto outcount_ok; } #endif outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft); if (outcount != RET_ILUNI) goto outcount_ok; cd->istate = last_istate; errno = EILSEQ; result = -1; break; outcount_ok: if (outcount < 0) { cd->istate = last_istate; errno = E2BIG; result = -1; break; } #ifndef LIBICONV_PLUG if (cd->hooks.uc_hook) (*cd->hooks.uc_hook)(wc, cd->hooks.data); #endif if (!(outcount <= outleft)) abort(); outptr += outcount; outleft -= outcount; } outcount_zero: if (!(incount <= inleft)) abort(); inptr += incount; inleft -= incount; } *inbuf = (const char*) inptr; *inbytesleft = inleft; *outbuf = (char*) outptr; *outbytesleft = outleft; return result; } static size_t unicode_loop_reset (iconv_t icd, char* * outbuf, size_t *outbytesleft) { conv_t cd = (conv_t) icd; if (outbuf == NULL || *outbuf == NULL) { /* Reset the states. */ memset(&cd->istate,'\0',sizeof(state_t)); memset(&cd->ostate,'\0',sizeof(state_t)); return 0; } else { size_t result = 0; if (cd->ifuncs.xxx_flushwc) { state_t last_istate = cd->istate; ucs4_t wc; if (cd->ifuncs.xxx_flushwc(cd, &wc)) { unsigned char* outptr = (unsigned char*) *outbuf; size_t outleft = *outbytesleft; int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft); if (outcount != RET_ILUNI) goto outcount_ok; /* Handle Unicode tag characters (range U+E0000..U+E007F). */ if ((wc >> 7) == (0xe0000 >> 7)) goto outcount_zero; /* Try transliteration. */ result++; if (cd->transliterate) { outcount = unicode_transliterate(cd,wc,outptr,outleft); if (outcount != RET_ILUNI) goto outcount_ok; } if (cd->discard_ilseq) { outcount = 0; goto outcount_ok; } #ifndef LIBICONV_PLUG else if (cd->fallbacks.uc_to_mb_fallback != NULL) { struct uc_to_mb_fallback_locals locals; locals.l_outbuf = outptr; locals.l_outbytesleft = outleft; locals.l_errno = 0; cd->fallbacks.uc_to_mb_fallback(wc, uc_to_mb_write_replacement, &locals, cd->fallbacks.data); if (locals.l_errno != 0) { cd->istate = last_istate; errno = locals.l_errno; return -1; } outptr = locals.l_outbuf; outleft = locals.l_outbytesleft; outcount = 0; goto outcount_ok; } #endif outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft); if (outcount != RET_ILUNI) goto outcount_ok; cd->istate = last_istate; errno = EILSEQ; return -1; outcount_ok: if (outcount < 0) { cd->istate = last_istate; errno = E2BIG; return -1; } #ifndef LIBICONV_PLUG if (cd->hooks.uc_hook) (*cd->hooks.uc_hook)(wc, cd->hooks.data); #endif if (!(outcount <= outleft)) abort(); outptr += outcount; outleft -= outcount; outcount_zero: *outbuf = (char*) outptr; *outbytesleft = outleft; } } if (cd->ofuncs.xxx_reset) { unsigned char* outptr = (unsigned char*) *outbuf; size_t outleft = *outbytesleft; int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft); if (outcount < 0) { errno = E2BIG; return -1; } if (!(outcount <= outleft)) abort(); *outbuf = (char*) (outptr + outcount); *outbytesleft = outleft - outcount; } memset(&cd->istate,'\0',sizeof(state_t)); memset(&cd->ostate,'\0',sizeof(state_t)); return result; } }