|
Libparserutils
|
00001 /* 00002 * This file is part of LibParserUtils. 00003 * Licensed under the MIT License, 00004 * http://www.opensource.org/licenses/mit-license.php 00005 * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org> 00006 */ 00007 00008 #include <errno.h> 00009 #include <stdbool.h> 00010 #include <stdlib.h> 00011 #include <string.h> 00012 00013 #ifndef WITHOUT_ICONV_FILTER 00014 #include <iconv.h> 00015 #endif 00016 00017 #include <parserutils/charset/mibenum.h> 00018 #include <parserutils/charset/codec.h> 00019 00020 #include "input/filter.h" 00021 #include "utils/utils.h" 00022 00024 struct parserutils_filter { 00025 #ifndef WITHOUT_ICONV_FILTER 00026 iconv_t cd; 00027 uint16_t int_enc; 00028 #else 00029 parserutils_charset_codec *read_codec; 00030 parserutils_charset_codec *write_codec; 00032 uint32_t pivot_buf[64]; 00034 bool leftover; 00035 uint8_t *pivot_left; 00036 size_t pivot_len; 00037 #endif 00038 00039 struct { 00040 uint16_t encoding; 00041 } settings; 00042 }; 00043 00044 static parserutils_error filter_set_defaults(parserutils_filter *input); 00045 static parserutils_error filter_set_encoding(parserutils_filter *input, 00046 const char *enc); 00047 00058 parserutils_error parserutils__filter_create(const char *int_enc, 00059 parserutils_filter **filter) 00060 { 00061 parserutils_filter *f; 00062 parserutils_error error; 00063 00064 if (int_enc == NULL || filter == NULL) 00065 return PARSERUTILS_BADPARM; 00066 00067 f = malloc(sizeof(parserutils_filter)); 00068 if (f == NULL) 00069 return PARSERUTILS_NOMEM; 00070 00071 #ifndef WITHOUT_ICONV_FILTER 00072 f->cd = (iconv_t) -1; 00073 f->int_enc = parserutils_charset_mibenum_from_name( 00074 int_enc, strlen(int_enc)); 00075 if (f->int_enc == 0) { 00076 free(f); 00077 return PARSERUTILS_BADENCODING; 00078 } 00079 #else 00080 f->leftover = false; 00081 f->pivot_left = NULL; 00082 f->pivot_len = 0; 00083 #endif 00084 00085 error = filter_set_defaults(f); 00086 if (error != PARSERUTILS_OK) { 00087 free(f); 00088 return error; 00089 } 00090 00091 #ifdef WITHOUT_ICONV_FILTER 00092 error = parserutils_charset_codec_create(int_enc, &f->write_codec); 00093 if (error != PARSERUTILS_OK) { 00094 if (f->read_codec != NULL) { 00095 parserutils_charset_codec_destroy(f->read_codec); 00096 f->read_codec = NULL; 00097 } 00098 free(f); 00099 return error; 00100 } 00101 #endif 00102 00103 *filter = f; 00104 00105 return PARSERUTILS_OK; 00106 } 00107 00114 parserutils_error parserutils__filter_destroy(parserutils_filter *input) 00115 { 00116 if (input == NULL) 00117 return PARSERUTILS_BADPARM; 00118 00119 #ifndef WITHOUT_ICONV_FILTER 00120 if (input->cd != (iconv_t) -1) { 00121 iconv_close(input->cd); 00122 input->cd = (iconv_t) -1; 00123 } 00124 #else 00125 if (input->read_codec != NULL) { 00126 parserutils_charset_codec_destroy(input->read_codec); 00127 input->read_codec = NULL; 00128 } 00129 00130 if (input->write_codec != NULL) { 00131 parserutils_charset_codec_destroy(input->write_codec); 00132 input->write_codec = NULL; 00133 } 00134 #endif 00135 00136 free(input); 00137 00138 return PARSERUTILS_OK; 00139 } 00140 00149 parserutils_error parserutils__filter_setopt(parserutils_filter *input, 00150 parserutils_filter_opttype type, 00151 parserutils_filter_optparams *params) 00152 { 00153 parserutils_error error = PARSERUTILS_OK; 00154 00155 if (input == NULL || params == NULL) 00156 return PARSERUTILS_BADPARM; 00157 00158 switch (type) { 00159 case PARSERUTILS_FILTER_SET_ENCODING: 00160 error = filter_set_encoding(input, params->encoding.name); 00161 break; 00162 } 00163 00164 return error; 00165 } 00166 00179 parserutils_error parserutils__filter_process_chunk(parserutils_filter *input, 00180 const uint8_t **data, size_t *len, 00181 uint8_t **output, size_t *outlen) 00182 { 00183 if (input == NULL || data == NULL || *data == NULL || len == NULL || 00184 output == NULL || *output == NULL || outlen == NULL) 00185 return PARSERUTILS_BADPARM; 00186 00187 #ifndef WITHOUT_ICONV_FILTER 00188 if (iconv(input->cd, (void *) data, len, 00189 (char **) output, outlen) == (size_t) -1) { 00190 switch (errno) { 00191 case E2BIG: 00192 return PARSERUTILS_NOMEM; 00193 case EILSEQ: 00194 if (*outlen < 3) 00195 return PARSERUTILS_NOMEM; 00196 00197 (*output)[0] = 0xef; 00198 (*output)[1] = 0xbf; 00199 (*output)[2] = 0xbd; 00200 00201 *output += 3; 00202 *outlen -= 3; 00203 00204 (*data)++; 00205 (*len)--; 00206 00207 while (*len > 0) { 00208 size_t ret; 00209 00210 ret = iconv(input->cd, (void *) data, len, 00211 (char **) output, outlen); 00212 if (ret != (size_t) -1 || errno != EILSEQ) 00213 break; 00214 00215 if (*outlen < 3) 00216 return PARSERUTILS_NOMEM; 00217 00218 (*output)[0] = 0xef; 00219 (*output)[1] = 0xbf; 00220 (*output)[2] = 0xbd; 00221 00222 *output += 3; 00223 *outlen -= 3; 00224 00225 (*data)++; 00226 (*len)--; 00227 } 00228 00229 return errno == E2BIG ? PARSERUTILS_NOMEM 00230 : PARSERUTILS_OK; 00231 } 00232 } 00233 00234 return PARSERUTILS_OK; 00235 #else 00236 if (input->leftover) { 00237 parserutils_error write_error; 00238 00239 /* Some data left to be written from last call */ 00240 00241 /* Attempt to flush the remaining data. */ 00242 write_error = parserutils_charset_codec_encode( 00243 input->write_codec, 00244 (const uint8_t **) &input->pivot_left, 00245 &input->pivot_len, 00246 output, outlen); 00247 00248 if (write_error != PARSERUTILS_OK) 00249 return write_error; 00250 00251 00252 /* And clear leftover */ 00253 input->pivot_left = NULL; 00254 input->pivot_len = 0; 00255 input->leftover = false; 00256 } 00257 00258 while (*len > 0) { 00259 parserutils_error read_error, write_error; 00260 size_t pivot_len = sizeof(input->pivot_buf); 00261 uint8_t *pivot = (uint8_t *) input->pivot_buf; 00262 00263 read_error = parserutils_charset_codec_decode(input->read_codec, 00264 data, len, 00265 (uint8_t **) &pivot, &pivot_len); 00266 00267 pivot = (uint8_t *) input->pivot_buf; 00268 pivot_len = sizeof(input->pivot_buf) - pivot_len; 00269 00270 if (pivot_len > 0) { 00271 write_error = parserutils_charset_codec_encode( 00272 input->write_codec, 00273 (const uint8_t **) &pivot, 00274 &pivot_len, 00275 output, outlen); 00276 00277 if (write_error != PARSERUTILS_OK) { 00278 input->leftover = true; 00279 input->pivot_left = pivot; 00280 input->pivot_len = pivot_len; 00281 00282 return write_error; 00283 } 00284 } 00285 00286 if (read_error != PARSERUTILS_OK && 00287 read_error != PARSERUTILS_NOMEM) 00288 return read_error; 00289 } 00290 00291 return PARSERUTILS_OK; 00292 #endif 00293 } 00294 00301 parserutils_error parserutils__filter_reset(parserutils_filter *input) 00302 { 00303 parserutils_error error = PARSERUTILS_OK; 00304 00305 if (input == NULL) 00306 return PARSERUTILS_BADPARM; 00307 00308 #ifndef WITHOUT_ICONV_FILTER 00309 iconv(input->cd, NULL, 0, NULL, 0); 00310 #else 00311 /* Clear pivot buffer leftovers */ 00312 input->pivot_left = NULL; 00313 input->pivot_len = 0; 00314 input->leftover = false; 00315 00316 /* Reset read codec */ 00317 error = parserutils_charset_codec_reset(input->read_codec); 00318 if (error != PARSERUTILS_OK) 00319 return error; 00320 00321 /* Reset write codec */ 00322 error = parserutils_charset_codec_reset(input->write_codec); 00323 if (error != PARSERUTILS_OK) 00324 return error; 00325 #endif 00326 00327 return error; 00328 } 00329 00336 parserutils_error filter_set_defaults(parserutils_filter *input) 00337 { 00338 parserutils_error error; 00339 00340 if (input == NULL) 00341 return PARSERUTILS_BADPARM; 00342 00343 #ifdef WITHOUT_ICONV_FILTER 00344 input->read_codec = NULL; 00345 input->write_codec = NULL; 00346 #endif 00347 00348 input->settings.encoding = 0; 00349 error = filter_set_encoding(input, "UTF-8"); 00350 if (error != PARSERUTILS_OK) 00351 return error; 00352 00353 return PARSERUTILS_OK; 00354 } 00355 00363 parserutils_error filter_set_encoding(parserutils_filter *input, 00364 const char *enc) 00365 { 00366 parserutils_error error = PARSERUTILS_OK; 00367 uint16_t mibenum; 00368 00369 if (input == NULL || enc == NULL) 00370 return PARSERUTILS_BADPARM; 00371 00372 mibenum = parserutils_charset_mibenum_from_name(enc, strlen(enc)); 00373 if (mibenum == 0) 00374 return PARSERUTILS_BADENCODING; 00375 00376 /* Exit early if we're already using this encoding */ 00377 if (input->settings.encoding == mibenum) 00378 return PARSERUTILS_OK; 00379 00380 #ifndef WITHOUT_ICONV_FILTER 00381 if (input->cd != (iconv_t) -1) { 00382 iconv_close(input->cd); 00383 input->cd = (iconv_t) -1; 00384 } 00385 00386 input->cd = iconv_open( 00387 parserutils_charset_mibenum_to_name(input->int_enc), 00388 parserutils_charset_mibenum_to_name(mibenum)); 00389 if (input->cd == (iconv_t) -1) { 00390 return (errno == EINVAL) ? PARSERUTILS_BADENCODING 00391 : PARSERUTILS_NOMEM; 00392 } 00393 #else 00394 if (input->read_codec != NULL) { 00395 parserutils_charset_codec_destroy(input->read_codec); 00396 input->read_codec = NULL; 00397 } 00398 00399 error = parserutils_charset_codec_create(enc, &input->read_codec); 00400 if (error != PARSERUTILS_OK) 00401 return error; 00402 #endif 00403 00404 input->settings.encoding = mibenum; 00405 00406 return error; 00407 00408 }
1.7.3