|
Libparserutils
|
00001 /* 00002 * This file is part of LibParserUtils. 00003 * Licensed under the MIT License, 00004 * http://www.opensource.org/licenses/mit-license.php 00005 * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org> 00006 */ 00007 00008 #ifndef parserutils_input_inputstream_h_ 00009 #define parserutils_input_inputstream_h_ 00010 00011 #ifdef __cplusplus 00012 extern "C" 00013 { 00014 #endif 00015 00016 #include <stdbool.h> 00017 #ifndef NDEBUG 00018 #include <stdio.h> 00019 #endif 00020 #include <stdlib.h> 00021 #include <inttypes.h> 00022 00023 #include <parserutils/errors.h> 00024 #include <parserutils/functypes.h> 00025 #include <parserutils/types.h> 00026 #include <parserutils/charset/utf8.h> 00027 #include <parserutils/utils/buffer.h> 00028 00032 typedef parserutils_error (*parserutils_charset_detect_func)( 00033 const uint8_t *data, size_t len, 00034 uint16_t *mibenum, uint32_t *source); 00035 00039 typedef struct parserutils_inputstream 00040 { 00041 parserutils_buffer *utf8; 00043 uint32_t cursor; 00045 bool had_eof; 00046 } parserutils_inputstream; 00047 00048 /* Create an input stream */ 00049 parserutils_error parserutils_inputstream_create(const char *enc, 00050 uint32_t encsrc, parserutils_charset_detect_func csdetect, 00051 parserutils_inputstream **stream); 00052 /* Destroy an input stream */ 00053 parserutils_error parserutils_inputstream_destroy( 00054 parserutils_inputstream *stream); 00055 00056 /* Append data to an input stream */ 00057 parserutils_error parserutils_inputstream_append( 00058 parserutils_inputstream *stream, 00059 const uint8_t *data, size_t len); 00060 /* Insert data into stream at current location */ 00061 parserutils_error parserutils_inputstream_insert( 00062 parserutils_inputstream *stream, 00063 const uint8_t *data, size_t len); 00064 00065 /* Slow form of css_inputstream_peek. */ 00066 parserutils_error parserutils_inputstream_peek_slow( 00067 parserutils_inputstream *stream, 00068 size_t offset, const uint8_t **ptr, size_t *length); 00069 00091 static inline parserutils_error parserutils_inputstream_peek( 00092 parserutils_inputstream *stream, size_t offset, 00093 const uint8_t **ptr, size_t *length) 00094 { 00095 parserutils_error error = PARSERUTILS_OK; 00096 const parserutils_buffer *utf8; 00097 const uint8_t *utf8_data; 00098 size_t len, off, utf8_len; 00099 00100 if (stream == NULL || ptr == NULL || length == NULL) 00101 return PARSERUTILS_BADPARM; 00102 00103 #ifndef NDEBUG 00104 #ifdef VERBOSE_INPUTSTREAM 00105 fprintf(stdout, "Peek: len: %zu cur: %u off: %zu\n", 00106 stream->utf8->length, stream->cursor, offset); 00107 #endif 00108 #ifdef RANDOMISE_INPUTSTREAM 00109 parserutils_buffer_randomise(stream->utf8); 00110 #endif 00111 #endif 00112 00113 utf8 = stream->utf8; 00114 utf8_data = utf8->data; 00115 utf8_len = utf8->length; 00116 off = stream->cursor + offset; 00117 00118 #define IS_ASCII(x) (((x) & 0x80) == 0) 00119 00120 if (off < utf8_len) { 00121 if (IS_ASCII(utf8_data[off])) { 00122 /* Early exit for ASCII case */ 00123 (*length) = 1; 00124 (*ptr) = (utf8_data + off); 00125 return PARSERUTILS_OK; 00126 } else { 00127 error = parserutils_charset_utf8_char_byte_length( 00128 utf8_data + off, &len); 00129 00130 if (error == PARSERUTILS_OK) { 00131 (*length) = len; 00132 (*ptr) = (utf8_data + off); 00133 return PARSERUTILS_OK; 00134 } else if (error != PARSERUTILS_NEEDDATA) { 00135 return error; 00136 } 00137 } 00138 } 00139 00140 #undef IS_ASCII 00141 00142 return parserutils_inputstream_peek_slow(stream, offset, ptr, length); 00143 } 00144 00151 static inline void parserutils_inputstream_advance( 00152 parserutils_inputstream *stream, size_t bytes) 00153 { 00154 if (stream == NULL) 00155 return; 00156 00157 #if !defined(NDEBUG) && defined(VERBOSE_INPUTSTREAM) 00158 fprintf(stdout, "Advance: len: %zu cur: %u bytes: %zu\n", 00159 stream->utf8->length, stream->cursor, bytes); 00160 #endif 00161 00162 if (bytes > stream->utf8->length - stream->cursor) 00163 bytes = stream->utf8->length - stream->cursor; 00164 00165 if (stream->cursor == stream->utf8->length) 00166 return; 00167 00168 stream->cursor += bytes; 00169 } 00170 00171 /* Read the document charset */ 00172 const char *parserutils_inputstream_read_charset( 00173 parserutils_inputstream *stream, uint32_t *source); 00174 /* Change the document charset */ 00175 parserutils_error parserutils_inputstream_change_charset( 00176 parserutils_inputstream *stream, 00177 const char *enc, uint32_t source); 00178 00179 #ifdef __cplusplus 00180 } 00181 #endif 00182 00183 #endif 00184
1.7.3