|
Libparserutils
|
00001 /* 00002 * This file is part of LibParserUtils. 00003 * Licensed under the MIT License, 00004 * http://www.opensource.org/licenses/mit-license.php 00005 * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org> 00006 */ 00007 00008 #include <ctype.h> 00009 #include <stdbool.h> 00010 #include <stddef.h> 00011 #include <stdio.h> 00012 #include <stdlib.h> 00013 #include <string.h> 00014 00015 #include <assert.h> 00016 00017 #include "charset/aliases.h" 00018 #include "utils/utils.h" 00019 00020 /* Bring in the aliases tables */ 00021 #include "aliases.inc" 00022 00023 typedef struct { 00024 size_t slen; 00025 const char *s; 00026 } lengthed_string; 00027 00028 00029 #define IS_PUNCT_OR_SPACE(x) \ 00030 (!(((x) >= 'A' && (x) <= 'Z') || \ 00031 ((x) >= 'a' && (x) <= 'z') || \ 00032 ((x) >= '0' && (x) <= '9'))) 00033 00034 00035 static int parserutils_charset_alias_match(const void *a, const void *b) 00036 { 00037 lengthed_string *s = (lengthed_string *)a; 00038 parserutils_charset_aliases_alias *alias = (parserutils_charset_aliases_alias*)b; 00039 size_t key_left = s->slen; 00040 size_t alias_left = alias->name_len; 00041 const char *s_alias = alias->name; 00042 const char *s_key = s->s; 00043 int cmpret; 00044 00045 while ((key_left > 0) && (alias_left > 0)) { 00046 while ((key_left > 0) && IS_PUNCT_OR_SPACE(*s_key)) { 00047 key_left--; s_key++; 00048 } 00049 00050 if (key_left == 0) 00051 break; 00052 00053 cmpret = tolower(*s_key) - *s_alias; 00054 00055 if (cmpret != 0) { 00056 return cmpret; 00057 } 00058 00059 key_left--; 00060 s_key++; 00061 alias_left--; 00062 s_alias++; 00063 } 00064 00065 while ((key_left > 0) && IS_PUNCT_OR_SPACE(*s_key)) { 00066 key_left--; s_key++; 00067 } 00068 00069 return key_left - alias_left; 00070 } 00071 00079 parserutils_charset_aliases_canon *parserutils__charset_alias_canonicalise( 00080 const char *alias, size_t len) 00081 { 00082 parserutils_charset_aliases_alias *c; 00083 lengthed_string s; 00084 00085 s.slen = len; 00086 s.s = alias; 00087 00088 c = (parserutils_charset_aliases_alias*)bsearch(&s, 00089 &charset_aliases[0], 00090 charset_aliases_count, 00091 sizeof(parserutils_charset_aliases_alias), 00092 parserutils_charset_alias_match); 00093 00094 if (c == NULL) 00095 return NULL; 00096 00097 return c->canon; 00098 } 00099 00107 uint16_t parserutils_charset_mibenum_from_name(const char *alias, size_t len) 00108 { 00109 parserutils_charset_aliases_canon *c; 00110 00111 if (alias == NULL) 00112 return 0; 00113 00114 c = parserutils__charset_alias_canonicalise(alias, len); 00115 if (c == NULL) 00116 return 0; 00117 00118 return c->mib_enum; 00119 } 00120 00127 const char *parserutils_charset_mibenum_to_name(uint16_t mibenum) 00128 { 00129 int i; 00130 parserutils_charset_aliases_canon *c; 00131 00132 for (i = 0; i < charset_aliases_canon_count; ++i) { 00133 c = &canonical_charset_names[i]; 00134 if (c->mib_enum == mibenum) 00135 return c->name; 00136 } 00137 00138 return NULL; 00139 } 00140 00147 bool parserutils_charset_mibenum_is_unicode(uint16_t mibenum) 00148 { 00149 return MIBENUM_IS_UNICODE(mibenum); 00150 }
1.7.3