/* $Header: /CVSROOT/tinolib/old/html_entities.h,v 1.2 2006/03/18 04:29:47 tino Exp $ * * This is a list of HTML entities. * Note that this was stolen from xml2sql-v, * another package of me. * * Public Domain * Written by Valentin Hilbig, webmaster@scylla-charybdis.com * * $Log: html_entities.h,v $ * Revision 1.2 2006/03/18 04:29:47 tino * tino_html_entity_check() added * * Revision 1.1 2006/02/12 01:37:39 tino * html_entities.h added */ #ifndef tino_INC_html_entities_h #define tino_INC_html_entities_h #include static struct tino_html_entities { int unicode; const char *entity; } tino_html_entities[] = { /* Important! * Keep this three in this sequence! */ { 38, "amp" }, { 60, "lt" }, { 62, "gt" }, /* HTML3.2 */ { 34, "quot" }, { 160, "nbsp" }, { 161, "iexcl" }, { 162, "cent" }, { 163, "pound" }, { 164, "curren" }, { 165, "yen" }, { 166, "brvbar" }, { 167, "sect" }, { 168, "uml" }, { 169, "copy" }, { 170, "ordf" }, { 171, "laquo" }, { 172, "not" }, { 173, "shy" }, { 174, "reg" }, { 175, "macr" }, { 176, "deg" }, { 177, "plusmn" }, { 178, "sup2" }, { 179, "sup3" }, { 180, "acute" }, { 181, "micro" }, { 182, "para" }, { 183, "middot" }, { 184, "cedil" }, { 185, "sup1" }, { 186, "ordm" }, { 187, "raquo" }, { 188, "frac14" }, { 189, "frac12" }, { 190, "frac34" }, { 191, "iquest" }, { 192, "Agrave" }, { 193, "Aacute" }, { 194, "Acirc" }, { 195, "Atilde" }, { 196, "Auml" }, { 197, "Aring" }, { 198, "AElig" }, { 199, "Ccedil" }, { 200, "Egrave" }, { 201, "Eacute" }, { 202, "Ecirc" }, { 203, "Euml" }, { 204, "Igrave" }, { 205, "Iacute" }, { 206, "Icirc" }, { 207, "Iuml" }, { 208, "ETH" }, { 209, "Ntilde" }, { 210, "Ograve" }, { 211, "Oacute" }, { 212, "Ocirc" }, { 213, "Otilde" }, { 214, "Ouml" }, { 215, "times" }, { 216, "Oslash" }, { 217, "Ugrave" }, { 218, "Uacute" }, { 219, "Ucirc" }, { 220, "Uuml" }, { 221, "Yacute" }, { 222, "THORN" }, { 223, "szlig" }, { 224, "agrave" }, { 225, "aacute" }, { 226, "acirc" }, { 227, "atilde" }, { 228, "auml" }, { 229, "aring" }, { 230, "aelig" }, { 231, "ccedil" }, { 232, "egrave" }, { 233, "eacute" }, { 234, "ecirc" }, { 235, "euml" }, { 236, "igrave" }, { 237, "iacute" }, { 238, "icirc" }, { 239, "iuml" }, { 240, "eth" }, { 241, "ntilde" }, { 242, "ograve" }, { 243, "oacute" }, { 244, "ocirc" }, { 245, "otilde" }, { 246, "ouml" }, { 247, "divide" }, { 248, "oslash" }, { 249, "ugrave" }, { 250, "uacute" }, { 251, "ucirc" }, { 252, "uuml" }, { 253, "yacute" }, { 254, "thorn" }, { 255, "yuml" }, /* HTML 4.0 */ { 913, "Alpha" }, { 914, "Beta" }, { 915, "Gamma" }, { 916, "Delta" }, { 917, "Epsilon" }, { 918, "Zeta" }, { 919, "Eta" }, { 920, "Theta" }, { 921, "Iota" }, { 922, "Kappa" }, { 923, "Lambda" }, { 924, "Mu" }, { 925, "Nu" }, { 926, "Xi" }, { 927, "Omicron" }, { 928, "Pi" }, { 929, "Rho" }, { 931, "Sigma" }, { 932, "Tau" }, { 933, "Upsilon" }, { 934, "Phi" }, { 935, "Chi" }, { 936, "Psi" }, { 937, "Omega" }, { 945, "alpha" }, { 946, "beta" }, { 947, "gamma" }, { 948, "delta" }, { 949, "epsilon" }, { 950, "zeta" }, { 951, "eta" }, { 952, "theta" }, { 953, "iota" }, { 954, "kappa" }, { 955, "lambda" }, { 956, "mu" }, { 957, "nu" }, { 958, "xi" }, { 959, "omicron" }, { 960, "pi" }, { 961, "rho" }, { 962, "sigmaf" }, { 963, "sigma" }, { 964, "tau" }, { 965, "upsilon" }, { 966, "phi" }, { 967, "chi" }, { 968, "psi" }, { 969, "omega" }, { 977, "thetasym" }, { 978, "upsih" }, { 982, "piv" }, { 8194, "ensp" }, { 8195, "emsp" }, { 8201, "thinsp" }, { 8204, "zwnj" }, { 8205, "zwj" }, { 8206, "lrm" }, { 8207, "rlm" }, { 8211, "ndash" }, { 8212, "mdash" }, { 8216, "lsquo" }, { 8217, "rsquo" }, { 8218, "sbquo" }, { 8220, "ldquo" }, { 8221, "rdquo" }, { 8222, "bdquo" }, { 8224, "dagger" }, { 8225, "Dagger" }, { 8226, "bull" }, { 8230, "hellip" }, { 8240, "permil" }, { 8242, "prime" }, { 8249, "lsaquo" }, { 8250, "rsaquo" }, { 8254, "oline" }, { 8260, "frasl" }, { 8364, "euro" }, { 8465, "image" }, { 8472, "weierp" }, { 8476, "real" }, { 8482, "trade" }, { 8501, "alefsym" }, { 8592, "larr" }, { 8593, "uarr" }, { 8594, "rarr" }, { 8595, "darr" }, { 8596, "harr" }, { 8629, "crarr" }, { 8656, "lArr" }, { 8657, "uArr" }, { 8658, "rArr" }, { 8659, "dArr" }, { 8660, "hArr" }, { 8704, "forall" }, { 8706, "part" }, { 8707, "exist" }, { 8709, "empty" }, { 8711, "nabla" }, { 8712, "isin" }, { 8713, "notin" }, { 8715, "ni" }, { 8719, "prod" }, { 8721, "sum" }, { 8722, "minus" }, { 8727, "lowast" }, { 8730, "radic" }, { 8733, "prop" }, { 8734, "infin" }, { 8736, "ang" }, { 8745, "cap" }, { 8746, "cup" }, { 8747, "int" }, { 8756, "there4" }, { 8764, "sim" }, { 8773, "cong" }, { 8776, "asymp" }, { 8800, "ne" }, { 8801, "equiv" }, { 8804, "le" }, { 8805, "ge" }, { 8834, "sub" }, { 8835, "sup" }, { 8836, "nsub" }, { 8838, "sube" }, { 8839, "supe" }, { 8853, "oplus" }, { 8855, "otimes" }, { 8869, "and" }, { 8869, "perp" }, { 8870, "or" }, { 8901, "sdot" }, { 8968, "lceil" }, { 8969, "rceil" }, { 8970, "lfloor" }, { 8971, "rfloor" }, { 9001, "lang" }, { 9002, "rang" }, { 9674, "loz" }, { 9824, "spades" }, { 9827, "clubs" }, { 9829, "hearts" }, { 9830, "diams" }, /* Extend here */ /* END */ { 0 } }; #define TINO_HTML_ENTITY_MAXLEN 12 /* no entity is this long! */ /* This routine is brain dead for now. * However it is the fastest version if you only call this once. */ static int tino_html_entity_check_simple(const char *s, int *len) { struct tino_html_entities *p; int max; max = len ? *len : TINO_HTML_ENTITY_MAXLEN; if (max<3) return -1; if (*s++!='&') return -1; max--; if (*s=='#') { int b, i, o; /* numbered entity */ b = 10; o = 1; if (s[o]=='x') { b = 16; o++; } for (i=o; i0xfeff) break; if (len) *len = i+2; return u; } return -1; } for (p=tino_html_entities; p->unicode; p++) { int i; for (i=0; ientity[i]) { if (!p->entity[i] && s[i]==';') { if (len) *len = i+2; return p->unicode; } break; } } return -1; } /* looks for &entity; in string s of max len *len (offset is returned in len) * returns the unicode of the entity or -1 if no entity. * * If s is a 0 terminated string len can be NULL or higher than the length of s * (in which case TINO_HTML_ENTITY_MAXLEN is a good value to start). * If len==NULL you do not get back the length of the entitiy! * * This is the convenience routine. * If you call it very seldom or need RAM, use the _simple version. * Else this will be replaced by a high performance version, * in case it is called frequently. * * Can be improved by a reverse Boyer Moore, aka. Splay Tree. */ static int tino_html_entity_check(const char *s, int *len) { return tino_html_entity_check_simple(s, len); } #endif