X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=src%2Fhtml_parser.cpp;h=cb1cda55da2b8143f4cf00eb733959756d417cc8;hb=fe752902775deb62f98786bd18b89a6a0edd0d42;hp=ef8ad2b272f8595640067bcefe6d8d78f6404815;hpb=a258482a3b53b90b0932f4a789ba66e8e1576c05;p=metaproxy-moved-to-github.git diff --git a/src/html_parser.cpp b/src/html_parser.cpp index ef8ad2b..cb1cda5 100644 --- a/src/html_parser.cpp +++ b/src/html_parser.cpp @@ -52,21 +52,19 @@ void mp::HTMLParser::parse(mp::HTMLParserEvent & event, const char *str) const parse_str(event, str); } -//static C functions follow would probably make sense to wrap this in PIMPL? - -static int skipSpace (const char *cp) +static int skipSpace(const char *cp) { int i = 0; - while (cp[i] && strchr (SPACECHR, cp[i])) + while (cp[i] && strchr(SPACECHR, cp[i])) i++; return i; } -static int skipName (const char *cp, char *dst) +static int skipName(const char *cp, char *dst) { int i; int j = 0; - for (i=0; cp[i] && !strchr (SPACECHR "/>=", cp[i]); i++) + for (i = 0; cp[i] && !strchr(SPACECHR "/>=", cp[i]); i++) if (j < TAG_MAX_LEN-1) { dst[j] = tolower(cp[j]); @@ -76,24 +74,24 @@ static int skipName (const char *cp, char *dst) return i; } -static int skipAttribute (const char *cp, char *name, char **value) +static int skipAttribute(const char *cp, char *name, const char **value, int *val_len) { - int i = skipName (cp, name); + int i = skipName(cp, name); *value = NULL; if (!i) - return skipSpace (cp); - i += skipSpace (cp + i); + return skipSpace(cp); + i += skipSpace(cp + i); if (cp[i] == '=') { int v0, v1; i++; - i += skipSpace (cp + i); + i += skipSpace(cp + i); if (cp[i] == '\"' || cp[i] == '\'') { char tr = cp[i]; v0 = ++i; while (cp[i] != tr && cp[i]) - i++; + i++; v1 = i; if (cp[i]) i++; @@ -101,34 +99,33 @@ static int skipAttribute (const char *cp, char *name, char **value) else { v0 = i; - while (cp[i] && !strchr (SPACECHR ">", cp[i])) + while (cp[i] && !strchr(SPACECHR ">", cp[i])) i++; v1 = i; } - *value = (char *) malloc (v1 - v0 + 1); - memcpy (*value, cp + v0, v1-v0); - (*value)[v1-v0] = '\0'; + *value = cp + v0; + *val_len = v1 - v0; } - i += skipSpace (cp + i); + i += skipSpace(cp + i); return i; } -static int tagAttrs (mp::HTMLParserEvent & event, +static int tagAttrs(mp::HTMLParserEvent & event, const char *tagName, const char *cp) { - int i; char attr_name[TAG_MAX_LEN]; - char *attr_value; - i = skipSpace (cp); - while (cp[i] && cp[i] != '>') + const char *attr_value; + int val_len; + int i = skipSpace(cp); + while (cp[i] && cp[i] != '>' && cp[i] != '/') { - int nor = skipAttribute (cp+i, attr_name, &attr_value); + int nor = skipAttribute(cp+i, attr_name, &attr_value, &val_len); i += nor; if (nor) { - DEBUG(printf ("------ attr %s=%s\n", attr_name, attr_value)); - event.attribute(tagName, attr_name, attr_value); + DEBUG(printf ("------ attr %s=%.*s\n", attr_name, val_len, attr_value)); + event.attribute(tagName, attr_name, attr_value, val_len); } else { @@ -139,65 +136,59 @@ static int tagAttrs (mp::HTMLParserEvent & event, return i; } -static int tagStart (mp::HTMLParserEvent & event, +static int tagStart(mp::HTMLParserEvent & event, char *tagName, const char *cp, const char which) { - int i = 0; - i = skipName (cp, tagName); - switch (which) + int i = skipName(cp, tagName); + switch (which) { - case '/' : - DEBUG(printf ("------ tag close %s\n", tagName)); - event.closeTag(tagName); - break; - case '!' : - DEBUG(printf ("------ dtd %s\n", tagName)); - break; - case '?' : - DEBUG(printf ("------ pi %s\n", tagName)); - break; - default : - DEBUG(printf ("------ tag open %s\n", tagName)); - event.openTagStart(tagName); - break; + case '/' : + DEBUG(printf("------ tag close %s\n", tagName)); + event.closeTag(tagName); + break; + case '!' : + DEBUG(printf("------ dtd %s\n", tagName)); + break; + case '?' : + DEBUG(printf("------ pi %s\n", tagName)); + break; + default : + DEBUG(printf("------ tag open %s\n", tagName)); + event.openTagStart(tagName); + break; } return i; } -static int tagEnd (mp::HTMLParserEvent & event, const char *tagName, const char *cp) +static int tagEnd(mp::HTMLParserEvent & event, const char *tagName, const char *cp) { int i = 0; + int close_it = 0; while (cp[i] && cp[i] != '>') + { + if (cp[i] == '/') + close_it = 1; i++; + } if (cp[i] == '>') { - event.anyTagEnd(tagName); + event.anyTagEnd(tagName, close_it); i++; } return i; } -static char* allocFromRange (const char *start, const char *end) -{ - char *value = (char *) malloc (end - start + 1); - assert (value); - memcpy (value, start, end - start); - value[end - start] = '\0'; - return value; -} - -static void tagText (mp::HTMLParserEvent & event, const char *text_start, const char *text_end) +static void tagText(mp::HTMLParserEvent & event, const char *text_start, const char *text_end) { if (text_end - text_start) //got text to flush { - char *temp = allocFromRange(text_start, text_end); - DEBUG(printf ("------ text %s\n", temp)); + DEBUG(printf("------ text %.*s\n", + (int) (text_end - text_start), text_start)); event.text(text_start, text_end-text_start); - free(temp); } } -static void parse_str (mp::HTMLParserEvent & event, const char *cp) +static void parse_str(mp::HTMLParserEvent & event, const char *cp) { const char *text_start = cp; const char *text_end = cp; @@ -206,27 +197,28 @@ static void parse_str (mp::HTMLParserEvent & event, const char *cp) if (cp[0] == '<' && cp[1]) //tag? { char which = cp[1]; - if (which == '/') cp++; - if (!strchr (SPACECHR, cp[1])) //valid tag starts + if (which == '/') + cp++; + if (!strchr(SPACECHR, cp[1])) //valid tag starts { - tagText (event, text_start, text_end); //flush any text + tagText(event, text_start, text_end); //flush any text char tagName[TAG_MAX_LEN]; cp++; if (which == '/') { - cp += tagStart (event, tagName, cp, which); + cp += tagStart(event, tagName, cp, which); } else if (which == '!' || which == '?') //pi or dtd { cp++; - cp += tagStart (event, tagName, cp, which); + cp += tagStart(event, tagName, cp, which); } else { - cp += tagStart (event, tagName, cp, which); - cp += tagAttrs (event, tagName, cp); + cp += tagStart(event, tagName, cp, which); + cp += tagAttrs(event, tagName, cp); } - cp += tagEnd (event, tagName, cp); + cp += tagEnd(event, tagName, cp); text_start = cp; text_end = cp; continue; @@ -236,7 +228,7 @@ static void parse_str (mp::HTMLParserEvent & event, const char *cp) cp++; text_end = cp; } - tagText (event, text_start, text_end); //flush any text + tagText(event, text_start, text_end); //flush any text } /*