X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=src%2Fhtml_parser.cpp;h=ddbbbe066764aae0da5386e0e3969f75515aed49;hb=288111986392ffbd733d71cd964f49437b899bdb;hp=694df699df9a2e44b0ce3755b6e168ab0772a391;hpb=3ef4df94516a136b7ee18ec8a45e740ef9e9dc05;p=metaproxy-moved-to-github.git diff --git a/src/html_parser.cpp b/src/html_parser.cpp index 694df69..ddbbbe0 100644 --- a/src/html_parser.cpp +++ b/src/html_parser.cpp @@ -42,6 +42,9 @@ namespace metaproxy_1 { int tagAttrs(HTMLParserEvent &event, const char *name, int len, const char *cp); + int skipAttribute(HTMLParserEvent &event, + const char *cp, int *attr_len, + const char **value, int *val_len, int *tr); Rep(); ~Rep(); int m_verbose; @@ -67,6 +70,12 @@ mp::HTMLParser::~HTMLParser() { } +void mp::HTMLParser::set_verbose(int v) +{ + m_p->m_verbose = v; +} + + void mp::HTMLParser::parse(mp::HTMLParserEvent & event, const char *str) const { m_p->parse_str(event, str); @@ -88,8 +97,10 @@ static int skipName(const char *cp) return i; } -static int skipAttribute(const char *cp, int *attr_len, - const char **value, int *val_len) +int mp::HTMLParser::Rep::skipAttribute(HTMLParserEvent &event, + const char *cp, int *attr_len, + const char **value, int *val_len, + int *tr) { int i = skipName(cp); *attr_len = i; @@ -104,9 +115,9 @@ static int skipAttribute(const char *cp, int *attr_len, i += skipSpace(cp + i); if (cp[i] == '\"' || cp[i] == '\'') { - char tr = cp[i]; + *tr = cp[i]; v0 = ++i; - while (cp[i] != tr && cp[i]) + while (cp[i] != *tr && cp[i]) i++; v1 = i; if (cp[i]) @@ -114,6 +125,7 @@ static int skipAttribute(const char *cp, int *attr_len, } else { + *tr = 0; v0 = i; while (cp[i] && !strchr(SPACECHR ">", cp[i])) i++; @@ -137,14 +149,18 @@ int mp::HTMLParser::Rep::tagAttrs(HTMLParserEvent &event, int attr_len; const char *value; int val_len; - int nor = skipAttribute(cp+i, &attr_len, &value, &val_len); + int tr; + int nor = skipAttribute(event, cp+i, &attr_len, &value, &val_len, &tr); i += nor; if (nor) { + char x[2]; + x[0] = tr; + x[1] = 0; if (m_verbose) printf ("------ attr %.*s=%.*s\n", attr_len, attr_name, val_len, value); - event.attribute(name, len, attr_name, attr_len, value, val_len); + event.attribute(name, len, attr_name, attr_len, value, val_len, x); } else { @@ -203,10 +219,13 @@ int mp::HTMLParser::Rep::tagEnd(HTMLParserEvent &event, { int i = 0; int close_it = 0; - while (cp[i] && cp[i] != '>') + for (; cp[i] && cp[i] != '/' && cp[i] != '>'; i++) + ; + if (i > 0) + event.text(cp, i); + if (cp[i] == '/') { - if (cp[i] == '/') - close_it = 1; + close_it = 1; i++; } if (cp[i] == '>')