X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=src%2Fhtml_parser.cpp;h=ddbbbe066764aae0da5386e0e3969f75515aed49;hb=288111986392ffbd733d71cd964f49437b899bdb;hp=47b2e148fedb407a35d89287ae57d3324f1fdba4;hpb=68250b45b381d70615236e9160e683c2ba8bf53a;p=metaproxy-moved-to-github.git diff --git a/src/html_parser.cpp b/src/html_parser.cpp index 47b2e14..ddbbbe0 100644 --- a/src/html_parser.cpp +++ b/src/html_parser.cpp @@ -42,6 +42,9 @@ namespace metaproxy_1 { int tagAttrs(HTMLParserEvent &event, const char *name, int len, const char *cp); + int skipAttribute(HTMLParserEvent &event, + const char *cp, int *attr_len, + const char **value, int *val_len, int *tr); Rep(); ~Rep(); int m_verbose; @@ -94,8 +97,10 @@ static int skipName(const char *cp) return i; } -static int skipAttribute(const char *cp, int *attr_len, - const char **value, int *val_len) +int mp::HTMLParser::Rep::skipAttribute(HTMLParserEvent &event, + const char *cp, int *attr_len, + const char **value, int *val_len, + int *tr) { int i = skipName(cp); *attr_len = i; @@ -110,9 +115,9 @@ static int skipAttribute(const char *cp, int *attr_len, i += skipSpace(cp + i); if (cp[i] == '\"' || cp[i] == '\'') { - char tr = cp[i]; + *tr = cp[i]; v0 = ++i; - while (cp[i] != tr && cp[i]) + while (cp[i] != *tr && cp[i]) i++; v1 = i; if (cp[i]) @@ -120,6 +125,7 @@ static int skipAttribute(const char *cp, int *attr_len, } else { + *tr = 0; v0 = i; while (cp[i] && !strchr(SPACECHR ">", cp[i])) i++; @@ -143,14 +149,18 @@ int mp::HTMLParser::Rep::tagAttrs(HTMLParserEvent &event, int attr_len; const char *value; int val_len; - int nor = skipAttribute(cp+i, &attr_len, &value, &val_len); + int tr; + int nor = skipAttribute(event, cp+i, &attr_len, &value, &val_len, &tr); i += nor; if (nor) { + char x[2]; + x[0] = tr; + x[1] = 0; if (m_verbose) printf ("------ attr %.*s=%.*s\n", attr_len, attr_name, val_len, value); - event.attribute(name, len, attr_name, attr_len, value, val_len); + event.attribute(name, len, attr_name, attr_len, value, val_len, x); } else { @@ -209,10 +219,13 @@ int mp::HTMLParser::Rep::tagEnd(HTMLParserEvent &event, { int i = 0; int close_it = 0; - while (cp[i] && cp[i] != '>') + for (; cp[i] && cp[i] != '/' && cp[i] != '>'; i++) + ; + if (i > 0) + event.text(cp, i); + if (cp[i] == '/') { - if (cp[i] == '/') - close_it = 1; + close_it = 1; i++; } if (cp[i] == '>')