From: Michael Schroeder Date: Thu, 23 Feb 2017 14:28:18 +0000 (+0100) Subject: Refactor xml parsing code X-Git-Tag: 0.6.27~25 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4d5c41221fa745db248cbf3892ebcb40817fa941;p=thirdparty%2Flibsolv.git Refactor xml parsing code Now the parser internals are in solv_xmlparser.c. This makes it easy to support other parser libraries. --- diff --git a/ext/CMakeLists.txt b/ext/CMakeLists.txt index 702dda49..bdc6ee97 100644 --- a/ext/CMakeLists.txt +++ b/ext/CMakeLists.txt @@ -121,6 +121,11 @@ IF (ENABLE_RPMMD OR ENABLE_SUSEREPO) repodata_diskusage.c) ENDIF (ENABLE_RPMMD OR ENABLE_SUSEREPO) +IF (ENABLE_RPMMD OR ENABLE_SUSEREPO OR ENABLE_APPDATA OR ENABLE_COMPS OR ENABLE_HELIXREPO OR ENABLE_MDKREPO) + SET (libsolvext_SRCS ${libsolvext_SRCS} + solv_xmlparser.c) +ENDIF (ENABLE_RPMMD OR ENABLE_SUSEREPO OR ENABLE_APPDATA OR ENABLE_COMPS OR ENABLE_HELIXREPO OR ENABLE_MDKREPO) + SET (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") IF (HAVE_LINKER_VERSION_SCRIPT) SET (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${LINK_FLAGS} -Wl,--version-script=${CMAKE_SOURCE_DIR}/ext/libsolvext.ver") diff --git a/ext/repo_appdata.c b/ext/repo_appdata.c index 2b9844eb..6fcd7077 100644 --- a/ext/repo_appdata.c +++ b/ext/repo_appdata.c @@ -22,12 +22,12 @@ #include #include #include -#include #include #include "pool.h" #include "repo.h" #include "util.h" +#include "solv_xmlparser.h" #include "repo_appdata.h" @@ -53,15 +53,8 @@ enum state { NUMSTATES }; -struct stateswitch { - enum state from; - char *ename; - enum state to; - int docontent; -}; -/* !! must be sorted by first column !! */ -static struct stateswitch stateswitches[] = { +static struct solv_xmlparser_element stateswitches[] = { { STATE_START, "applications", STATE_START, 0 }, { STATE_START, "components", STATE_START, 0 }, { STATE_START, "application", STATE_APPLICATION, 0 }, @@ -86,23 +79,15 @@ static struct stateswitch stateswitches[] = { }; struct parsedata { - int depth; - enum state state; - int statedepth; - char *content; - int lcontent; - int acontent; - int docontent; Pool *pool; Repo *repo; Repodata *data; - - struct stateswitch *swtab[NUMSTATES]; - enum state sbtab[NUMSTATES]; + int ret; Solvable *solvable; Id handle; + int skiplang; char *description; int licnt; int skip_depth; @@ -111,74 +96,30 @@ struct parsedata { int havesummary; const char *filename; Queue *owners; -}; - -static inline const char * -find_attr(const char *txt, const char **atts) -{ - for (; *atts; atts += 2) - if (!strcmp(*atts, txt)) - return atts[1]; - return 0; -} + struct solv_xmlparser xmlp; +}; -static void XMLCALL -startElement(void *userData, const char *name, const char **atts) +static void +startElement(struct solv_xmlparser *xmlp, int state, const char *name, const char **atts) { - struct parsedata *pd = userData; + struct parsedata *pd = xmlp->userdata; Pool *pool = pd->pool; Solvable *s = pd->solvable; - struct stateswitch *sw; const char *type; -#if 0 - fprintf(stderr, "start: [%d]%s\n", pd->state, name); -#endif - if (pd->depth != pd->statedepth) - { - pd->depth++; - return; - } - - pd->depth++; - if (!pd->swtab[pd->state]) /* no statetable -> no substates */ - { -#if 0 - fprintf(stderr, "into unknown: %s (from: %d)\n", name, pd->state); -#endif - return; - } - for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++) /* find name in statetable */ - if (!strcmp(sw->ename, name)) - break; - - if (sw->from != pd->state) - { -#if 0 - fprintf(stderr, "into unknown: %s (from: %d)\n", name, pd->state); -#endif - return; - } - pd->state = sw->to; - pd->docontent = sw->docontent; - pd->statedepth = pd->depth; - pd->lcontent = 0; - *pd->content = 0; - - if (!pd->skip_depth && find_attr("xml:lang", atts)) - pd->skip_depth = pd->depth; - if (pd->skip_depth) + /* ignore all language tags */ + if (pd->skiplang || solv_xmlparser_find_attr("xml:lang", atts)) { - pd->docontent = 0; + pd->skiplang++; return; } - switch(pd->state) + switch(state) { case STATE_APPLICATION: - type = find_attr("type", atts); + type = solv_xmlparser_find_attr("type", atts); if (!type || !*type) type = "desktop"; if (strcmp(type, "desktop") != 0) @@ -206,51 +147,55 @@ startElement(void *userData, const char *name, const char **atts) /* replace whitespace with one space/newline */ /* also strip starting/ending whitespace */ -static void +static char * wsstrip(struct parsedata *pd) { + struct solv_xmlparser *xmlp = &pd->xmlp; int i, j; int ws = 0; - for (i = j = 0; pd->content[i]; i++) + for (i = j = 0; xmlp->content[i]; i++) { - if (pd->content[i] == ' ' || pd->content[i] == '\t' || pd->content[i] == '\n') + if (xmlp->content[i] == ' ' || xmlp->content[i] == '\t' || xmlp->content[i] == '\n') { - ws |= pd->content[i] == '\n' ? 2 : 1; + ws |= xmlp->content[i] == '\n' ? 2 : 1; continue; } if (ws && j) - pd->content[j++] = (ws & 2) ? '\n' : ' '; + xmlp->content[j++] = (ws & 2) ? '\n' : ' '; ws = 0; - pd->content[j++] = pd->content[i]; + xmlp->content[j++] = xmlp->content[i]; } - pd->content[j] = 0; - pd->lcontent = j; + xmlp->content[j] = 0; + xmlp->lcontent = j; + return xmlp->content; } /* indent all lines */ -static void +static char * indent(struct parsedata *pd, int il) { + struct solv_xmlparser *xmlp = &pd->xmlp; int i, l; - for (l = 0; pd->content[l]; ) + for (l = 0; xmlp->content[l]; ) { - if (pd->content[l] == '\n') + if (xmlp->content[l] == '\n') { l++; continue; } - if (pd->lcontent + il + 1 > pd->acontent) + if (xmlp->lcontent + il + 1 > xmlp->acontent) { - pd->acontent = pd->lcontent + il + 256; - pd->content = realloc(pd->content, pd->acontent); + xmlp->acontent = xmlp->lcontent + il + 256; + xmlp->content = realloc(xmlp->content, xmlp->acontent); } - memmove(pd->content + l + il, pd->content + l, pd->lcontent - l + 1); + memmove(xmlp->content + l + il, xmlp->content + l, xmlp->lcontent - l + 1); for (i = 0; i < il; i++) - pd->content[l + i] = ' '; - pd->lcontent += il; - while (pd->content[l] && pd->content[l] != '\n') + xmlp->content[l + i] = ' '; + xmlp->lcontent += il; + while (xmlp->content[l] && xmlp->content[l] != '\n') l++; } + return xmlp->content; } static void @@ -346,47 +291,23 @@ guess_filename_from_id(Pool *pool, const char *id) return r; } -static void XMLCALL -endElement(void *userData, const char *name) +static void +endElement(struct solv_xmlparser *xmlp, int state, char *content) { - struct parsedata *pd = userData; + struct parsedata *pd = xmlp->userdata; Pool *pool = pd->pool; Solvable *s = pd->solvable; Id id; -#if 0 - fprintf(stderr, "end: [%d]%s\n", pd->state, name); -#endif - if (pd->depth != pd->statedepth) - { - pd->depth--; -#if 0 - fprintf(stderr, "back from unknown %d %d %d\n", pd->state, pd->depth, pd->statedepth); -#endif - return; - } - - pd->depth--; - pd->statedepth--; - - if (pd->skip_depth && pd->depth + 1 >= pd->skip_depth) + if (pd->skiplang) { - if (pd->depth + 1 == pd->skip_depth) - pd->skip_depth = 0; - pd->state = pd->sbtab[pd->state]; - pd->docontent = 0; + pd->skiplang--; return; } - pd->skip_depth = 0; - if (!s) - { - pd->state = pd->sbtab[pd->state]; - pd->docontent = 0; - return; - } + return; - switch (pd->state) + switch (state) { case STATE_APPLICATION: if (!s->arch) @@ -434,26 +355,26 @@ endElement(void *userData, const char *name) pd->desktop_file = solv_free(pd->desktop_file); break; case STATE_ID: - pd->desktop_file = solv_strdup(pd->content); + pd->desktop_file = solv_strdup(content); break; case STATE_NAME: - s->name = pool_str2id(pd->pool, pool_tmpjoin(pool, "application:", pd->content, 0), 1); + s->name = pool_str2id(pd->pool, pool_tmpjoin(pool, "application:", content, 0), 1); break; case STATE_LICENCE: - repodata_add_poolstr_array(pd->data, pd->handle, SOLVABLE_LICENSE, pd->content); + repodata_add_poolstr_array(pd->data, pd->handle, SOLVABLE_LICENSE, content); break; case STATE_SUMMARY: pd->havesummary = 1; - repodata_set_str(pd->data, pd->handle, SOLVABLE_SUMMARY, pd->content); + repodata_set_str(pd->data, pd->handle, SOLVABLE_SUMMARY, content); break; case STATE_URL: - repodata_set_str(pd->data, pd->handle, SOLVABLE_URL, pd->content); + repodata_set_str(pd->data, pd->handle, SOLVABLE_URL, content); break; case STATE_GROUP: - repodata_add_poolstr_array(pd->data, pd->handle, SOLVABLE_GROUP, pd->content); + repodata_add_poolstr_array(pd->data, pd->handle, SOLVABLE_GROUP, content); break; case STATE_EXTENDS: - repodata_add_poolstr_array(pd->data, pd->handle, SOLVABLE_EXTENDS, pd->content); + repodata_add_poolstr_array(pd->data, pd->handle, SOLVABLE_EXTENDS, content); break; case STATE_DESCRIPTION: if (pd->description) @@ -466,83 +387,60 @@ endElement(void *userData, const char *name) } break; case STATE_P: - wsstrip(pd); - pd->description = solv_dupappend(pd->description, pd->content, "\n\n"); + content = wsstrip(pd); + pd->description = solv_dupappend(pd->description, content, "\n\n"); break; case STATE_UL_LI: wsstrip(pd); - indent(pd, 4); - pd->content[2] = '-'; - pd->description = solv_dupappend(pd->description, pd->content, "\n"); + content = indent(pd, 4); + content[2] = '-'; + pd->description = solv_dupappend(pd->description, content, "\n"); break; case STATE_OL_LI: wsstrip(pd); - indent(pd, 4); + content = indent(pd, 4); if (++pd->licnt >= 10) - pd->content[0] = '0' + (pd->licnt / 10) % 10; - pd->content[1] = '0' + pd->licnt % 10; - pd->content[2] = '.'; - pd->description = solv_dupappend(pd->description, pd->content, "\n"); + content[0] = '0' + (pd->licnt / 10) % 10; + content[1] = '0' + pd->licnt % 10; + content[2] = '.'; + pd->description = solv_dupappend(pd->description, content, "\n"); break; case STATE_UL: case STATE_OL: pd->description = solv_dupappend(pd->description, "\n", 0); break; case STATE_PKGNAME: - id = pool_str2id(pd->pool, pd->content, 1); + id = pool_str2id(pd->pool, content, 1); s->requires = repo_addid_dep(pd->repo, s->requires, id, 0); - id = pool_str2id(pd->pool, pool_tmpjoin(pd->pool, "application-appdata(", pd->content, ")"), 1); + id = pool_str2id(pd->pool, pool_tmpjoin(pd->pool, "application-appdata(", content, ")"), 1); s->provides = repo_addid_dep(pd->repo, s->provides, id, 0); break; case STATE_KEYWORD: - repodata_add_poolstr_array(pd->data, pd->handle, SOLVABLE_KEYWORDS, pd->content); + repodata_add_poolstr_array(pd->data, pd->handle, SOLVABLE_KEYWORDS, content); break; default: break; } - - pd->state = pd->sbtab[pd->state]; - pd->docontent = 0; - -#if 0 - fprintf(stderr, "end: [%s] -> %d\n", name, pd->state); -#endif } - -static void XMLCALL -characterData(void *userData, const XML_Char *s, int len) +static void +errorCallback(struct solv_xmlparser *xmlp, const char *errstr, unsigned int line, unsigned int column) { - struct parsedata *pd = userData; - int l; - char *c; - if (!pd->docontent) - return; - l = pd->lcontent + len + 1; - if (l > pd->acontent) - { - pd->acontent = l + 256; - pd->content = realloc(pd->content, pd->acontent); + struct parsedata *pd = xmlp->userdata; + pool_debug(pd->pool, SOLV_ERROR, "repo_appdata: %s at line %u:%u\n", errstr, line, column); + pd->ret = -1; + if (pd->solvable) + { + repo_free_solvable(pd->repo, pd->solvable - pd->pool->solvables, 1); + pd->solvable = 0; } - c = pd->content + pd->lcontent; - pd->lcontent += len; - while (len-- > 0) - *c++ = *s++; - *c = 0; } -#define BUFF_SIZE 8192 - static int repo_add_appdata_fn(Repo *repo, FILE *fp, int flags, const char *filename, Queue *owners) { - Pool *pool = repo->pool; - struct parsedata pd; - struct stateswitch *sw; Repodata *data; - char buf[BUFF_SIZE]; - int i, l; - int ret = 0; + struct parsedata pd; data = repo_add_repodata(repo, flags); memset(&pd, 0, sizeof(pd)); @@ -553,47 +451,17 @@ repo_add_appdata_fn(Repo *repo, FILE *fp, int flags, const char *filename, Queue pd.filename = filename; pd.owners = owners; - pd.content = malloc(256); - pd.acontent = 256; - - for (i = 0, sw = stateswitches; sw->from != NUMSTATES; i++, sw++) - { - if (!pd.swtab[sw->from]) - pd.swtab[sw->from] = sw; - pd.sbtab[sw->to] = sw->from; - } - - XML_Parser parser = XML_ParserCreate(NULL); - XML_SetUserData(parser, &pd); - XML_SetElementHandler(parser, startElement, endElement); - XML_SetCharacterDataHandler(parser, characterData); + solv_xmlparser_init(&pd.xmlp, stateswitches, &pd, startElement, endElement, errorCallback); + solv_xmlparser_parse(&pd.xmlp, fp); + solv_xmlparser_free(&pd.xmlp); - for (;;) - { - l = fread(buf, 1, sizeof(buf), fp); - if (XML_Parse(parser, buf, l, l == 0) == XML_STATUS_ERROR) - { - pool_error(pool, -1, "repo_appdata: %s at line %u:%u\n", XML_ErrorString(XML_GetErrorCode(parser)), (unsigned int)XML_GetCurrentLineNumber(parser), (unsigned int)XML_GetCurrentColumnNumber(parser)); - if (pd.solvable) - { - repo_free_solvable(repo, pd.solvable - pd.pool->solvables, 1); - pd.solvable = 0; - } - ret = -1; - break; - } - if (l == 0) - break; - } - XML_ParserFree(parser); + solv_free(pd.desktop_file); + solv_free(pd.description); if (!(flags & REPO_NO_INTERNALIZE)) repodata_internalize(data); - solv_free(pd.content); - solv_free(pd.desktop_file); - solv_free(pd.description); - return ret; + return pd.ret; } int diff --git a/ext/repo_comps.c b/ext/repo_comps.c index 8f364ddc..61e91728 100644 --- a/ext/repo_comps.c +++ b/ext/repo_comps.c @@ -20,11 +20,11 @@ #include #include #include -#include #include "pool.h" #include "repo.h" #include "util.h" +#include "solv_xmlparser.h" #define DISABLE_SPLIT #include "tools_util.h" #include "repo_comps.h" @@ -62,15 +62,7 @@ enum state { NUMSTATES }; -struct stateswitch { - enum state from; - char *ename; - enum state to; - int docontent; -}; - -/* must be sorted by first column */ -static struct stateswitch stateswitches[] = { +static struct solv_xmlparser_element stateswitches[] = { { STATE_START, "comps", STATE_COMPS, 0 }, { STATE_COMPS, "group", STATE_GROUP, 0 }, { STATE_COMPS, "category", STATE_CATEGORY, 0 }, @@ -84,11 +76,11 @@ static struct stateswitch stateswitches[] = { { STATE_GROUP, "lang_only", STATE_LANG_ONLY, 1 }, { STATE_GROUP, "packagelist", STATE_PACKAGELIST, 0 }, { STATE_PACKAGELIST, "packagereq", STATE_PACKAGEREQ, 1 }, - { STATE_CATEGORY, "id", STATE_CID, 1 }, - { STATE_CATEGORY, "name", STATE_CNAME, 1 }, - { STATE_CATEGORY, "description", STATE_CDESCRIPTION, 1 }, + { STATE_CATEGORY, "id", STATE_ID, 1 }, + { STATE_CATEGORY, "name", STATE_NAME, 1 }, + { STATE_CATEGORY, "description", STATE_DESCRIPTION, 1 }, { STATE_CATEGORY , "grouplist", STATE_GROUPLIST, 0 }, - { STATE_CATEGORY , "display_order", STATE_CDISPLAY_ORDER, 1 }, + { STATE_CATEGORY , "display_order", STATE_DISPLAY_ORDER, 1 }, { STATE_GROUPLIST, "groupid", STATE_GROUPID, 1 }, { NUMSTATES } }; @@ -99,16 +91,8 @@ struct parsedata { Repodata *data; const char *filename; const char *basename; - int depth; - enum state state; - int statedepth; - char *content; - int lcontent; - int acontent; - int docontent; - - struct stateswitch *swtab[NUMSTATES]; - enum state sbtab[NUMSTATES]; + + struct solv_xmlparser xmlp; struct joindata jd; const char *tmplang; @@ -116,100 +100,43 @@ struct parsedata { Id condreq; Solvable *solvable; + const char *kind; Id handle; }; -/* - * find_attr - * find value for xml attribute - * I: txt, name of attribute - * I: atts, list of key/value attributes - * O: pointer to value of matching key, or NULL - * - */ - -static inline const char * -find_attr(const char *txt, const char **atts) -{ - for (; *atts; atts += 2) - { - if (!strcmp(*atts, txt)) - return atts[1]; - } - return 0; -} - - -/* - * XML callback: startElement - */ -static void XMLCALL -startElement(void *userData, const char *name, const char **atts) +static void +startElement(struct solv_xmlparser *xmlp, int state, const char *name, const char **atts) { - struct parsedata *pd = userData; + struct parsedata *pd = xmlp->userdata; Pool *pool = pd->pool; Solvable *s = pd->solvable; - struct stateswitch *sw; - -#if 0 - fprintf(stderr, "start: [%d]%s\n", pd->state, name); -#endif - if (pd->depth != pd->statedepth) - { - pd->depth++; - return; - } - - pd->depth++; - if (!pd->swtab[pd->state]) /* no statetable -> no substates */ - { -#if 0 - fprintf(stderr, "into unknown: %s (from: %d)\n", name, pd->state); -#endif - return; - } - for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++) /* find name in statetable */ - if (!strcmp(sw->ename, name)) - break; - - if (sw->from != pd->state) - { -#if 0 - fprintf(stderr, "into unknown: %s (from: %d)\n", name, pd->state); -#endif - return; - } - pd->state = sw->to; - pd->docontent = sw->docontent; - pd->statedepth = pd->depth; - pd->lcontent = 0; - *pd->content = 0; - switch(pd->state) + switch(state) { case STATE_GROUP: case STATE_CATEGORY: s = pd->solvable = pool_id2solvable(pool, repo_add_solvable(pd->repo)); pd->handle = s - pool->solvables; + pd->kind = state == STATE_GROUP ? "group" : "category"; break; case STATE_NAME: case STATE_CNAME: case STATE_DESCRIPTION: case STATE_CDESCRIPTION: - pd->tmplang = join_dup(&pd->jd, find_attr("xml:lang", atts)); + pd->tmplang = join_dup(&pd->jd, solv_xmlparser_find_attr("xml:lang", atts)); break; case STATE_PACKAGEREQ: { - const char *type = find_attr("type", atts); + const char *type = solv_xmlparser_find_attr("type", atts); pd->condreq = 0; pd->reqtype = SOLVABLE_RECOMMENDS; if (type && !strcmp(type, "conditional")) { - const char *requires = find_attr("requires", atts); + const char *requires = solv_xmlparser_find_attr("requires", atts); if (requires && *requires) pd->condreq = pool_str2id(pool, requires, 1); } @@ -226,29 +153,14 @@ startElement(void *userData, const char *name, const char **atts) } -static void XMLCALL -endElement(void *userData, const char *name) +static void +endElement(struct solv_xmlparser *xmlp, int state, char *content) { - struct parsedata *pd = userData; + struct parsedata *pd = xmlp->userdata; Solvable *s = pd->solvable; Id id; -#if 0 - fprintf(stderr, "end: [%d]%s\n", pd->state, name); -#endif - if (pd->depth != pd->statedepth) - { - pd->depth--; -#if 0 - fprintf(stderr, "back from unknown %d %d %d\n", pd->state, pd->depth, pd->statedepth); -#endif - return; - } - - pd->depth--; - pd->statedepth--; - - switch (pd->state) + switch (state) { case STATE_GROUP: case STATE_CATEGORY: @@ -262,29 +174,26 @@ endElement(void *userData, const char *name) break; case STATE_ID: - case STATE_CID: - s->name = pool_str2id(pd->pool, join2(&pd->jd, pd->state == STATE_ID ? "group" : "category", ":", pd->content), 1); + s->name = pool_str2id(pd->pool, join2(&pd->jd, pd->kind, ":", content), 1); break; case STATE_NAME: - case STATE_CNAME: - repodata_set_str(pd->data, pd->handle, pool_id2langid(pd->pool, SOLVABLE_SUMMARY, pd->tmplang, 1), pd->content); + repodata_set_str(pd->data, pd->handle, pool_id2langid(pd->pool, SOLVABLE_SUMMARY, pd->tmplang, 1), content); break; case STATE_DESCRIPTION: - case STATE_CDESCRIPTION: - repodata_set_str(pd->data, pd->handle, pool_id2langid(pd->pool, SOLVABLE_DESCRIPTION, pd->tmplang, 1), pd->content); + repodata_set_str(pd->data, pd->handle, pool_id2langid(pd->pool, SOLVABLE_DESCRIPTION, pd->tmplang, 1), content); break; case STATE_PACKAGEREQ: - id = pool_str2id(pd->pool, pd->content, 1); + id = pool_str2id(pd->pool, content, 1); if (pd->condreq) id = pool_rel2id(pd->pool, id, pd->condreq, REL_COND, 1); repo_add_idarray(pd->repo, pd->handle, pd->reqtype, id); break; case STATE_GROUPID: - id = pool_str2id(pd->pool, join2(&pd->jd, "group", ":", pd->content), 1); + id = pool_str2id(pd->pool, join2(&pd->jd, "group", ":", content), 1); s->requires = repo_addid_dep(pd->repo, s->requires, id, 0); break; @@ -293,63 +202,27 @@ endElement(void *userData, const char *name) break; case STATE_DISPLAY_ORDER: - case STATE_CDISPLAY_ORDER: - repodata_set_str(pd->data, pd->handle, SOLVABLE_ORDER, pd->content); - break; - - case STATE_DEFAULT: - break; - - case STATE_LANGONLY: - case STATE_LANG_ONLY: + repodata_set_str(pd->data, pd->handle, SOLVABLE_ORDER, content); break; default: break; } - - pd->state = pd->sbtab[pd->state]; - pd->docontent = 0; - -#if 0 - fprintf(stderr, "end: [%s] -> %d\n", name, pd->state); -#endif } - -static void XMLCALL -characterData(void *userData, const XML_Char *s, int len) +static void +errorCallback(struct solv_xmlparser *xmlp, const char *errstr, unsigned int line, unsigned int column) { - struct parsedata *pd = userData; - int l; - char *c; - if (!pd->docontent) - return; - l = pd->lcontent + len + 1; - if (l > pd->acontent) - { - pd->content = solv_realloc(pd->content, l + 256); - pd->acontent = l + 256; - } - c = pd->content + pd->lcontent; - pd->lcontent += len; - while (len-- > 0) - *c++ = *s++; - *c = 0; + struct parsedata *pd = xmlp->userdata; + pool_debug(pd->pool, SOLV_ERROR, "repo_comps: %s at line %u:%u\n", errstr, line, column); } -#define BUFF_SIZE 8192 - int repo_add_comps(Repo *repo, FILE *fp, int flags) { Repodata *data; struct parsedata pd; - char buf[BUFF_SIZE]; - int i, l; - struct stateswitch *sw; - XML_Parser parser; data = repo_add_repodata(repo, flags); @@ -357,35 +230,9 @@ repo_add_comps(Repo *repo, FILE *fp, int flags) pd.repo = repo; pd.pool = repo->pool; pd.data = data; - - pd.content = solv_malloc(256); - pd.acontent = 256; - - for (i = 0, sw = stateswitches; sw->from != NUMSTATES; i++, sw++) - { - if (!pd.swtab[sw->from]) - pd.swtab[sw->from] = sw; - pd.sbtab[sw->to] = sw->from; - } - - parser = XML_ParserCreate(NULL); - XML_SetUserData(parser, &pd); - XML_SetElementHandler(parser, startElement, endElement); - XML_SetCharacterDataHandler(parser, characterData); - for (;;) - { - l = fread(buf, 1, sizeof(buf), fp); - if (XML_Parse(parser, buf, l, l == 0) == XML_STATUS_ERROR) - { - pool_debug(pd.pool, SOLV_ERROR, "%s at line %u:%u\n", XML_ErrorString(XML_GetErrorCode(parser)), (unsigned int)XML_GetCurrentLineNumber(parser), (unsigned int)XML_GetCurrentColumnNumber(parser)); - break; - } - if (l == 0) - break; - } - XML_ParserFree(parser); - - solv_free(pd.content); + solv_xmlparser_init(&pd.xmlp, stateswitches, &pd, startElement, endElement, errorCallback); + solv_xmlparser_parse(&pd.xmlp, fp); + solv_xmlparser_free(&pd.xmlp); join_freemem(&pd.jd); if (!(flags & REPO_NO_INTERNALIZE)) diff --git a/ext/repo_deltainfoxml.c b/ext/repo_deltainfoxml.c index 06df1a39..aa40e0c0 100644 --- a/ext/repo_deltainfoxml.c +++ b/ext/repo_deltainfoxml.c @@ -5,8 +5,6 @@ * for further information */ -#define DO_ARRAY 1 - #define _GNU_SOURCE #include #include @@ -14,11 +12,11 @@ #include #include #include -#include #include "pool.h" #include "repo.h" #include "chksum.h" +#include "solv_xmlparser.h" #include "repo_deltainfoxml.h" /* @@ -52,15 +50,7 @@ enum state { NUMSTATES }; -struct stateswitch { - enum state from; - char *ename; - enum state to; - int docontent; -}; - -/* !! must be sorted by first column !! */ -static struct stateswitch stateswitches[] = { +static struct solv_xmlparser_element stateswitches[] = { /* compatibility with old yum-presto */ { STATE_START, "prestodelta", STATE_START, 0 }, { STATE_START, "deltainfo", STATE_START, 0 }, @@ -93,19 +83,10 @@ struct deltarpm { struct parsedata { int ret; - int depth; - enum state state; - int statedepth; - char *content; - int lcontent; - int acontent; - int docontent; Pool *pool; Repo *repo; Repodata *data; - struct stateswitch *swtab[NUMSTATES]; - enum state sbtab[NUMSTATES]; struct deltarpm delta; Id newpkgevr; Id newpkgname; @@ -113,22 +94,9 @@ struct parsedata { Id *handles; int nhandles; -}; -/* - * find attribute - */ - -static const char * -find_attr(const char *txt, const char **atts) -{ - for (; *atts; atts += 2) - { - if (!strcmp(*atts, txt)) - return atts[1]; - } - return 0; -} + struct solv_xmlparser xmlp; +}; /* @@ -139,7 +107,7 @@ static Id makeevr_atts(Pool *pool, struct parsedata *pd, const char **atts) { const char *e, *v, *r, *v2; - char *c; + char *c, *space; int l; e = v = r = 0; @@ -174,12 +142,7 @@ makeevr_atts(Pool *pool, struct parsedata *pd, const char **atts) l += strlen(v); if (r) l += strlen(r) + 1; - if (l > pd->acontent) - { - pd->content = solv_realloc(pd->content, l + 256); - pd->acontent = l + 256; - } - c = pd->content; + c = space = solv_xmlparser_contentspace(&pd->xmlp, l); if (e) { strcpy(c, e); @@ -198,59 +161,28 @@ makeevr_atts(Pool *pool, struct parsedata *pd, const char **atts) c += strlen(c); } *c = 0; - if (!*pd->content) + if (!*space) return 0; #if 0 - fprintf(stderr, "evr: %s\n", pd->content); + fprintf(stderr, "evr: %s\n", space); #endif - return pool_str2id(pool, pd->content, 1); + return pool_str2id(pool, space, 1); } -static void XMLCALL -startElement(void *userData, const char *name, const char **atts) +static void +startElement(struct solv_xmlparser *xmlp, int state, const char *name, const char **atts) { - struct parsedata *pd = userData; + struct parsedata *pd = xmlp->userdata; Pool *pool = pd->pool; - struct stateswitch *sw; const char *str; -#if 0 - fprintf(stderr, "start: [%d]%s\n", pd->state, name); -#endif - if (pd->depth != pd->statedepth) - { - pd->depth++; - return; - } - - pd->depth++; - if (!pd->swtab[pd->state]) - return; - for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++) /* find name in statetable */ - if (!strcmp(sw->ename, name)) - break; - if (sw->from != pd->state) + switch(state) { -#if 0 - fprintf(stderr, "into unknown: [%d]%s (from: %d)\n", sw->to, name, sw->from); -#endif - return; - } - pd->state = sw->to; - pd->docontent = sw->docontent; - pd->statedepth = pd->depth; - pd->lcontent = 0; - *pd->content = 0; - - switch(pd->state) - { - case STATE_START: - break; case STATE_NEWPACKAGE: - if ((str = find_attr("name", atts)) != 0) + if ((str = solv_xmlparser_find_attr("name", atts)) != 0) pd->newpkgname = pool_str2id(pool, str, 1); pd->newpkgevr = makeevr_atts(pool, pd, atts); - if ((str = find_attr("arch", atts)) != 0) + if ((str = solv_xmlparser_find_attr("arch", atts)) != 0) pd->newpkgarch = pool_str2id(pool, str, 1); break; @@ -259,62 +191,44 @@ startElement(void *userData, const char *name, const char **atts) pd->delta.bevr = solv_extend(pd->delta.bevr, pd->delta.nbevr, 1, sizeof(Id), 7); pd->delta.bevr[pd->delta.nbevr++] = makeevr_atts(pool, pd, atts); break; + case STATE_FILENAME: - if ((str = find_attr("xml:base", atts))) + if ((str = solv_xmlparser_find_attr("xml:base", atts))) pd->delta.locbase = solv_strdup(str); break; + case STATE_LOCATION: - pd->delta.location = solv_strdup(find_attr("href", atts)); - if ((str = find_attr("xml:base", atts))) + pd->delta.location = solv_strdup(solv_xmlparser_find_attr("href", atts)); + if ((str = solv_xmlparser_find_attr("xml:base", atts))) pd->delta.locbase = solv_strdup(str); break; - case STATE_SIZE: - break; + case STATE_CHECKSUM: pd->delta.filechecksum = 0; pd->delta.filechecksumtype = REPOKEY_TYPE_SHA1; - if ((str = find_attr("type", atts)) != 0) + if ((str = solv_xmlparser_find_attr("type", atts)) != 0) { pd->delta.filechecksumtype = solv_chksum_str2type(str); if (!pd->delta.filechecksumtype) pool_debug(pool, SOLV_ERROR, "unknown checksum type: '%s'\n", str); } break; - case STATE_SEQUENCE: - break; + default: break; } } -static void XMLCALL -endElement(void *userData, const char *name) +static void +endElement(struct solv_xmlparser *xmlp, int state, char *content) { - struct parsedata *pd = userData; + struct parsedata *pd = xmlp->userdata; Pool *pool = pd->pool; const char *str; -#if 0 - fprintf(stderr, "end: %s\n", name); -#endif - if (pd->depth != pd->statedepth) - { - pd->depth--; -#if 0 - fprintf(stderr, "back from unknown %d %d %d\n", pd->state, pd->depth, pd->statedepth); -#endif - return; - } - - pd->depth--; - pd->statedepth--; - switch (pd->state) + switch (state) { - case STATE_START: - break; - case STATE_NEWPACKAGE: - break; case STATE_DELTA: { /* read all data for a deltarpm. commit into attributes */ @@ -356,16 +270,16 @@ endElement(void *userData, const char *name) pd->delta.locbase = solv_free(pd->delta.locbase); break; case STATE_FILENAME: - pd->delta.location = solv_strdup(pd->content); + pd->delta.location = solv_strdup(content); break; case STATE_CHECKSUM: - pd->delta.filechecksum = solv_strdup(pd->content); + pd->delta.filechecksum = solv_strdup(content); break; case STATE_SIZE: - pd->delta.downloadsize = strtoull(pd->content, 0, 10); + pd->delta.downloadsize = strtoull(content, 0, 10); break; case STATE_SEQUENCE: - if ((str = pd->content)) + if ((str = content) != 0) { const char *s1, *s2; s1 = strrchr(str, '-'); @@ -392,80 +306,32 @@ endElement(void *userData, const char *name) default: break; } - - pd->state = pd->sbtab[pd->state]; - pd->docontent = 0; } - -static void XMLCALL -characterData(void *userData, const XML_Char *s, int len) +void +errorCallback(struct solv_xmlparser *xmlp, const char *errstr, unsigned int line, unsigned int column) { - struct parsedata *pd = userData; - int l; - char *c; - if (!pd->docontent) - return; - l = pd->lcontent + len + 1; - if (l > pd->acontent) - { - pd->content = solv_realloc(pd->content, l + 256); - pd->acontent = l + 256; - } - c = pd->content + pd->lcontent; - pd->lcontent += len; - while (len-- > 0) - *c++ = *s++; - *c = 0; + struct parsedata *pd = xmlp->userdata; + pd->ret = pool_error(pd->pool, -1, "repo_deltainfoxml: %s at line %u:%u", errstr, line, column); } -#define BUFF_SIZE 8192 - int repo_add_deltainfoxml(Repo *repo, FILE *fp, int flags) { Pool *pool = repo->pool; - struct parsedata pd; - char buf[BUFF_SIZE]; - int i, l; - struct stateswitch *sw; Repodata *data; - XML_Parser parser; + struct parsedata pd; + int i; data = repo_add_repodata(repo, flags); memset(&pd, 0, sizeof(pd)); - for (i = 0, sw = stateswitches; sw->from != NUMSTATES; i++, sw++) - { - if (!pd.swtab[sw->from]) - pd.swtab[sw->from] = sw; - pd.sbtab[sw->to] = sw->from; - } pd.pool = pool; pd.repo = repo; pd.data = data; - - pd.content = solv_malloc(256); - pd.acontent = 256; - pd.lcontent = 0; - - parser = XML_ParserCreate(NULL); - XML_SetUserData(parser, &pd); - XML_SetElementHandler(parser, startElement, endElement); - XML_SetCharacterDataHandler(parser, characterData); - for (;;) - { - l = fread(buf, 1, sizeof(buf), fp); - if (XML_Parse(parser, buf, l, l == 0) == XML_STATUS_ERROR) - { - pd.ret = pool_error(pool, -1, "repo_updateinfoxml: %s at line %u:%u", XML_ErrorString(XML_GetErrorCode(parser)), (unsigned int)XML_GetCurrentLineNumber(parser), (unsigned int)XML_GetCurrentColumnNumber(parser)); - break; - } - if (l == 0) - break; - } - XML_ParserFree(parser); - solv_free(pd.content); + solv_xmlparser_init(&pd.xmlp, stateswitches, &pd, startElement, endElement, errorCallback); + solv_xmlparser_parse(&pd.xmlp, fp); + solv_xmlparser_free(&pd.xmlp); /* now commit all handles */ if (!pd.ret) diff --git a/ext/repo_helix.c b/ext/repo_helix.c index 6358f726..eff89010 100644 --- a/ext/repo_helix.c +++ b/ext/repo_helix.c @@ -26,8 +26,9 @@ #include #include #include -#include +#include "queue.h" +#include "solv_xmlparser.h" #include "repo_helix.h" #include "evr.h" @@ -75,22 +76,10 @@ enum state { STATE_PATCH, STATE_PRODUCT, - STATE_PEPOCH, - STATE_PVERSION, - STATE_PRELEASE, - STATE_PARCH, - NUMSTATES }; -struct stateswitch { - enum state from; - char *ename; - enum state to; - int docontent; -}; - -static struct stateswitch stateswitches[] = { +static struct solv_xmlparser_element stateswitches[] = { { STATE_START, "channel", STATE_CHANNEL, 0 }, { STATE_CHANNEL, "subchannel", STATE_SUBCHANNEL, 0 }, { STATE_SUBCHANNEL, "package", STATE_PACKAGE, 0 }, @@ -104,10 +93,10 @@ static struct stateswitch stateswitches[] = { { STATE_PACKAGE, "name", STATE_NAME, 1 }, { STATE_PACKAGE, "vendor", STATE_VENDOR, 1 }, { STATE_PACKAGE, "buildtime", STATE_BUILDTIME, 1 }, - { STATE_PACKAGE, "epoch", STATE_PEPOCH, 1 }, - { STATE_PACKAGE, "version", STATE_PVERSION, 1 }, - { STATE_PACKAGE, "release", STATE_PRELEASE, 1 }, - { STATE_PACKAGE, "arch", STATE_PARCH, 1 }, + { STATE_PACKAGE, "epoch", STATE_EPOCH, 1 }, + { STATE_PACKAGE, "version", STATE_VERSION, 1 }, + { STATE_PACKAGE, "release", STATE_RELEASE, 1 }, + { STATE_PACKAGE, "arch", STATE_ARCH, 1 }, { STATE_PACKAGE, "history", STATE_HISTORY, 0 }, { STATE_PACKAGE, "provides", STATE_PROVIDES, 0 }, { STATE_PACKAGE, "requires", STATE_REQUIRES, 0 }, @@ -119,6 +108,7 @@ static struct stateswitch stateswitches[] = { { STATE_PACKAGE, "suggests", STATE_SUGGESTS, 0 }, { STATE_PACKAGE, "enhances", STATE_ENHANCES, 0 }, { STATE_PACKAGE, "freshens", STATE_FRESHENS, 0 }, + { STATE_PACKAGE, "deps", STATE_PACKAGE, 0 }, /* ignore deps element */ { STATE_HISTORY, "update", STATE_UPDATE, 0 }, { STATE_UPDATE, "epoch", STATE_EPOCH, 1 }, @@ -144,17 +134,8 @@ static struct stateswitch stateswitches[] = { * parser data */ -typedef struct _parsedata { +struct parsedata { int ret; - /* XML parser data */ - int depth; - enum state state; /* current state */ - int statedepth; - char *content; /* buffer for content of node */ - int lcontent; /* actual length of current content */ - int acontent; /* actual buffer size */ - int docontent; /* handle content */ - /* repo data */ Pool *pool; /* current pool */ Repo *repo; /* current repo */ @@ -163,6 +144,7 @@ typedef struct _parsedata { Offset freshens; /* current freshens vector */ /* package data */ + int srcpackage; /* is srcpackage element */ int epoch; /* epoch (as offset into evrspace) */ int version; /* version (as offset into evrspace) */ int release; /* release (as offset into evrspace) */ @@ -171,9 +153,8 @@ typedef struct _parsedata { int levrspace; /* actual evr length */ char *kind; - struct stateswitch *swtab[NUMSTATES]; - enum state sbtab[NUMSTATES]; -} Parsedata; + struct solv_xmlparser xmlp; +}; /*------------------------------------------------------------------*/ @@ -182,9 +163,9 @@ typedef struct _parsedata { /* create Id from epoch:version-release */ static Id -evr2id(Pool *pool, Parsedata *pd, const char *e, const char *v, const char *r) +evr2id(Pool *pool, struct parsedata *pd, const char *e, const char *v, const char *r) { - char *c; + char *c, *space; int l; /* treat explitcit 0 as NULL */ @@ -211,15 +192,10 @@ evr2id(Pool *pool, Parsedata *pd, const char *e, const char *v, const char *r) if (r) l += strlen(r) + 1; /* -r */ - /* extend content if not sufficient */ - if (l > pd->acontent) - { - pd->content = (char *)realloc(pd->content, l + 256); - pd->acontent = l + 256; - } + /* get content space */ + c = space = solv_xmlparser_contentspace(&pd->xmlp, l); - /* copy e-v-r to content */ - c = pd->content; + /* copy e-v-r */ if (e) { strcpy(c, e); @@ -239,13 +215,13 @@ evr2id(Pool *pool, Parsedata *pd, const char *e, const char *v, const char *r) } *c = 0; /* if nothing inserted, return Id 0 */ - if (!*pd->content) - return ID_NULL; + if (!*space) + return 0; #if 0 - fprintf(stderr, "evr: %s\n", pd->content); + fprintf(stderr, "evr: %s\n", space); #endif /* intern and create */ - return pool_str2id(pool, pd->content, 1); + return pool_str2id(pool, space, 1); } @@ -255,7 +231,7 @@ evr2id(Pool *pool, Parsedata *pd, const char *e, const char *v, const char *r) * odd index is value */ static Id -evr_atts2id(Pool *pool, Parsedata *pd, const char **atts) +evr_atts2id(Pool *pool, struct parsedata *pd, const char **atts) { const char *e, *v, *r; e = v = r = 0; @@ -312,7 +288,7 @@ static struct flagtab flagtab[] = { */ static unsigned int -adddep(Pool *pool, Parsedata *pd, unsigned int olddeps, const char **atts, Id marker) +adddep(Pool *pool, struct parsedata *pd, unsigned int olddeps, const char **atts, Id marker) { Id id, name; const char *n, *f, *k; @@ -342,13 +318,9 @@ adddep(Pool *pool, Parsedata *pd, unsigned int olddeps, const char **atts, Id ma if (k) /* if kind!=package, intern : */ { int l = strlen(k) + 1 + strlen(n) + 1; - if (l > pd->acontent) /* extend buffer if needed */ - { - pd->content = (char *)realloc(pd->content, l + 256); - pd->acontent = l + 256; - } - sprintf(pd->content, "%s:%s", k, n); - name = pool_str2id(pool, pd->content, 1); + char *space = solv_xmlparser_contentspace(&pd->xmlp, l); + sprintf(space, "%s:%s", k, n); + name = pool_str2id(pool, space, 1); } else { @@ -382,76 +354,30 @@ adddep(Pool *pool, Parsedata *pd, unsigned int olddeps, const char **atts, Id ma /*----------------------------------------------------------------*/ -/* - * XML callback - * - * - */ - -static void XMLCALL -startElement(void *userData, const char *name, const char **atts) +static void +startElement(struct solv_xmlparser *xmlp, int state, const char *name, const char **atts) { - Parsedata *pd = (Parsedata *)userData; - struct stateswitch *sw; + struct parsedata *pd = xmlp->userdata; Pool *pool = pd->pool; Solvable *s = pd->solvable; - if (pd->depth != pd->statedepth) - { - pd->depth++; - return; - } - - /* ignore deps element */ - if (pd->state == STATE_PACKAGE && !strcmp(name, "deps")) - return; - - pd->depth++; - - /* find node name in stateswitch */ - if (!pd->swtab[pd->state]) - return; - for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++) - { - if (!strcmp(sw->ename, name)) - break; - } - - /* check if we're at the right level */ - if (sw->from != pd->state) - { -#if 0 - fprintf(stderr, "into unknown: %s\n", name); -#endif - return; - } - - /* set new state */ - pd->state = sw->to; - - pd->docontent = sw->docontent; - pd->statedepth = pd->depth; - - /* start with empty content */ - /* (will collect data until end element) */ - pd->lcontent = 0; - *pd->content = 0; - - switch (pd->state) + switch (state) { case STATE_NAME: if (pd->kind) /* if kind is set (non package) */ { - strcpy(pd->content, pd->kind); - pd->lcontent = strlen(pd->content); - pd->content[pd->lcontent++] = ':'; /* prefix name with ':' */ - pd->content[pd->lcontent] = 0; + strcpy(xmlp->content, pd->kind); + xmlp->lcontent = strlen(xmlp->content); + xmlp->content[xmlp->lcontent++] = ':'; /* prefix name with ':' */ + xmlp->content[xmlp->lcontent] = 0; } break; case STATE_PACKAGE: /* solvable name */ pd->solvable = pool_id2solvable(pool, repo_add_solvable(pd->repo)); + pd->srcpackage = 0; + pd->kind = NULL; /* default is (src)package */ if (!strcmp(name, "selection")) pd->kind = "selection"; else if (!strcmp(name, "pattern")) @@ -464,8 +390,8 @@ startElement(void *userData, const char *name, const char **atts) pd->kind = "patch"; else if (!strcmp(name, "application")) pd->kind = "application"; - else - pd->kind = NULL; /* default is package */ + else if (!strcmp(name, "srcpackage")) + pd->srcpackage = 1; pd->levrspace = 1; pd->epoch = 0; pd->version = 0; @@ -542,7 +468,8 @@ startElement(void *userData, const char *name, const char **atts) } } -static const char *findKernelFlavor(Parsedata *pd, Solvable *s) +static const char * +findKernelFlavor(struct parsedata *pd, Solvable *s) { Pool *pool = pd->pool; Id pid, *pidp; @@ -589,41 +516,21 @@ static const char *findKernelFlavor(Parsedata *pd, Solvable *s) } -/* - * XML callback - * - * - * create Solvable from collected data - */ - -static void XMLCALL -endElement(void *userData, const char *name) +static void +endElement(struct solv_xmlparser *xmlp, int state, char *content) { - Parsedata *pd = (Parsedata *)userData; + struct parsedata *pd = xmlp->userdata; Pool *pool = pd->pool; Solvable *s = pd->solvable; Id evr; unsigned int t = 0; const char *flavor; - if (pd->depth != pd->statedepth) - { - pd->depth--; - /* printf("back from unknown %d %d %d\n", pd->state, pd->depth, pd->statedepth); */ - return; - } - - /* ignore deps element */ - if (pd->state == STATE_PACKAGE && !strcmp(name, "deps")) - return; - - pd->depth--; - pd->statedepth--; - switch (pd->state) + switch (state) { case STATE_PACKAGE: /* package complete */ - if (name[0] == 's' && name[1] == 'r' && name[2] == 'c' && s->arch != ARCH_SRC && s->arch != ARCH_NOSRC) + if (pd->srcpackage && s->arch != ARCH_SRC && s->arch != ARCH_NOSRC) s->arch = ARCH_SRC; if (!s->arch) /* default to "noarch" */ s->arch = ARCH_NOARCH; @@ -720,13 +627,13 @@ endElement(void *userData, const char *name) } break; case STATE_NAME: - s->name = pool_str2id(pool, pd->content, 1); + s->name = pool_str2id(pool, content, 1); break; case STATE_VENDOR: - s->vendor = pool_str2id(pool, pd->content, 1); + s->vendor = pool_str2id(pool, content, 1); break; case STATE_BUILDTIME: - t = atoi (pd->content); + t = atoi(content); if (t) repodata_set_num(pd->data, s - pool->solvables, SOLVABLE_BUILDTIME, t); break; @@ -746,72 +653,38 @@ endElement(void *userData, const char *name) case STATE_EPOCH: case STATE_VERSION: case STATE_RELEASE: - case STATE_PEPOCH: - case STATE_PVERSION: - case STATE_PRELEASE: /* ensure buffer space */ - if (pd->lcontent + 1 + pd->levrspace > pd->aevrspace) + if (xmlp->lcontent + 1 + pd->levrspace > pd->aevrspace) { - pd->evrspace = (char *)realloc(pd->evrspace, pd->lcontent + 1 + pd->levrspace + 256); - pd->aevrspace = pd->lcontent + 1 + pd->levrspace + 256; + pd->aevrspace = xmlp->lcontent + 1 + pd->levrspace + 256; + pd->evrspace = (char *)realloc(pd->evrspace, pd->aevrspace); } - memcpy(pd->evrspace + pd->levrspace, pd->content, pd->lcontent + 1); - if (pd->state == STATE_EPOCH || pd->state == STATE_PEPOCH) + memcpy(pd->evrspace + pd->levrspace, xmlp->content, xmlp->lcontent + 1); + if (state == STATE_EPOCH) pd->epoch = pd->levrspace; - else if (pd->state == STATE_VERSION || pd->state == STATE_PVERSION) + else if (state == STATE_VERSION) pd->version = pd->levrspace; else pd->release = pd->levrspace; - pd->levrspace += pd->lcontent + 1; + pd->levrspace += xmlp->lcontent + 1; break; case STATE_ARCH: - case STATE_PARCH: - s->arch = pool_str2id(pool, pd->content, 1); + s->arch = pool_str2id(pool, content, 1); break; default: break; } - pd->state = pd->sbtab[pd->state]; - pd->docontent = 0; - /* printf("back from known %d %d %d\n", pd->state, pd->depth, pd->statedepth); */ } - -/* - * XML callback - * character data - * - */ - -static void XMLCALL -characterData(void *userData, const XML_Char *s, int len) +static void +errorCallback(struct solv_xmlparser *xmlp, const char *errstr, unsigned int line, unsigned int column) { - Parsedata *pd = (Parsedata *)userData; - int l; - char *c; - - /* check if current nodes content is interesting */ - if (!pd->docontent) - return; - - /* adapt content buffer */ - l = pd->lcontent + len + 1; - if (l > pd->acontent) - { - pd->content = (char *)realloc(pd->content, l + 256); - pd->acontent = l + 256; - } - /* append new content to buffer */ - c = pd->content + pd->lcontent; - pd->lcontent += len; - while (len-- > 0) - *c++ = *s++; - *c = 0; + struct parsedata *pd = xmlp->userdata; + pd->ret = pool_error(pd->pool, -1, "%s at line %u", errstr, line); } -/*-------------------------------------------------------------------*/ -#define BUFF_SIZE 8192 +/*-------------------------------------------------------------------*/ /* * read 'helix' type xml from fp @@ -823,60 +696,29 @@ int repo_add_helix(Repo *repo, FILE *fp, int flags) { Pool *pool = repo->pool; - Parsedata pd; + struct parsedata pd; Repodata *data; - char buf[BUFF_SIZE]; - int i, l; - struct stateswitch *sw; unsigned int now; - XML_Parser parser; now = solv_timems(0); data = repo_add_repodata(repo, flags); /* prepare parsedata */ memset(&pd, 0, sizeof(pd)); - for (i = 0, sw = stateswitches; sw->from != NUMSTATES; i++, sw++) - { - if (!pd.swtab[sw->from]) - pd.swtab[sw->from] = sw; - pd.sbtab[sw->to] = sw->from; - } - pd.pool = pool; pd.repo = repo; - - pd.content = (char *)malloc(256); /* must hold all solvable kinds! */ - pd.acontent = 256; - pd.lcontent = 0; - - pd.evrspace = (char *)malloc(256); - pd.aevrspace= 256; - pd.levrspace = 1; pd.data = data; + solv_xmlparser_init(&pd.xmlp, stateswitches, &pd, startElement, endElement, errorCallback); - /* set up XML parser */ + pd.evrspace = (char *)solv_malloc(256); + pd.aevrspace = 256; + pd.levrspace = 1; - parser = XML_ParserCreate(NULL); - XML_SetUserData(parser, &pd); /* make parserdata available to XML callbacks */ - XML_SetElementHandler(parser, startElement, endElement); - XML_SetCharacterDataHandler(parser, characterData); + solv_xmlparser_init(&pd.xmlp, stateswitches, &pd, startElement, endElement, errorCallback); + solv_xmlparser_parse(&pd.xmlp, fp); + solv_xmlparser_free(&pd.xmlp); - /* read/parse XML file */ - for (;;) - { - l = fread(buf, 1, sizeof(buf), fp); - if (XML_Parse(parser, buf, l, l == 0) == XML_STATUS_ERROR) - { - pd.ret = pool_error(pool, -1, "%s at line %u", XML_ErrorString(XML_GetErrorCode(parser)), (unsigned int)XML_GetCurrentLineNumber(parser)); - break; - } - if (l == 0) - break; - } - XML_ParserFree(parser); - free(pd.content); - free(pd.evrspace); + solv_free(pd.evrspace); if (!(flags & REPO_NO_INTERNALIZE)) repodata_internalize(data); diff --git a/ext/repo_mdk.c b/ext/repo_mdk.c index 345d4167..418bc61d 100644 --- a/ext/repo_mdk.c +++ b/ext/repo_mdk.c @@ -11,12 +11,12 @@ #include #include #include -#include #include "pool.h" #include "repo.h" #include "util.h" #include "chksum.h" +#include "solv_xmlparser.h" #include "repo_mdk.h" static Offset @@ -246,15 +246,7 @@ enum state { NUMSTATES }; -struct stateswitch { - enum state from; - char *ename; - enum state to; - int docontent; -}; - -/* must be sorted by first column */ -static struct stateswitch stateswitches[] = { +static struct solv_xmlparser_element stateswitches[] = { { STATE_START, "media_info", STATE_MEDIA_INFO, 0 }, { STATE_MEDIA_INFO, "info", STATE_INFO, 1 }, { STATE_MEDIA_INFO, "files", STATE_FILES, 1 }, @@ -265,31 +257,12 @@ struct parsedata { Pool *pool; Repo *repo; Repodata *data; - int depth; - enum state state; - int statedepth; - char *content; - int lcontent; - int acontent; - int docontent; - struct stateswitch *swtab[NUMSTATES]; - enum state sbtab[NUMSTATES]; Solvable *solvable; Hashtable joinhash; Hashval joinhashmask; + struct solv_xmlparser xmlp; }; -static inline const char * -find_attr(const char *txt, const char **atts) -{ - for (; *atts; atts += 2) - { - if (!strcmp(*atts, txt)) - return atts[1]; - } - return 0; -} - static Hashtable joinhash_init(Repo *repo, Hashval *hmp) { @@ -380,56 +353,37 @@ joinhash_lookup(Repo *repo, Hashtable ht, Hashval hm, const char *fn, const char return 0; } -static void XMLCALL -startElement(void *userData, const char *name, const char **atts) +static void +startElement(struct solv_xmlparser *xmlp, int state, const char *name, const char **atts) { - struct parsedata *pd = userData; + struct parsedata *pd = xmlp->userdata; Pool *pool = pd->pool; - struct stateswitch *sw; - if (pd->depth != pd->statedepth) - { - pd->depth++; - return; - } - pd->depth++; - if (!pd->swtab[pd->state]) - return; - for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++) - if (!strcmp(sw->ename, name)) - break; - if (sw->from != pd->state) - return; - pd->state = sw->to; - pd->docontent = sw->docontent; - pd->statedepth = pd->depth; - pd->lcontent = 0; - *pd->content = 0; - switch (pd->state) + switch (state) { case STATE_INFO: { - const char *fn = find_attr("fn", atts); - const char *distepoch = find_attr("distepoch", atts); + const char *fn = solv_xmlparser_find_attr("fn", atts); + const char *distepoch = solv_xmlparser_find_attr("distepoch", atts); const char *str; pd->solvable = joinhash_lookup(pd->repo, pd->joinhash, pd->joinhashmask, fn, distepoch); if (!pd->solvable) break; - str = find_attr("url", atts); + str = solv_xmlparser_find_attr("url", atts); if (str && *str) repodata_set_str(pd->data, pd->solvable - pool->solvables, SOLVABLE_URL, str); - str = find_attr("license", atts); + str = solv_xmlparser_find_attr("license", atts); if (str && *str) repodata_set_poolstr(pd->data, pd->solvable - pool->solvables, SOLVABLE_LICENSE, str); - str = find_attr("sourcerpm", atts); + str = solv_xmlparser_find_attr("sourcerpm", atts); if (str && *str) repodata_set_sourcepkg(pd->data, pd->solvable - pool->solvables, str); break; } case STATE_FILES: { - const char *fn = find_attr("fn", atts); - const char *distepoch = find_attr("distepoch", atts); + const char *fn = solv_xmlparser_find_attr("fn", atts); + const char *distepoch = solv_xmlparser_find_attr("distepoch", atts); pd->solvable = joinhash_lookup(pd->repo, pd->joinhash, pd->joinhashmask, fn, distepoch); break; } @@ -438,29 +392,22 @@ startElement(void *userData, const char *name, const char **atts) } } -static void XMLCALL -endElement(void *userData, const char *name) +static void +endElement(struct solv_xmlparser *xmlp, int state, char *content) { - struct parsedata *pd = userData; + struct parsedata *pd = xmlp->userdata; Solvable *s = pd->solvable; - if (pd->depth != pd->statedepth) - { - pd->depth--; - return; - } - pd->depth--; - pd->statedepth--; - switch (pd->state) + switch (state) { case STATE_INFO: - if (s && *pd->content) - repodata_set_str(pd->data, s - pd->pool->solvables, SOLVABLE_DESCRIPTION, pd->content); + if (s && *content) + repodata_set_str(pd->data, s - pd->pool->solvables, SOLVABLE_DESCRIPTION, content); break; case STATE_FILES: - if (s && *pd->content) + if (s && *content) { char *np, *p, *sl; - for (p = pd->content; p && *p; p = np) + for (p = content; p && *p; p = np) { Id id; np = strchr(p, '\n'); @@ -488,42 +435,21 @@ endElement(void *userData, const char *name) default: break; } - pd->state = pd->sbtab[pd->state]; - pd->docontent = 0; } -static void XMLCALL -characterData(void *userData, const XML_Char *s, int len) +static void +errorCallback(struct solv_xmlparser *xmlp, const char *errstr, unsigned int line, unsigned int column) { - struct parsedata *pd = userData; - int l; - char *c; - if (!pd->docontent) - return; - l = pd->lcontent + len + 1; - if (l > pd->acontent) - { - pd->content = solv_realloc(pd->content, l + 256); - pd->acontent = l + 256; - } - c = pd->content + pd->lcontent; - pd->lcontent += len; - while (len-- > 0) - *c++ = *s++; - *c = 0; + struct parsedata *pd = xmlp->userdata; + pool_debug(pd->pool, SOLV_ERROR, "%s at line %u:%u\n", errstr, line, column); } -#define BUFF_SIZE 8192 int repo_add_mdk_info(Repo *repo, FILE *fp, int flags) { Repodata *data; struct parsedata pd; - char buf[BUFF_SIZE]; - int i, l; - struct stateswitch *sw; - XML_Parser parser; if (!(flags & REPO_EXTEND_SOLVABLES)) { @@ -537,36 +463,10 @@ repo_add_mdk_info(Repo *repo, FILE *fp, int flags) pd.repo = repo; pd.pool = repo->pool; pd.data = data; - - pd.content = solv_malloc(256); - pd.acontent = 256; - + solv_xmlparser_init(&pd.xmlp, stateswitches, &pd, startElement, endElement, errorCallback); pd.joinhash = joinhash_init(repo, &pd.joinhashmask); - - for (i = 0, sw = stateswitches; sw->from != NUMSTATES; i++, sw++) - { - if (!pd.swtab[sw->from]) - pd.swtab[sw->from] = sw; - pd.sbtab[sw->to] = sw->from; - } - - parser = XML_ParserCreate(NULL); - XML_SetUserData(parser, &pd); - XML_SetElementHandler(parser, startElement, endElement); - XML_SetCharacterDataHandler(parser, characterData); - for (;;) - { - l = fread(buf, 1, sizeof(buf), fp); - if (XML_Parse(parser, buf, l, l == 0) == XML_STATUS_ERROR) - { - pool_debug(pd.pool, SOLV_ERROR, "%s at line %u:%u\n", XML_ErrorString(XML_GetErrorCode(parser)), (unsigned int)XML_GetCurrentLineNumber(parser), (unsigned int)XML_GetCurrentColumnNumber(parser)); - break; - } - if (l == 0) - break; - } - XML_ParserFree(parser); - solv_free(pd.content); + solv_xmlparser_parse(&pd.xmlp, fp); + solv_xmlparser_free(&pd.xmlp); solv_free(pd.joinhash); if (!(flags & REPO_NO_INTERNALIZE)) repodata_internalize(data); diff --git a/ext/repo_products.c b/ext/repo_products.c index 326f8fd5..7b3d731b 100644 --- a/ext/repo_products.c +++ b/ext/repo_products.c @@ -26,11 +26,11 @@ #include #include #include -#include #include "pool.h" #include "repo.h" #include "util.h" +#include "solv_xmlparser.h" #define DISABLE_SPLIT #include "tools_util.h" #include "repo_content.h" @@ -68,15 +68,7 @@ enum state { NUMSTATES }; -struct stateswitch { - enum state from; - char *ename; - enum state to; - int docontent; -}; - -/* !! must be sorted by first column !! */ -static struct stateswitch stateswitches[] = { +static struct solv_xmlparser_element stateswitches[] = { { STATE_START, "product", STATE_PRODUCT, 0 }, { STATE_PRODUCT, "vendor", STATE_VENDOR, 1 }, { STATE_PRODUCT, "name", STATE_NAME, 1 }, @@ -107,19 +99,11 @@ static struct stateswitch stateswitches[] = { struct parsedata { const char *filename; const char *basename; - int depth; - enum state state; - int statedepth; - char *content; - int lcontent; - int acontent; - int docontent; Pool *pool; Repo *repo; Repodata *data; - struct stateswitch *swtab[NUMSTATES]; - enum state sbtab[NUMSTATES]; + struct solv_xmlparser xmlp; struct joindata jd; const char *tmplang; @@ -139,26 +123,6 @@ struct parsedata { }; -/* - * find_attr - * find value for xml attribute - * I: txt, name of attribute - * I: atts, list of key/value attributes - * O: pointer to value of matching key, or NULL - * - */ - -static inline const char * -find_attr(const char *txt, const char **atts) -{ - for (; *atts; atts += 2) - { - if (!strcmp(*atts, txt)) - return atts[1]; - } - return 0; -} - static time_t datestr2timestamp(const char *date) { @@ -183,58 +147,19 @@ datestr2timestamp(const char *date) return timegm(&tm); } -/* - * XML callback: startElement - */ - -static void XMLCALL -startElement(void *userData, const char *name, const char **atts) +static void +startElement(struct solv_xmlparser *xmlp, int state, const char *name, const char **atts) { - struct parsedata *pd = userData; + struct parsedata *pd = xmlp->userdata; Pool *pool = pd->pool; Solvable *s = pd->solvable; - struct stateswitch *sw; - -#if 0 - fprintf(stderr, "start: [%d]%s\n", pd->state, name); -#endif - if (pd->depth != pd->statedepth) - { - pd->depth++; - return; - } - - pd->depth++; - if (!pd->swtab[pd->state]) /* no statetable -> no substates */ - { -#if 0 - fprintf(stderr, "into unknown: %s (from: %d)\n", name, pd->state); -#endif - return; - } - for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++) /* find name in statetable */ - if (!strcmp(sw->ename, name)) - break; - - if (sw->from != pd->state) - { -#if 0 - fprintf(stderr, "into unknown: %s (from: %d)\n", name, pd->state); -#endif - return; - } - pd->state = sw->to; - pd->docontent = sw->docontent; - pd->statedepth = pd->depth; - pd->lcontent = 0; - *pd->content = 0; - switch(pd->state) + switch(state) { case STATE_PRODUCT: /* parse 'schemeversion' and store in global variable */ { - const char * scheme = find_attr("schemeversion", atts); + const char * scheme = solv_xmlparser_find_attr("schemeversion", atts); pd->productscheme = (scheme && *scheme) ? atoi(scheme) : -1; } if (!s) @@ -247,14 +172,14 @@ startElement(void *userData, const char *name, const char **atts) /* ... */ case STATE_SUMMARY: case STATE_DESCRIPTION: - pd->tmplang = join_dup(&pd->jd, find_attr("lang", atts)); + pd->tmplang = join_dup(&pd->jd, solv_xmlparser_find_attr("lang", atts)); break; case STATE_URL: - pd->urltype = pool_str2id(pd->pool, find_attr("name", atts), 1); + pd->urltype = pool_str2id(pd->pool, solv_xmlparser_find_attr("name", atts), 1); break; case STATE_REGUPDREPO: { - const char *repoid = find_attr("repoid", atts); + const char *repoid = solv_xmlparser_find_attr("repoid", atts); if (repoid && *repoid) { Id h = repodata_new_handle(pd->data); @@ -269,28 +194,13 @@ startElement(void *userData, const char *name, const char **atts) } -static void XMLCALL -endElement(void *userData, const char *name) +static void +endElement(struct solv_xmlparser *xmlp, int state, char *content) { - struct parsedata *pd = userData; + struct parsedata *pd = xmlp->userdata; Solvable *s = pd->solvable; -#if 0 - fprintf(stderr, "end: [%d]%s\n", pd->state, name); -#endif - if (pd->depth != pd->statedepth) - { - pd->depth--; -#if 0 - fprintf(stderr, "back from unknown %d %d %d\n", pd->state, pd->depth, pd->statedepth); -#endif - return; - } - - pd->depth--; - pd->statedepth--; - - switch (pd->state) + switch (state) { case STATE_PRODUCT: /* product done, finish solvable */ @@ -326,144 +236,74 @@ endElement(void *userData, const char *name) pd->solvable = 0; break; case STATE_VENDOR: - s->vendor = pool_str2id(pd->pool, pd->content, 1); + s->vendor = pool_str2id(pd->pool, content, 1); break; case STATE_NAME: - s->name = pool_str2id(pd->pool, join2(&pd->jd, "product", ":", pd->content), 1); + s->name = pool_str2id(pd->pool, join2(&pd->jd, "product", ":", content), 1); break; case STATE_VERSION: - pd->tmpvers = solv_strdup(pd->content); + pd->tmpvers = solv_strdup(content); break; case STATE_RELEASE: - pd->tmprel = solv_strdup(pd->content); + pd->tmprel = solv_strdup(content); break; case STATE_ARCH: - s->arch = pool_str2id(pd->pool, pd->content, 1); + s->arch = pool_str2id(pd->pool, content, 1); break; case STATE_PRODUCTLINE: - repodata_set_str(pd->data, pd->handle, PRODUCT_PRODUCTLINE, pd->content); + repodata_set_str(pd->data, pd->handle, PRODUCT_PRODUCTLINE, content); break; case STATE_UPDATEREPOKEY: /** obsolete **/ break; case STATE_SUMMARY: - repodata_set_str(pd->data, pd->handle, pool_id2langid(pd->pool, SOLVABLE_SUMMARY, pd->tmplang, 1), pd->content); + repodata_set_str(pd->data, pd->handle, pool_id2langid(pd->pool, SOLVABLE_SUMMARY, pd->tmplang, 1), content); break; case STATE_SHORTSUMMARY: - repodata_set_str(pd->data, pd->handle, PRODUCT_SHORTLABEL, pd->content); + repodata_set_str(pd->data, pd->handle, PRODUCT_SHORTLABEL, content); break; case STATE_DESCRIPTION: - repodata_set_str(pd->data, pd->handle, pool_id2langid(pd->pool, SOLVABLE_DESCRIPTION, pd->tmplang, 1), pd->content); + repodata_set_str(pd->data, pd->handle, pool_id2langid(pd->pool, SOLVABLE_DESCRIPTION, pd->tmplang, 1), content); break; case STATE_URL: if (pd->urltype) { - repodata_add_poolstr_array(pd->data, pd->handle, PRODUCT_URL, pd->content); + repodata_add_poolstr_array(pd->data, pd->handle, PRODUCT_URL, content); repodata_add_idarray(pd->data, pd->handle, PRODUCT_URL_TYPE, pd->urltype); } break; case STATE_TARGET: - repodata_set_str(pd->data, pd->handle, PRODUCT_REGISTER_TARGET, pd->content); + repodata_set_str(pd->data, pd->handle, PRODUCT_REGISTER_TARGET, content); break; case STATE_REGRELEASE: - repodata_set_str(pd->data, pd->handle, PRODUCT_REGISTER_RELEASE, pd->content); + repodata_set_str(pd->data, pd->handle, PRODUCT_REGISTER_RELEASE, content); break; case STATE_REGFLAVOR: - repodata_set_str(pd->data, pd->handle, PRODUCT_REGISTER_FLAVOR, pd->content); + repodata_set_str(pd->data, pd->handle, PRODUCT_REGISTER_FLAVOR, content); break; case STATE_CPEID: - if (*pd->content) - repodata_set_str(pd->data, pd->handle, SOLVABLE_CPEID, pd->content); + if (*content) + repodata_set_str(pd->data, pd->handle, SOLVABLE_CPEID, content); break; case STATE_ENDOFLIFE: /* FATE#320699: Support tri-state product-endoflife (tag absent, present but nodate(0), present + date) */ - repodata_set_num(pd->data, pd->handle, PRODUCT_ENDOFLIFE, (*pd->content ? datestr2timestamp(pd->content) : 0)); + repodata_set_num(pd->data, pd->handle, PRODUCT_ENDOFLIFE, (*content ? datestr2timestamp(content) : 0)); break; default: break; } - - pd->state = pd->sbtab[pd->state]; - pd->docontent = 0; - -#if 0 - fprintf(stderr, "end: [%s] -> %d\n", name, pd->state); -#endif } - -static void XMLCALL -characterData(void *userData, const XML_Char *s, int len) -{ - struct parsedata *pd = userData; - int l; - char *c; - if (!pd->docontent) - return; - l = pd->lcontent + len + 1; - if (l > pd->acontent) - { - pd->content = solv_realloc(pd->content, l + 256); - pd->acontent = l + 256; - } - c = pd->content + pd->lcontent; - pd->lcontent += len; - while (len-- > 0) - *c++ = *s++; - *c = 0; -} - -#define BUFF_SIZE 8192 - - -/* - * add single product to repo - * - */ - static void -add_code11_product(struct parsedata *pd, FILE *fp) +errorCallback(struct solv_xmlparser *xmlp, const char *errstr, unsigned int line, unsigned int column) { - char buf[BUFF_SIZE]; - int l; - struct stat st; - XML_Parser parser; - - if (!fstat(fileno(fp), &st)) - { - pd->currentproduct = st.st_ino; - pd->ctime = (unsigned int)st.st_ctime; - } - else - { - pd->currentproduct = pd->baseproduct + 1; /* make it != baseproduct if stat fails */ - pool_error(pd->pool, 0, "fstat: %s", strerror(errno)); - pd->ctime = 0; - } - - parser = XML_ParserCreate(NULL); - XML_SetUserData(parser, pd); - XML_SetElementHandler(parser, startElement, endElement); - XML_SetCharacterDataHandler(parser, characterData); - - for (;;) - { - l = fread(buf, 1, sizeof(buf), fp); - if (XML_Parse(parser, buf, l, l == 0) == XML_STATUS_ERROR) - { - pool_debug(pd->pool, SOLV_ERROR, "%s: %s at line %u:%u\n", pd->filename, XML_ErrorString(XML_GetErrorCode(parser)), (unsigned int)XML_GetCurrentLineNumber(parser), (unsigned int)XML_GetCurrentColumnNumber(parser)); - XML_ParserFree(parser); - if (pd->solvable) - { - repo_free_solvable(pd->repo, pd->solvable - pd->pool->solvables, 1); - pd->solvable = 0; - } - return; - } - if (l == 0) - break; - } - XML_ParserFree(parser); + struct parsedata *pd = xmlp->userdata; + pool_debug(pd->pool, SOLV_ERROR, "%s: %s at line %u:%u\n", pd->filename, errstr, line, column); + if (pd->solvable) + { + repo_free_solvable(pd->repo, pd->solvable - pd->pool->solvables, 1); + pd->solvable = 0; + } } @@ -472,9 +312,7 @@ repo_add_code11_products(Repo *repo, const char *dirpath, int flags) { Repodata *data; struct parsedata pd; - struct stateswitch *sw; DIR *dir; - int i; data = repo_add_repodata(repo, flags); @@ -483,15 +321,7 @@ repo_add_code11_products(Repo *repo, const char *dirpath, int flags) pd.pool = repo->pool; pd.data = data; - pd.content = solv_malloc(256); - pd.acontent = 256; - - for (i = 0, sw = stateswitches; sw->from != NUMSTATES; i++, sw++) - { - if (!pd.swtab[sw->from]) - pd.swtab[sw->from] = sw; - pd.sbtab[sw->to] = sw->from; - } + solv_xmlparser_init(&pd.xmlp, stateswitches, &pd, startElement, endElement, errorCallback); if (flags & REPO_USE_ROOTDIR) dirpath = pool_prepend_rootdir(repo->pool, dirpath); @@ -521,14 +351,22 @@ repo_add_code11_products(Repo *repo, const char *dirpath, int flags) pool_error(repo->pool, 0, "%s: %s", fullpath, strerror(errno)); continue; } + if (fstat(fileno(fp), &st)) + { + pool_error(repo->pool, 0, "%s: %s", fullpath, strerror(errno)); + fclose(fp); + continue; + } + pd.currentproduct = st.st_ino; + pd.ctime = (unsigned int)st.st_ctime; pd.filename = fullpath; pd.basename = entry->d_name; - add_code11_product(&pd, fp); + solv_xmlparser_parse(&pd.xmlp, fp); fclose(fp); } closedir(dir); } - solv_free(pd.content); + solv_xmlparser_free(&pd.xmlp); join_freemem(&pd.jd); if (flags & REPO_USE_ROOTDIR) solv_free((char *)dirpath); diff --git a/ext/repo_repomdxml.c b/ext/repo_repomdxml.c index 1d1197e2..9c97e897 100644 --- a/ext/repo_repomdxml.c +++ b/ext/repo_repomdxml.c @@ -5,8 +5,6 @@ * for further information */ -#define DO_ARRAY 1 - #define _GNU_SOURCE #include #include @@ -14,11 +12,11 @@ #include #include #include -#include #include "pool.h" #include "repo.h" #include "chksum.h" +#include "solv_xmlparser.h" #include "repo_repomdxml.h" /* @@ -110,15 +108,7 @@ enum state { NUMSTATES }; -struct stateswitch { - enum state from; - char *ename; - enum state to; - int docontent; -}; - -/* !! must be sorted by first column !! */ -static struct stateswitch stateswitches[] = { +static struct solv_xmlparser_element stateswitches[] = { /* suseinfo tags */ { STATE_START, "repomd", STATE_REPOMD, 0 }, { STATE_START, "suseinfo", STATE_SUSEINFO, 0 }, @@ -153,20 +143,12 @@ static struct stateswitch stateswitches[] = { struct parsedata { int ret; - int depth; - enum state state; - int statedepth; - char *content; - int lcontent; - int acontent; - int docontent; Pool *pool; Repo *repo; Repodata *data; - XML_Parser *parser; - struct stateswitch *swtab[NUMSTATES]; - enum state sbtab[NUMSTATES]; + struct solv_xmlparser xmlp; + int timestamp; /* handles for collection structures */ @@ -180,66 +162,20 @@ struct parsedata { Id chksumtype; }; -/* - * find attribute - */ - -static inline const char * -find_attr(const char *txt, const char **atts) -{ - for (; *atts; atts += 2) - { - if (!strcmp(*atts, txt)) - return atts[1]; - } - return 0; -} - -static void XMLCALL -startElement(void *userData, const char *name, const char **atts) +static void +startElement(struct solv_xmlparser *xmlp, int state, const char *name, const char **atts) { - struct parsedata *pd = userData; - /*Pool *pool = pd->pool;*/ - struct stateswitch *sw; - -#if 0 - fprintf(stderr, "start: [%d]%s\n", pd->state, name); -#endif - if (pd->depth != pd->statedepth) - { - pd->depth++; - return; - } + struct parsedata *pd = xmlp->userdata; - pd->depth++; - if (!pd->swtab[pd->state]) - return; - for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++) /* find name in statetable */ - if (!strcmp(sw->ename, name)) - break; - - if (sw->from != pd->state) - { -#if 0 - fprintf(stderr, "into unknown: %s (from: %d)\n", name, pd->state); -#endif - return; - } - pd->state = sw->to; - pd->docontent = sw->docontent; - pd->statedepth = pd->depth; - pd->lcontent = 0; - *pd->content = 0; - - switch(pd->state) + switch(state) { case STATE_REPOMD: { const char *updstr; /* this should be OBSOLETE soon */ - updstr = find_attr("updates", atts); + updstr = solv_xmlparser_find_attr("updates", atts); if (updstr) { char *value = solv_strdup(updstr); @@ -253,7 +189,7 @@ startElement(void *userData, const char *name, const char **atts) repodata_add_poolstr_array(pd->data, SOLVID_META, REPOSITORY_UPDATES, value); value = p; } - free(fvalue); + solv_free(fvalue); } break; } @@ -261,7 +197,7 @@ startElement(void *userData, const char *name, const char **atts) { /* this is extra metadata about the product this repository was designed for */ - const char *cpeid = find_attr("cpeid", atts); + const char *cpeid = solv_xmlparser_find_attr("cpeid", atts); pd->rphandle = repodata_new_handle(pd->data); /* set the cpeid for the product the label is set in the content of the tag */ @@ -273,7 +209,7 @@ startElement(void *userData, const char *name, const char **atts) { /* this is extra metadata about the product this repository was designed for */ - const char *cpeid = find_attr("cpeid", atts); + const char *cpeid = solv_xmlparser_find_attr("cpeid", atts); pd->ruhandle = repodata_new_handle(pd->data); /* set the cpeid for the product the label is set in the content of the tag */ @@ -283,7 +219,7 @@ startElement(void *userData, const char *name, const char **atts) } case STATE_DATA: { - const char *type= find_attr("type", atts); + const char *type= solv_xmlparser_find_attr("type", atts); pd->rdhandle = repodata_new_handle(pd->data); if (type) repodata_set_poolstr(pd->data, pd->rdhandle, REPOSITORY_REPOMD_TYPE, type); @@ -291,7 +227,7 @@ startElement(void *userData, const char *name, const char **atts) } case STATE_LOCATION: { - const char *href = find_attr("href", atts); + const char *href = solv_xmlparser_find_attr("href", atts); if (href) repodata_set_str(pd->data, pd->rdhandle, REPOSITORY_REPOMD_LOCATION, href); break; @@ -299,10 +235,10 @@ startElement(void *userData, const char *name, const char **atts) case STATE_CHECKSUM: case STATE_OPENCHECKSUM: { - const char *type= find_attr("type", atts); + const char *type= solv_xmlparser_find_attr("type", atts); pd->chksumtype = type && *type ? solv_chksum_str2type(type) : 0; if (!pd->chksumtype) - pd->ret = pool_error(pd->pool, -1, "line %d: unknown checksum type: %s", (unsigned int)XML_GetCurrentLineNumber(*pd->parser), type ? type : "NULL"); + pd->ret = pool_error(pd->pool, -1, "line %d: unknown checksum type: %s", solv_xmlparser_lineno(xmlp), type ? type : "NULL"); break; } default: @@ -311,27 +247,11 @@ startElement(void *userData, const char *name, const char **atts) return; } -static void XMLCALL -endElement(void *userData, const char *name) +static void +endElement(struct solv_xmlparser *xmlp, int state, char *content) { - struct parsedata *pd = userData; - /* Pool *pool = pd->pool; */ - -#if 0 - fprintf(stderr, "endElement: %s\n", name); -#endif - if (pd->depth != pd->statedepth) - { - pd->depth--; -#if 0 - fprintf(stderr, "back from unknown %d %d %d\n", pd->state, pd->depth, pd->statedepth); -#endif - return; - } - - pd->depth--; - pd->statedepth--; - switch (pd->state) + struct parsedata *pd = xmlp->userdata; + switch (state) { case STATE_REPOMD: if (pd->timestamp > 0) @@ -347,10 +267,10 @@ endElement(void *userData, const char *name) case STATE_OPENCHECKSUM: if (!pd->chksumtype) break; - if (strlen(pd->content) != 2 * solv_chksum_len(pd->chksumtype)) - pd->ret = pool_error(pd->pool, -1, "line %d: invalid checksum length for %s", (unsigned int)XML_GetCurrentLineNumber(*pd->parser), solv_chksum_type2str(pd->chksumtype)); + if (strlen(content) != 2 * solv_chksum_len(pd->chksumtype)) + pd->ret = pool_error(pd->pool, -1, "line %d: invalid checksum length for %s", solv_xmlparser_lineno(xmlp), solv_chksum_type2str(pd->chksumtype)); else - repodata_set_checksum(pd->data, pd->rdhandle, pd->state == STATE_CHECKSUM ? REPOSITORY_REPOMD_CHECKSUM : REPOSITORY_REPOMD_OPENCHECKSUM, pd->chksumtype, pd->content); + repodata_set_checksum(pd->data, pd->rdhandle, state == STATE_CHECKSUM ? REPOSITORY_REPOMD_CHECKSUM : REPOSITORY_REPOMD_OPENCHECKSUM, pd->chksumtype, content); break; case STATE_TIMESTAMP: @@ -360,7 +280,7 @@ endElement(void *userData, const char *name) * of all resources to save it as the time * the metadata was generated */ - int timestamp = atoi(pd->content); + int timestamp = atoi(content); if (timestamp) repodata_set_num(pd->data, pd->rdhandle, REPOSITORY_REPOMD_TIMESTAMP, timestamp); if (timestamp > pd->timestamp) @@ -369,7 +289,7 @@ endElement(void *userData, const char *name) } case STATE_EXPIRE: { - int expire = atoi(pd->content); + int expire = atoi(content); if (expire > 0) repodata_set_num(pd->data, SOLVID_META, REPOSITORY_EXPIRE, expire); break; @@ -377,119 +297,69 @@ endElement(void *userData, const char *name) /* repomd.xml content and suseinfo.xml keywords are equivalent */ case STATE_CONTENT: case STATE_KEYWORD: - if (*pd->content) - repodata_add_poolstr_array(pd->data, SOLVID_META, REPOSITORY_KEYWORDS, pd->content); + if (*content) + repodata_add_poolstr_array(pd->data, SOLVID_META, REPOSITORY_KEYWORDS, content); break; case STATE_REVISION: - if (*pd->content) - repodata_set_str(pd->data, SOLVID_META, REPOSITORY_REVISION, pd->content); + if (*content) + repodata_set_str(pd->data, SOLVID_META, REPOSITORY_REVISION, content); break; case STATE_DISTRO: /* distro tag is used in repomd.xml to say the product this repo is made for */ - if (*pd->content) - repodata_set_str(pd->data, pd->rphandle, REPOSITORY_PRODUCT_LABEL, pd->content); + if (*content) + repodata_set_str(pd->data, pd->rphandle, REPOSITORY_PRODUCT_LABEL, content); repodata_add_flexarray(pd->data, SOLVID_META, REPOSITORY_DISTROS, pd->rphandle); break; case STATE_UPDATES: /* updates tag is used in suseinfo.xml to say the repo updates a product however it s not yet a tag standarized for repomd.xml */ - if (*pd->content) - repodata_set_str(pd->data, pd->ruhandle, REPOSITORY_PRODUCT_LABEL, pd->content); + if (*content) + repodata_set_str(pd->data, pd->ruhandle, REPOSITORY_PRODUCT_LABEL, content); repodata_add_flexarray(pd->data, SOLVID_META, REPOSITORY_UPDATES, pd->ruhandle); break; case STATE_REPO: - if (*pd->content) - repodata_add_poolstr_array(pd->data, SOLVID_META, REPOSITORY_REPOID, pd->content); + if (*content) + repodata_add_poolstr_array(pd->data, SOLVID_META, REPOSITORY_REPOID, content); break; case STATE_SIZE: - if (*pd->content) - repodata_set_num(pd->data, pd->rdhandle, REPOSITORY_REPOMD_SIZE, strtoull(pd->content, 0, 10)); + if (*content) + repodata_set_num(pd->data, pd->rdhandle, REPOSITORY_REPOMD_SIZE, strtoull(content, 0, 10)); break; default: break; } - - pd->state = pd->sbtab[pd->state]; - pd->docontent = 0; - - return; } - -static void XMLCALL -characterData(void *userData, const XML_Char *s, int len) +static void +errorCallback(struct solv_xmlparser *xmlp, const char *errstr, unsigned int line, unsigned int column) { - struct parsedata *pd = userData; - int l; - char *c; - if (!pd->docontent) - return; - l = pd->lcontent + len + 1; - if (l > pd->acontent) - { - pd->content = realloc(pd->content, l + 256); - pd->acontent = l + 256; - } - c = pd->content + pd->lcontent; - pd->lcontent += len; - while (len-- > 0) - *c++ = *s++; - *c = 0; + struct parsedata *pd = xmlp->userdata; + pd->ret = pool_error(pd->pool, -1, "repo_repomdxml: %s at line %u:%u", errstr, line, column); } -#define BUFF_SIZE 8192 - int repo_add_repomdxml(Repo *repo, FILE *fp, int flags) { Pool *pool = repo->pool; struct parsedata pd; Repodata *data; - char buf[BUFF_SIZE]; - int i, l; - struct stateswitch *sw; - XML_Parser parser; data = repo_add_repodata(repo, flags); memset(&pd, 0, sizeof(pd)); pd.timestamp = 0; - for (i = 0, sw = stateswitches; sw->from != NUMSTATES; i++, sw++) - { - if (!pd.swtab[sw->from]) - pd.swtab[sw->from] = sw; - pd.sbtab[sw->to] = sw->from; - } pd.pool = pool; pd.repo = repo; pd.data = data; + solv_xmlparser_init(&pd.xmlp, stateswitches, &pd, startElement, endElement, errorCallback); - pd.content = malloc(256); - pd.acontent = 256; - pd.lcontent = 0; - parser = XML_ParserCreate(NULL); - XML_SetUserData(parser, &pd); - pd.parser = &parser; - XML_SetElementHandler(parser, startElement, endElement); - XML_SetCharacterDataHandler(parser, characterData); - for (;;) - { - l = fread(buf, 1, sizeof(buf), fp); - if (XML_Parse(parser, buf, l, l == 0) == XML_STATUS_ERROR) - { - pd.ret = pool_error(pool, -1, "repo_repomdxml: %s at line %u:%u", XML_ErrorString(XML_GetErrorCode(parser)), (unsigned int)XML_GetCurrentLineNumber(parser), (unsigned int)XML_GetCurrentColumnNumber(parser)); - break; - } - if (l == 0) - break; - } - XML_ParserFree(parser); + solv_xmlparser_parse(&pd.xmlp, fp); + solv_xmlparser_free(&pd.xmlp); if (!(flags & REPO_NO_INTERNALIZE)) repodata_internalize(data); - free(pd.content); return pd.ret; } diff --git a/ext/repo_rpmmd.c b/ext/repo_rpmmd.c index 42f40cce..6a88908b 100644 --- a/ext/repo_rpmmd.c +++ b/ext/repo_rpmmd.c @@ -11,7 +11,6 @@ #include #include #include -#include #include "pool.h" #include "repo.h" @@ -19,6 +18,7 @@ #include "tools_util.h" #include "repo_rpmmd.h" #include "chksum.h" +#include "solv_xmlparser.h" #ifdef ENABLE_COMPLEX_DEPS #include "pool_parserpmrichdep.h" #endif @@ -86,8 +86,7 @@ enum state { STATE_OPTIONALURL, STATE_FLAG, - /* rpm-md dependencies inside the - format tag */ + /* rpm-md dependencies inside the format tag */ STATE_PROVIDES, STATE_REQUIRES, STATE_OBSOLETES, @@ -118,18 +117,13 @@ enum state { NUMSTATES }; -struct stateswitch { - enum state from; - char *ename; - enum state to; - int docontent; -}; - -static struct stateswitch stateswitches[] = { - /** fake tag used to enclose 2 different xml files in one **/ +static struct solv_xmlparser_element stateswitches[] = { + /** fake tag used to enclose multiple xml files in one **/ { STATE_START, "rpmmd", STATE_START, 0 }, - /** tags for different package data, we just ignore the tag **/ + /** tags for different package data, just ignore them **/ + { STATE_START, "patterns", STATE_START, 0 }, + { STATE_START, "products", STATE_START, 0 }, { STATE_START, "metadata", STATE_START, 0 }, { STATE_START, "otherdata", STATE_START, 0 }, { STATE_START, "filelists", STATE_START, 0 }, @@ -141,6 +135,8 @@ static struct stateswitch stateswitches[] = { { STATE_START, "patch", STATE_SOLVABLE, 0 }, { STATE_START, "package", STATE_SOLVABLE, 0 }, + { STATE_SOLVABLE, "format", STATE_SOLVABLE, 0 }, + { STATE_SOLVABLE, "name", STATE_NAME, 1 }, { STATE_SOLVABLE, "arch", STATE_ARCH, 1 }, { STATE_SOLVABLE, "version", STATE_VERSION, 0 }, @@ -231,23 +227,15 @@ struct parsedata { Repo *repo; Repodata *data; char *kind; - int depth; - enum state state; - int statedepth; - char *content; - int lcontent; - int acontent; - int docontent; Solvable *solvable; Offset freshens; - struct stateswitch *swtab[NUMSTATES]; - enum state sbtab[NUMSTATES]; + + struct solv_xmlparser xmlp; struct joindata jd; /* temporal to store attribute tag language */ const char *tmplang; Id chksumtype; Id handle; - XML_Parser *parser; Queue diskusageq; const char *language; /* default language */ Id langcache[ID_NUM_INTERNAL]; /* cache for the default language */ @@ -297,7 +285,7 @@ static Id makeevr_atts(Pool *pool, struct parsedata *pd, const char **atts) { const char *e, *v, *r, *v2; - char *c; + char *c, *space; int l; e = v = r = 0; @@ -326,12 +314,7 @@ makeevr_atts(Pool *pool, struct parsedata *pd, const char **atts) l += strlen(v); if (r) l += strlen(r) + 1; - if (l > pd->acontent) - { - pd->content = solv_realloc(pd->content, l + 256); - pd->acontent = l + 256; - } - c = pd->content; + c = space = solv_xmlparser_contentspace(&pd->xmlp, l); if (e) { strcpy(c, e); @@ -350,33 +333,12 @@ makeevr_atts(Pool *pool, struct parsedata *pd, const char **atts) c += strlen(c); } *c = 0; - if (!*pd->content) + if (!*space) return 0; #if 0 - fprintf(stderr, "evr: %s\n", pd->content); + fprintf(stderr, "evr: %s\n", space); #endif - return pool_str2id(pool, pd->content, 1); -} - - -/* - * find_attr - * find value for xml attribute - * I: txt, name of attribute - * I: atts, list of key/value attributes - * O: pointer to value of matching key, or NULL - * - */ - -static inline const char * -find_attr(const char *txt, const char **atts) -{ - for (; *atts; atts += 2) - { - if (!strcmp(*atts, txt)) - return atts[1]; - } - return 0; + return pool_str2id(pool, space, 1); } @@ -427,13 +389,9 @@ adddep(Pool *pool, struct parsedata *pd, unsigned int olddeps, const char **atts if (k) { int l = strlen(k) + 1 + strlen(n) + 1; - if (l > pd->acontent) - { - pd->content = solv_realloc(pd->content, l + 256); - pd->acontent = l + 256; - } - sprintf(pd->content, "%s:%s", k, n); - id = pool_str2id(pool, pd->content, 1); + char *space = solv_xmlparser_contentspace(&pd->xmlp, l); + sprintf(space, "%s:%s", k, n); + id = pool_str2id(pool, space, 1); } #ifdef ENABLE_COMPLEX_DEPS else if (!f && n[0] == '(') @@ -666,63 +624,22 @@ fill_cshash_from_new_solvables(struct parsedata *pd) /* * startElement - * XML callback - * */ -static void XMLCALL -startElement(void *userData, const char *name, const char **atts) +static void +startElement(struct solv_xmlparser *xmlp, int state, const char *name, const char **atts) { - struct parsedata *pd = userData; + struct parsedata *pd = xmlp->userdata; Pool *pool = pd->pool; Solvable *s = pd->solvable; - struct stateswitch *sw; - const char *str; Id handle = pd->handle; + const char *str; const char *pkgid; - /* fprintf(stderr, "into %s, from %d, depth %d, statedepth %d\n", name, pd->state, pd->depth, pd->statedepth); */ - - if (pd->depth != pd->statedepth) - { - pd->depth++; - return; - } - - if (pd->state == STATE_START && !strcmp(name, "patterns")) - return; - if (pd->state == STATE_START && !strcmp(name, "products")) - return; -#if 0 - if (pd->state == STATE_START && !strcmp(name, "metadata")) - return; -#endif - if (pd->state == STATE_SOLVABLE && !strcmp(name, "format")) + if (!s && state != STATE_SOLVABLE) return; - pd->depth++; - if (!pd->swtab[pd->state]) - return; - for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++) - if (!strcmp(sw->ename, name)) - break; - if (sw->from != pd->state) - { -#if 0 - fprintf(stderr, "into unknown: %s\n", name); -#endif - return; - } - pd->state = sw->to; - pd->docontent = sw->docontent; - pd->statedepth = pd->depth; - pd->lcontent = 0; - *pd->content = 0; - - if (!s && pd->state != STATE_SOLVABLE) - return; - - switch(pd->state) + switch(state) { case STATE_SOLVABLE: pd->kind = 0; @@ -750,7 +667,7 @@ startElement(void *userData, const char *name, const char **atts) one. */ pd->extending = 0; - if ((pkgid = find_attr("pkgid", atts)) != NULL) + if ((pkgid = solv_xmlparser_find_attr("pkgid", atts)) != NULL) { unsigned char chk[256]; int l; @@ -791,7 +708,7 @@ startElement(void *userData, const char *name, const char **atts) if (pd->kind && pd->kind[1] == 'r') { /* products can have a type */ - const char *type = find_attr("type", atts); + const char *type = solv_xmlparser_find_attr("type", atts); if (type && *type) repodata_set_str(pd->data, handle, PRODUCT_TYPE, type); } @@ -863,27 +780,27 @@ startElement(void *userData, const char *name, const char **atts) case STATE_SUMMARY: case STATE_CATEGORY: case STATE_DESCRIPTION: - pd->tmplang = join_dup(&pd->jd, find_attr("lang", atts)); + pd->tmplang = join_dup(&pd->jd, solv_xmlparser_find_attr("lang", atts)); break; case STATE_USERVISIBLE: repodata_set_void(pd->data, handle, SOLVABLE_ISVISIBLE); break; case STATE_INCLUDESENTRY: - str = find_attr("pattern", atts); + str = solv_xmlparser_find_attr("pattern", atts); if (str) repodata_add_poolstr_array(pd->data, handle, SOLVABLE_INCLUDES, join2(&pd->jd, "pattern", ":", str)); break; case STATE_EXTENDSENTRY: - str = find_attr("pattern", atts); + str = solv_xmlparser_find_attr("pattern", atts); if (str) repodata_add_poolstr_array(pd->data, handle, SOLVABLE_EXTENDS, join2(&pd->jd, "pattern", ":", str)); break; case STATE_LOCATION: - str = find_attr("href", atts); + str = solv_xmlparser_find_attr("href", atts); if (str) { int medianr = 0; - const char *base = find_attr("xml:base", atts); + const char *base = solv_xmlparser_find_attr("xml:base", atts); if (base && !strncmp(base, "media:", 6)) { /* check for the media number in the fragment */ @@ -899,29 +816,29 @@ startElement(void *userData, const char *name, const char **atts) } break; case STATE_CHECKSUM: - str = find_attr("type", atts); + str = solv_xmlparser_find_attr("type", atts); pd->chksumtype = str && *str ? solv_chksum_str2type(str) : 0; if (!pd->chksumtype) - pd->ret = pool_error(pool, -1, "line %d: unknown checksum type: %s", (unsigned int)XML_GetCurrentLineNumber(*pd->parser), str ? str : "NULL"); + pd->ret = pool_error(pool, -1, "line %d: unknown checksum type: %s", solv_xmlparser_lineno(xmlp), str ? str : "NULL"); break; case STATE_TIME: { unsigned int t; - str = find_attr("build", atts); + str = solv_xmlparser_find_attr("build", atts); if (str && (t = atoi(str)) != 0) repodata_set_num(pd->data, handle, SOLVABLE_BUILDTIME, t); break; } case STATE_SIZE: - if ((str = find_attr("installed", atts)) != 0) + if ((str = solv_xmlparser_find_attr("installed", atts)) != 0) repodata_set_num(pd->data, handle, SOLVABLE_INSTALLSIZE, strtoull(str, 0, 10)); - if ((str = find_attr("package", atts)) != 0) + if ((str = solv_xmlparser_find_attr("package", atts)) != 0) repodata_set_num(pd->data, handle, SOLVABLE_DOWNLOADSIZE, strtoull(str, 0, 10)); break; case STATE_HEADERRANGE: { unsigned int end; - str = find_attr("end", atts); + str = solv_xmlparser_find_attr("end", atts); if (str && (end = atoi(str)) != 0) repodata_set_num(pd->data, handle, SOLVABLE_HEADEREND, end); break; @@ -947,7 +864,7 @@ startElement(void *userData, const char *name, const char **atts) long filesz = 0, filenum = 0; Id did; - if ((str = find_attr("name", atts)) == 0) + if ((str = solv_xmlparser_find_attr("name", atts)) == 0) { pd->ret = pool_error(pool, -1, " tag without 'name' attribute"); break; @@ -959,20 +876,16 @@ startElement(void *userData, const char *name, const char **atts) else { int l = strlen(str) + 2; - if (l > pd->acontent) - { - pd->acontent = l + 256; - pd->content = solv_realloc(pd->content, pd->acontent); - } - pd->content[0] = '/'; - memcpy(pd->content + 1, str, l - 1); - str = pd->content; + char *space = solv_xmlparser_contentspace(xmlp, l); + space[0] = '/'; + memcpy(space + 1, str, l - 1); + str = space; } } did = repodata_str2dir(pd->data, str, 1); - if ((str = find_attr("size", atts)) != 0) + if ((str = solv_xmlparser_find_attr("size", atts)) != 0) filesz = strtol(str, 0, 0); - if ((str = find_attr("count", atts)) != 0) + if ((str = solv_xmlparser_find_attr("count", atts)) != 0) filenum = strtol(str, 0, 0); if (filesz || filenum) { @@ -983,9 +896,9 @@ startElement(void *userData, const char *name, const char **atts) } case STATE_CHANGELOG: pd->changelog_handle = repodata_new_handle(pd->data); - if ((str = find_attr("date", atts)) != 0) + if ((str = solv_xmlparser_find_attr("date", atts)) != 0) repodata_set_num(pd->data, pd->changelog_handle, SOLVABLE_CHANGELOG_TIME, strtoull(str, 0, 10)); - if ((str = find_attr("author", atts)) != 0) + if ((str = solv_xmlparser_find_attr("author", atts)) != 0) repodata_set_str(pd->data, pd->changelog_handle, SOLVABLE_CHANGELOG_AUTHOR, str); break; default: @@ -996,14 +909,12 @@ startElement(void *userData, const char *name, const char **atts) /* * endElement - * XML callback - * */ -static void XMLCALL -endElement(void *userData, const char *name) +static void +endElement(struct solv_xmlparser *xmlp, int state, char *content) { - struct parsedata *pd = userData; + struct parsedata *pd = xmlp->userdata; Pool *pool = pd->pool; Solvable *s = pd->solvable; Repo *repo = pd->repo; @@ -1011,37 +922,10 @@ endElement(void *userData, const char *name) Id id; char *p; - if (pd->depth != pd->statedepth) - { - pd->depth--; - /* printf("back from unknown %d %d %d\n", pd->state, pd->depth, pd->statedepth); */ - return; - } - - /* ignore patterns & metadata */ - if (pd->state == STATE_START && !strcmp(name, "patterns")) - return; - if (pd->state == STATE_START && !strcmp(name, "products")) - return; -#if 0 - if (pd->state == STATE_START && !strcmp(name, "metadata")) - return; -#endif - if (pd->state == STATE_SOLVABLE && !strcmp(name, "format")) - return; - - pd->depth--; - pd->statedepth--; - - if (!s) - { - pd->state = pd->sbtab[pd->state]; - pd->docontent = 0; - return; - } + return; - switch (pd->state) + switch (state) { case STATE_SOLVABLE: if (pd->extending) @@ -1064,32 +948,32 @@ endElement(void *userData, const char *name) break; case STATE_NAME: if (pd->kind) - s->name = pool_str2id(pool, join2(&pd->jd, pd->kind, ":", pd->content), 1); + s->name = pool_str2id(pool, join2(&pd->jd, pd->kind, ":", content), 1); else - s->name = pool_str2id(pool, pd->content, 1); + s->name = pool_str2id(pool, content, 1); break; case STATE_ARCH: - s->arch = pool_str2id(pool, pd->content, 1); + s->arch = pool_str2id(pool, content, 1); break; case STATE_VENDOR: - s->vendor = pool_str2id(pool, pd->content, 1); + s->vendor = pool_str2id(pool, content, 1); break; case STATE_RPM_GROUP: - repodata_set_poolstr(pd->data, handle, SOLVABLE_GROUP, pd->content); + repodata_set_poolstr(pd->data, handle, SOLVABLE_GROUP, content); break; case STATE_RPM_LICENSE: - repodata_set_poolstr(pd->data, handle, SOLVABLE_LICENSE, pd->content); + repodata_set_poolstr(pd->data, handle, SOLVABLE_LICENSE, content); break; case STATE_CHECKSUM: { unsigned char chk[256]; int l = solv_chksum_len(pd->chksumtype); - const char *str = pd->content; + const char *str = content; if (!l || l > sizeof(chk)) break; - if (solv_hex2bin(&str, chk, l) != l || pd->content[2 * l]) + if (solv_hex2bin(&str, chk, l) != l || content[2 * l]) { - pd->ret = pool_error(pool, -1, "line %u: invalid %s checksum", (unsigned int)XML_GetCurrentLineNumber(*pd->parser), solv_chksum_type2str(pd->chksumtype)); + pd->ret = pool_error(pool, -1, "line %u: invalid %s checksum", solv_xmlparser_lineno(xmlp), solv_chksum_type2str(pd->chksumtype)); break; } repodata_set_bin_checksum(pd->data, handle, SOLVABLE_CHECKSUM, pd->chksumtype, chk); @@ -1099,152 +983,124 @@ endElement(void *userData, const char *name) break; } case STATE_FILE: - if ((p = strrchr(pd->content, '/')) != 0) + if ((p = strrchr(content, '/')) != 0) { *p++ = 0; - if (pd->lastdir && !strcmp(pd->lastdirstr, pd->content)) + if (pd->lastdir && !strcmp(pd->lastdirstr, content)) { id = pd->lastdir; } else { - int l = p - pd->content; + int l = p - content; if (l + 1 > pd->lastdirstrl) /* + 1 for the possible leading / we need to insert */ { pd->lastdirstrl = l + 128; pd->lastdirstr = solv_realloc(pd->lastdirstr, pd->lastdirstrl); } - if (pd->content[0] != '/') + if (content[0] != '/') { pd->lastdirstr[0] = '/'; - memcpy(pd->lastdirstr + 1, pd->content, l); + memcpy(pd->lastdirstr + 1, content, l); id = repodata_str2dir(pd->data, pd->lastdirstr, 1); } else - id = repodata_str2dir(pd->data, pd->content, 1); + id = repodata_str2dir(pd->data, content, 1); pd->lastdir = id; - memcpy(pd->lastdirstr, pd->content, l); + memcpy(pd->lastdirstr, content, l); } } else { - p = pd->content; + p = content; id = repodata_str2dir(pd->data, "/", 1); } repodata_add_dirstr(pd->data, handle, SOLVABLE_FILELIST, id, p); break; case STATE_SUMMARY: - repodata_set_str(pd->data, handle, langtag(pd, SOLVABLE_SUMMARY, pd->tmplang), pd->content); + repodata_set_str(pd->data, handle, langtag(pd, SOLVABLE_SUMMARY, pd->tmplang), content); break; case STATE_DESCRIPTION: - set_description_author(pd->data, handle, pd->content, pd); + set_description_author(pd->data, handle, content, pd); break; case STATE_CATEGORY: - repodata_set_str(pd->data, handle, langtag(pd, SOLVABLE_CATEGORY, pd->tmplang), pd->content); + repodata_set_str(pd->data, handle, langtag(pd, SOLVABLE_CATEGORY, pd->tmplang), content); break; case STATE_DISTRIBUTION: - repodata_set_poolstr(pd->data, handle, SOLVABLE_DISTRIBUTION, pd->content); + repodata_set_poolstr(pd->data, handle, SOLVABLE_DISTRIBUTION, content); break; case STATE_URL: - if (pd->content[0]) - repodata_set_str(pd->data, handle, SOLVABLE_URL, pd->content); + if (*content) + repodata_set_str(pd->data, handle, SOLVABLE_URL, content); break; case STATE_PACKAGER: - if (pd->content[0]) - repodata_set_poolstr(pd->data, handle, SOLVABLE_PACKAGER, pd->content); + if (*content) + repodata_set_poolstr(pd->data, handle, SOLVABLE_PACKAGER, content); break; case STATE_SOURCERPM: - if (pd->content[0]) - repodata_set_sourcepkg(pd->data, handle, pd->content); + if (*content) + repodata_set_sourcepkg(pd->data, handle, content); break; case STATE_RELNOTESURL: - if (pd->content[0]) + if (*content) { - repodata_add_poolstr_array(pd->data, handle, PRODUCT_URL, pd->content); + repodata_add_poolstr_array(pd->data, handle, PRODUCT_URL, content); repodata_add_idarray(pd->data, handle, PRODUCT_URL_TYPE, pool_str2id(pool, "releasenotes", 1)); } break; case STATE_UPDATEURL: - if (pd->content[0]) + if (*content) { - repodata_add_poolstr_array(pd->data, handle, PRODUCT_URL, pd->content); + repodata_add_poolstr_array(pd->data, handle, PRODUCT_URL, content); repodata_add_idarray(pd->data, handle, PRODUCT_URL_TYPE, pool_str2id(pool, "update", 1)); } break; case STATE_OPTIONALURL: - if (pd->content[0]) + if (*content) { - repodata_add_poolstr_array(pd->data, handle, PRODUCT_URL, pd->content); + repodata_add_poolstr_array(pd->data, handle, PRODUCT_URL, content); repodata_add_idarray(pd->data, handle, PRODUCT_URL_TYPE, pool_str2id(pool, "optional", 1)); } break; case STATE_FLAG: - if (pd->content[0]) - repodata_add_poolstr_array(pd->data, handle, PRODUCT_FLAGS, pd->content); + if (*content) + repodata_add_poolstr_array(pd->data, handle, PRODUCT_FLAGS, content); break; case STATE_EULA: - if (pd->content[0]) - repodata_set_str(pd->data, handle, langtag(pd, SOLVABLE_EULA, pd->tmplang), pd->content); + if (*content) + repodata_set_str(pd->data, handle, langtag(pd, SOLVABLE_EULA, pd->tmplang), content); break; case STATE_KEYWORD: - if (pd->content[0]) - repodata_add_poolstr_array(pd->data, handle, SOLVABLE_KEYWORDS, pd->content); + if (*content) + repodata_add_poolstr_array(pd->data, handle, SOLVABLE_KEYWORDS, content); break; case STATE_DISKUSAGE: if (pd->diskusageq.count) repodata_add_diskusage(pd->data, handle, &pd->diskusageq); break; case STATE_ORDER: - if (pd->content[0]) - repodata_set_str(pd->data, handle, SOLVABLE_ORDER, pd->content); + if (*content) + repodata_set_str(pd->data, handle, SOLVABLE_ORDER, content); break; case STATE_CHANGELOG: - repodata_set_str(pd->data, pd->changelog_handle, SOLVABLE_CHANGELOG_TEXT, pd->content); + repodata_set_str(pd->data, pd->changelog_handle, SOLVABLE_CHANGELOG_TEXT, content); repodata_add_flexarray(pd->data, handle, SOLVABLE_CHANGELOG, pd->changelog_handle); pd->changelog_handle = 0; break; default: break; } - pd->state = pd->sbtab[pd->state]; - pd->docontent = 0; - /* fprintf(stderr, "back from known %d %d %d\n", pd->state, pd->depth, pd->statedepth); */ } - -/* - * characterData - * XML callback - * - */ - -static void XMLCALL -characterData(void *userData, const XML_Char *s, int len) +static void +errorCallback(struct solv_xmlparser *xmlp, const char *errstr, unsigned int line, unsigned int column) { - struct parsedata *pd = userData; - int l; - char *c; - - if (!pd->docontent) - return; - l = pd->lcontent + len + 1; - if (l > pd->acontent) - { - pd->content = solv_realloc(pd->content, l + 256); - pd->acontent = l + 256; - } - c = pd->content + pd->lcontent; - pd->lcontent += len; - while (len-- > 0) - *c++ = *s++; - *c = 0; + struct parsedata *pd = xmlp->userdata; + pd->ret = pool_error(pd->pool, -1, "repo_rpmmd: %s at line %u:%u", errstr, line, column); } /*-----------------------------------------------*/ -/* 'main' */ - -#define BUFF_SIZE 8192 /* * repo_add_rpmmd @@ -1257,30 +1113,17 @@ repo_add_rpmmd(Repo *repo, FILE *fp, const char *language, int flags) { Pool *pool = repo->pool; struct parsedata pd; - char buf[BUFF_SIZE]; - int i, l; - struct stateswitch *sw; Repodata *data; unsigned int now; - XML_Parser parser; now = solv_timems(0); data = repo_add_repodata(repo, flags); memset(&pd, 0, sizeof(pd)); - for (i = 0, sw = stateswitches; sw->from != NUMSTATES; i++, sw++) - { - if (!pd.swtab[sw->from]) - pd.swtab[sw->from] = sw; - pd.sbtab[sw->to] = sw->from; - } pd.pool = pool; pd.repo = repo; pd.data = data; - pd.content = solv_malloc(256); - pd.acontent = 256; - pd.lcontent = 0; pd.kind = 0; pd.language = language && *language && strcmp(language, "en") != 0 ? language : 0; queue_init(&pd.diskusageq); @@ -1293,24 +1136,10 @@ repo_add_rpmmd(Repo *repo, FILE *fp, const char *language, int flags) fill_cshash_from_repo(&pd); } - parser = XML_ParserCreate(NULL); - XML_SetUserData(parser, &pd); - pd.parser = &parser; - XML_SetElementHandler(parser, startElement, endElement); - XML_SetCharacterDataHandler(parser, characterData); - for (;;) - { - l = fread(buf, 1, sizeof(buf), fp); - if (XML_Parse(parser, buf, l, l == 0) == XML_STATUS_ERROR) - { - pd.ret = pool_error(pool, -1, "repo_rpmmd: %s at line %u:%u", XML_ErrorString(XML_GetErrorCode(parser)), (unsigned int)XML_GetCurrentLineNumber(parser), (unsigned int)XML_GetCurrentColumnNumber(parser)); - break; - } - if (l == 0) - break; - } - XML_ParserFree(parser); - solv_free(pd.content); + solv_xmlparser_init(&pd.xmlp, stateswitches, &pd, startElement, endElement, errorCallback); + solv_xmlparser_parse(&pd.xmlp, fp); + solv_xmlparser_free(&pd.xmlp); + solv_free(pd.lastdirstr); join_freemem(&pd.jd); free_cshash(&pd); diff --git a/ext/repo_updateinfoxml.c b/ext/repo_updateinfoxml.c index 6af74f22..5adf7b7b 100644 --- a/ext/repo_updateinfoxml.c +++ b/ext/repo_updateinfoxml.c @@ -13,11 +13,11 @@ #include #include #include -#include #include #include "pool.h" #include "repo.h" +#include "solv_xmlparser.h" #include "repo_updateinfoxml.h" #define DISABLE_SPLIT #include "tools_util.h" @@ -75,16 +75,7 @@ enum state { NUMSTATES }; -struct stateswitch { - enum state from; - char *ename; - enum state to; - int docontent; -}; - - -/* !! must be sorted by first column !! */ -static struct stateswitch stateswitches[] = { +static struct solv_xmlparser_element stateswitches[] = { { STATE_START, "updates", STATE_UPDATES, 0 }, { STATE_START, "update", STATE_UPDATE, 0 }, { STATE_UPDATES, "update", STATE_UPDATE, 0 }, @@ -112,13 +103,6 @@ static struct stateswitch stateswitches[] = { struct parsedata { int ret; - int depth; - enum state state; - int statedepth; - char *content; - int lcontent; - int acontent; - int docontent; Pool *pool; Repo *repo; Repodata *data; @@ -126,10 +110,8 @@ struct parsedata { Solvable *solvable; time_t buildtime; Id collhandle; + struct solv_xmlparser xmlp; struct joindata jd; - - struct stateswitch *swtab[NUMSTATES]; - enum state sbtab[NUMSTATES]; }; /* @@ -161,7 +143,7 @@ static Id makeevr_atts(Pool *pool, struct parsedata *pd, const char **atts) { const char *e, *v, *r, *v2; - char *c; + char *c, *space; int l; e = v = r = 0; @@ -190,12 +172,8 @@ makeevr_atts(Pool *pool, struct parsedata *pd, const char **atts) l += strlen(v); if (r) l += strlen(r) + 1; - if (l > pd->acontent) - { - pd->content = realloc(pd->content, l + 256); - pd->acontent = l + 256; - } - c = pd->content; + + c = space = solv_xmlparser_contentspace(&pd->xmlp, l); if (e) { strcpy(c, e); @@ -214,60 +192,25 @@ makeevr_atts(Pool *pool, struct parsedata *pd, const char **atts) c += strlen(c); } *c = 0; - if (!*pd->content) + if (!*space) return 0; #if 0 - fprintf(stderr, "evr: %s\n", pd->content); + fprintf(stderr, "evr: %s\n", space); #endif - return pool_str2id(pool, pd->content, 1); + return pool_str2id(pool, space, 1); } -static void XMLCALL -startElement(void *userData, const char *name, const char **atts) +static void +startElement(struct solv_xmlparser *xmlp, int state, const char *name, const char **atts) { - struct parsedata *pd = userData; + struct parsedata *pd = xmlp->userdata; Pool *pool = pd->pool; Solvable *solvable = pd->solvable; - struct stateswitch *sw; - /*const char *str; */ - -#if 0 - fprintf(stderr, "start: [%d]%s\n", pd->state, name); -#endif - if (pd->depth != pd->statedepth) - { - pd->depth++; - return; - } - pd->depth++; - if (!pd->swtab[pd->state]) - return; - for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++) /* find name in statetable */ - if (!strcmp(sw->ename, name)) - break; - - if (sw->from != pd->state) + switch(state) { -#if 0 - fprintf(stderr, "into unknown: %s (from: %d)\n", name, pd->state); -#endif - return; - } - pd->state = sw->to; - pd->docontent = sw->docontent; - pd->statedepth = pd->depth; - pd->lcontent = 0; - *pd->content = 0; - - switch(pd->state) - { - case STATE_START: - break; - case STATE_UPDATES: - break; /* * buildtime = (time_t)0; } break; - /* FEDORA-2007-4594 */ - case STATE_ID: - break; - /* imlib-1.9.15-6.fc8 */ - case STATE_TITLE: - break; - /* Fedora 8 */ - case STATE_RELEASE: - break; - /* - */ + case STATE_ISSUED: case STATE_UPDATED: { - const char *date = 0; - for (; *atts; atts += 2) - { - if (!strcmp(*atts, "date")) - date = atts[1]; - } + const char *date = solv_xmlparser_find_attr("date", atts); if (date) { time_t t = datestr2timestamp(date); @@ -325,13 +253,7 @@ startElement(void *userData, const char *name, const char **atts) } } break; - case STATE_REFERENCES: - break; - /* - */ + case STATE_REFERENCE: { const char *href = 0, *id = 0, *title = 0, *type = 0; @@ -359,20 +281,7 @@ startElement(void *userData, const char *name, const char **atts) repodata_add_flexarray(pd->data, pd->handle, UPDATE_REFERENCE, refhandle); } break; - /* This update ... */ - case STATE_DESCRIPTION: - break; - /* This update ... */ - case STATE_MESSAGE: - break; - case STATE_PKGLIST: - break; - /* Fedora 8 */ - case STATE_NAME: - break; + /* @@ -414,19 +323,7 @@ startElement(void *userData, const char *name, const char **atts) repodata_set_id(pd->data, pd->collhandle, UPDATE_COLLECTION_ARCH, a); break; } - /* libntlm-0.4.2-1.fc8.x86_64.rpm */ - /* libntlm-0.4.2-1.fc8.x86_64.rpm */ - case STATE_FILENAME: - break; - /* True */ - case STATE_REBOOT: - break; - /* True */ - case STATE_RESTART: - break; - /* True */ - case STATE_RELOGIN: - break; + default: break; } @@ -434,34 +331,16 @@ startElement(void *userData, const char *name, const char **atts) } -static void XMLCALL -endElement(void *userData, const char *name) +static void +endElement(struct solv_xmlparser *xmlp, int state, char *content) { - struct parsedata *pd = userData; + struct parsedata *pd = xmlp->userdata; Pool *pool = pd->pool; Solvable *s = pd->solvable; Repo *repo = pd->repo; -#if 0 - fprintf(stderr, "end: %s\n", name); -#endif - if (pd->depth != pd->statedepth) - { - pd->depth--; -#if 0 - fprintf(stderr, "back from unknown %d %d %d\n", pd->state, pd->depth, pd->statedepth); -#endif - return; - } - - pd->depth--; - pd->statedepth--; - switch (pd->state) + switch (state) { - case STATE_START: - break; - case STATE_UPDATES: - break; case STATE_UPDATE: s->provides = repo_addid_dep(repo, s->provides, pool_rel2id(pool, s->name, s->evr, REL_EQ, 1), 0); if (pd->buildtime) @@ -470,80 +349,75 @@ endElement(void *userData, const char *name) pd->buildtime = (time_t)0; } break; + case STATE_ID: - s->name = pool_str2id(pool, join2(&pd->jd, "patch", ":", pd->content), 1); + s->name = pool_str2id(pool, join2(&pd->jd, "patch", ":", content), 1); break; + /* imlib-1.9.15-6.fc8 */ case STATE_TITLE: - while (pd->lcontent > 0 && pd->content[pd->lcontent - 1] == '\n') - pd->content[--pd->lcontent] = 0; - repodata_set_str(pd->data, pd->handle, SOLVABLE_SUMMARY, pd->content); + /* strip trailing newlines */ + while (pd->xmlp.lcontent > 0 && content[pd->xmlp.lcontent - 1] == '\n') + content[--pd->xmlp.lcontent] = 0; + repodata_set_str(pd->data, pd->handle, SOLVABLE_SUMMARY, content); break; + case STATE_SEVERITY: - repodata_set_poolstr(pd->data, pd->handle, UPDATE_SEVERITY, pd->content); + repodata_set_poolstr(pd->data, pd->handle, UPDATE_SEVERITY, content); break; + case STATE_RIGHTS: - repodata_set_poolstr(pd->data, pd->handle, UPDATE_RIGHTS, pd->content); - break; - /* - * Fedora 8 - */ - case STATE_RELEASE: - break; - case STATE_ISSUED: - break; - case STATE_REFERENCES: - break; - case STATE_REFERENCE: + repodata_set_poolstr(pd->data, pd->handle, UPDATE_RIGHTS, content); break; + /* * This update ... */ case STATE_DESCRIPTION: - repodata_set_str(pd->data, pd->handle, SOLVABLE_DESCRIPTION, pd->content); + repodata_set_str(pd->data, pd->handle, SOLVABLE_DESCRIPTION, content); break; + /* * Warning! ... */ case STATE_MESSAGE: - repodata_set_str(pd->data, pd->handle, UPDATE_MESSAGE, pd->content); - break; - case STATE_PKGLIST: - break; - case STATE_COLLECTION: - break; - case STATE_NAME: + repodata_set_str(pd->data, pd->handle, UPDATE_MESSAGE, content); break; + case STATE_PACKAGE: repodata_add_flexarray(pd->data, pd->handle, UPDATE_COLLECTION, pd->collhandle); pd->collhandle = 0; break; + /* libntlm-0.4.2-1.fc8.x86_64.rpm */ /* libntlm-0.4.2-1.fc8.x86_64.rpm */ case STATE_FILENAME: - repodata_set_str(pd->data, pd->collhandle, UPDATE_COLLECTION_FILENAME, pd->content); + repodata_set_str(pd->data, pd->collhandle, UPDATE_COLLECTION_FILENAME, content); break; + /* True */ case STATE_REBOOT: - if (pd->content[0] == 'T' || pd->content[0] == 't'|| pd->content[0] == '1') + if (content[0] == 'T' || content[0] == 't'|| content[0] == '1') { /* FIXME: this is per-package, the global flag should be computed at runtime */ repodata_set_void(pd->data, pd->handle, UPDATE_REBOOT); repodata_set_void(pd->data, pd->collhandle, UPDATE_REBOOT); } break; + /* True */ case STATE_RESTART: - if (pd->content[0] == 'T' || pd->content[0] == 't'|| pd->content[0] == '1') + if (content[0] == 'T' || content[0] == 't'|| content[0] == '1') { /* FIXME: this is per-package, the global flag should be computed at runtime */ repodata_set_void(pd->data, pd->handle, UPDATE_RESTART); repodata_set_void(pd->data, pd->collhandle, UPDATE_RESTART); } break; + /* True */ case STATE_RELOGIN: - if (pd->content[0] == 'T' || pd->content[0] == 't'|| pd->content[0] == '1') + if (content[0] == 'T' || content[0] == 't'|| content[0] == '1') { /* FIXME: this is per-package, the global flag should be computed at runtime */ repodata_set_void(pd->data, pd->handle, UPDATE_RELOGIN); @@ -553,86 +427,31 @@ endElement(void *userData, const char *name) default: break; } - - pd->state = pd->sbtab[pd->state]; - pd->docontent = 0; } - -static void XMLCALL -characterData(void *userData, const XML_Char *s, int len) +static void +errorCallback(struct solv_xmlparser *xmlp, const char *errstr, unsigned int line, unsigned int column) { - struct parsedata *pd = userData; - int l; - char *c; - - if (!pd->docontent) - { -#if 0 - fprintf(stderr, "Content: [%d]'%.*s'\n", pd->state, len, s); -#endif - return; - } - l = pd->lcontent + len + 1; - if (l > pd->acontent) - { - pd->content = realloc(pd->content, l + 256); - pd->acontent = l + 256; - } - c = pd->content + pd->lcontent; - pd->lcontent += len; - while (len-- > 0) - *c++ = *s++; - *c = 0; + struct parsedata *pd = xmlp->userdata; + pd->ret = pool_error(pd->pool, -1, "repo_updateinfoxml: %s at line %u:%u", errstr, line, column); } - -#define BUFF_SIZE 8192 - int repo_add_updateinfoxml(Repo *repo, FILE *fp, int flags) { Pool *pool = repo->pool; - struct parsedata pd; - char buf[BUFF_SIZE]; - int i, l; - struct stateswitch *sw; Repodata *data; - XML_Parser parser; + struct parsedata pd; data = repo_add_repodata(repo, flags); memset(&pd, 0, sizeof(pd)); - for (i = 0, sw = stateswitches; sw->from != NUMSTATES; i++, sw++) - { - if (!pd.swtab[sw->from]) - pd.swtab[sw->from] = sw; - pd.sbtab[sw->to] = sw->from; - } pd.pool = pool; pd.repo = repo; pd.data = data; - - pd.content = malloc(256); - pd.acontent = 256; - pd.lcontent = 0; - parser = XML_ParserCreate(NULL); - XML_SetUserData(parser, &pd); - XML_SetElementHandler(parser, startElement, endElement); - XML_SetCharacterDataHandler(parser, characterData); - for (;;) - { - l = fread(buf, 1, sizeof(buf), fp); - if (XML_Parse(parser, buf, l, l == 0) == XML_STATUS_ERROR) - { - pd.ret = pool_error(pool, -1, "repo_updateinfoxml: %s at line %u:%u", XML_ErrorString(XML_GetErrorCode(parser)), (unsigned int)XML_GetCurrentLineNumber(parser), (unsigned int)XML_GetCurrentColumnNumber(parser)); - break; - } - if (l == 0) - break; - } - XML_ParserFree(parser); - free(pd.content); + solv_xmlparser_init(&pd.xmlp, stateswitches, &pd, startElement, endElement, errorCallback); + solv_xmlparser_parse(&pd.xmlp, fp); + solv_xmlparser_free(&pd.xmlp); join_freemem(&pd.jd); if (!(flags & REPO_NO_INTERNALIZE)) diff --git a/ext/repo_zyppdb.c b/ext/repo_zyppdb.c index 5200c293..16ed47f0 100644 --- a/ext/repo_zyppdb.c +++ b/ext/repo_zyppdb.c @@ -4,7 +4,6 @@ * Parses legacy /var/lib/zypp/db/products/... files. * They are old (pre Code11) product descriptions. See bnc#429177 * - * * Copyright (c) 2008, Novell Inc. * * This program is licensed under the BSD license, read LICENSE.BSD @@ -22,12 +21,12 @@ #include #include #include -#include #include #include "pool.h" #include "repo.h" #include "util.h" +#include "solv_xmlparser.h" #define DISABLE_SPLIT #include "tools_util.h" #include "repo_zyppdb.h" @@ -45,15 +44,7 @@ enum state { NUMSTATES }; -struct stateswitch { - enum state from; - char *ename; - enum state to; - int docontent; -}; - -/* !! must be sorted by first column !! */ -static struct stateswitch stateswitches[] = { +static struct solv_xmlparser_element stateswitches[] = { { STATE_START, "product", STATE_PRODUCT, 0 }, { STATE_PRODUCT, "name", STATE_NAME, 1 }, { STATE_PRODUCT, "version", STATE_VERSION, 0 }, @@ -65,101 +56,32 @@ static struct stateswitch stateswitches[] = { }; struct parsedata { - int depth; - enum state state; - int statedepth; - char *content; - int lcontent; - int acontent; - int docontent; Pool *pool; Repo *repo; Repodata *data; - - struct stateswitch *swtab[NUMSTATES]; - enum state sbtab[NUMSTATES]; - struct joindata jd; - + const char *filename; const char *tmplang; - Solvable *solvable; Id handle; + struct solv_xmlparser xmlp; + struct joindata jd; }; -/* - * find_attr - * find value for xml attribute - * I: txt, name of attribute - * I: atts, list of key/value attributes - * O: pointer to value of matching key, or NULL - * - */ - -static inline const char * -find_attr(const char *txt, const char **atts) -{ - for (; *atts; atts += 2) - { - if (!strcmp(*atts, txt)) - return atts[1]; - } - return 0; -} - - -/* - * XML callback: startElement - */ -static void XMLCALL -startElement(void *userData, const char *name, const char **atts) +static void +startElement(struct solv_xmlparser *xmlp, int state, const char *name, const char **atts) { - struct parsedata *pd = userData; + struct parsedata *pd = xmlp->userdata; Pool *pool = pd->pool; Solvable *s = pd->solvable; - struct stateswitch *sw; - -#if 0 - fprintf(stderr, "start: [%d]%s\n", pd->state, name); -#endif - if (pd->depth != pd->statedepth) - { - pd->depth++; - return; - } - - pd->depth++; - if (!pd->swtab[pd->state]) /* no statetable -> no substates */ - { -#if 0 - fprintf(stderr, "into unknown: %s (from: %d)\n", name, pd->state); -#endif - return; - } - for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++) /* find name in statetable */ - if (!strcmp(sw->ename, name)) - break; - - if (sw->from != pd->state) - { -#if 0 - fprintf(stderr, "into unknown: %s (from: %d)\n", name, pd->state); -#endif - return; - } - pd->state = sw->to; - pd->docontent = sw->docontent; - pd->statedepth = pd->depth; - pd->lcontent = 0; - *pd->content = 0; - switch(pd->state) + switch(state) { case STATE_PRODUCT: { /* parse 'type' */ - const char *type = find_attr("type", atts); + const char *type = solv_xmlparser_find_attr("type", atts); s = pd->solvable = pool_id2solvable(pool, repo_add_solvable(pd->repo)); pd->handle = s - pool->solvables; if (type) @@ -168,15 +90,14 @@ startElement(void *userData, const char *name, const char **atts) break; case STATE_VERSION: { - const char *ver = find_attr("ver", atts); - const char *rel = find_attr("rel", atts); - /* const char *epoch = find_attr("epoch", atts); ignored */ + const char *ver = solv_xmlparser_find_attr("ver", atts); + const char *rel = solv_xmlparser_find_attr("rel", atts); + /* const char *epoch = solv_xmlparser_find_attr("epoch", atts); ignored */ s->evr = makeevr(pd->pool, join2(&pd->jd, ver, "-", rel)); } break; - /* ... */ - case STATE_SUMMARY: - pd->tmplang = join_dup(&pd->jd, find_attr("lang", atts)); + case STATE_SUMMARY: /* ... */ + pd->tmplang = join_dup(&pd->jd, solv_xmlparser_find_attr("lang", atts)); break; default: break; @@ -184,28 +105,13 @@ startElement(void *userData, const char *name, const char **atts) } -static void XMLCALL -endElement(void *userData, const char *name) +static void +endElement(struct solv_xmlparser *xmlp, int state, char *content) { - struct parsedata *pd = userData; + struct parsedata *pd = xmlp->userdata; Solvable *s = pd->solvable; -#if 0 - fprintf(stderr, "end: [%d]%s\n", pd->state, name); -#endif - if (pd->depth != pd->statedepth) - { - pd->depth--; -#if 0 - fprintf(stderr, "back from unknown %d %d %d\n", pd->state, pd->depth, pd->statedepth); -#endif - return; - } - - pd->depth--; - pd->statedepth--; - - switch (pd->state) + switch (state) { case STATE_PRODUCT: if (!s->arch) @@ -217,89 +123,34 @@ endElement(void *userData, const char *name) pd->solvable = 0; break; case STATE_NAME: - s->name = pool_str2id(pd->pool, join2(&pd->jd, "product", ":", pd->content), 1); + s->name = pool_str2id(pd->pool, join2(&pd->jd, "product", ":", content), 1); break; case STATE_ARCH: - s->arch = pool_str2id(pd->pool, pd->content, 1); + s->arch = pool_str2id(pd->pool, content, 1); break; case STATE_SUMMARY: - repodata_set_str(pd->data, pd->handle, pool_id2langid(pd->pool, SOLVABLE_SUMMARY, pd->tmplang, 1), pd->content); + repodata_set_str(pd->data, pd->handle, pool_id2langid(pd->pool, SOLVABLE_SUMMARY, pd->tmplang, 1), content); break; case STATE_VENDOR: - s->vendor = pool_str2id(pd->pool, pd->content, 1); + s->vendor = pool_str2id(pd->pool, content, 1); break; case STATE_INSTALLTIME: - repodata_set_num(pd->data, pd->handle, SOLVABLE_INSTALLTIME, atol(pd->content)); + repodata_set_num(pd->data, pd->handle, SOLVABLE_INSTALLTIME, atol(content)); default: break; } - - pd->state = pd->sbtab[pd->state]; - pd->docontent = 0; - -#if 0 - fprintf(stderr, "end: [%s] -> %d\n", name, pd->state); -#endif -} - - -static void XMLCALL -characterData(void *userData, const XML_Char *s, int len) -{ - struct parsedata *pd = userData; - int l; - char *c; - if (!pd->docontent) - return; - l = pd->lcontent + len + 1; - if (l > pd->acontent) - { - pd->content = realloc(pd->content, l + 256); - pd->acontent = l + 256; - } - c = pd->content + pd->lcontent; - pd->lcontent += len; - while (len-- > 0) - *c++ = *s++; - *c = 0; } -#define BUFF_SIZE 8192 - - -/* - * add single product to repo - * - */ - static void -add_zyppdb_product(struct parsedata *pd, FILE *fp) +errorCallback(struct solv_xmlparser *xmlp, const char *errstr, unsigned int line, unsigned int column) { - char buf[BUFF_SIZE]; - int l; - - XML_Parser parser = XML_ParserCreate(NULL); - XML_SetUserData(parser, pd); - XML_SetElementHandler(parser, startElement, endElement); - XML_SetCharacterDataHandler(parser, characterData); - - for (;;) + struct parsedata *pd = xmlp->userdata; + pool_debug(pd->pool, SOLV_ERROR, "repo_zyppdb: %s: %s at line %u:%u\n", pd->filename, errstr, line, column); + if (pd->solvable) { - l = fread(buf, 1, sizeof(buf), fp); - if (XML_Parse(parser, buf, l, l == 0) == XML_STATUS_ERROR) - { - pool_debug(pd->pool, SOLV_ERROR, "repo_zyppdb: %s at line %u:%u\n", XML_ErrorString(XML_GetErrorCode(parser)), (unsigned int)XML_GetCurrentLineNumber(parser), (unsigned int)XML_GetCurrentColumnNumber(parser)); - if (pd->solvable) - { - repo_free_solvable(pd->repo, pd->solvable - pd->pool->solvables, 1); - pd->solvable = 0; - } - return; - } - if (l == 0) - break; + repo_free_solvable(pd->repo, pd->solvable - pd->pool->solvables, 1); + pd->solvable = 0; } - XML_ParserFree(parser); } @@ -312,9 +163,7 @@ add_zyppdb_product(struct parsedata *pd, FILE *fp) int repo_add_zyppdb_products(Repo *repo, const char *dirpath, int flags) { - int i; struct parsedata pd; - struct stateswitch *sw; struct dirent *entry; char *fullpath; DIR *dir; @@ -326,16 +175,7 @@ repo_add_zyppdb_products(Repo *repo, const char *dirpath, int flags) pd.repo = repo; pd.pool = repo->pool; pd.data = data; - - pd.content = malloc(256); - pd.acontent = 256; - - for (i = 0, sw = stateswitches; sw->from != NUMSTATES; i++, sw++) - { - if (!pd.swtab[sw->from]) - pd.swtab[sw->from] = sw; - pd.sbtab[sw->to] = sw->from; - } + solv_xmlparser_init(&pd.xmlp, stateswitches, &pd, startElement, endElement, errorCallback); if (flags & REPO_USE_ROOTDIR) dirpath = pool_prepend_rootdir(repo->pool, dirpath); @@ -344,21 +184,22 @@ repo_add_zyppdb_products(Repo *repo, const char *dirpath, int flags) { while ((entry = readdir(dir))) { - if (strlen(entry->d_name) < 3) - continue; /* skip '.' and '..' */ + if (entry->d_name[0] == '.') + continue; /* skip dot files */ fullpath = join2(&pd.jd, dirpath, "/", entry->d_name); if ((fp = fopen(fullpath, "r")) == 0) { pool_error(repo->pool, 0, "%s: %s", fullpath, strerror(errno)); continue; } - add_zyppdb_product(&pd, fp); + pd.filename = entry->d_name; + solv_xmlparser_parse(&pd.xmlp, fp); fclose(fp); } } closedir(dir); - free(pd.content); + solv_xmlparser_free(&pd.xmlp); join_freemem(&pd.jd); if (flags & REPO_USE_ROOTDIR) solv_free((char *)dirpath); diff --git a/ext/solv_xmlparser.c b/ext/solv_xmlparser.c new file mode 100644 index 00000000..f28d168d --- /dev/null +++ b/ext/solv_xmlparser.c @@ -0,0 +1,209 @@ +/* + * solv_xmlparser.c + * + * XML parser abstraction + * + * Copyright (c) 2017, Novell Inc. + * + * This program is licensed under the BSD license, read LICENSE.BSD + * for further information + */ + +#include +#include +#include +#include + +#include + +#include "util.h" +#include "queue.h" +#include "solv_xmlparser.h" + +static inline void +add_contentspace(struct solv_xmlparser *xmlp, int l) +{ + l += xmlp->lcontent + 1; /* plus room for trailing zero */ + if (l > xmlp->acontent) + { + xmlp->acontent = l + 256; + xmlp->content = solv_realloc(xmlp->content, xmlp->acontent); + } +} + +static void XMLCALL +characterData(void *userData, const XML_Char *s, int len) +{ + struct solv_xmlparser *xmlp = userData; + char *c; + + if (!xmlp->docontent) + return; + add_contentspace(xmlp, len); + c = xmlp->content + xmlp->lcontent; + xmlp->lcontent += len; + while (len-- > 0) + *c++ = *s++; +} + +static void XMLCALL +startElement(void *userData, const char *name, const char **atts) +{ + struct solv_xmlparser *xmlp = userData; + struct solv_xmlparser_element *elements; + Id *elementhelper; + struct solv_xmlparser_element *el; + int i, oldstate; + + if (xmlp->unknowncnt) + { + xmlp->unknowncnt++; + return; + } + elementhelper = xmlp->elementhelper; + elements = xmlp->elements; + oldstate = xmlp->state; + for (i = elementhelper[xmlp->nelements + oldstate]; i; i = elementhelper[i - 1]) + if (!strcmp(elements[i - 1].element, name)) + break; + if (!i) + { +#if 0 + fprintf(stderr, "into unknown: %s\n", name); +#endif + xmlp->unknowncnt++; + return; + } + el = xmlp->elements + i - 1; + queue_push(&xmlp->elementq, xmlp->state); + xmlp->state = el->tostate; + xmlp->docontent = el->docontent; + xmlp->lcontent = 0; + if (xmlp->state != oldstate) + xmlp->startelement(xmlp, xmlp->state, el->element, atts); +} + +static void XMLCALL +endElement(void *userData, const char *name) +{ + struct solv_xmlparser *xmlp = userData; + + if (xmlp->unknowncnt) + { + xmlp->unknowncnt--; + xmlp->lcontent = 0; + xmlp->docontent = 0; + return; + } + xmlp->content[xmlp->lcontent] = 0; + if (xmlp->elementq.count && xmlp->state != xmlp->elementq.elements[xmlp->elementq.count - 1]) + xmlp->endelement(xmlp, xmlp->state, xmlp->content); + xmlp->state = queue_pop(&xmlp->elementq); + xmlp->docontent = 0; + xmlp->lcontent = 0; +} + +void +solv_xmlparser_init(struct solv_xmlparser *xmlp, + struct solv_xmlparser_element *elements, + void *userdata, + void (*startelement)(struct solv_xmlparser *, int state, const char *name, const char **atts), + void (*endelement)(struct solv_xmlparser *, int state, char *content), + void (*errorhandler)(struct solv_xmlparser *, const char *errstr, unsigned int line, unsigned int column)) +{ + int i, nstates, nelements; + struct solv_xmlparser_element *el; + Id *elementhelper; + + memset(xmlp, 0, sizeof(*xmlp)); + nstates = 0; + nelements = 0; + for (el = elements; el->element; el++) + { + nelements++; + if (el->fromstate > nstates) + nstates = el->fromstate; + if (el->tostate > nstates) + nstates = el->tostate; + } + nstates++; + + xmlp->elements = elements; + xmlp->nelements = nelements; + elementhelper = solv_calloc(nelements + nstates, sizeof(Id)); + for (i = nelements - 1; i >= 0; i--) + { + int fromstate = elements[i].fromstate; + elementhelper[i] = elementhelper[nelements + fromstate]; + elementhelper[nelements + fromstate] = i + 1; + } + xmlp->elementhelper = elementhelper; + queue_init(&xmlp->elementq); + xmlp->acontent = 256; + xmlp->content = solv_malloc(xmlp->acontent); + + xmlp->userdata = userdata; + xmlp->startelement = startelement; + xmlp->endelement = endelement; + xmlp->errorhandler = errorhandler; +} + +unsigned int +solv_xmlparser_lineno(struct solv_xmlparser *xmlp) +{ + return (unsigned int)XML_GetCurrentLineNumber(xmlp->parser); +} + +void +solv_xmlparser_free(struct solv_xmlparser *xmlp) +{ + xmlp->elementhelper = solv_free(xmlp->elementhelper); + queue_free(&xmlp->elementq); + xmlp->content = solv_free(xmlp->content); +} + +void +solv_xmlparser_parse(struct solv_xmlparser *xmlp, FILE *fp) +{ + char buf[8192]; + int l; + + xmlp->state = 0; + xmlp->unknowncnt = 0; + xmlp->docontent = 0; + xmlp->lcontent = 0; + queue_empty(&xmlp->elementq); + xmlp->parser = XML_ParserCreate(NULL); + XML_SetUserData(xmlp->parser, xmlp); + XML_SetElementHandler(xmlp->parser, startElement, endElement); + XML_SetCharacterDataHandler(xmlp->parser, characterData); + + for (;;) + { + l = fread(buf, 1, sizeof(buf), fp); + if (XML_Parse(xmlp->parser, buf, l, l == 0) == XML_STATUS_ERROR) + { + unsigned int line = XML_GetCurrentLineNumber(xmlp->parser); + unsigned int column = XML_GetCurrentColumnNumber(xmlp->parser); + xmlp->errorhandler(xmlp, XML_ErrorString(XML_GetErrorCode(xmlp->parser)), line, column); + break; + } + if (l == 0) + break; + } + XML_ParserFree(xmlp->parser); + xmlp->parser = 0; +} + +char * +solv_xmlparser_contentspace(struct solv_xmlparser *xmlp, int l) +{ + xmlp->lcontent = 0; + if (l > xmlp->acontent) + { + xmlp->acontent = l + 256; + xmlp->content = solv_realloc(xmlp->content, xmlp->acontent); + } + return xmlp->content; +} + diff --git a/ext/solv_xmlparser.h b/ext/solv_xmlparser.h new file mode 100644 index 00000000..9fb342f4 --- /dev/null +++ b/ext/solv_xmlparser.h @@ -0,0 +1,52 @@ + +struct solv_xmlparser_element { + int fromstate; + char *element; + int tostate; + int docontent; +}; + +struct solv_xmlparser { + void *userdata; + + int state; + int docontent; + + Queue elementq; + int unknowncnt; + + char *content; + int lcontent; /* current content length */ + int acontent; /* allocated content length */ + + struct solv_xmlparser_element *elements; + int nelements; + + void (*startelement)(struct solv_xmlparser *xmlp, int state, const char *name, const char **atts); + void (*endelement)(struct solv_xmlparser *xmlp, int state, char *content); + void (*errorhandler)(struct solv_xmlparser *xmlp, const char *errstr, unsigned int line, unsigned int column); + + Id *elementhelper; + void *parser; +}; + +static inline const char * +solv_xmlparser_find_attr(const char *txt, const char **atts) +{ + for (; *atts; atts += 2) + if (!strcmp(*atts, txt)) + return atts[1]; + return 0; +} + +extern void solv_xmlparser_init(struct solv_xmlparser *xmlp, struct solv_xmlparser_element *elements, void *userdata, + void (*startelement)(struct solv_xmlparser *xmlp, int state, const char *name, const char **atts), + void (*endelement)(struct solv_xmlparser *xmlp, int state, char *content), + void (*errorhandler)(struct solv_xmlparser *xmlp, const char *errstr, unsigned int line, unsigned int column)); + +extern void solv_xmlparser_free(struct solv_xmlparser *xmlp); +extern void solv_xmlparser_parse(struct solv_xmlparser *xmlp, FILE *fp); +unsigned int solv_xmlparser_lineno(struct solv_xmlparser *xmlp); +char *solv_xmlparser_contentspace(struct solv_xmlparser *xmlp, int l); + +