Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow incremental parsing without filelists or other xml #441

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/python/createrepo_c/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,8 +416,8 @@ def _warningcb(warning_type, message):
metadata_files = {record.type: record for record in repomd.records}
parser = RepositoryReader()
parser._primary_xml_path = os.path.join(path, metadata_files["primary"].location_href)
parser._filelists_xml_path = os.path.join(path, metadata_files["filelists"].location_href)
parser._other_xml_path = os.path.join(path, metadata_files["other"].location_href)
parser._filelists_xml_path = os.path.join(path, metadata_files["filelists"].location_href) if metadata_files.get("filelists") else None
parser._other_xml_path = os.path.join(path, metadata_files["other"].location_href) if metadata_files.get("other") else None

if metadata_files.get("updateinfo"):
parser._updateinfo_path = os.path.join(path, metadata_files["updateinfo"].location_href)
Expand Down
6 changes: 3 additions & 3 deletions src/python/xml_parser-py.c
Original file line number Diff line number Diff line change
Expand Up @@ -780,14 +780,14 @@ pkg_iterator_init(_PkgIteratorObject *self, PyObject *args, PyObject *kwargs)
static char *kwlist[] = {"primary", "filelists", "other", "newpkgcb",
"warningcb", NULL};

if (!PyArg_ParseTupleAndKeywords(args, kwargs, "sssOO:pkg_iterator_init", kwlist,
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "szzOO:pkg_iterator_init", kwlist,
&primary_path, &filelists_path, &other_path, &py_newpkgcb,
&py_warningcb)) {
return -1;
}

if (!primary_path || !filelists_path || !other_path) {
PyErr_SetString(PyExc_TypeError, "file paths must be provided");
if (!primary_path) {
PyErr_SetString(PyExc_TypeError, "primary file path must be provided");
return -1;
}

Expand Down
84 changes: 44 additions & 40 deletions src/xml_parser_main_metadata_together.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,16 @@
#define ERR_DOMAIN CREATEREPO_C_ERROR

typedef struct {
GSList *in_progress_pkgs_list;
int in_progress_count_primary;
int in_progress_count_filelists;
int in_progress_count_other;
GQueue *finished_pkgs_queue;
cr_XmlParserNewPkgCb newpkgcb; // newpkgcb passed in from user
void *newpkgcb_data;// newpkgcb data passed in from user
cr_XmlParserPkgCb pkgcb; // pkgcb passed in from user
void *pkgcb_data; // pkgcb data passed in from user
GSList *in_progress_pkgs_list;
int in_progress_count_primary;
int in_progress_count_filelists;
int in_progress_count_other;
GQueue *finished_pkgs_queue;
cr_XmlParserNewPkgCb newpkgcb; // newpkgcb passed in from user
void *newpkgcb_data;// newpkgcb data passed in from user
cr_XmlParserPkgCb pkgcb; // pkgcb passed in from user
void *pkgcb_data; // pkgcb data passed in from user
cr_PackageLoadingFlags loadingflags; // which callbacks need to be called before the package is considered "finished" loading
} cr_CbData;

struct _cr_PkgIterator {
Expand Down Expand Up @@ -67,17 +68,19 @@ struct _cr_PkgIterator {
static int
queue_package_if_finished(cr_Package *pkg, cr_CbData *cb_data)
{
if (pkg && (pkg->loadingflags & CR_PACKAGE_LOADED_PRI) && (pkg->loadingflags & CR_PACKAGE_LOADED_OTH) &&
(pkg->loadingflags & CR_PACKAGE_LOADED_FIL))
if (pkg && ((pkg->loadingflags & cb_data->loadingflags) == cb_data->loadingflags))
{
//remove first element in the list
cb_data->in_progress_pkgs_list = g_slist_delete_link(cb_data->in_progress_pkgs_list, cb_data->in_progress_pkgs_list);

// One package was fully finished
cb_data->in_progress_count_primary--;
cb_data->in_progress_count_filelists--;
cb_data->in_progress_count_other--;

if (cb_data->loadingflags & CR_PACKAGE_LOADED_FIL) {
cb_data->in_progress_count_filelists--;
}
if (cb_data->loadingflags & CR_PACKAGE_LOADED_OTH) {
cb_data->in_progress_count_other--;
}
g_queue_push_tail(cb_data->finished_pkgs_queue, pkg);
}
return CR_CB_RET_OK;
Expand Down Expand Up @@ -319,8 +322,6 @@ parse_next_section(CR_FILE *target_file, const char *path, cr_ParserData *pd, GE

//TODO(amatej): there is quite some overlap with this and cr_load_xml_files,
// consider using this api to implement cr_load_xml_files?
// TODO: maybe whether or not individual files are parsed could be controlled by NULL paths? I think cr_load_xml_files
// already works that way.
cr_PkgIterator *
cr_PkgIterator_new(const char *primary_path,
const char *filelists_path,
dralley marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -332,8 +333,6 @@ cr_PkgIterator_new(const char *primary_path,
GError **err)
{
assert(primary_path);
assert(filelists_path);
assert(other_path);
assert(!err || *err == NULL);

cr_PkgIterator* new_iter = g_new0(cr_PkgIterator, 1);
Expand Down Expand Up @@ -370,30 +369,41 @@ cr_PkgIterator_new(const char *primary_path,
cbdata->newpkgcb_data = newpkgcb_data;

new_iter->tmp_err = NULL;
int parse_files_in_primary = 0;

GError* tmp_err = new_iter->tmp_err;
new_iter->primary_f = cr_open(primary_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err);
cbdata->loadingflags |= CR_PACKAGE_LOADED_PRI;
if (tmp_err) {
g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", primary_path);
cr_PkgIterator_free(new_iter, err);
return NULL;
}
new_iter->filelists_f = cr_open(filelists_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err);
if (tmp_err) {
g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", filelists_path);
cr_PkgIterator_free(new_iter, err);
return NULL;
if (new_iter->filelists_path) {
new_iter->filelists_f = cr_open(filelists_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err);
cbdata->loadingflags |= CR_PACKAGE_LOADED_FIL;
kontura marked this conversation as resolved.
Show resolved Hide resolved
if (tmp_err) {
g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", filelists_path);
cr_PkgIterator_free(new_iter, err);
return NULL;
}
} else {
new_iter->filelists_is_done = 1;
parse_files_in_primary = 1;
}
new_iter->other_f = cr_open(other_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err);
if (tmp_err) {
g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", other_path);
cr_PkgIterator_free(new_iter, err);
return NULL;
if (new_iter->other_path) {
new_iter->other_f = cr_open(other_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err);
cbdata->loadingflags |= CR_PACKAGE_LOADED_OTH;
if (tmp_err) {
g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", other_path);
cr_PkgIterator_free(new_iter, err);
return NULL;
}
} else {
new_iter->other_is_done = 1;
}

//TODO(amatej): In the future we could make filelists/other optional if there is a need for it. That would mean we
// should replace the last 0 in primary_parser_data_new depending on whether we have filelists or not.
new_iter->primary_pd = primary_parser_data_new(newpkgcb_primary, cbdata, pkgcb_primary, cbdata, warningcb, warningcb_data, 0);
new_iter->primary_pd = primary_parser_data_new(newpkgcb_primary, cbdata, pkgcb_primary, cbdata, warningcb, warningcb_data, parse_files_in_primary);
new_iter->filelists_pd = filelists_parser_data_new(newpkgcb_filelists, cbdata, pkgcb_filelists, cbdata, warningcb, warningcb_data);
new_iter->other_pd = other_parser_data_new(newpkgcb_other, cbdata, pkgcb_other, cbdata, warningcb, warningcb_data);
return new_iter;
Expand All @@ -404,29 +414,23 @@ cr_PkgIterator_parse_next(cr_PkgIterator *iter, GError **err) {
cr_CbData *cbdata = (cr_CbData*) iter->cbdata;

while (!cr_PkgIterator_is_finished(iter) && g_queue_is_empty(cbdata->finished_pkgs_queue)) {
while ((cbdata->in_progress_count_primary <= cbdata->in_progress_count_filelists ||
cbdata->in_progress_count_primary <= cbdata->in_progress_count_other) &&
!iter->primary_is_done)
if (!iter->primary_is_done)
{
iter->primary_is_done = parse_next_section(iter->primary_f, iter->primary_path, iter->primary_pd, err);
if (*err) {
return NULL;
}
}

while ((cbdata->in_progress_count_filelists <= cbdata->in_progress_count_primary ||
cbdata->in_progress_count_filelists <= cbdata->in_progress_count_other) &&
!iter->filelists_is_done)
while (cbdata->in_progress_count_filelists <= cbdata->in_progress_count_primary && !iter->filelists_is_done)
{
iter->filelists_is_done = parse_next_section(iter->filelists_f, iter->filelists_path, iter->filelists_pd, err);
if (*err) {
return NULL;
}
}

while ((cbdata->in_progress_count_other <= cbdata->in_progress_count_filelists ||
cbdata->in_progress_count_other <= cbdata->in_progress_count_primary) &&
!iter->other_is_done)
while (cbdata->in_progress_count_other <= cbdata->in_progress_count_primary && !iter->other_is_done)
{
iter->other_is_done = parse_next_section(iter->other_f, iter->other_path, iter->other_pd, err);
if (*err) {
Expand Down
68 changes: 68 additions & 0 deletions tests/python/tests/test_xml_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1245,6 +1245,74 @@ def warningcb(warn_type, msg):
self.assertRaises(StopIteration, next, package_iterator)
self.assertTrue(package_iterator.is_finished())

def test_xml_parser_pkg_iterator_repo02_only_primary(self):
warnings = []
def warningcb(warn_type, msg):
warnings.append((warn_type, msg))

package_iterator = cr.PackageIterator(
primary_path=REPO_02_PRIXML, filelists_path=None, other_path=None,
warningcb=warningcb,
)
packages = list(package_iterator)

self.assertEqual(len(packages), 2)
self.assertEqual(packages[0].name, "fake_bash")
self.assertEqual(packages[0].files, [(None, '/usr/bin/', 'fake_bash')]) # the files still get parsed - only from primary.xml
self.assertEqual(packages[0].changelogs, [])
self.assertListEqual(warnings, [])
self.assertRaises(StopIteration, next, package_iterator)
self.assertTrue(package_iterator.is_finished())


def test_xml_parser_pkg_iterator_repo02_no_primary(self):
def test():
cr.PackageIterator(
primary_path=None, filelists_path=REPO_02_FILXML, other_path=REPO_02_OTHXML,
)

self.assertRaises(TypeError, test)

def test_xml_parser_pkg_iterator_repo02_no_filelists(self):
warnings = []
def warningcb(warn_type, msg):
warnings.append((warn_type, msg))

package_iterator = cr.PackageIterator(
primary_path=REPO_02_PRIXML, filelists_path=None, other_path=REPO_02_OTHXML,
warningcb=warningcb,
)
packages = list(package_iterator)

self.assertEqual(len(packages), 2)
self.assertEqual(packages[0].name, "fake_bash")
self.assertEqual(packages[0].changelogs, [
('Tomas Mlcoch <[email protected]> - 1.1.1-1', 1334664000, '- First release'),
])
self.assertEqual(packages[0].files, [(None, '/usr/bin/', 'fake_bash')]) # the files still get parsed - only from primary.xml
self.assertListEqual(warnings, [])
self.assertRaises(StopIteration, next, package_iterator)
self.assertTrue(package_iterator.is_finished())

def test_xml_parser_pkg_iterator_repo02_no_other(self):
dralley marked this conversation as resolved.
Show resolved Hide resolved
warnings = []
def warningcb(warn_type, msg):
warnings.append((warn_type, msg))

package_iterator = cr.PackageIterator(
primary_path=REPO_02_PRIXML, filelists_path=REPO_02_FILXML, other_path=None,
warningcb=warningcb,
)
packages = list(package_iterator)

self.assertEqual(len(packages), 2)
self.assertEqual(packages[0].name, "fake_bash")
self.assertEqual(packages[0].changelogs, [])
self.assertEqual(packages[0].files, [(None, '/usr/bin/', 'fake_bash')])
self.assertListEqual(warnings, [])
self.assertRaises(StopIteration, next, package_iterator)
self.assertTrue(package_iterator.is_finished())

def test_xml_parser_pkg_iterator_repo02_newpkgcb_as_filter(self):
def newpkgcb(pkgId, name, arch):
if name in {"fake_bash"}:
Expand Down
50 changes: 50 additions & 0 deletions tests/test_xml_parser_main_metadata_together.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,50 @@ test_cr_xml_package_iterator_00(void)
g_assert_cmpint(parsed, ==, 2);
}

static void
test_cr_xml_package_iterator_00_no_filelists(void)
{
int parsed = 0;
GError *tmp_err = NULL;
cr_Package *package = NULL;

cr_PkgIterator *pkg_iterator = cr_PkgIterator_new(
TEST_REPO_02_PRIMARY, NULL, TEST_REPO_02_OTHER, NULL, NULL, NULL, NULL, &tmp_err);

while ((package = cr_PkgIterator_parse_next(pkg_iterator, &tmp_err))) {
parsed++;
cr_package_free(package);
}

g_assert(cr_PkgIterator_is_finished(pkg_iterator));
cr_PkgIterator_free(pkg_iterator, &tmp_err);

g_assert(tmp_err == NULL);
g_assert_cmpint(parsed, ==, 2);
}

static void
test_cr_xml_package_iterator_00_no_other(void)
{
int parsed = 0;
GError *tmp_err = NULL;
cr_Package *package = NULL;

cr_PkgIterator *pkg_iterator = cr_PkgIterator_new(
TEST_REPO_02_PRIMARY, TEST_REPO_02_FILELISTS, NULL, NULL, NULL, NULL, NULL, &tmp_err);

while ((package = cr_PkgIterator_parse_next(pkg_iterator, &tmp_err))) {
parsed++;
cr_package_free(package);
}

g_assert(cr_PkgIterator_is_finished(pkg_iterator));
cr_PkgIterator_free(pkg_iterator, &tmp_err);

g_assert(tmp_err == NULL);
g_assert_cmpint(parsed, ==, 2);
}

static void
test_cr_xml_package_iterator_filelists_ext_00(void)
{
Expand Down Expand Up @@ -320,6 +364,12 @@ main(int argc, char *argv[])
g_test_add_func("/xml_parser_main_metadata/test_cr_xml_package_iterator_00",
test_cr_xml_package_iterator_00);

g_test_add_func("/xml_parser_main_metadata/test_cr_xml_package_iterator_00_no_filelists",
test_cr_xml_package_iterator_00_no_filelists);

g_test_add_func("/xml_parser_main_metadata/test_cr_xml_package_iterator_00_no_other",
test_cr_xml_package_iterator_00_no_other);

g_test_add_func("/xml_parser_main_metadata/test_cr_xml_package_iterator_filelists_ext_00",
test_cr_xml_package_iterator_filelists_ext_00);

Expand Down
Loading