Skip to content

Commit

Permalink
[issue-184] update jsonyamlxml parser and writer to validate against …
Browse files Browse the repository at this point in the history
…spec 2.2

leads to failing test, which should be solved once PR spdx#247 is merged
Signed-off-by: Meret Behrens <[email protected]>
  • Loading branch information
meretp committed Oct 26, 2022
1 parent f061ea8 commit f5af2e1
Show file tree
Hide file tree
Showing 10 changed files with 363 additions and 385 deletions.
46 changes: 26 additions & 20 deletions spdx/parsers/jsonyamlxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -1495,36 +1495,34 @@ def parse_pkg_chksum(self, pkg_chksum):
self.value_error("PKG_CHECKSUM", pkg_chksum)


def unflatten_document(document):
def flatten_document(document):
"""
Inverse of spdx.writers.jsonyamlxml.flatten_document
Flatten document to match current data model. File objects are nested within packages according to hasFiles-tag.
"""
files_by_id = {}
if "files" in document:
for f in document.pop("files"):
for f in document.get("files"):
f["name"] = f.pop("fileName")
# XXX must downstream rely on "sha1" property?
for checksum in f["checksums"]:
if checksum["algorithm"] == "SHA1":
if checksum["algorithm"] == "SHA1" or "sha1" in checksum["algorithm"]:
f["sha1"] = checksum["checksumValue"]
break
if "licenseInfoInFiles" in f:
f["licenseInfoFromFiles"] = f.pop("licenseInfoInFiles")
files_by_id[f["SPDXID"]] = f

packages = document.pop("packages")
for package in packages:
if "hasFiles" in package:
package["files"] = [{
"File": files_by_id[spdxid]} for spdxid in package["hasFiles"]
]
# XXX must downstream rely on "sha1" property?
for checksum in package.get("checksums", []):
if checksum["algorithm"] == "SHA1":
package["sha1"] = checksum["checksumValue"]
break

document["documentDescribes"] = [{ "Package": package} for package in packages ]
if "packages" in document:
packages = document.get("packages")
for package in packages:
if "hasFiles" in package:
package["files"] = [{
"File": files_by_id[spdxid]} for spdxid in package["hasFiles"]
]
# XXX must downstream rely on "sha1" property?
for checksum in package.get("checksums", []):
if checksum["algorithm"] == "SHA1" or "sha1" in checksum["algorithm"]:
package["sha1"] = checksum["checksumValue"]
break

return document

Expand All @@ -1545,7 +1543,7 @@ def __init__(self, builder, logger):
def json_yaml_set_document(self, data):
# we could verify that the spdxVersion >= 2.2, but we try to be resilient in parsing
if data.get("spdxVersion"):
self.document_object = unflatten_document(data)
self.document_object = data
return
self.document_object = data.get("Document")

Expand All @@ -1555,6 +1553,7 @@ def parse(self):
"""
self.error = False
self.document = document.Document()
self.document_object = flatten_document(self.document_object)
if not isinstance(self.document_object, dict):
self.logger.log("Empty or not valid SPDX Document")
self.error = True
Expand All @@ -1580,7 +1579,8 @@ def parse(self):

self.parse_packages(self.document_object.get("packages"))

self.parse_doc_described_objects(self.document_object.get("documentDescribes"))
if self.document_object.get("documentDescribes"):
self.parse_doc_described_objects(self.document_object.get("documentDescribes"))

validation_messages = ErrorMessages()
# Report extra errors if self.error is False otherwise there will be
Expand Down Expand Up @@ -1693,11 +1693,17 @@ def parse_doc_described_objects(self, doc_described_objects):
and described.get("File") is not None,
doc_described_objects,
)
relationships = filter(
lambda described: isinstance(described, str), doc_described_objects
)
# At the moment, only single-package documents are supported, so just the last package will be stored.
for package in packages:
self.parse_package(package.get("Package"))
for file in files:
self.parse_file(file.get("File"))
for relationship in relationships:
self.parse_relationship(self.document.spdx_id, "DESCRIBES", relationship)

return True
else:
self.value_error("DOC_DESCRIBES", doc_described_objects)
Expand Down
4 changes: 4 additions & 0 deletions spdx/parsers/xmlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,16 @@ def __init__(self, builder, logger):
"reviewers",
"fileTypes",
"licenseInfoFromFiles",
"licenseInfoInFiles",
"artifactOf",
"fileContributors",
"fileDependencies",
"excludedFilesNames",
"files",
"documentDescribes",
"packages",
"checksums",
"hasFiles"
}

def parse(self, file):
Expand Down
4 changes: 2 additions & 2 deletions spdx/writers/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import json

from spdx.writers.tagvalue import InvalidDocumentError
from spdx.writers.jsonyamlxml import JsonYamlWriter
from spdx.writers.jsonyamlxml import Writer
from spdx.parsers.loggers import ErrorMessages


Expand All @@ -24,6 +24,6 @@ def write_document(document, out, validate=True):
if messages:
raise InvalidDocumentError(messages)

writer = JsonYamlWriter(document)
writer = Writer(document)
document_object = writer.create_document()
json.dump(document_object, out, indent=4)
153 changes: 71 additions & 82 deletions spdx/writers/jsonyamlxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,8 @@ def create_package_info(self, package):

if package.has_optional_field("check_sum"):
package_object["checksums"] = [self.checksum(checksum) for checksum in package.checksums if checksum]
assert package.check_sum.identifier == "SHA1", "First checksum must be SHA1"
package_object["sha1"] = package.check_sum.value
# assert package.check_sum.identifier == "SHA1", "First checksum must be SHA1"
# package_object["sha1"] = package.check_sum.value

if package.has_optional_field("description"):
package_object["description"] = package.description
Expand All @@ -161,7 +161,14 @@ def create_package_info(self, package):
if package.has_optional_field("homepage"):
package_object["homepage"] = package.homepage.__str__()

return package_object
files_in_package = []
if package.has_optional_field("files"):
package_object["hasFiles"] = []
for file in package.files:
package_object["hasFiles"].append(file.spdx_id)
files_in_package.append(self.create_file_info(file))

return package_object, files_in_package


class FileWriter(BaseWriter):
Expand All @@ -187,66 +194,61 @@ def create_artifact_info(self, file):

return artifact_of_objects

def create_file_info(self, package):
def create_file_info(self, file):
file_types = {
1: "fileType_source",
2: "fileType_binary",
3: "fileType_archive",
4: "fileType_other",
}
file_objects = []
files = package.files

for file in files:
file_object = dict()
file_object = dict()

file_object["name"] = file.name
file_object["SPDXID"] = self.spdx_id(file.spdx_id)
file_object["checksums"] = [self.checksum(checksum) for checksum in file.checksums if checksum]
file_object["licenseConcluded"] = self.license(file.conc_lics)
file_object["licenseInfoFromFiles"] = list(
map(self.license, file.licenses_in_file)
)
file_object["copyrightText"] = file.copyright.__str__()
file_object["fileName"] = file.name
file_object["SPDXID"] = self.spdx_id(file.spdx_id)
file_object["checksums"] = [self.checksum(file.chk_sum)]
file_object["licenseConcluded"] = self.license(file.conc_lics)
file_object["licenseInfoInFiles"] = list(
map(self.license, file.licenses_in_file)
)
file_object["copyrightText"] = file.copyright.__str__()
# assert file.chk_sum.identifier == "SHA1", "First checksum must be SHA1"
# file_object["sha1"] = file.chk_sum.value

assert file.chk_sum.identifier == "SHA1", "First checksum must be SHA1"
file_object["sha1"] = file.chk_sum.value

if file.has_optional_field("comment"):
file_object["comment"] = file.comment
if file.has_optional_field("comment"):
file_object["comment"] = file.comment

if file.has_optional_field("type"):
file_object["fileTypes"] = [file_types.get(file.type)]
if file.has_optional_field("type"):
file_object["fileTypes"] = [file_types.get(file.type)]

if file.has_optional_field("license_comment"):
file_object["licenseComments"] = file.license_comment
if file.has_optional_field("license_comment"):
file_object["licenseComments"] = file.license_comment

if file.has_optional_field("attribution_text"):
file_object["attributionTexts"] = [file.attribution_text]
if file.has_optional_field("attribution_text"):
file_object["attributionTexts"] = [file.attribution_text]

if file.has_optional_field("notice"):
file_object["noticeText"] = file.notice
if file.has_optional_field("notice"):
file_object["noticeText"] = file.notice

if file.contributors:
file_object["fileContributors"] = file.contributors.__str__()
if file.contributors:
file_object["fileContributors"] = file.contributors.__str__()

if file.dependencies:
file_object["fileDependencies"] = file.dependencies
if file.dependencies:
file_object["fileDependencies"] = file.dependencies

valid_artifacts = (
valid_artifacts = (
file.artifact_of_project_name
and len(file.artifact_of_project_name)
== len(file.artifact_of_project_home)
and len(file.artifact_of_project_home)
== len(file.artifact_of_project_uri)
)

if valid_artifacts:
file_object["artifactOf"] = self.create_artifact_info(file)
)

file_objects.append({"File": file_object})
if valid_artifacts:
file_object["artifactOf"] = self.create_artifact_info(file)

return file_objects
return file_object


class ReviewInfoWriter(BaseWriter):
Expand Down Expand Up @@ -315,6 +317,10 @@ def create_relationship_info(self):
relationship_objects = []

for relationship_term in self.document.relationships:
if relationship_term.relationshiptype == "DESCRIBES":
continue
if relationship_term.relationshiptype == "CONTAINS":
continue
relationship_object = dict()
relationship_object["spdxElementId"] = relationship_term.spdxelementid
relationship_object[
Expand Down Expand Up @@ -468,6 +474,21 @@ def create_ext_document_references(self):

return ext_document_reference_objects

def create_document_describes(self):
"""
Create list of SPDXID that have a
"""
described_relationships = []
remove_rel = []
for relationship in self.document.relationships:
if relationship.relationshiptype == "DESCRIBES":
described_relationships.append(relationship.relatedspdxelement)
if not relationship.has_comment:
remove_rel.append(relationship.relatedspdxelement)
self.document.relationships = [rel for rel in self.document.relationships if rel.relatedspdxelement not in remove_rel]
return described_relationships


def create_document(self):
self.document_object = dict()

Expand All @@ -478,15 +499,18 @@ def create_document(self):
self.document_object["SPDXID"] = self.spdx_id(self.document.spdx_id)
self.document_object["name"] = self.document.name

described_relationships = self.create_document_describes()
if described_relationships:
self.document_object["documentDescribes"] = described_relationships

package_objects = []
file_objects = []
for package in self.document.packages:
package_info_object = self.create_package_info(package)
# SPDX 2.2 says to omit if filesAnalyzed = False
if package.files:
package_info_object["files"] = self.create_file_info(package)
package_objects.append({"Package": package_info_object})

self.document_object["documentDescribes"] = package_objects
package_info_object, files_in_package = self.create_package_info(package)
package_objects.append(package_info_object)
file_objects.extend(files_in_package)
self.document_object["packages"] = package_objects
self.document_object["files"] = file_objects

if self.document.has_comment:
self.document_object["comment"] = self.document.comment
Expand Down Expand Up @@ -516,43 +540,8 @@ def create_document(self):
return self.document_object


def flatten_document(document_object):
"""
Move nested Package -> Files to top level to conform with schema.
"""

document = document_object["Document"]

# replace documentDescribes with SPDXID references
package_objects = document["documentDescribes"]

document["documentDescribes"] = [package["Package"]["SPDXID"] for package in package_objects]

document["packages"] = [package["Package"] for package in package_objects]

file_objects = []

for package_info_object in document.get("packages", []):
if not "files" in package_info_object:
continue
if "sha1" in package_info_object:
del package_info_object["sha1"]
package_info_object["hasFiles"] = [file_object["File"]["SPDXID"] for file_object in package_info_object["files"]]
file_objects.extend(file_object["File"] for file_object in package_info_object.pop("files"))

for file_object in file_objects:
file_object["fileName"] = file_object.pop("name")
if "licenseInfoFromFiles" in file_object:
file_object["licenseInfoInFiles"] = file_object.pop("licenseInfoFromFiles")
del file_object["sha1"]

document["files"] = file_objects

return document


class JsonYamlWriter(Writer):

def create_document(self):
document_object = super().create_document()
return flatten_document(document_object)
return document_object
4 changes: 2 additions & 2 deletions spdx/writers/yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import yaml

from spdx.writers.tagvalue import InvalidDocumentError
from spdx.writers.jsonyamlxml import JsonYamlWriter
from spdx.writers.jsonyamlxml import Writer
from spdx.parsers.loggers import ErrorMessages


Expand All @@ -24,7 +24,7 @@ def write_document(document, out, validate=True):
if messages:
raise InvalidDocumentError(messages)

writer = JsonYamlWriter(document)
writer = Writer(document)
document_object = writer.create_document()

yaml.safe_dump(document_object, out, indent=2, explicit_start=True)
Loading

0 comments on commit f5af2e1

Please sign in to comment.