Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

changes necessary to validate against SPDX jsonschema #197

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions spdx/creationinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class Organization(Creator):
- email: Org's email address. Optional. Type: str.
"""

def __init__(self, name, email):
def __init__(self, name, email=None):
super(Organization, self).__init__(name)
self.email = email

Expand Down Expand Up @@ -80,7 +80,7 @@ class Person(Creator):
- email: person's email address. Optional. Type: str.
"""

def __init__(self, name, email):
def __init__(self, name, email=None):
super(Person, self).__init__(name)
self.email = email

Expand Down
14 changes: 13 additions & 1 deletion spdx/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def __init__(self, name, spdx_id=None, chk_sum=None):
self.spdx_id = spdx_id
self.comment = None
self.type = None
self.chk_sum = chk_sum
self.checksums = [None]
self.conc_lics = None
self.licenses_in_file = []
self.license_comment = None
Expand All @@ -82,6 +82,18 @@ def __eq__(self, other):
def __lt__(self, other):
return self.name < other.name

@property
def chk_sum(self):
"""
Backwards compatibility, return first checksum.
"""
# NOTE Package.check_sum but File.chk_sum
return self.checksums[0]

@chk_sum.setter
def chk_sum(self, value):
self.checksums[0] = value

def add_lics(self, lics):
self.licenses_in_file.append(lics)

Expand Down
16 changes: 14 additions & 2 deletions spdx/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class Package(object):
If set to "false", the package must not contain any files.
Optional, boolean.
- homepage: Optional, URL as string or NONE or NO_ASSERTION.
- verif_code: string. Mandatory if files_analyzed is True or None (omitted)
- verif_code: string. 0..1 if files_analyzed is True or None (omitted)
Must be None (omitted) if files_analyzed is False
- check_sum: Optional , spdx.checksum.Algorithm.
- source_info: Optional string.
Expand Down Expand Up @@ -82,7 +82,7 @@ def __init__(
self.files_analyzed = None
self.homepage = None
self.verif_code = None
self.check_sum = None
self.checksums = [None]
self.source_info = None
self.conc_lics = None
self.license_declared = None
Expand All @@ -103,6 +103,18 @@ def are_files_analyzed(self):
# as default None Value is False, previous line is simplification of
# return self.files_analyzed or self.files_analyzed is None

@property
def check_sum(self):
"""
Backwards compatibility, return first checksum.
"""
# NOTE Package.check_sum but File.chk_sum
return self.checksums[0]

@check_sum.setter
def check_sum(self, value):
self.checksums[0] = value

def add_file(self, fil):
self.files.append(fil)

Expand Down
35 changes: 34 additions & 1 deletion spdx/parsers/jsonyamlxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -1486,6 +1486,39 @@ def parse_pkg_chksum(self, pkg_chksum):
self.value_error("PKG_CHECKSUM", pkg_chksum)


def unflatten_document(document):
"""
Inverse of spdx.writers.jsonyamlxml.flatten_document
"""
files_by_id = {}
if "files" in document:
for f in document.pop("files"):
f["name"] = f.pop("fileName")
# XXX must downstream rely on "sha1" property?
for checksum in f["checksums"]:
if checksum["algorithm"] == "SHA1":
f["sha1"] = checksum["checksumValue"]
break
if "licenseInfoInFiles" in f:
f["licenseInfoFromFiles"] = f.pop("licenseInfoInFiles")
files_by_id[f["SPDXID"]] = f

packages = document.pop("packages")
for package in packages:
if "hasFiles" in package:
package["files"] = [{
"File": files_by_id[spdxid]} for spdxid in package["hasFiles"]
]
# XXX must downstream rely on "sha1" property?
for checksum in package.get("checksums", []):
if checksum["algorithm"] == "SHA1":
package["sha1"] = checksum["checksumValue"]
break

document["documentDescribes"] = [{ "Package": package} for package in packages ]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does not comply with the json schema, which just has an array of spdx ids here: https://github.com/spdx/spdx-spec/blob/development/v2.3.1/schemas/spdx-schema.json#L219-L226

Even if it's just a temporary internal data structure, I find this highly confusing

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's very intentional, this PR didn't invent flatten/unflatten as it is a pain to follow links around when the document is in memory.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see the point of flattening the structure in this case though. The only instance where the value is accessed is this part where the packages are parsed, and this can be done just as well without modifying the data structure first. In fact, it is already done this way (although the fact that this logic is technically duplicated should be fixed).

If resolving links becomes annoying, I would find it more natural to add a utility method that handles it instead of introducing an additional intermediate data structure. This will come with a performance impact if called many times, but I would be surprised if this was an issue.

Additional remark: The current structure only considers files that are included in packages. To my understanding, files should also be allowed to be independent of any package (this might be related to the python tools still being out of date though, it seems like snippets are not considered at all, for example).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One additional comment: My understanding so far is that in a json document, any SPDX-Document DESCRIBES SPDX-Package relationship would be omitted and instead the package id would appear in documentDescribes (similar for files and snippets). This would imply that during parsing, we have to create new relationships for elements of documentDesccribes. I don't see that happening anywhere.

Relevant discussion: #242. Let's see if I'm wrong here 😅


return document

class Parser(
CreationInfoParser,
ExternalDocumentRefsParser,
Expand All @@ -1503,7 +1536,7 @@ def __init__(self, builder, logger):
def json_yaml_set_document(self, data):
# we could verify that the spdxVersion >= 2.2, but we try to be resilient in parsing
if data.get("spdxVersion"):
self.document_object = data
self.document_object = unflatten_document(data)
return
self.document_object = data.get("Document")

Expand Down
4 changes: 2 additions & 2 deletions spdx/writers/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import json

from spdx.writers.tagvalue import InvalidDocumentError
from spdx.writers.jsonyamlxml import Writer
from spdx.writers.jsonyamlxml import JsonYamlWriter
from spdx.parsers.loggers import ErrorMessages


Expand All @@ -24,6 +24,6 @@ def write_document(document, out, validate=True):
if messages:
raise InvalidDocumentError(messages)

writer = Writer(document)
writer = JsonYamlWriter(document)
document_object = writer.create_document()
json.dump(document_object, out, indent=4)
62 changes: 54 additions & 8 deletions spdx/writers/jsonyamlxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def checksum(self, checksum_field):
"""
checksum_object = dict()
checksum_object["algorithm"] = (
"checksumAlgorithm_" + checksum_field.identifier.lower()
checksum_field.identifier.upper()
)
checksum_object["checksumValue"] = checksum_field.value
return checksum_object
Expand Down Expand Up @@ -112,9 +112,10 @@ def create_package_info(self, package):
package_object["SPDXID"] = self.spdx_id(package.spdx_id)
package_object["name"] = package.name
package_object["downloadLocation"] = package.download_location.__str__()
package_object["packageVerificationCode"] = self.package_verification_code(
package
)
if package.verif_code is not None:
package_object["packageVerificationCode"] = self.package_verification_code(
package
)
package_object["licenseConcluded"] = self.license(package.conc_lics)
package_object["licenseInfoFromFiles"] = list(
map(self.license, package.licenses_from_files)
Expand All @@ -138,13 +139,14 @@ def create_package_info(self, package):
package_object["packageFileName"] = package.file_name

if package.has_optional_field("supplier"):
package_object["supplier"] = package.supplier.__str__()
package_object["supplier"] = package.supplier.to_value()

if package.has_optional_field("originator"):
package_object["originator"] = package.originator.__str__()
package_object["originator"] = package.originator.to_value()

if package.has_optional_field("check_sum"):
package_object["checksums"] = [self.checksum(package.check_sum)]
package_object["checksums"] = [self.checksum(checksum) for checksum in package.checksums if checksum]
assert package.check_sum.identifier == "SHA1", "First checksum must be SHA1"
package_object["sha1"] = package.check_sum.value

if package.has_optional_field("description"):
Expand Down Expand Up @@ -197,12 +199,14 @@ def create_file_info(self, package):

file_object["name"] = file.name
file_object["SPDXID"] = self.spdx_id(file.spdx_id)
file_object["checksums"] = [self.checksum(file.chk_sum)]
file_object["checksums"] = [self.checksum(checksum) for checksum in file.checksums if checksum]
file_object["licenseConcluded"] = self.license(file.conc_lics)
file_object["licenseInfoFromFiles"] = list(
map(self.license, file.licenses_in_file)
)
file_object["copyrightText"] = file.copyright.__str__()

assert file.chk_sum.identifier == "SHA1", "First checksum must be SHA1"
file_object["sha1"] = file.chk_sum.value

if file.has_optional_field("comment"):
Expand Down Expand Up @@ -505,3 +509,45 @@ def create_document(self):
self.document_object["relationships"] = self.create_relationship_info()

return {"Document": self.document_object}


def flatten_document(document_object):
"""
Move nested Package -> Files to top level to conform with schema.
"""

document = document_object["Document"]

# replace documentDescribes with SPDXID references
package_objects = document["documentDescribes"]

document["documentDescribes"] = [package["Package"]["SPDXID"] for package in package_objects]

document["packages"] = [package["Package"] for package in package_objects]

file_objects = []

for package_info_object in document.get("packages", []):
if not "files" in package_info_object:
continue
if "sha1" in package_info_object:
del package_info_object["sha1"]
package_info_object["hasFiles"] = [file_object["File"]["SPDXID"] for file_object in package_info_object["files"]]
file_objects.extend(file_object["File"] for file_object in package_info_object.pop("files"))

for file_object in file_objects:
file_object["fileName"] = file_object.pop("name")
if "licenseInfoFromFiles" in file_object:
file_object["licenseInfoInFiles"] = file_object.pop("licenseInfoFromFiles")
del file_object["sha1"]

document["files"] = file_objects

return document


class JsonYamlWriter(Writer):

def create_document(self):
document_object = super().create_document()
return flatten_document(document_object)
3 changes: 2 additions & 1 deletion spdx/writers/rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from spdx import document
from spdx import config
from spdx import utils
from spdx.package import Package
from spdx.parsers.loggers import ErrorMessages
from spdx.writers.tagvalue import InvalidDocumentError

Expand Down Expand Up @@ -709,7 +710,7 @@ def handle_package_literal_optional(self, package, package_node, predicate, fiel
triple = (package_node, predicate, value_node)
self.graph.add(triple)

def handle_pkg_optional_fields(self, package, package_node):
def handle_pkg_optional_fields(self, package: Package, package_node):
"""
Write package optional fields.
"""
Expand Down
15 changes: 14 additions & 1 deletion spdx/writers/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,19 @@
from spdx.parsers.loggers import ErrorMessages


class XMLWriter(Writer):
def checksum(self, checksum_field):
"""
Return a dictionary representation of a spdx.checksum.Algorithm object
"""
checksum_object = dict()
checksum_object["algorithm"] = (
"checksumAlgorithm_" + checksum_field.identifier.lower()
)
checksum_object["checksumValue"] = checksum_field.value
return checksum_object


def write_document(document, out, validate=True):

if validate:
Expand All @@ -24,7 +37,7 @@ def write_document(document, out, validate=True):
if messages:
raise InvalidDocumentError(messages)

writer = Writer(document)
writer = XMLWriter(document)
document_object = {"SpdxDocument": writer.create_document()}

xmltodict.unparse(document_object, out, encoding="utf-8", pretty=True)
4 changes: 2 additions & 2 deletions spdx/writers/yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import yaml

from spdx.writers.tagvalue import InvalidDocumentError
from spdx.writers.jsonyamlxml import Writer
from spdx.writers.jsonyamlxml import JsonYamlWriter
from spdx.parsers.loggers import ErrorMessages


Expand All @@ -24,7 +24,7 @@ def write_document(document, out, validate=True):
if messages:
raise InvalidDocumentError(messages)

writer = Writer(document)
writer = JsonYamlWriter(document)
document_object = writer.create_document()

yaml.safe_dump(document_object, out, indent=2, explicit_start=True)
13 changes: 10 additions & 3 deletions tests/data/doc_write/json-simple-plus.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,17 @@
"name": "Sample_Document-V2.1",
"SPDXID": "SPDXRef-DOCUMENT",
"documentNamespace": "https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301",
"creationInfo": {
"creators": [
"Organization: SPDX"
],
"created": "2021-11-15T00:00:00Z",
"licenseListVersion": "3.6"
},
"documentDescribes": [
{
"Package": {
"SPDXID": "SPDXRef-Package",
"SPDXID": "SPDXRef-Package",
"name": "some/path",
"downloadLocation": "NOASSERTION",
"copyrightText": "Some copyrught",
Expand All @@ -17,7 +24,7 @@
},
"checksums": [
{
"algorithm": "checksumAlgorithm_sha1",
"algorithm": "SHA1",
"checksumValue": "SOME-SHA1"
}
],
Expand All @@ -30,7 +37,7 @@
"SPDXID": "SPDXRef-File",
"checksums": [
{
"algorithm": "checksumAlgorithm_sha1",
"algorithm": "SHA1",
"checksumValue": "SOME-SHA1"
}
],
Expand Down
Loading