Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for multiple file checksums, file types #200

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions data/SPDXJsonExample.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
}
],
"fileTypes": [
"fileType_archive"
"ARCHIVE"
],
"SPDXID": "SPDXRef-File1"
}
Expand All @@ -54,7 +54,7 @@
}
],
"fileTypes": [
"fileType_source"
"SOURCE"
],
"SPDXID": "SPDXRef-File2"
}
Expand Down
14 changes: 14 additions & 0 deletions spdx/checksum.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.

CHECKSUM_ALGORITHMS = ['SHA1', 'SHA256', 'SHA512']
CHECKSUM_ALGORITHM_FROM_XML_DICT = {
'checksumAlgorithm_sha1': 'SHA1',
'checksumAlgorithm_sha256': 'SHA256',
'checksumAlgorithm_sha512': 'SHA512',
}
CHECKSUM_ALGORITHM_TO_XML_DICT = {
'SHA1': 'checksumAlgorithm_sha1',
'SHA256': 'checksumAlgorithm_sha256',
'SHA512': 'checksumAlgorithm_sha512',
}
jotterson marked this conversation as resolved.
Show resolved Hide resolved

jotterson marked this conversation as resolved.
Show resolved Hide resolved

class Algorithm(object):
"""Generic checksum algorithm."""

def __init__(self, identifier, value):
if identifier not in CHECKSUM_ALGORITHMS:
raise ValueError('checksum algorithm: {}'.format(identifier))
jotterson marked this conversation as resolved.
Show resolved Hide resolved
self.identifier = identifier
self.value = value

Expand Down
4 changes: 2 additions & 2 deletions spdx/creationinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class Organization(Creator):
- email: Org's email address. Optional. Type: str.
"""

def __init__(self, name, email):
def __init__(self, name, email=None):
super(Organization, self).__init__(name)
self.email = email

Expand Down Expand Up @@ -80,7 +80,7 @@ class Person(Creator):
- email: person's email address. Optional. Type: str.
"""

def __init__(self, name, email):
def __init__(self, name, email=None):
super(Person, self).__init__(name)
self.email = email

Expand Down
132 changes: 98 additions & 34 deletions spdx/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,65 @@ class FileType(object):
AUDIO = 6
IMAGE = 7
TEXT = 8
VIDEO = 9
DOCUMENTATION = 9
SPDX = 10
VIDEO = 11


FILE_TYPE_TO_XML_DICT = {
FileType.SOURCE: "fileType_source",
FileType.OTHER: "fileType_other",
FileType.BINARY: "fileType_binary",
FileType.ARCHIVE: "fileType_archive",
FileType.APPLICATION: "fileType_application",
FileType.AUDIO: "fileType_audio",
FileType.DOCUMENTATION: "fileType_documentation",
FileType.IMAGE: "fileType_image",
FileType.SPDX: "fileType_spdx",
FileType.TEXT: "fileType_text",
FileType.VIDEO: "fileType_video"
}
jotterson marked this conversation as resolved.
Show resolved Hide resolved
FILE_TYPE_TO_STRING_DICT = {
FileType.SOURCE: "SOURCE",
FileType.OTHER: "OTHER",
FileType.BINARY: "BINARY",
FileType.ARCHIVE: "ARCHIVE",
FileType.APPLICATION: "APPLICATION",
FileType.AUDIO: "AUDIO",
FileType.DOCUMENTATION: "DOCUMENTATION",
FileType.IMAGE: "IMAGE",
FileType.SPDX: "SPDX",
FileType.TEXT: "TEXT",
FileType.VIDEO: "VIDEO",
}
jotterson marked this conversation as resolved.
Show resolved Hide resolved

FILE_TYPE_FROM_XML_DICT = {
"fileType_source": FileType.SOURCE,
"fileType_binary": FileType.BINARY,
"fileType_archive": FileType.ARCHIVE,
"fileType_other": FileType.OTHER,
"fileType_application": FileType.APPLICATION,
"fileType_audio": FileType.AUDIO,
"fileType_image": FileType.IMAGE,
"fileType_text": FileType.TEXT,
"fileType_documentation": FileType.DOCUMENTATION,
"fileType_spdx": FileType.SPDX,
"fileType_video": FileType.VIDEO,
}

FILE_TYPE_FROM_STRING_DICT = {
"SOURCE": FileType.SOURCE,
"BINARY": FileType.BINARY,
"ARCHIVE": FileType.ARCHIVE,
"OTHER": FileType.OTHER,
"APPLICATION": FileType.APPLICATION,
"AUDIO": FileType.AUDIO,
"IMAGE": FileType.IMAGE,
"TEXT": FileType.TEXT,
"DOCUMENTATION": FileType.DOCUMENTATION,
"SPDX": FileType.SPDX,
"VIDEO": FileType.VIDEO,
}


@total_ordering
Expand All @@ -40,9 +96,8 @@ class File(object):
- spdx_id: Uniquely identify any element in an SPDX document which may be
referenced by other elements. Mandatory, one. Type: str.
- comment: File comment str, Optional zero or one.
- type: one of FileType.SOURCE, FileType.BINARY, FileType.ARCHIVE
and FileType.OTHER, optional zero or one.
- chk_sum: SHA1, Mandatory one.
file_types: list of file types. cardinality 1..#FILE_TYPES
jotterson marked this conversation as resolved.
Show resolved Hide resolved
jotterson marked this conversation as resolved.
Show resolved Hide resolved
- chk_sums: list of checksums, there must be a SHA1 hash, at least.
jotterson marked this conversation as resolved.
Show resolved Hide resolved
- conc_lics: Mandatory one. document.License or utils.NoAssert or utils.SPDXNone.
- licenses_in_file: list of licenses found in file, mandatory one or more.
document.License or utils.SPDXNone or utils.NoAssert.
Expand All @@ -58,12 +113,12 @@ class File(object):
-attribution_text: optional string.
"""

def __init__(self, name, spdx_id=None, chk_sum=None):
def __init__(self, name, spdx_id=None):
self.name = name
self.spdx_id = spdx_id
self.comment = None
self.type = None
self.chk_sum = chk_sum
self.file_types = []
self.checksums = []
self.conc_lics = None
self.licenses_in_file = []
self.license_comment = None
Expand All @@ -82,6 +137,17 @@ def __eq__(self, other):
def __lt__(self, other):
return self.name < other.name

@property
def chk_sum(self):
"""
Backwards compatibility, return first checksum.
jotterson marked this conversation as resolved.
Show resolved Hide resolved
"""
return self.get_checksum('SHA1')

@chk_sum.setter
def chk_sum(self, value):
self.set_checksum(value)

def add_lics(self, lics):
self.licenses_in_file.append(lics)

Expand All @@ -106,7 +172,7 @@ def validate(self, messages):
"""
messages.push_context(self.name)
self.validate_concluded_license(messages)
self.validate_type(messages)
self.validate_file_types(messages)
self.validate_checksum(messages)
self.validate_licenses_in_file(messages)
self.validate_copyright(messages)
Expand Down Expand Up @@ -162,44 +228,42 @@ def validate_concluded_license(self, messages):

return messages

def validate_type(self, messages):
if self.type not in [
None,
FileType.SOURCE,
FileType.OTHER,
FileType.BINARY,
FileType.ARCHIVE,
]:
messages.append(
"File type must be one of the constants defined in "
"class spdx.file.FileType"
)

def validate_file_types(self, messages):
if len(self.file_types) < 1:
messages.append('At least one file type must be specified.')
jotterson marked this conversation as resolved.
Show resolved Hide resolved
return messages

def validate_checksum(self, messages):
if not isinstance(self.chk_sum, checksum.Algorithm):
messages.append(
"File checksum must be instance of spdx.checksum.Algorithm"
)
else:
if not self.chk_sum.identifier == "SHA1":
messages.append("File checksum algorithm must be SHA1")

if self.get_checksum() is None:
jotterson marked this conversation as resolved.
Show resolved Hide resolved
messages.append("At least one file checksum algorithm must be SHA1")
return messages

def calc_chksum(self):
def calculate_checksum(self, hash_algorithm='SHA1'):
BUFFER_SIZE = 65536

file_sha1 = hashlib.sha1()
file_hash = hashlib.new(hash_algorithm.lower())
with open(self.name, "rb") as file_handle:
while True:
data = file_handle.read(BUFFER_SIZE)
if not data:
break
file_sha1.update(data)

return file_sha1.hexdigest()
file_hash.update(data)

return file_hash.hexdigest()

def get_checksum(self, hash_algorithm='SHA1'):
for chk_sum in self.checksums:
if chk_sum.identifier == hash_algorithm:
return chk_sum
return None

def set_checksum(self, chk_sum):
if isinstance(chk_sum, checksum.Algorithm):
jotterson marked this conversation as resolved.
Show resolved Hide resolved
for file_chk_sum in self.checksums:
if file_chk_sum.identifier == chk_sum.identifier:
file_chk_sum.value = chk_sum.value
return
self.checksums.append(chk_sum)
jotterson marked this conversation as resolved.
Show resolved Hide resolved

def has_optional_field(self, field):
return getattr(self, field, None) is not None
63 changes: 41 additions & 22 deletions spdx/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def __init__(
self.files_analyzed = None
self.homepage = None
self.verif_code = None
self.check_sum = None
self.checksums = []
jotterson marked this conversation as resolved.
Show resolved Hide resolved
self.source_info = None
self.conc_lics = None
self.license_declared = None
Expand All @@ -97,6 +97,17 @@ def __init__(
self.verif_exc_files = []
self.pkg_ext_refs = []

@property
def check_sum(self):
"""
Backwards compatibility, return first checksum.
jotterson marked this conversation as resolved.
Show resolved Hide resolved
"""
return self.get_checksum('SHA1')

@check_sum.setter
def check_sum(self, value):
self.set_checksum(value)

@property
def are_files_analyzed(self):
return self.files_analyzed is not False
Expand Down Expand Up @@ -271,32 +282,40 @@ def validate_str_fields(self, fields, optional, messages):
return messages

def validate_checksum(self, messages):
if self.check_sum is not None:
if not isinstance(self.check_sum, checksum.Algorithm):
messages.append(
"Package checksum must be instance of spdx.checksum.Algorithm"
)

if self.get_checksum() is None:
messages.append("At least one package checksum algorithm must be SHA1")
return messages

def calc_verif_code(self):
hashes = []

list_of_file_hashes = []
hash_algo_name = "SHA1"
for file_entry in self.files:
if (
isinstance(file_entry.chk_sum, checksum.Algorithm)
and file_entry.chk_sum.identifier == "SHA1"
):
sha1 = file_entry.chk_sum.value
file_chksum = file_entry.get_checksum(hash_algo_name)
if file_chksum is not None:
file_ch = file_chksum.value
else:
sha1 = file_entry.calc_chksum()
hashes.append(sha1)

hashes.sort()

sha1 = hashlib.sha1()
sha1.update("".join(hashes).encode("utf-8"))
return sha1.hexdigest()
file_ch = file_entry.calculate_checksum(hash_algo_name)
list_of_file_hashes.append(file_ch)

list_of_file_hashes.sort()

hasher = hashlib.new(hash_algo_name.lower())
hasher.update("".join(list_of_file_hashes).encode("utf-8"))
return hasher.hexdigest()

def get_checksum(self, hash_algorithm='SHA1'):
for chk_sum in self.checksums:
if chk_sum.identifier == hash_algorithm:
return chk_sum
return None

def set_checksum(self, new_checksum):
if isinstance(new_checksum, checksum.Algorithm):
for c in self.checksums:
if c.identifier == new_checksum.identifier:
c.value = new_checksum.value
return
self.checksums.append(new_checksum)

def has_optional_field(self, field):
return getattr(self, field, None) is not None
Expand Down
Loading