Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for multiple file checksums, file types #200

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions data/SPDXJsonExample.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
}
],
"fileTypes": [
"fileType_archive"
"ARCHIVE"
],
"SPDXID": "SPDXRef-File1"
}
Expand All @@ -54,7 +54,7 @@
}
],
"fileTypes": [
"fileType_source"
"SOURCE"
],
"SPDXID": "SPDXRef-File2"
}
Expand Down Expand Up @@ -187,7 +187,7 @@
"name": "from linux kernel",
"copyrightText": "Copyright 2008-2010 John Smith",
"licenseConcluded": "Apache-2.0",
"licenseInfoFromSnippet": [
"licenseInfoInSnippet": [
"Apache-2.0"
],
"licenseComments": "The concluded license was taken from package xyz, from which the snippet was copied into the current file. The concluded license information was found in the COPYING.txt file in package xyz.",
Expand Down
2 changes: 1 addition & 1 deletion data/SPDXXmlExample.xml
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.</extractedText>
<name>from linux kernel</name>
<copyrightText>Copyright 2008-2010 John Smith</copyrightText>
<licenseConcluded>Apache-2.0</licenseConcluded>
<licenseInfoFromSnippet>Apache-2.0</licenseInfoFromSnippet>
<licenseInfoInSnippet>Apache-2.0</licenseInfoInSnippet>
<licenseComments>The concluded license was taken from package xyz, from which the snippet was copied into the current file. The concluded license information was found in the COPYING.txt file in package xyz.</licenseComments>
<SPDXID>SPDXRef-Snippet</SPDXID>
<fileId>SPDXRef-DoapSource</fileId>
Expand Down
2 changes: 1 addition & 1 deletion data/SPDXYamlExample.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ Document:
the snippet was copied into the current file. The concluded license information
was found in the COPYING.txt file in package xyz.
licenseConcluded: Apache-2.0
licenseInfoFromSnippet:
licenseInfoInSnippet:
- Apache-2.0
name: from linux kernel
spdxVersion: SPDX-2.1
2 changes: 1 addition & 1 deletion spdx/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,5 +98,5 @@ def validate_annotation_type(self, messages):
messages.append("Annotation missing annotation type.")

def validate_spdx_id(self, messages):
if self.spdx_id is None:
if self.spdx_id is not None and type(self.spdx_id) != str :
messages.append("Annotation missing SPDX Identifier Reference.")
35 changes: 35 additions & 0 deletions spdx/checksum.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,46 @@
# See the License for the specific language governing permissions and
# limitations under the License.

"""
list of checksum algorithms from SPDX spec part 8.4.1 "Description".
SHA1, SHA224, SHA256, SHA384, SHA512, SHA3-256, SHA3-384, SHA3-512,
BLAKE2b-256, BLAKE2b-384, BLAKE2b-512, BLAKE3,
MD2, MD4, MD5, MD6, ADLER32
"""
CHECKSUM_ALGORITHM_TO_XML_DICT = {
'ADLER32': 'checksumAlgorithm_adler32',
'BLAKE2b-256': 'checksumAlgorithm_blake2b-256',
'BLAKE2b-384': 'checksumAlgorithm_blake2b-384',
'BLAKE2b-512': 'checksumAlgorithm_blake2b-512',
'BLAKE3': 'checksumAlgorithm_blake3',
'MD2': 'checksumAlgorithm_md2',
'MD4': 'checksumAlgorithm_md4',
'MD5': 'checksumAlgorithm_md5',
'MD6': 'checksumAlgorithm_md6',
'SHA1': 'checksumAlgorithm_sha1',
'SHA224': 'checksumAlgorithm_sha224',
'SHA256': 'checksumAlgorithm_sha256',
'SHA384': 'checksumAlgorithm_sha384',
'SHA512': 'checksumAlgorithm_sha512',
'SHA3-256': 'checksumAlgorithm_sha3-256',
'SHA3-384': 'checksumAlgorithm_sha3-384',
'SHA3-512': 'checksumAlgorithm_sha3-512',
}
CHECKSUM_ALGORITHMS = [k for k in CHECKSUM_ALGORITHM_TO_XML_DICT]
CHECKSUM_ALGORITHM_FROM_XML_DICT = {}
for k, v in CHECKSUM_ALGORITHM_TO_XML_DICT.items():
CHECKSUM_ALGORITHM_FROM_XML_DICT[v] = k

# regex parses algorithm:value from string
CHECKSUM_REGEX = '({}):\\s*([a-f0-9]*)'.format('|'.join(CHECKSUM_ALGORITHMS))


class Algorithm(object):
"""Generic checksum algorithm."""

def __init__(self, identifier, value):
if identifier not in CHECKSUM_ALGORITHMS:
raise ValueError('checksum algorithm {} is not supported'.format(identifier))
self.identifier = identifier
self.value = value

Expand Down
2 changes: 0 additions & 2 deletions spdx/cli_tools/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ def main(file, force):
"Package Download Location: {0}".format(doc.package.download_location)
)
print("Package Homepage: {0}".format(doc.package.homepage))
if doc.package.check_sum:
print("Package Checksum: {0}".format(doc.package.check_sum.value))
print("Package Attribution Text: {0}".format(doc.package.attribution_text))
print("Package verification code: {0}".format(doc.package.verif_code))
print(
Expand Down
4 changes: 2 additions & 2 deletions spdx/creationinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class Organization(Creator):
- email: Org's email address. Optional. Type: str.
"""

def __init__(self, name, email):
def __init__(self, name, email=None):
super(Organization, self).__init__(name)
self.email = email

Expand Down Expand Up @@ -80,7 +80,7 @@ class Person(Creator):
- email: person's email address. Optional. Type: str.
"""

def __init__(self, name, email):
def __init__(self, name, email=None):
super(Person, self).__init__(name)
self.email = email

Expand Down
19 changes: 11 additions & 8 deletions spdx/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,8 @@ def __init__(
self.packages = []
if package is not None:
self.packages.append(package)
self.files = []
self.describes = []
self.extracted_licenses = []
self.reviews = []
self.annotations = []
Expand Down Expand Up @@ -350,8 +352,8 @@ def add_package(self, package):

@property
def package(self):
warnings.warn('document.package and document.files are deprecated; '
'use document.packages instead',
warnings.warn('document.package and document.file are deprecated; '
'use document.packages or document.files instead',
DeprecationWarning)
if len(self.packages) == 0:
return None
Expand All @@ -368,13 +370,14 @@ def package(self, value):
else:
self.packages[0] = value

@property
def files(self):
return self.package.files
def get_files(self):
return self.files

def set_files(self, value):
self.files = value

@files.setter
def files(self, value):
self.package.files = value
def add_file(self, fil):
self.files.append(fil)

@property
def has_comment(self):
Expand Down
97 changes: 60 additions & 37 deletions spdx/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import enum
from functools import total_ordering
import hashlib

Expand All @@ -17,7 +17,7 @@
from spdx import utils


class FileType(object):
class FileType(enum.IntEnum):
SOURCE = 1
BINARY = 2
ARCHIVE = 3
Expand All @@ -26,9 +26,21 @@ class FileType(object):
AUDIO = 6
IMAGE = 7
TEXT = 8
VIDEO = 9
DOCUMENTATION = 9
SPDX = 10
VIDEO = 11

@classmethod
def by_name(cls, name):
return FileType.__getitem__(name)


FILE_TYPE_TO_XML_DICT = {}
FILE_TYPE_FROM_XML_DICT = {}
for ft in list(FileType):
xml_name = 'fileType_{}'.format(ft.name.lower())
FILE_TYPE_TO_XML_DICT[ft] = xml_name
FILE_TYPE_FROM_XML_DICT[xml_name] = ft


@total_ordering
Expand All @@ -40,9 +52,8 @@ class File(object):
- spdx_id: Uniquely identify any element in an SPDX document which may be
referenced by other elements. Mandatory, one. Type: str.
- comment: File comment str, Optional zero or one.
- type: one of FileType.SOURCE, FileType.BINARY, FileType.ARCHIVE
and FileType.OTHER, optional zero or one.
- chk_sum: SHA1, Mandatory one.
- file_types: list of file types. cardinality 0..#FILE_TYPES
- checksums: list of checksums, there must be a SHA1 hash, at least.
- conc_lics: Mandatory one. document.License or utils.NoAssert or utils.SPDXNone.
- licenses_in_file: list of licenses found in file, mandatory one or more.
document.License or utils.SPDXNone or utils.NoAssert.
Expand All @@ -58,12 +69,12 @@ class File(object):
-attribution_text: optional string.
"""

def __init__(self, name, spdx_id=None, chk_sum=None):
def __init__(self, name, spdx_id=None):
self.name = name
self.spdx_id = spdx_id
self.comment = None
self.type = None
self.chk_sum = chk_sum
self.file_types = []
self.checksums = {}
self.conc_lics = None
self.licenses_in_file = []
self.license_comment = None
Expand All @@ -75,13 +86,34 @@ def __init__(self, name, spdx_id=None, chk_sum=None):
self.artifact_of_project_name = []
self.artifact_of_project_home = []
self.artifact_of_project_uri = []
self.annotations = []

def __eq__(self, other):
return isinstance(other, File) and self.name == other.name

def __lt__(self, other):
return self.name < other.name

@property
def chk_sum(self):
"""
Backwards compatibility, return the SHA1 checksum.
note that this is deprecated, use get_checksum
"""
return self.get_checksum('SHA1')

@chk_sum.setter
def chk_sum(self, value):
"""
backwards compatability, deprecated, please use set_checksum
"""
if isinstance(value, str):
self.set_checksum(checksum.Algorithm('SHA1', value))
elif isinstance(value, checksum.Algorithm):
self.set_checksum(value)
else:
raise ValueError('cannot call chk_sum with value of type {}.'.format(type(value)))

def add_lics(self, lics):
self.licenses_in_file.append(lics)

Expand All @@ -106,7 +138,6 @@ def validate(self, messages):
"""
messages.push_context(self.name)
self.validate_concluded_license(messages)
self.validate_type(messages)
self.validate_checksum(messages)
self.validate_licenses_in_file(messages)
self.validate_copyright(messages)
Expand Down Expand Up @@ -162,44 +193,36 @@ def validate_concluded_license(self, messages):

return messages

def validate_type(self, messages):
if self.type not in [
None,
FileType.SOURCE,
FileType.OTHER,
FileType.BINARY,
FileType.ARCHIVE,
]:
messages.append(
"File type must be one of the constants defined in "
"class spdx.file.FileType"
)

return messages

def validate_checksum(self, messages):
if not isinstance(self.chk_sum, checksum.Algorithm):
messages.append(
"File checksum must be instance of spdx.checksum.Algorithm"
)
else:
if not self.chk_sum.identifier == "SHA1":
messages.append("File checksum algorithm must be SHA1")

if self.get_checksum() is None:
jotterson marked this conversation as resolved.
Show resolved Hide resolved
messages.append("At least one file checksum algorithm must be SHA1")
return messages

def calc_chksum(self):
def calculate_checksum(self, hash_algorithm='SHA1'):
if hash_algorithm not in checksum.CHECKSUM_ALGORITHMS:
raise ValueError('checksum algorithm {} is not supported'.format(hash_algorithm))
BUFFER_SIZE = 65536

file_sha1 = hashlib.sha1()
file_hash = hashlib.new(hash_algorithm.lower())
with open(self.name, "rb") as file_handle:
while True:
data = file_handle.read(BUFFER_SIZE)
if not data:
break
file_sha1.update(data)
file_hash.update(data)

return file_hash.hexdigest()

def get_checksum(self, hash_algorithm='SHA1'):
if hash_algorithm not in checksum.CHECKSUM_ALGORITHMS:
raise ValueError('checksum algorithm {} is not supported'.format(hash_algorithm))
return self.checksums.get(hash_algorithm)

return file_sha1.hexdigest()
def set_checksum(self, chk_sum):
if isinstance(chk_sum, checksum.Algorithm):
jotterson marked this conversation as resolved.
Show resolved Hide resolved
if chk_sum.identifier not in checksum.CHECKSUM_ALGORITHMS:
raise ValueError('checksum algorithm {} is not supported'.format(chk_sum.identifier))
self.checksums[chk_sum.identifier] = chk_sum.value

def has_optional_field(self, field):
return getattr(self, field, None) is not None
Loading