diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..a47be8a --- /dev/null +++ b/.flake8 @@ -0,0 +1,6 @@ +[flake8] +max-line-length = 110 +ignore = E201, E202, F401, W504 +per-file-ignores = + revdbc/project.py:E203,E221 + revdbc/version.py:E203,E221 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e65e9d3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +.eggs/ +*.egg-info/ +build/ +dist/ + +*.pyc +__pycache__/ +.mypy_cache/ +.tox/ + +docs/_build/ + +traces/ +runs/ diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..b82a140 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,12 @@ +# Changelog # + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] ## + +### Added ### +- Initial version + +[Unreleased]: ... diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..93ae9a8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020 Tim Henkes + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..6df0404 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include revdbc/py.typed diff --git a/README.md b/README.md new file mode 100644 index 0000000..2fcc59e --- /dev/null +++ b/README.md @@ -0,0 +1,15 @@ +# Reverse-engineer DBC definitions from CAN dumps. # + +This library offers APIs to automatically reverse-engineer strctural definitions of CAN packets by analyzing CAN dumps. It comes with a command line tool to conveniently analyze data from camdump files as created by the [can-utils](https://github.com/linux-can/can-utils). The software focuses on reverse-engineering structures that can be described using DBC. + +## Installation ## + +Install the latest release using pip (``pip install revdbc``) or manually from source by running ``pip install .`` (preferred) or ``python setup.py install`` in the cloned repository. Doing so installs both the library for programmatic usage and an executable called `revdbc`. Use `revdbc --help` for usage information. + +## Python API ## + +TODO + +## The Algorithm ## + +TODO diff --git a/REQUIREMENTS b/REQUIREMENTS new file mode 100644 index 0000000..153b5e1 --- /dev/null +++ b/REQUIREMENTS @@ -0,0 +1,540 @@ +Library: candumpgen - Generate realistic candump log files. +Link: https://github.com/TH-eMundo/candumpgen +Installation: via pip, package name: candumpgen +Last update of this entry: 29th of August, 2020 +License: Apache 2.0 + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020 Tim Henkes + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +---------------------------------------------------------------------------------------------------- + +Library: cantools - CAN BUS tools in Python 3. +Link: https://github.com/eerimoq/cantools +Installation: via pip, package name: cantools +Last update of this entry: 29th of August, 2020 +License: MIT + +The MIT License (MIT) + +Copyright (c) 2015-2019 Erik Moqvist + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +---------------------------------------------------------------------------------------------------- + +Library: scikit-learn: machine learning in Python +Link: https://github.com/scikit-learn/scikit-learn +Installation: via pip, package name: scikit-learn +Last update of this entry: 19th of May, 2020 +License: 3-Clause BSD + +New BSD License + +Copyright (c) 2007–2020 The scikit-learn developers. +All rights reserved. + + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + a. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + c. Neither the name of the Scikit-learn Developers nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +DAMAGE. + +---------------------------------------------------------------------------------------------------- + +Library: Typing Extensions – Backported and Experimental Type Hints for Python +Link: https://github.com/python/typing/blob/master/typing_extensions/README.rst +Installation: via pip, package name: typing_extensions +Last update of this entry: 25th of October, 2020 +License: Python Software Foundation License + +A. HISTORY OF THE SOFTWARE +========================== + +Python was created in the early 1990s by Guido van Rossum at Stichting +Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands +as a successor of a language called ABC. Guido remains Python's +principal author, although it includes many contributions from others. + +In 1995, Guido continued his work on Python at the Corporation for +National Research Initiatives (CNRI, see http://www.cnri.reston.va.us) +in Reston, Virginia where he released several versions of the +software. + +In May 2000, Guido and the Python core development team moved to +BeOpen.com to form the BeOpen PythonLabs team. In October of the same +year, the PythonLabs team moved to Digital Creations (now Zope +Corporation, see http://www.zope.com). In 2001, the Python Software +Foundation (PSF, see http://www.python.org/psf/) was formed, a +non-profit organization created specifically to own Python-related +Intellectual Property. Zope Corporation is a sponsoring member of +the PSF. + +All Python releases are Open Source (see http://www.opensource.org for +the Open Source Definition). Historically, most, but not all, Python +releases have also been GPL-compatible; the table below summarizes +the various releases. + + Release Derived Year Owner GPL- + from compatible? (1) + + 0.9.0 thru 1.2 1991-1995 CWI yes + 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes + 1.6 1.5.2 2000 CNRI no + 2.0 1.6 2000 BeOpen.com no + 1.6.1 1.6 2001 CNRI yes (2) + 2.1 2.0+1.6.1 2001 PSF no + 2.0.1 2.0+1.6.1 2001 PSF yes + 2.1.1 2.1+2.0.1 2001 PSF yes + 2.1.2 2.1.1 2002 PSF yes + 2.1.3 2.1.2 2002 PSF yes + 2.2 and above 2.1.1 2001-now PSF yes + +Footnotes: + +(1) GPL-compatible doesn't mean that we're distributing Python under + the GPL. All Python licenses, unlike the GPL, let you distribute + a modified version without making your changes open source. The + GPL-compatible licenses make it possible to combine Python with + other software that is released under the GPL; the others don't. + +(2) According to Richard Stallman, 1.6.1 is not GPL-compatible, + because its license has a choice of law clause. According to + CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1 + is "not incompatible" with the GPL. + +Thanks to the many outside volunteers who have worked under Guido's +direction to make these releases possible. + + +B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON +=============================================================== + +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +-------------------------------------------- + +1. This LICENSE AGREEMENT is between the Python Software Foundation +("PSF"), and the Individual or Organization ("Licensee") accessing and +otherwise using this software ("Python") in source or binary form and +its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF hereby +grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, +analyze, test, perform and/or display publicly, prepare derivative works, +distribute, and otherwise use Python alone or in any derivative version, +provided, however, that PSF's License Agreement and PSF's notice of copyright, +i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +2011, 2012, 2013, 2014 Python Software Foundation; All Rights Reserved" are +retained in Python alone or in any derivative version prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python. + +4. PSF is making Python available to Licensee on an "AS IS" +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between PSF and +Licensee. This License Agreement does not grant permission to use PSF +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using Python, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + + +BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 +------------------------------------------- + +BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 + +1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an +office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the +Individual or Organization ("Licensee") accessing and otherwise using +this software in source or binary form and its associated +documentation ("the Software"). + +2. Subject to the terms and conditions of this BeOpen Python License +Agreement, BeOpen hereby grants Licensee a non-exclusive, +royalty-free, world-wide license to reproduce, analyze, test, perform +and/or display publicly, prepare derivative works, distribute, and +otherwise use the Software alone or in any derivative version, +provided, however, that the BeOpen Python License is retained in the +Software, alone or in any derivative version prepared by Licensee. + +3. BeOpen is making the Software available to Licensee on an "AS IS" +basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE +SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS +AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY +DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +5. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +6. This License Agreement shall be governed by and interpreted in all +respects by the law of the State of California, excluding conflict of +law provisions. Nothing in this License Agreement shall be deemed to +create any relationship of agency, partnership, or joint venture +between BeOpen and Licensee. This License Agreement does not grant +permission to use BeOpen trademarks or trade names in a trademark +sense to endorse or promote products or services of Licensee, or any +third party. As an exception, the "BeOpen Python" logos available at +http://www.pythonlabs.com/logos.html may be used according to the +permissions granted on that web page. + +7. By copying, installing or otherwise using the software, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + + +CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 +--------------------------------------- + +1. This LICENSE AGREEMENT is between the Corporation for National +Research Initiatives, having an office at 1895 Preston White Drive, +Reston, VA 20191 ("CNRI"), and the Individual or Organization +("Licensee") accessing and otherwise using Python 1.6.1 software in +source or binary form and its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, CNRI +hereby grants Licensee a nonexclusive, royalty-free, world-wide +license to reproduce, analyze, test, perform and/or display publicly, +prepare derivative works, distribute, and otherwise use Python 1.6.1 +alone or in any derivative version, provided, however, that CNRI's +License Agreement and CNRI's notice of copyright, i.e., "Copyright (c) +1995-2001 Corporation for National Research Initiatives; All Rights +Reserved" are retained in Python 1.6.1 alone or in any derivative +version prepared by Licensee. Alternately, in lieu of CNRI's License +Agreement, Licensee may substitute the following text (omitting the +quotes): "Python 1.6.1 is made available subject to the terms and +conditions in CNRI's License Agreement. This Agreement together with +Python 1.6.1 may be located on the Internet using the following +unique, persistent identifier (known as a handle): 1895.22/1013. This +Agreement may also be obtained from a proxy server on the Internet +using the following URL: http://hdl.handle.net/1895.22/1013". + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python 1.6.1 or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python 1.6.1. + +4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" +basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. This License Agreement shall be governed by the federal +intellectual property law of the United States, including without +limitation the federal copyright law, and, to the extent such +U.S. federal law does not apply, by the law of the Commonwealth of +Virginia, excluding Virginia's conflict of law provisions. +Notwithstanding the foregoing, with regard to derivative works based +on Python 1.6.1 that incorporate non-separable material that was +previously distributed under the GNU General Public License (GPL), the +law of the Commonwealth of Virginia shall govern this License +Agreement only as to issues arising under or with respect to +Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this +License Agreement shall be deemed to create any relationship of +agency, partnership, or joint venture between CNRI and Licensee. This +License Agreement does not grant permission to use CNRI trademarks or +trade name in a trademark sense to endorse or promote products or +services of Licensee, or any third party. + +8. By clicking on the "ACCEPT" button where indicated, or by copying, +installing or otherwise using Python 1.6.1, Licensee agrees to be +bound by the terms and conditions of this License Agreement. + + ACCEPT + + +CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 +-------------------------------------------------- + +Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, +The Netherlands. All rights reserved. + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, +provided that the above copyright notice appear in all copies and that +both that copyright notice and this permission notice appear in +supporting documentation, and that the name of Stichting Mathematisch +Centrum or CWI not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior +permission. + +STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO +THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE +FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..cb097c4 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SPHINXPROJ = revdbc +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_static/.gitkeep b/docs/_static/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..281479d --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,83 @@ +# pylint: disable=invalid-name, wrong-import-position + +# Configuration file for the Sphinx documentation builder. +# +# This file does only contain a selection of the most common options. For a full list see +# the documentation: +# http://www.sphinx-doc.org/en/master/config + +# -- Path setup -------------------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, add these +# directories to sys.path here. If the directory is relative to the documentation root, +# use os.path.abspath to make it absolute, like shown here. +import os +import re +import sys + +this_file_path = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(os.path.join(this_file_path, "..", "revdbc")) + +from version import __version__ as __version # noQA: E402 # pylint: disable=no-name-in-module +from project import project as __project # noQA: E402 # pylint: disable=no-name-in-module, import-error + +# -- Project information ----------------------------------------------------------------- + +project = __project["name"] +author = __project["author"] +copyright = "{}, {}".format(__project["year"], __project["author"]) # pylint: disable=redefined-builtin + +# The short X.Y version +version = __version["short"] +# The full version, including alpha/beta/rc tags +release = __version["full"] + +# -- General configuration --------------------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be extensions coming +# with Sphinx (named "sphinx.ext.*") or your custom ones. +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.viewcode", + "sphinx.ext.napoleon", + "sphinx_autodoc_typehints" +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = [ "_templates" ] + +# List of patterns, relative to source directory, that match files and directories to +# ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [ "_build", "Thumbs.db", ".DS_Store" ] + +# -- Options for HTML output ------------------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for a list of +# builtin themes. +html_theme = "sphinx_rtd_theme" + +# Add any paths that contain custom static files (such as style sheets) here, relative to +# this directory. They are copied after the builtin static files, so a file named +# "default.css" will overwrite the builtin "default.css". +html_static_path = [ "_static" ] + +# -- Autodoc Member Skipping ------------------------------------------------------------- + +private_name_regex = re.compile(r"^_\w+__") + + +def autodoc_skip_member_handler(_app, _what, name, _obj, _skip, _options): + """ + A very simple handler for the autodoc-skip-member event that skips everything + "private", aka starting with double underscores. Everything else is left untouched. + """ + + if private_name_regex.match(name): + return True + + return None + + +def setup(app): + app.connect("autodoc-skip-member", autodoc_skip_member_handler) diff --git a/docs/getting_started.rst b/docs/getting_started.rst new file mode 100644 index 0000000..2d48f65 --- /dev/null +++ b/docs/getting_started.rst @@ -0,0 +1,4 @@ +Getting Started +=============== + + diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..b2f8f51 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,17 @@ +revdbc - Reverse-engineer DBC definitions from CAN dumps. +========================================================= + +:Version: |version| +:Release: |release| +:Date: |today| + +.. toctree:: + :caption: General + + installation + getting_started + +.. toctree:: + :caption: API Reference + + revdbc/index.rst diff --git a/docs/installation.rst b/docs/installation.rst new file mode 100644 index 0000000..763f8ff --- /dev/null +++ b/docs/installation.rst @@ -0,0 +1,4 @@ +Installation +============ + +Install the latest release using pip (``pip install revdbc``) or manually from source by running ``pip install .`` (preferred) or ``python setup.py install`` in the cloned repository. Doing so installs both the library for programmatic usage and an executable called ``revdbc``. Use ``revdbc --help`` for usage information. diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..76aef1f --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,36 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build +set SPHINXPROJ=revdbc + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..f852e84 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,3 @@ +sphinx +sphinx-rtd-theme +sphinx-autodoc-typehints diff --git a/docs/revdbc/index.rst b/docs/revdbc/index.rst new file mode 100644 index 0000000..b7f4821 --- /dev/null +++ b/docs/revdbc/index.rst @@ -0,0 +1,2 @@ +API Documentation +================= diff --git a/documents/AFIT-END-DS-18-D-003.pdf b/documents/AFIT-END-DS-18-D-003.pdf new file mode 100644 index 0000000..ae5c2b2 Binary files /dev/null and b/documents/AFIT-END-DS-18-D-003.pdf differ diff --git a/documents/Bachelorarbeit_Stefan_Schoenhaerl.pdf b/documents/Bachelorarbeit_Stefan_Schoenhaerl.pdf new file mode 100644 index 0000000..5e733af Binary files /dev/null and b/documents/Bachelorarbeit_Stefan_Schoenhaerl.pdf differ diff --git a/documents/Pattern_Recognition_and_Machine_Learning.pdf b/documents/Pattern_Recognition_and_Machine_Learning.pdf new file mode 100644 index 0000000..cce1ab8 Binary files /dev/null and b/documents/Pattern_Recognition_and_Machine_Learning.pdf differ diff --git a/documents/The_CAN_Subsystem_of_the_Linux_Kernel.pdf b/documents/The_CAN_Subsystem_of_the_Linux_Kernel.pdf new file mode 100644 index 0000000..46e2005 Binary files /dev/null and b/documents/The_CAN_Subsystem_of_the_Linux_Kernel.pdf differ diff --git a/documents/car_hacking.pdf b/documents/car_hacking.pdf new file mode 100644 index 0000000..369a528 Binary files /dev/null and b/documents/car_hacking.pdf differ diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..7648699 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,5 @@ +[mypy] + +# Ignore packages that are not typed at all +[mypy-cantools.*,matplotlib.*,numpy.*,scipy.*,setuptools.*,sklearn.*] +ignore_missing_imports = True diff --git a/pylintrc b/pylintrc new file mode 100644 index 0000000..b348c4d --- /dev/null +++ b/pylintrc @@ -0,0 +1,529 @@ +[MASTER] + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-whitelist= + +# Specify a score threshold to be exceeded before program exits with error. +fail-under=10.0 + +# Add files or directories to the blacklist. They should be base names, not +# paths. +ignore=CVS + +# Add files or directories matching the regex patterns to the blacklist. The +# regex matches against base names, not paths. +ignore-patterns= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. +confidence= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=missing-function-docstring, + missing-class-docstring, + missing-module-docstring, + too-many-statements, + too-many-branches, + too-many-locals, + too-many-arguments, + too-many-instance-attributes, + too-many-lines, + too-many-boolean-expressions, + too-many-public-methods, + too-few-public-methods, + fixme + + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable=c-extension-no-member + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'error', 'warning', 'refactor', and 'convention' +# which contain the number of messages in each category, as well as 'statement' +# which is the total number of statements analyzed. This score is used by the +# global evaluation report (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +#msg-template= + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit + + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=no + +# This flag controls whether the implicit-str-concat should generate a warning +# on implicit string concatenation in sequences defined over several lines. +check-str-concat-over-line-jumps=no + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. Available dictionaries: none. To make it work, +# install the python-enchant package. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains the private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis). It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# List of decorators that change the signature of a decorated function. +signature-mutators= + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, + XXX, + TODO + +# Regular expression of note tags to take in consideration. +#notes-rgx= + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. +#argument-rgx= + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo, + bar, + baz, + toto, + tutu, + tata + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +bad-names-rgxs= + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. +#class-attribute-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=a, b, # The two fields while merging/solving overlaps + db, # Database + f, # Files + i, + j, + k, + x, # x coordinate + y, # y coordinate + e, + ex, + Run, + _ + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +good-names-rgxs= + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. +#variable-rgx= + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=110 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[SIMILARITIES] + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + +# Minimum lines number of a similarity. +min-similarity-lines=4 + + +[DESIGN] + +# Maximum number of arguments for function / method. +max-args=5 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules=optparse,tkinter.tix + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled). +ext-import-graph= + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled). +import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=cls + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "BaseException, Exception". +overgeneral-exceptions=BaseException, + Exception diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8c07b23 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +candumpgen>=0.0.1,<0.1 +cantools>=35.3.0,<36 +scikit-learn>=0.23.2,<0.24 +typing_extensions>=3.7.4.3,<4 diff --git a/revdbc/__init__.py b/revdbc/__init__.py new file mode 100644 index 0000000..1a968c1 --- /dev/null +++ b/revdbc/__init__.py @@ -0,0 +1,9 @@ +from .revdbc import ( + AnalysisFailed, + AnalysisResult, + + CandumpRowNP, + + load_candump, + analyze_identifier +) diff --git a/revdbc/__main__.py b/revdbc/__main__.py new file mode 100644 index 0000000..0bb7988 --- /dev/null +++ b/revdbc/__main__.py @@ -0,0 +1,201 @@ +import argparse +from datetime import datetime +import logging +import os +import tempfile +from typing import Dict, Tuple + +import candumpgen +import cantools +import numpy as np + +from . import revdbc + + +def run_common(args: argparse.Namespace) -> Tuple[str, str]: + path = os.path.abspath(args.file) + if not os.path.exists(path): + raise Exception("`{}` does not point to a file/directory.".format(path)) + + log_level = logging.ERROR + if args.verbose > 0: + log_level = logging.WARNING + if args.verbose > 1: + log_level = logging.INFO + if args.verbose > 2: + log_level = logging.DEBUG + + output_directory = os.path.abspath(args.output_directory) + if not os.path.isdir(output_directory): + raise Exception("`--out={}` does not point to a directory.".format(output_directory)) + run_output_directory = os.path.join(output_directory, str(datetime.now())) + + try: + os.mkdir(run_output_directory) + except FileExistsError: + pass + + log_file = os.path.join(run_output_directory, "run.log") + + logging.basicConfig(format="%(asctime)s [%(levelname)s] %(message)s", level=log_level, + handlers=[ logging.StreamHandler(), logging.FileHandler(log_file) ]) + + return path, run_output_directory + + +def run_analysis(args: argparse.Namespace) -> None: + path, run_output_directory = run_common(args) + + if not os.path.isfile(path): + raise Exception("`{}` does not point to a file.".format(path)) + + candump = revdbc.load_candump(path) + + identifiers = np.unique(candump["identifier"]) + + print("Found identifiers: {}".format([ + (x, len(candump[candump["identifier"] == x])) for x in identifiers + ])) + + while True: + try: + identifier = int(input("Select one of them: ")) + if identifier in identifiers: + break + except ValueError: + pass + bodies = candump[candump["identifier"] == identifier]["data"] + sizes = np.unique(bodies["size"]) + + if len(sizes) != 1: + raise Exception( + "Can't process identifier {}, whose packet sizes differ throughout the candump." + .format(identifier) + ) + + print("{} packets found for selected identifier {}.".format(len(bodies), identifier)) + + restored_dbc = revdbc.analyze_identifier( + identifier, + bodies["bits"], + sizes[0], + run_output_directory + ).restored_dbc + + cantools.subparsers.dump._dump_can_database(restored_dbc) # pylint: disable=protected-access + + +TOP_LEVEL_NAME = __name__ + + +def run_test(args: argparse.Namespace) -> None: + path, run_output_directory = run_common(args) + + if os.path.isfile(path): + test_cases = [ path ] + else: + test_cases = [ os.path.join(path, f) for f in next(os.walk(path))[2] if f.endswith(".dbc") ] + + results: Dict[str, Tuple[float, float]] = {} + for original_dbc in test_cases: + distances = [] + for run in range(args.runs): + with tempfile.TemporaryFile(mode="w+") as f: + candumpgen.generate_candump(f, original_dbc, "can0") + f.seek(0) + candump = revdbc.load_candump(f) + + for identifier in np.unique(candump["identifier"]): + bodies = candump[candump["identifier"] == identifier]["data"] + sizes = np.unique(bodies["size"]) + + if len(sizes) != 1: + logging.getLogger(TOP_LEVEL_NAME).warning( + "Skipping identifier %s, whose packet sizes differ throughout the candump.", + identifier + ) + continue + + restored_dbc_file = revdbc.analyze_identifier( + identifier, + bodies["bits"], + sizes[0], + run_output_directory, + "_{}_testrun{}".format(os.path.basename(original_dbc).replace(".", "_"), run + 1) + ).restored_dbc_file + + distance = candumpgen.dbc_dist(original_dbc, restored_dbc_file) + + logging.getLogger(TOP_LEVEL_NAME).info( + "Distance between original and restored DBC files: %s", + distance + ) + + distances.append(distance) + + average_distance = sum(distances) / len(distances) + + def squared_difference(distance: float, average_distance: float = average_distance) -> float: + return (distance - average_distance) ** 2 + + variance = sum(map(squared_difference, distances)) / len(distances) + + logging.getLogger(TOP_LEVEL_NAME).info( + "Average distance: %s; Variance: %s", + average_distance, + variance + ) + + results[original_dbc] = (average_distance, variance) + + first_col_heading = "Results ({} iterations each)".format(args.runs) + second_col_heading = "Average" + third_col_heading = "Variance" + + print("") + print("") + print("{:30s} {:7s} {:8s}".format(first_col_heading, second_col_heading, third_col_heading)) + + for dbc, (avg, var) in results.items(): + print("{:30s} {:7.2f} {:8.2f}".format(os.path.basename(dbc), avg, var)) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Reverse-engineer DBC definitions from CAN dumps.") + + parser.add_argument("-v", "--verbose", dest="verbose", action="count", default=0, + help="Increase output verbosity, up to three times.") + + parser.add_argument("-o", "--out", dest="output_directory", default=".", type=str, + help="The directory to store outputs in. A new subdirectory is created for" + " each run, next to a subdirectory for converted candumps. Defaults to" + " the working directory.") + + subparsers = parser.add_subparsers(title="subcommands", required=True) + + parser_analyze = subparsers.add_parser("analyze", help="Analyze a candump file.", aliases=["a"]) + + parser_analyze.add_argument("file", metavar="FILE", type=str, + help="The candump file to load and analyze.") + + parser_analyze.set_defaults(func=run_analysis) + + parser_test = subparsers.add_parser( + "test", + help="Test/benchmark the performance of the analysis." + ) + + parser_test.add_argument("file", metavar="FILE", type=str, help="The DBC file to benchmark on.") + + parser_test.add_argument("-r", "--runs", dest="runs", default=8, type=int, + help="The number of runs to repeat and average each test case" + " (defaults to 8).") + + parser_test.set_defaults(func=run_test) + + args = parser.parse_args() + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/revdbc/maybe_open.py b/revdbc/maybe_open.py new file mode 100644 index 0000000..c1376a1 --- /dev/null +++ b/revdbc/maybe_open.py @@ -0,0 +1,39 @@ +import os +from typing import cast, Any, AnyStr, Generic, IO, Union + +PathLike = Union[str, bytes, os.PathLike] +Openable = Union[PathLike, IO[Any]] + + +class NullContextManager(Generic[AnyStr]): + def __init__(self, resource: IO[AnyStr]): + self._resource: IO[AnyStr] = resource + + def __enter__(self) -> IO[AnyStr]: + return self._resource + + def __exit__(self, *args: Any) -> None: + pass + + +def maybe_open(obj: Openable, mode: str = "r") -> Union[NullContextManager[Any], IO[Any]]: + """ + Tries to open `obj` as a file. If that attempt fails, assumes that `obj` already is an opened file. + + Args: + obj: Either some representation of a path to a file or an opened file-like object. + mode: The mode to open the file with, in case `obj` needs to be opened. Refer to :func:`open` for + details. + + Returns: + A context manager which returns either the newly opened file-like object or the original `obj`. Closes + the file when exiting the context manager if it was opened. + + Raises: + OSError: upon failure in case `obj` had to be opened. + """ + + try: + return open(os.fspath(cast(PathLike, obj)), mode) + except TypeError: + return NullContextManager(cast(IO[Any], obj)) diff --git a/revdbc/project.py b/revdbc/project.py new file mode 100644 index 0000000..29184f2 --- /dev/null +++ b/revdbc/project.py @@ -0,0 +1,12 @@ +project = { + "name" : "revdbc", + "description" : "Reverse-engineer DBC definitions from CAN dumps.", + "url" : "https://github.com/TH-eMundo/revdbc", + "year" : "2020", + "author" : "Tim Henkes", + "author_email" : "tim.henkes@e-mundo.de", + "categories" : [ + "Topic :: Security", + "Topic :: Scientific/Engineering :: Artificial Intelligence" + ] +} diff --git a/revdbc/revdbc.py b/revdbc/revdbc.py new file mode 100644 index 0000000..17472cf --- /dev/null +++ b/revdbc/revdbc.py @@ -0,0 +1,1270 @@ +from enum import Enum, auto +import logging +import os +import re +from typing import cast, Dict, List, NamedTuple, Optional, Set, Tuple, Union +import warnings + +import cantools +import cantools.subparsers.dump +import numpy as np +from scipy.spatial.distance import minkowski +import sklearn +from sklearn import cluster +from sklearn import svm +from typing_extensions import Literal + +from .maybe_open import Openable, maybe_open + +warnings.filterwarnings( + "ignore", + message="Liblinear failed to converge, increase the number of iterations.", + category=sklearn.exceptions.ConvergenceWarning, + module=sklearn.__name__ +) + +warnings.filterwarnings( + "ignore", + message="invalid value encountered in true_divide", + category=RuntimeWarning, + module=np.__name__ +) + +LOGGER = logging.getLogger(__name__) + +CANDUMP_ROW_REGEX = re.compile(r"^\((\d+\.\d+)\) (\w+) ([0-9A-F]{3,8})#([0-9A-F]*)\s*$") + +SizedBitsNP = np.dtype([ + ("size", np.uint8), # The number of valid bytes in `bits`. + ("bits", np.uint64) # Can hold up to 64 bits of data. +]) + +# Information about the source device/interface is not stored, all rows are assumed to belong to the +# same device/interface. +CandumpRowNP = np.dtype([ + ("timestamp", np.float64), # The timestamp with 64 bit floating point precision + ("identifier", np.uint32), # The identifier (11/29 bits), packed into a 32 bit integer + ("data", SizedBitsNP) # The packet body +]) + + +class FieldType(Enum): + MULTIPLEXER = auto() + MULTIPLEXED = auto() + CONST = auto() + VALUE = auto() + UNKNOWN = auto() + + +class FieldEndianness(Enum): + BIG = auto() + LITTLE = auto() + UNKNOWN = auto() + + def to_cantools( + self, + unknown: Union[Literal["little_endian"], Literal["big_endian"]] = "little_endian" + ) -> Literal["little_endian", "big_endian"]: + """ + Args: + unknown: The value to return for the UNKNOWN enum value. Defaults to "little_endian". + + Returns: + The string representation of this endianness as expected by cantools (i.e. "big_endian" or + "little_endian"). + """ + + if self is FieldEndianness.LITTLE: + return "little_endian" + + if self is FieldEndianness.BIG: + return "big_endian" + + return unknown + + +class Field(NamedTuple): + lsb_anchor: int + msb_anchor: int + size: int + endianness: FieldEndianness + type: FieldType + + +class AnalysisResult(NamedTuple): + restored_dbc_file: str + restored_dbc: cantools.db.Database + + +class NotEnoughData(Exception): + pass + + +class AnalysisFailed(Exception): + pass + + +def load_candump(candump: Openable) -> np.ndarray: + """ + Args: + candump: Either a path-like object pointing to the candump file to load, or an opened readable + file-like object representing the file. + + Returns: + The loaded/parsed candump file. + The `np.ndarray` contains elements of type `CandumpRowNP`. + + Raises: + OSError: if any interaction with the file system failed. + """ + entries = [] + with maybe_open(candump, "r") as f: + for row in f: + match = CANDUMP_ROW_REGEX.match(row) + if match is None: + LOGGER.warning("Skipping malformed row in candump file %s: %s", f.name, row) + continue + + timestamp = np.array(match.group(1), dtype=np.float64) + identifier = np.array(int(match.group(3), 16), dtype=np.uint32) + + # Swap the bytes after reading them to have bit indexing compatible with cantools. + body = np.array(int(match.group(4).ljust(16, "0"), 16), dtype=np.uint64).byteswap() + + # Get the size of the body by counting hex digits in the candump text and rounding up to the next + # full byte. + body_size = np.array((len(match.group(4)) * 4 + 7) // 8, dtype=np.uint8) + + body_np = np.array((body_size, body), dtype=SizedBitsNP) + + entries.append(np.array((timestamp, identifier, body_np), dtype=CandumpRowNP)) + + return np.array(entries, dtype=CandumpRowNP) + + +def analyze_identifier( + identifier: int, + bodies: np.ndarray, + size: int, + output_directory: str, + suffix: Optional[str] = None +) -> AnalysisResult: + """ + Args: + identifier: The identifier that `bodies` belong to. Used only for logging/rendering purposes and to + identify the analysis. + bodies: A (non-empty) NumPy array containing CAN packet bodies belonging to the same identifier. The + `np.ndarray` contains elements of type `np.uint64`. + size: The number of bytes in each body. All bodies must have the same byte size. + output_directory: The directory to save the results of the analysis to. Must exist. + suffix: Suffix to append to all generated file names (but in front of the extensions). + + Returns: + The result of the analysis. + + Raises: + OSError: if any interaction with the file system failed. + AnalysisFailed: if the analysis failed with an unrecoverable error. + """ + + # Find predictable fields in the CAN packet bodies. Predictable fields are possibly multiplexer + # fields. + muxes = _find_predictable_fields(bodies, size) + + # Constant fields are very much predictable too, but don't offer a lot of value for + # demultiplexing. + muxes = set(filter(lambda mux: mux.type is not FieldType.CONST, muxes)) + + LOGGER.debug("Potential muxes:") + for mux in muxes: + LOGGER.debug("%s", mux) + + top_level_analyzed = set() + mux_level_analyzed: Optional[Dict[int, Set[Field]]] = None + mux_found = False + + # Demultiplex the bodies by grouping them by their multiplexer field values + for mux in set(muxes): + LOGGER.debug("-" * 100) + LOGGER.debug("Demuxing and analyzing %s", mux) + + demuxed_analysis_results = {} + demuxed = _demux_by(bodies, mux) + for mux_value, demuxed_bodies in demuxed.items(): + LOGGER.debug("*" * 100) + LOGGER.debug("Mux %s value %s", mux, mux_value) + + try: + demuxed_analysis_results[mux_value] = _solve_overlaps(analyze_demultiplexed( + demuxed_bodies, + size * 8 # Pass the size in bits + ), mux, size) + except NotEnoughData as e: + LOGGER.debug("Data too sparse: %s", e) + + if len(demuxed_analysis_results) < 2: + LOGGER.info( + "Data too sparse, couldn't demultiplex/analyze %s. Assuming it is not a multiplexer" + " field.", + mux + ) + muxes.remove(mux) + continue + + # The next step is to find out which fields seem to depend on the value of the multiplexer + # and which don't. To do so, the code finds fields that are classified the same way for all + # multiplexer values. + detected_fields = [ x.fields for x in demuxed_analysis_results.values() ] + detected_fields = list(filter(lambda x: len(x) > 0, detected_fields)) + + if len(detected_fields) == 0: + LOGGER.debug( + "No fields detected when demultiplexing %s. Assuming it is not a multiplexer" + " field.", + mux + ) + muxes.remove(mux) + continue + + all_fields = set.union(*detected_fields) + non_multiplexed_fields = set.intersection(*detected_fields) + multiplexed_fields = all_fields - non_multiplexed_fields + + LOGGER.debug("~" * 100) + # If all of the fields are independent of the multiplexer field value, it is probably not a + # multiplexer + if non_multiplexed_fields == all_fields: + LOGGER.debug( + "Mux %s does not influence the packet body. Assuming it is not a multiplexer" + " field.", + mux + ) + muxes.remove(mux) + else: + if mux_found: + raise AnalysisFailed("Found multiple multiplexer fields, analysis not possible.") + mux_found = True + + def is_multiplexed_field(field: Field, muxed_fields: Set[Field] = multiplexed_fields) -> bool: + return field in muxed_fields + + top_level_analyzed = non_multiplexed_fields + mux_level_analyzed = { + key: set(filter(is_multiplexed_field, value.fields)) + for key, value in demuxed_analysis_results.items() + } + + LOGGER.debug("-" * 100) + LOGGER.debug("Muxes:") + for mux in muxes: + LOGGER.debug("%s", mux) + LOGGER.debug("-" * 100) + + if len(muxes) == 0: + LOGGER.debug("No multiplexers, running global analysis:") + # In case no multiplexers were detected, perform analysis on the untouched bodies + analysis_result = analyze_demultiplexed(bodies, size * 8) # Pass the size in bits + + top_level_analyzed = analysis_result.fields + + restored_dbc_info = _restore_dbc( + int(identifier), + size, + top_level_analyzed, + mux_level_analyzed, + output_directory, + suffix + ) + + return AnalysisResult( + restored_dbc_file=restored_dbc_info[0], + restored_dbc=restored_dbc_info[1] + ) + + +def _find_predictable_fields(bodies: np.ndarray, size: int) -> Set[Field]: + """ + Args: + bodies: A (non-empty) NumPy array containing CAN packet bodies belonging to the same identifier. The + `np.ndarray` contains elements of type `np.uint64`. + size: The number of bytes in each body. All bodies must have the same byte size. + + Returns: + The set of predictable fields that were found. + """ + + # Find bit positions whose flips seem to be predictable + predictable_fields = [] + for index in range(size * 8): + # The endianness is unknown for single-bit fields + field = Field( + lsb_anchor=index, + msb_anchor=index, + size=1, + endianness=FieldEndianness.UNKNOWN, + type=FieldType.UNKNOWN + ) + + # Constant fields are predictable + if _is_field_constant(bodies, field): + predictable_fields.append(field._replace(type=FieldType.CONST)) + continue + + # If the field is not constant, perform the SVM prediction test + if _is_field_predictable(bodies, field): + predictable_fields.append(field._replace(type=FieldType.MULTIPLEXER)) + continue + + LOGGER.debug("-" * 100) + LOGGER.debug("Predictable single-bit fields:") + for predictable_field in predictable_fields: + LOGGER.debug("%s", predictable_field) + + # Try to merge constant/predictable fields to build larger ones + predictable_field_index = 0 + while predictable_field_index < len(predictable_fields) - 1: + this_predictable_field = predictable_fields[predictable_field_index] + + LOGGER.debug("-" * 100) + LOGGER.debug("Finding a merging buddy for %s", this_predictable_field) + + for other_predictable_field in predictable_fields[predictable_field_index + 1:]: + merged_field = _merge_fields(this_predictable_field, other_predictable_field) + if merged_field is None: + LOGGER.debug("Won't merge with %s", other_predictable_field) + continue + + LOGGER.debug("Would merge with %s", other_predictable_field) + LOGGER.debug("Merged field: %s", merged_field) + + if _is_field_predictable(bodies, merged_field): + merged_field = merged_field._replace(type=FieldType.MULTIPLEXER) + + # Merging a constant field doesn't affect predictability + if merged_field.type in [ FieldType.CONST, FieldType.MULTIPLEXER ]: + predictable_fields.remove(this_predictable_field) + predictable_fields.remove(other_predictable_field) + predictable_fields.insert(predictable_field_index, merged_field) + break + else: + predictable_field_index += 1 + + LOGGER.debug("-" * 100) + return set(predictable_fields) + + +def _is_field_constant(bodies: np.ndarray, field: Field) -> bool: + """ + Args: + bodies: A (non-empty) NumPy array containing CAN packet bodies belonging to the same identifier. The + `np.ndarray` contains elements of type `np.uint64`. + field: The field to check constantness for. + + Returns: + Whether the field is constant. + """ + + return ( + field.type is FieldType.CONST or + ( + field.type is FieldType.UNKNOWN and + len(np.unique(_extract_field(bodies, field))) == 1 + ) + ) + + +def _extract_field(bodies: np.ndarray, field: Field) -> np.ndarray: + """ + Args: + bodies: A (non-empty) NumPy array containing CAN packet bodies belonging to the same identifier. The + `np.ndarray` contains elements of type `np.uint64`. + field: The field to extract. + + Returns: + The extracted field values. + The `np.ndarray` contains elements of type `np.uint64`. + """ + + # With DBC's "sawtooth" byte ordering and bit indexing behaviour, "little endian" byte order means that + # semantically adjacent bits are also using adjacent bit indizes. I.e. the bit that semantically follows + # bit 7 (counting from 0) is bit 8. For "bit endian", the indizes do not logically follow the semantic + # ordering. For example, the semantically next bit after bit 15 is bit 0 in case of big endian. With + # little endian, bits sawtooth into the _next_ byte, while for big endian, they sawtooth into the + # _previous_ byte. These jumps make it hard to extract a semantically coherent big endian field, if it + # crosses byte borders. The following code solves this problem by swapping the bytes of the CAN packet + # body and updating the index of the starting bit of the field to extract. By swapping the bytes, fields + # that used to sawtooth into the next byte will now sawtooth into the previous byte and vice versa, thus + # big endian fields are then accessible conveniently like little endian fields. + if field.endianness is FieldEndianness.BIG: + # Swap the bytes to achieve convenient little endian-style access + bodies = bodies.byteswap() + + # Update the anchors accordingly + lsb_anchor = (7 - (field.lsb_anchor // 8)) * 8 + field.lsb_anchor % 8 + msb_anchor = (7 - (field.msb_anchor // 8)) * 8 + field.msb_anchor % 8 + + # Update the field, so that the following code for little endian fields can be reused. + field = Field( + lsb_anchor=lsb_anchor, + msb_anchor=msb_anchor, + size=field.size, + endianness=FieldEndianness.LITTLE, + type=field.type + ) + + mask = 0xFFFFFFFFFFFFFFFF if field.size == 64 else ((1 << field.size) - 1) + + return (bodies >> field.lsb_anchor) & mask + + +def _is_field_predictable(bodies: np.ndarray, field: Field) -> bool: + """ + Args: + bodies: A (non-empty) NumPy array containing CAN packet bodies belonging to the same identifier. The + `np.ndarray` contains elements of type `np.uint64`. + field: The field to check predictability for. + + Returns: + Whether the field seems to be predictable. + """ + + # Free parameters! + training_sequence_length = 64 + max_num_training_samples = 8192 + + def _seems_predictable(predicted: np.ndarray, actual: np.ndarray) -> bool: + # Both `np.ndarray` contains elements of type `np.uint64`. + + # Count bit positions which have a TAV greater than 0, indicating that they are non-constant + num_non_constant_bits: int = np.count_nonzero(_calculate_tav(actual, field.size) > 0) + + # Count entries where expected and actual differ by at least one bit. It doesn't matter by + # how many bits the entries differ, either the prediction is correct or it is not. + num_mistakes: int = np.count_nonzero(predicted ^ actual) + + LOGGER.debug( + "Field %s, containing %s non-constant bit%s predicted with %s mistake%s", + field, + num_non_constant_bits, + "" if num_non_constant_bits == 1 else "s", + num_mistakes, + "" if num_mistakes == 1 else "s" + ) + + # This one I'm very unsure about + return num_mistakes <= num_non_constant_bits * 2 + + if field.type is FieldType.MULTIPLEXER: + return True + + if field.type is FieldType.UNKNOWN: + # The bits corresponding to the field for each body + field_values = _extract_field(bodies, field) + + LOGGER.debug("Checking predictablility of field %s.", field) + + # Prepare training samples of length training_sequence_length + field_values_shifted = [] + for i in range(training_sequence_length): + start = i + end = field_values.shape[0] - training_sequence_length + i + + field_values_shifted.append(field_values[start:end]) + + training_samples = np.stack(field_values_shifted, axis=-1) + training_labels = field_values[training_sequence_length:] + + training_samples = training_samples[:max_num_training_samples] + training_labels = training_labels[:max_num_training_samples] + + # Check whether the field is constant after skipping the first `delay` entries + if len(np.unique(training_labels)) == 1: + LOGGER.debug( + "Field %s, containing 0 non-constant bits predicted with 0 single-bit mistakes", + field + ) + return True + + # If it is not constant, train a linear SVM and see whether it is capable of predicting bit + # flips. + classifier = svm.LinearSVC() + try: + classifier.fit(training_samples, training_labels) + except ValueError: + LOGGER.exception("Fitting the classifier for multiplexer field detection raised.") + return False + + return _seems_predictable(classifier.predict(training_samples), training_labels) + + return False + + +def _merge_fields(a: Field, b: Field) -> Optional[Field]: + """ + Try to merge two fields, based on their field positions and types. + + Args: + a, b: The fields to merge. + + Returns: + The merged field or 'None', if the fields could not be merged. + + Note: + Field merging happens only based on the position and types of the fields. This function does not check + whether the fields semantically merge. + """ + + # Merge the types: + merged_type: Optional[FieldType] = None + + # Constant fields can be merged with any other type. To make type merging easier, swap a and b if b is + # constant. + if b.type is FieldType.CONST: + a, b = b, a + + # Constant fields can be merged with any other type without losing semantics. + if a.type is FieldType.CONST: + merged_type = b.type + + # Two fields of type multiplexer or value can be merged, but semantics are potentially lost, thus the type + # is reduced to unknown. + if a.type is b.type and a.type in [ FieldType.MULTIPLEXER, FieldType.VALUE ]: + merged_type = FieldType.UNKNOWN + + # If a merged type was not found at this point, abort. + if merged_type is None: + return None + + # Merge the size: + merged_size = a.size + b.size + + # Merge anchors and endianness: + merged_lsb_anchor = None + merged_msb_anchor = None + merged_endianness = None + + # Check which bytes are affected by the fields + affected_bytes_a = _get_affected_bytes(a) + affected_bytes_b = _get_affected_bytes(b) + affected_bytes_both = affected_bytes_a & affected_bytes_b + affected_bytes_any = affected_bytes_a | affected_bytes_b + + # Fields may have at most one affected byte in common, otherwise they are guaranteed to overlap. + if len(affected_bytes_both) > 1: + return None + + # If no common byte is affected by both fields, the LSB of one must be the byte after the MSB of the + # other. + if len(affected_bytes_both) == 0: + b_after_a = max(affected_bytes_a) + 1 == min(affected_bytes_b) + a_after_b = max(affected_bytes_b) + 1 == min(affected_bytes_a) + + # If a common byte is affected by both fields, it must be the MSB of one and the LSB of the other. + if len(affected_bytes_both) == 1: + b_after_a = max(affected_bytes_a) == min(affected_bytes_b) + a_after_b = max(affected_bytes_b) == min(affected_bytes_a) + + # Check whether the affected bytes follow the above rules, to rule out a byte-level overlap. + if not (b_after_a or a_after_b): + return None + + # Swap the variables so that b follows a. + if a_after_b: + affected_bytes_a, affected_bytes_b = affected_bytes_b, affected_bytes_a + a, b = b, a + + # Not used after this point but better safe than sorry + b_after_a, a_after_b = a_after_b, b_after_a + + # The next step is to rule out a bit-level overlap and to make sure that the fields are adjacent on the + # bit-level too: + # Check which bits are affected by a and b at the (potential) border between them + affected_border_bits_a = _get_affected_bits(a, max(affected_bytes_a)) + affected_border_bits_b = _get_affected_bits(b, min(affected_bytes_b)) + + # This is where endianness comes into play: unknown endianness can be merged with any other endianness, + # while big can not be merged with little. + current_endianness = { a.endianness, b.endianness } + + # Check whether a merged field with unknown endianness can be created: + # - Both fields must be of unknown endianness + # - Both fields must affect the same byte + # - No other bytes must be affected (theoretically implied by being unknown in the first place) + # - The affected bits must not overlap + # - The affected bits must be adjacent + if ( + current_endianness == { FieldEndianness.UNKNOWN } and + len(affected_bytes_both) == 1 and + len(affected_bytes_any) == 1 and + len(affected_border_bits_a & affected_border_bits_b) == 0 + ): + if max(affected_border_bits_a) + 1 == min(affected_border_bits_b): + # The fields are adjacent and of unknown endianness; b follows a + merged_lsb_anchor = a.lsb_anchor + merged_msb_anchor = b.msb_anchor + merged_endianness = FieldEndianness.UNKNOWN + + if max(affected_border_bits_b) + 1 == min(affected_border_bits_a): + # The fields are adjacent and of unknown endianness; a follows b + merged_lsb_anchor = b.lsb_anchor + merged_msb_anchor = a.msb_anchor + merged_endianness = FieldEndianness.UNKNOWN + + # Check whether a merged field with little endianness can be created: + # - Both fields must be of unknown or little endianness + # - Multiple bytes must be affected + # - In case there is no commonly affected byte: + # - Bit 7 of the MSB of a must be affected + # - Bit 0 of the LSB of b must be affected + # - In case there is a commonly affected byte: + # - The affected bits must not overlap + # - The most significant bit affected by a must be adjacent to the least significant bit affected by b + if ( + current_endianness <= { FieldEndianness.LITTLE, FieldEndianness.UNKNOWN } and + len(affected_bytes_any) > 1 and + ( + ( + len(affected_bytes_both) == 0 and + 7 in affected_border_bits_a and + 0 in affected_border_bits_b + ) or + ( + len(affected_bytes_both) == 1 and + len(affected_border_bits_a & affected_border_bits_b) == 0 and + max(affected_border_bits_a) + 1 == min(affected_border_bits_b) + ) + ) + ): + merged_lsb_anchor = a.lsb_anchor + merged_msb_anchor = b.msb_anchor + merged_endianness = FieldEndianness.LITTLE + + # Check whether a merged field with big endianness can be created: + # - Both fields must be of unknown or big endianness + # - Multiple bytes must be affected + # - In case there is no commonly affected byte: + # - Bit 0 of the MSB of a must be affected + # - Bit 7 of the LSB of b must be affected + # - In case there is a commonly affected byte: + # - The affected bits must not overlap + # - The most significant bit affected by b must be adjacent to the least significant bit affected by a + if ( + current_endianness <= { FieldEndianness.BIG, FieldEndianness.UNKNOWN } and + len(affected_bytes_any) > 1 and + ( + ( + len(affected_bytes_both) == 0 and + 0 in affected_border_bits_a and + 7 in affected_border_bits_b + ) or + ( + len(affected_bytes_both) == 1 and + len(affected_border_bits_a & affected_border_bits_b) == 0 and + max(affected_border_bits_b) + 1 == min(affected_border_bits_a) + ) + ) + ): + merged_lsb_anchor = b.lsb_anchor + merged_msb_anchor = a.msb_anchor + merged_endianness = FieldEndianness.BIG + + # Make sure that all properties could be merged. + if ( + merged_lsb_anchor is None or + merged_msb_anchor is None or + merged_size is None or + merged_endianness is None or + merged_type is None + ): + return None + + return Field( + lsb_anchor=merged_lsb_anchor, + msb_anchor=merged_msb_anchor, + size=merged_size, + endianness=merged_endianness, + type=merged_type + ) + + +def _get_affected_bytes(field: Field) -> Set[int]: + """ + Args: + field: A field. + + Returns: + The indices of all bytes affected by this field. + """ + + lsb = field.lsb_anchor // 8 + msb = field.msb_anchor // 8 + + return set(range(min(lsb, msb), max(lsb, msb) + 1)) + + +def _get_affected_bits(field: Field, byte: int) -> Set[int]: + """ + Args: + field: A field. + byte: A byte index. + + Returns: + The indices of all bits affected by the byte of this field. + """ + + if byte not in _get_affected_bytes(field): + return set() + + if field.endianness is FieldEndianness.BIG: + # "Convert" the big endian field into a little endian field. + byte = 2 * (field.lsb_anchor // 8) - byte + + field = field._replace(msb_anchor=(field.lsb_anchor + field.size - 1)) + + byte_lsb_anchor = byte * 8 + byte_msb_anchor = ((byte + 1) * 8) - 1 + + affected_relative_lsb_anchor = max(byte_lsb_anchor, field.lsb_anchor) % 8 + affected_relative_msb_anchor = min(byte_msb_anchor, field.msb_anchor) % 8 + + return set(range(affected_relative_lsb_anchor, affected_relative_msb_anchor + 1)) + + +def _demux_by(bodies: np.ndarray, field: Field) -> Dict[np.uint64, np.ndarray]: + """ + Args: + bodies: A (non-empty) NumPy array containing CAN packet bodies belonging to the same identifier. The + `np.ndarray` contains elements of type `np.uint64`. + field: The field to demultiplex by. + + Returns: + For each value the multiplexer field takes, a list of packet bodies. + The `np.ndarray` contains elements of type `np.uint64`. + + Raises: + OverflowError: if there are too many unique values of the multiplexer field. + """ + + # Free parameters! + max_unique_mux_values = 2 ** 8 + + if field.type is not FieldType.MULTIPLEXER: + raise ValueError("Not a multiplexer field.") + + field_values = _extract_field(bodies, field) + + # The bits corresponding to the multiplexer field for each body + unique_mux_values, inverse_body_indizes = np.unique(field_values, return_inverse=True) + if unique_mux_values.shape[0] > max_unique_mux_values: + raise OverflowError("Too many unique values of the multiplexer field ({}).".format( + unique_mux_values.shape[0] + )) + + return { v: bodies[inverse_body_indizes == i] for i, v in enumerate(unique_mux_values) } + + +class DemultiplexedAnalysisResult(NamedTuple): + tav: np.ndarray # Containing np.uint64 + relative_tav: np.ndarray # Containing np.float64 + tav_derivative: np.ndarray # Containing np.float64 + bcot: np.ndarray # Containing np.float64 + fields: Set[Field] + + +def analyze_demultiplexed(bodies: np.ndarray, size: int) -> DemultiplexedAnalysisResult: + """ + Find field boundaries in CAN packet bodies. This assumes that all bodies belong to the same CAN identifier + and that multiplexers have already been detected and the bodies were demultiplexed. + Do NOT strip the multiplexer field(s) off the bodies before passing them here. That is, pass all bodies + that have the same value for all multiplexer fields. The multiplexer fields will then be detected as + constants. + + Args: + bodies: The bodies to analyze. The `np.ndarray` contains elements of type `np.uint64`. + size: The number of bits in each body. All bodies must have the same bit size. + + Returns: + The results of this analysis. + + Note: + Always pass the full CAN bodies. Failing to do so messes with bit and byte positions + required for little/big endian detection and field merging. + + Raises: + NotEnoughData: if the data is too sparse to perform analysis. + """ + + # Free parameters! + if size < 1: + raise ValueError("Bodies must consist of at least one bit.") + + if len(bodies) < 2: + raise NotEnoughData("Need at least two bodies to perform any analysis.") + + # Prepare all metrics + tav = _calculate_tav(bodies, size) + relative_tav = _calculate_relative_tav(tav) + tav_derivative = _calculate_tav_derivative(relative_tav) + bcot = _calculate_bcot(bodies, size) + + # Roughly detect fields + rough_field_separators = _find_rough_field_separators(tav_derivative, bcot) + + # Add all byte borders as potential field separators + rough_field_separators |= set(x for x in [7, 15, 23, 31, 39, 47, 55, 63] if x < size) + rough_field_separators_ascending = sorted(list(rough_field_separators)) + + LOGGER.debug("Rough field separators: %s", rough_field_separators_ascending) + + # Use the rough field separators to classify first CONST and VALUE fields + fields = [] + field_start = 0 + for separator in rough_field_separators_ascending: + field_end = separator + field_size = field_end - field_start + 1 + + field = Field( + lsb_anchor=field_start, + msb_anchor=field_end, + size=field_size, + endianness=FieldEndianness.UNKNOWN, + type=FieldType.UNKNOWN + ) + + if _is_field_constant(bodies, field): + field = field._replace(type=FieldType.CONST) + else: + field = field._replace(type=FieldType.VALUE) + + # This search finds little endian or unknown endian fields. + if len(_get_affected_bytes(field)) > 1: + field = field._replace(endianness=FieldEndianness.LITTLE) + + fields.append(field) + + field_start = separator + 1 + + # Try to merge fields to build larger ones + field_index = 0 + while field_index < len(fields) - 1: + this_field = fields[field_index] + + LOGGER.debug("\t" + "-" * 100) # pylint: disable=logging-not-lazy + LOGGER.debug("\tFinding a merging buddy for %s", this_field) + + for other_field in fields[field_index + 1:]: + merged_field = _merge_fields(this_field, other_field) + if merged_field is None: + LOGGER.debug("\tWon't merge with %s", other_field) + continue + + LOGGER.debug("\tWould merge with %s", other_field) + + merged_field = _restore_merge_semantics( + bodies, + merged_field, + this_field, + other_field + ) + + if merged_field is not None: + fields.remove(this_field) + fields.remove(other_field) + fields.insert(field_index, merged_field) + break + else: + field_index += 1 + + return DemultiplexedAnalysisResult( + tav=tav, + relative_tav=relative_tav, + tav_derivative=tav_derivative, + bcot=bcot, + fields=set(fields) + ) + + +def _calculate_tav(bodies: np.ndarray, size: int) -> np.ndarray: + """ + Args: + bodies: The bodies to analyze. The `np.ndarray` contains elements of type `np.uint64`. + size: The number of bits in each body. All bodies must have the same bit size. + + Returns: + The absolute TAV, i.e. for each bit position the absolute number of bit flips. + The `np.ndarray` contains elements of type `np.uint64`. + """ + + if size < 1: + raise ValueError("Bodies must consist of at least one bit.") + + tav = np.zeros(size, dtype=np.uint64) + for bit in np.arange(size): + bits = (bodies >> bit) & 1 + tav[bit] = np.sum(bits[1:] ^ bits[:-1]) + return tav + + +def _calculate_relative_tav(tav: np.ndarray) -> np.ndarray: + """ + Args: + tav: The (absolute) TAV, as returned by `_calculate_tav`. The `np.ndarray` contains elements of type + `np.uint64`. + + Returns: + The relative TAV, i.e. for each bit position the number of bit flips in relation to all other bit + flips. The relative TAV for all bit positions adds up to 1. + The `np.ndarray` contains elements of type `np.float64`. + """ + + tav = tav.astype(np.float64) + + return tav / np.linalg.norm(tav) + + +def _calculate_tav_derivative(relative_tav: np.ndarray) -> np.ndarray: + """ + Args: + relative_tav: The relative TAV, as returned by `_calculate_relative_tav`. The `np.ndarray` contains + elements of type `np.float64`. + + Returns: + The derivative of the relative TAV. Relates adjacent bit positions, thus the entry "0" belongs to the + relation between bit positions 0 and 1. + The `np.ndarray` contains elements of type `np.float64`. + """ + + if relative_tav.shape[0] < 1: + raise ValueError("The TAV must be available for at least one bit.") + + return relative_tav[1:] - relative_tav[:-1] + + +def _calculate_bcot(bodies: np.ndarray, size: int) -> np.ndarray: + """ + Args: + bodies: The bodies to analyze. The `np.ndarray` contains elements of type `np.uint64`. + size: The number of bits in each body. All bodies must have the same bit size. + + Returns: + The Bit-Correlation-Over-Time. Like the derivative of the TAV, this metric relates adjacent bit + positions, thus the entry "0" belongs to the relation between bit positions 0 and 1. Note that entries + might be nan (= not a number), in case at least one of the correlated bits is constant. For example, + if bit 4 is constant, the entries "3" and "4" will be nan, because the correlation with a constant bit + is undefined. + The `np.ndarray` contains elements of type `np.float64`. + """ + + # Free parameters! + bcot_max_samples = 64 * 1024 + convolution_length = max(min(bodies.shape[0], bcot_max_samples) // 200, 64) + + if size < 1: + raise ValueError("Bodies must consist of at least one bit.") + + bodies = bodies[:bcot_max_samples] + + # Note: this code works with temporary Python list, which are potential bottlenecks, but the + # lists only have one entry per bit position (minus one), so the worst case is 63 entries per + # list, which should not be an issue. + # Note: Variable names are chosen as per the paper that defines this algorithm. + b = bodies[1:] ^ bodies[:-1] # pylint: disable=invalid-name + + b_t = np.array([ ((b >> col) & 1) for col in np.arange(size) ], dtype=np.uint8) + v_t = np.ones((size, convolution_length), dtype=np.uint8) + c_t = np.array([ np.convolve(b_t[row], v_t[row]) for row in np.arange(size) ]) + bcot = np.array([ np.corrcoef(c_t[row], c_t[row + 1])[1][0] for row in np.arange(size - 1) ]) + + return bcot.astype(np.float64) + + +def _find_rough_field_separators( + tav_derivative: np.ndarray, + bcot: np.ndarray +) -> Set[int]: + """ + Args: + tav_derivative: The derivative of the (relative) TAV, as returned by `_calculate_tav_derivative`. The + `np.ndarray` contains elements of type `np.float64`. + bcot: The Bit-Correlation-Over-Time, as returned by `_calculate_bcot`. The `np.ndarray` contains + elements of type `np.float64`. + + Returns: + The (rough) field separators detected in the CAN packet bodies. Here "5" means that one field ends at + bit position 5 and another field starts at bit position 6. + + Raises: + NotEnoughData: if the data is too sparse to perform rough field separation. + """ + + # Free parameters! + num_classes = 2 + num_separator_classes = 1 + min_dist = .55 + + tav_derivative_vs_bcot = np.stack((bcot, tav_derivative), axis=-1) + non_nan_condition = np.logical_not(np.isnan(tav_derivative_vs_bcot[:, 0])) + non_nan_indizes = np.arange(bcot.shape[0])[non_nan_condition] + point_cloud = tav_derivative_vs_bcot[non_nan_condition] + + if len(point_cloud) == 0 or len(np.unique(point_cloud, axis=0)) < num_classes: + raise NotEnoughData("Not enough unique points to separate {} classes.".format(num_classes)) + + dist = minkowski(np.array(np.min(point_cloud[:, 0]), np.min(point_cloud[:, 1])), + np.array(np.max(point_cloud[:, 0]), np.max(point_cloud[:, 1]))) + + if dist < min_dist: + LOGGER.debug("All points are close, assuming no separators.") + return set() + + kmeans = cluster.KMeans(n_clusters=num_classes).fit(point_cloud) + + separator_classes = list(map(lambda x: x[0], sorted( + enumerate(kmeans.cluster_centers_), + key=lambda x: cast(float, np.linalg.norm(x[1])) + )[:num_separator_classes])) + + separators = [] + for separator_class in separator_classes: + separators.extend(non_nan_indizes[kmeans.labels_ == separator_class]) + + # Return the separating bit positions + return { int(x) for x in separators } + + +def _restore_merge_semantics( + bodies: np.ndarray, + merged_field: Field, + a: Field, + b: Field +) -> Optional[Field]: + """ + Args: + bodies: The bodies which are subject to analysis. The `np.ndarray` contains elements of type + `np.uint64`. + merged_field: The merged field to restore semantics for. + a: The first source field of the merged field. + b: The second source field of the merged field. + + Returns: + The merged field with restored semantics if possible, 'None' otherwise. + """ + + if merged_field.type is not FieldType.UNKNOWN: + return merged_field + + if not (a.type is b.type is FieldType.VALUE): # pylint: disable=superfluous-parens + return None + + # Extract the field bodies + merged_field_bodies = _extract_field(bodies, merged_field) + + # Prepare all metrics + tav = _calculate_tav(merged_field_bodies, merged_field.size) + relative_tav = _calculate_relative_tav(tav) + tav_derivative = _calculate_tav_derivative(relative_tav) + bcot = _calculate_bcot(merged_field_bodies, merged_field.size) + + # Roughly detect fields + try: + if len(_find_rough_field_separators(tav_derivative, bcot)) == 0: + return merged_field._replace(type=FieldType.VALUE) + except NotEnoughData: + # Not sure whether merging here is the correct decision, but it reduces single-bit fields. + return merged_field._replace(type=FieldType.VALUE) + + return None + + +def _solve_overlaps( + analysis_result: DemultiplexedAnalysisResult, + field: Field, + size: int +) -> DemultiplexedAnalysisResult: + """ + Adds `field` to the analysis result, resizing analysed fields as required to avoid overlaps. + + Args: + analysis_result: The result of an analysis. + field: The field to add/fit into the analysis result. + size: The size of the CAN message in bytes. + + Returns: + The updated analysis result including `field`. + """ + + LOGGER.debug("Fitting %s into %s", field, analysis_result) + + for analyzed_field in set(analysis_result.fields): + analysis_result.fields.remove(analyzed_field) + analysis_result.fields.update(_solve_overlap(analyzed_field, field, size)) + + analysis_result.fields.add(field) + + return analysis_result + + +def _solve_overlap(analyzed_field: Field, field: Field, size: int) -> Set[Field]: + """ + Args: + analyzed_field: Field to modify to avoid overlaps with `field`. + field: The field to avoid overlaps with. + size: The size of the CAN message in bytes. + + Returns: + `analyzed_field`, modified/split/removed to avoid overlap with `field`. + """ + + # Resolving overlaps between fields is rather complicated, especially given the different + # possible combinations of endianness. The following code works around this by looking at which + # bits are affected by each field, "masking" one field off of the other and then merging the + # remaining bits back into fields. + masked_analyzed_field_bits = { + # Take the bits affected by analyzed_field and remove those affected by field. + # This effectively performs the bitwise operation 'analyzed_field & ~field' + byte: _get_affected_bits(analyzed_field, byte) & (set(range(8)) - _get_affected_bits(field, byte)) + for byte in range(size) + } + + # Restore field structures from the remaining bits + masked_bytewise_fields = [] + + # Find adjacent bits and build (small) fields for those ranges + for byte, bits in masked_analyzed_field_bits.items(): + bytewise_field = None + + for bit in range(8): + if bit in bits: + if bytewise_field is None: + bytewise_field = Field( + lsb_anchor=(byte * 8 + bit), + msb_anchor=(byte * 8 + bit), + size=1, + endianness=analyzed_field.endianness, + type=analyzed_field.type + ) + + bytewise_field = bytewise_field._replace(msb_anchor=(byte * 8 + bit)) + bytewise_field = bytewise_field._replace( + size=(bytewise_field.msb_anchor - bytewise_field.lsb_anchor + 1) + ) + else: + if bytewise_field is not None: + masked_bytewise_fields.append(bytewise_field) + bytewise_field = None + + if bytewise_field is not None: + masked_bytewise_fields.append(bytewise_field) + + # Merge the (small) bytewise fields to build the final non-overlapping field(s) + field_index = 0 + while field_index < len(masked_bytewise_fields) - 1: + this_field = masked_bytewise_fields[field_index] + + for other_field in masked_bytewise_fields[field_index + 1:]: + merged_field = _merge_fields(this_field, other_field) + if merged_field is None: + continue + + masked_bytewise_fields.remove(this_field) + masked_bytewise_fields.remove(other_field) + masked_bytewise_fields.insert(field_index, merged_field._replace( + type=analyzed_field.type + )) + break + else: + field_index += 1 + + LOGGER.debug("Avoiding overlaps between %s and %s: %s", analyzed_field, field, masked_bytewise_fields) + + return set(masked_bytewise_fields) + + +def _restore_dbc( + identifier: int, + size: int, + top_level_fields: Set[Field], + mux_level_fields: Optional[Dict[int, Set[Field]]], + output_directory: str, + suffix: Optional[str] = None +) -> Tuple[str, cantools.db.Database]: + """ + Args: + identifier: The identifier of this CAN message. + size: The size of the CAN message in bytes. + top_level_fields: Fields detected on the message top level (i.e. not multiplexed). + mux_level_fields: ... + output_directory: ... + suffix: Suffix to append to the file name (but in front of the extension). + + Returns: + The path pointing to the restored DBC file and the loaded DBC. + """ + + signals: List[cantools.db.Signal] = [] + + multiplexer_signal_name = None + for field in top_level_fields: + start = field.msb_anchor if field.endianness is FieldEndianness.BIG else field.lsb_anchor + + name = "TOP_LEVEL_{}_{}_{}".format( + start, + field.size, + field.endianness.to_cantools() + ) + + if field.type is FieldType.MULTIPLEXER: + multiplexer_signal_name = name + + signals.append(cantools.database.can.Signal( + name=name, + start=start, + length=field.size, + byte_order=field.endianness.to_cantools(), + is_multiplexer=field.type is FieldType.MULTIPLEXER, + is_float=False # TODO + )) + + if mux_level_fields is not None: + for mux_value, fields in mux_level_fields.items(): + for field in fields: + start = field.msb_anchor if field.endianness is FieldEndianness.BIG else field.lsb_anchor + + name = "MUX_{}_LEVEL_{}_{}_{}".format( + mux_value, + start, + field.size, + field.endianness.to_cantools() + ) + + signals.append(cantools.database.can.Signal( + name=name, + start=start, + length=field.size, + byte_order=field.endianness.to_cantools(), + multiplexer_ids=[ int(mux_value) ], + multiplexer_signal=multiplexer_signal_name, + is_float=False # TODO + )) + + dbc_output = os.path.join(output_directory, "restored{}.dbc".format(suffix or "")) + LOGGER.info("DBC output: %s", dbc_output) + + try: + db = cantools.database.load_file(dbc_output) + LOGGER.info("Extending existing DBC.") + except (IOError, OSError): + db = cantools.database.can.Database() + LOGGER.info("Creating new DBC.") + + db.messages.append(cantools.database.can.Message( + identifier, + "ID{}".format(identifier), + size, + signals + )) + db.refresh() + + cantools.database.dump_file(db, dbc_output) + + return (dbc_output, db) diff --git a/revdbc/version.py b/revdbc/version.py new file mode 100644 index 0000000..a4eed49 --- /dev/null +++ b/revdbc/version.py @@ -0,0 +1,4 @@ +__version__ = {} +__version__["short"] = "0.0.1" +__version__["tag"] = "alpha" +__version__["full"] = "{}-{}".format(__version__["short"], __version__["tag"]) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..48cc3c8 --- /dev/null +++ b/setup.py @@ -0,0 +1,76 @@ +# pylint: disable=exec-used +import os +from typing import Dict, List, Union + +from setuptools import setup, find_packages + +source_root = os.path.join(os.path.dirname(os.path.abspath(__file__)), "revdbc") + +version_scope: Dict[str, Dict[str, str]] = {} +with open(os.path.join(source_root, "version.py")) as f: + exec(f.read(), version_scope) +version = version_scope["__version__"] + +project_scope: Dict[str, Dict[str, Union[str, List[str]]]] = {} +with open(os.path.join(source_root, "project.py")) as f: + exec(f.read(), project_scope) +project = project_scope["project"] + +with open("README.md") as f: + long_description = f.read() + +classifiers = [ + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + + "License :: OSI Approved :: Apache Software License", + + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + + "Typing :: Typed" +] + +classifiers.extend(project["categories"]) + +if version["tag"] == "alpha": + classifiers.append("Development Status :: 3 - Alpha") + +if version["tag"] == "beta": + classifiers.append("Development Status :: 4 - Beta") + +if version["tag"] == "stable": + classifiers.append("Development Status :: 5 - Production/Stable") + +del project["categories"] +del project["year"] + +setup( + version=version["short"], + long_description=long_description, + long_description_content_type="text/markdown", + license="Apache 2.0", + packages=find_packages(), + entry_points={ + "console_scripts": [ + "revdbc=revdbc.__main__:main" + ], + }, + install_requires=[ + "candumpgen>=0.0.1,<0.1", + "cantools>=35.3.0,<36", + "scikit-learn>=0.23.2,<0.24", + "typing_extensions>=3.7.4.3,<4" + ], + python_requires=">=3.6", + include_package_data=True, + zip_safe=False, + classifiers=classifiers, + **project +) diff --git a/tests/test_nop.py b/tests/test_nop.py new file mode 100644 index 0000000..0c7dbd9 --- /dev/null +++ b/tests/test_nop.py @@ -0,0 +1,3 @@ +def test_nop(): + # A single succeeding test to satisfy pytest until actual tests are available. + assert True diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..9b383dc --- /dev/null +++ b/tox.ini @@ -0,0 +1,37 @@ +[tox] +envlist = py{36,37,38,39,py3} + +[testenv] +description = "Functionality tests" +deps = pytest +commands = pytest + +[testenv:docs] +description = "Build the documentation" +skip_install = true +changedir = docs/ +deps = -rdocs/requirements.txt +commands = sphinx-build -W --keep-going -b html . _build/html + +[testenv:codespell] +description = "Check code for spelling mistakes" +skip_install = true +deps = codespell +commands = codespell --skip="*.jpg,*.svg,*.pyc" *.* docs/ revdbc/ tests/ setup.py --skip docs/_build + +[testenv:mypy] +description = "Verify typings" +skip_install = true +deps = mypy +commands = mypy --strict --allow-subclassing-any --show-error-codes --implicit-reexport . + +[testenv:flake8] +description = "Check code style" +skip_install = true +deps = flake8 +commands = flake8 revdbc/ tests/ docs/ setup.py + +[testenv:pylint] +description = "Linting" +deps = pylint +commands = pylint revdbc/ tests/*.py docs/*.py setup.py