diff --git a/ci/clang_tidy.sh b/ci/clang_tidy.sh index 562f3124e63..e27171634b4 100755 --- a/ci/clang_tidy.sh +++ b/ci/clang_tidy.sh @@ -39,4 +39,11 @@ popd # Run the build via CMake, which will run clang-tidy when CUDF_CLANG_TIDY is enabled. cmake -S cpp -B cpp/build -DCMAKE_BUILD_TYPE=Release -DCUDF_CLANG_TIDY=ON -GNinja -cmake --build cpp/build +cmake --build cpp/build > build_output.txt 2>&1 + +# Parse the build output to extract only IWYU's proposed changes. +python cpp/scrips/parse_iwyu_output.py build_output.txt iwyu_output.txt + +# Save the IWYU output as an artifact. +mkdir -p ${RAPIDS_ARTIFACTS_DIR} +mv iwyu_output.txt ${RAPIDS_ARTIFACTS_DIR}/iwyu_output.txt diff --git a/cpp/scripts/parse_iwyu_output.py b/cpp/scripts/parse_iwyu_output.py new file mode 100644 index 00000000000..47ced4baf0b --- /dev/null +++ b/cpp/scripts/parse_iwyu_output.py @@ -0,0 +1,157 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Helper script to modify IWYU output to only include removals.""" + +import argparse +import re + +def parse_log(log_content: str) -> tuple[dict[str, list[str]], dict[str, list[str]], dict[str, list[str]]]: + """Parse the log content to extract the include lists.""" + add_includes = {} + remove_includes = {} + full_include_lists = {} + + # Regex to match "should add" and "should remove" sections + add_pattern = re.compile(r'(.+)\s+should add these lines:\n((?:.+\n)+)') + remove_pattern = re.compile(r'(.+)\s+should remove these lines:\n((?:.+\n)+)') + full_include_pattern = re.compile(r'The full include-list for (.+):\n((?:.+\n)+?)---') + + # Parse "should add these lines" + for match in add_pattern.finditer(log_content): + file_path, includes = match.groups() + add_includes[file_path.strip()] = [line.strip() for line in includes.splitlines()] + + # Parse "should remove these lines" + for match in remove_pattern.finditer(log_content): + file_path, includes = match.groups() + remove_includes[file_path.strip()] = [line.strip() for line in includes.splitlines()] + + # Parse "full include-list" + for match in full_include_pattern.finditer(log_content): + file_path, includes = match.groups() + full_include_lists[file_path.strip()] = [line.strip() for line in includes.splitlines()] + + return add_includes, remove_includes, full_include_lists + + +def extract_include_file(include_line): + """Extract the core file path from an #include directive.""" + match = re.search(r'#include\s+[<"]([^">]+)[">]', include_line) + if match: + return match.group(1) + return None + + +def process_includes(add_includes, remove_includes): + """Process the include lists to remove any add/remove duplicates.""" + # Make a copy of the dictionary keys to safely iterate over + add_keys = list(add_includes.keys()) + + for file_path in add_keys: + adds = add_includes[file_path] + add_files = {extract_include_file(line) for line in adds} + + if file_path in remove_includes: + remove_files = {extract_include_file(line) for line in remove_includes[file_path]} + + # Update remove_includes by filtering out matched files + remove_includes[file_path] = [ + line for line in remove_includes[file_path] + if extract_include_file(line) not in add_files + ] + + # Also remove matching entries from add_includes + add_includes[file_path] = [ + line for line in adds + if extract_include_file(line) not in remove_files + ] + + +def update_full_include_list(add_includes, full_include_lists): + """Update the full include-list to remove any includes that are in add_includes.""" + # Update the full include-list to remove any includes that are in add_includes based on file name + for file_path, adds in add_includes.items(): + add_files = {extract_include_file(line) for line in adds} + if file_path in full_include_lists: + full_include_lists[file_path] = [ + line for line in full_include_lists[file_path] + if extract_include_file(line) not in add_files + ] + + +def write_output(file_path, add_includes, remove_includes, full_include_lists): + """Write the output back in the desired format.""" + with open(file_path, 'w') as f: + for file in sorted(set(add_includes.keys()).union(remove_includes.keys()).union(full_include_lists.keys())): + # Write "should add these lines" + if file in add_includes and add_includes[file]: + f.write(f"{file} should add these lines:\n") + for line in add_includes[file]: + f.write(f"{line}\n") + f.write("\n") + + # Write "should remove these lines" + if file in remove_includes and remove_includes[file]: + f.write(f"{file} should remove these lines:\n") + for line in remove_includes[file]: + f.write(f"{line}\n") # No extra minus sign + f.write("\n") + + # Write "The full include-list" + if file in full_include_lists and full_include_lists[file]: + f.write(f"The full include-list for {file}:\n") + for line in full_include_lists[file]: + f.write(f"{line}\n") + f.write("---\n") + + +def modify_log(log_content, output_file="output.txt"): + """Modify the log content to only include removals.""" + # Step 1: Parse the log + add_includes, remove_includes, full_include_lists = parse_log(log_content) + + # Step 2: Process the includes + process_includes(add_includes, remove_includes) + + # Step 3: Update the full include-list + update_full_include_list(add_includes, full_include_lists) + + # Step 4: Write the output back in the desired format + write_output(output_file, add_includes, remove_includes, full_include_lists) + + + +def main(): + parser = argparse.ArgumentParser( + description="Modify IWYU output to only include removals." + ) + parser.add_argument("input", help="File containing IWYU output") + + # Add output file parameter + parser.add_argument( + "output", + nargs="?", + help="Output file to write the modified output to", + default="iwyu_output.txt", + ) + args = parser.parse_args() + with open(args.input, "r") as f: + log_content = f.read() + modify_log(log_content, args.output) + + +if __name__ == "__main__": + main()