-
Notifications
You must be signed in to change notification settings - Fork 17
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: Use git attributes to identify binary files #2083
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,12 @@ | ||
import { stat } from 'node:fs/promises'; | ||
import path, { join } from 'node:path'; | ||
import { readFileSync } from 'node:fs'; | ||
|
||
import sqlite3 from 'better-sqlite3'; | ||
import assert from 'assert'; | ||
import makeDebug from 'debug'; | ||
import { existsSync } from 'fs'; | ||
import minimatch from 'minimatch'; | ||
|
||
import listProjectFiles from './listProjectFiles'; | ||
import queryKeywords from './queryKeywords'; | ||
|
@@ -80,11 +82,14 @@ export class FileIndex { | |
? await listGitProjectFiles(directory) | ||
: await listProjectFiles(directory); | ||
|
||
const gitAttributes = getGitAttributes(directory); | ||
|
||
const filteredFileNames = await filterFiles( | ||
directory, | ||
fileNames, | ||
excludePatterns, | ||
includePatterns | ||
includePatterns, | ||
gitAttributes | ||
); | ||
|
||
const options = { | ||
|
@@ -254,9 +259,9 @@ const DATA_FILE_EXTENSIONS: string[] = [ | |
'xml', | ||
].map((ext) => '.' + ext); | ||
|
||
const isBinaryFile = (fileName: string) => { | ||
return BINARY_FILE_EXTENSIONS.some((ext) => fileName.endsWith(ext)); | ||
}; | ||
const isBinaryFile = (fileName: string, gitAttributes?: ReturnType<typeof getGitAttributes>) => | ||
BINARY_FILE_EXTENSIONS.some((ext) => fileName.endsWith(ext)) || | ||
gitAttributes?.some((attr) => attr.binary && minimatch(fileName, attr.pattern, { dot: true })); | ||
|
||
const isDataFile = (fileName: string) => { | ||
return DATA_FILE_EXTENSIONS.some((ext) => fileName.endsWith(ext)); | ||
|
@@ -266,11 +271,12 @@ export async function filterFiles( | |
directory: string, | ||
fileNames: string[], | ||
excludePatterns?: RegExp[], | ||
includePatterns?: RegExp[] | ||
includePatterns?: RegExp[], | ||
gitAttributes?: ReturnType<typeof getGitAttributes> | ||
): Promise<string[]> { | ||
const result: string[] = []; | ||
for (const fileName of fileNames) { | ||
if (isBinaryFile(fileName)) continue; | ||
if (isBinaryFile(fileName, gitAttributes)) continue; | ||
|
||
const includeFile = fileNameMatchesFilterPatterns(fileName, includePatterns, excludePatterns); | ||
if (!includeFile) continue; | ||
|
@@ -298,3 +304,20 @@ export async function filterFiles( | |
} | ||
return result; | ||
} | ||
|
||
// Reads git attribute patterns from a .gitattributes file. | ||
export function getGitAttributes(directory: string) { | ||
const gitAttributesPath = join(directory, '.gitattributes'); | ||
if (!existsSync(gitAttributesPath)) return []; | ||
|
||
const gitAttributesContent = readFileSync(gitAttributesPath, 'utf-8'); | ||
Comment on lines
+311
to
+313
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
(Essentially, exists returning true doesn't guarantee that read will succeed, so you have to handle the error anyway, so might as well skip the exists.) |
||
const lines = gitAttributesContent.split('\n').filter(Boolean); | ||
|
||
return lines.map((line) => { | ||
const [pattern, ...attributes] = line.split(/\s+/); | ||
return { | ||
pattern, | ||
binary: attributes.includes('binary'), | ||
}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If it's only pulling binary like that, why even bother returning other patterns? Perhaps it should just return a list of binary patterns instead. This will also simplify upstream code — you can just add those to excluded patterns. |
||
}); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note this is much simpler than actual gitattributes(5):
This might be fine for our purposes, but the simplification warrants a comment at least — and I think skipping comments is easy enough to just do it. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
{ dot: true }
option was missing from the demonstration this morning. This fixes the issue with dot files appearing with a**/*
pattern.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks like a good fix to have. Is it in any other merged PRs?