Skip to content

Commit

Permalink
[REF] Update the BOM check script to check for BOM UTF16 also and js …
Browse files Browse the repository at this point in the history
…files
  • Loading branch information
softstartcode committed Jan 29, 2023
1 parent bf03cc2 commit 68f5c40
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 32 deletions.
34 changes: 23 additions & 11 deletions doc/devtools/check_bom_encoding.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@
require dirname(__FILE__) . '/svntools.php';
require dirname(__DIR__) . '/../lib/core/BOMChecker/Scanner.php';

$dir = __DIR__ . '/../../';
$dir = realpath(__DIR__ . '/../../') ;

$excludeFolders = [
$dir . 'vendor',
$dir . 'vendor_bundled',
$dir . 'temp'
$dir . '/vendor',
$dir . '/vendor_bundled',
$dir . '/temp',
$dir . '/.git',
];

$extensions = [
Expand All @@ -45,14 +46,25 @@
$BOMScanner = new BOMChecker_Scanner($dir, $extensions, $excludeFolders, $listFiles);
$BOMFiles = $BOMScanner->scan();
$totalFilesScanned = $BOMScanner->getScannedFiles();
$listBOMFiles = $BOMScanner->getBomFiles();

if (! empty($listBOMFiles)) {
echo color('Found ' . $totalFilesScanned . ' files with BOM encoding:', 'yellow') . PHP_EOL;
foreach ($listBOMFiles as $files) {
info($files);
echo PHP_EOL;
info($totalFilesScanned . ' files scanned...');

if ($BOMScanner->bomFilesFound()) {
foreach ($BOMScanner->getBomFilesByType() as $type => $listBOMFiles) {
if (! count($listBOMFiles)) {
continue;
}
echo PHP_EOL;
echo color('=> Found ' . $type . ' in ' . count($listBOMFiles) . ' files:', 'red') . PHP_EOL . PHP_EOL;
foreach ($listBOMFiles as $files) {
echo color($files, 'red') . PHP_EOL;
}
}
exit(1);
} else {
important('Files without BOM encoding');
echo PHP_EOL;
important('No problem found in the files.');
}

echo PHP_EOL;
exit($BOMScanner->bomFilesFound() ? 1 : 0);
96 changes: 75 additions & 21 deletions lib/core/BOMChecker/Scanner.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@

class BOMChecker_Scanner
{
public const BOM_UTF8 = 'BOM-UTF8';
public const BOM_UTF16 = 'BOM-UTF16';

// Tiki source folder
protected $sourceDir = __DIR__ . '/../../../';

Expand All @@ -16,14 +19,18 @@ class BOMChecker_Scanner

protected $scanExtensions = [
'php',
'tpl'
'tpl',
'js',
];

// The number of files scanned.
protected $scannedFiles = 0;

// The list of files detected with BOM
protected $bomFiles = [];
protected $bomFiles = [
self::BOM_UTF8 => [],
self::BOM_UTF16 => [],
];

// The list of files detected without BOM
protected $withoutBomFiles = [];
Expand All @@ -38,6 +45,8 @@ public function __construct($scanDir = null, $scanExtensions = [], $excludeDir =
$this->sourceDir = $scanDir;
}

$this->sourceDir = realpath($this->sourceDir);

if (is_array($scanExtensions) && count($scanExtensions)) {
$this->scanExtensions = $scanExtensions;
}
Expand All @@ -64,7 +73,7 @@ public function scan()
$this->checkDir($this->sourceDir);
}

return $this->bomFiles;
return $this->getBomFiles();
}

/**
Expand All @@ -85,8 +94,8 @@ protected function checkDir($sourceDir)
$sourceDirHandler = opendir($sourceDir);

while ($file = readdir($sourceDirHandler)) {
// Skip ".", ".." and hidden fields (Unix).
if (substr($file, 0, 1) == '.') {
// Skip ".", ".." .
if ($file == '.' || $file == '..') {
continue;
}

Expand All @@ -99,17 +108,16 @@ protected function checkDir($sourceDir)
if (
! is_file($sourcefilePath)
|| ! in_array($this->getFileExtension($sourcefilePath), $this->scanExtensions)
|| ! $this->checkUtf8Bom($sourcefilePath)
) {
if (
in_array($this->getFileExtension($sourcefilePath), $this->scanExtensions)
&& ! $this->checkUtf8Bom($sourcefilePath)
) {
$this->withoutBomFiles[] = $sourcefilePath;
}
continue;
}
$this->bomFiles[] = str_replace($this->sourceDir, '', $sourcefilePath);

if (! $type = $this->checkUtfBom($sourcefilePath)) {
$this->withoutBomFiles[] = $sourcefilePath;
continue;
}

$this->bomFiles[$type][] = str_replace($this->sourceDir . '/', '', $sourcefilePath);
}
}

Expand All @@ -127,10 +135,10 @@ protected function checkListFiles($listFiles)

foreach ($listFiles as $file) {
if (in_array($this->getFileExtension($file), $this->scanExtensions)) {
if (! $this->checkUtf8Bom($file)) {
if (! $type = $this->checkUtfBom($file)) {
$this->withoutBomFiles[] = $file;
} else {
$this->bomFiles[] = $file;
$this->bomFiles[$type][] = $file;
}
}
}
Expand Down Expand Up @@ -166,20 +174,31 @@ protected function getFileExtension($filePath)
}

/**
* Check if UTF-8 BOM codification file
* Check if UTF-8 / UTF-16 BOM codification file
*
* @param string $filePath
* @return bool
* @return bool|string false if not found, a string with the type of BOM if found
*/
protected function checkUtf8Bom($filePath)
protected function checkUtfBom($filePath)
{
$file = fopen($filePath, 'r');
$data = fgets($file, 10);
$data = fgets($file, 3);
fclose($file);

$this->scannedFiles++;

return (substr($data, 0, 3) == "\xEF\xBB\xBF");
if (substr($data, 0, 3) == "\xEF\xBB\xBF") {
return self::BOM_UTF8;
}

if (
(substr($data, 0, 2) == "\xFE\xFF") // UTF-16 big-endian BOM
|| (substr($data, 0, 2) == "\xFF\xFE") // UTF-16 little-endian BOM
) {
return self::BOM_UTF16;
}

return false;
}

/**
Expand All @@ -199,7 +218,26 @@ public function getScannedFiles()
*/
public function getBomFiles()
{
return $this->bomFiles;
$allFiles = [];
foreach ($this->bomFiles as $files) {
$allFiles = array_merge($allFiles, $files);
}

return $allFiles;
}

/**
* Get the list of files detected with BOM.
*
* @return array
*/
public function getBomFilesByType($type = null)
{
if (! $type) {
return $this->bomFiles;
}

return $this->bomFiles[$type] ?? [];
}

/**
Expand All @@ -211,4 +249,20 @@ public function getWithoutBomFiles()
{
return $this->withoutBomFiles;
}

/**
* Returs true if there is at least one file found with BOM
*
* @return bool
*/
public function bomFilesFound()
{
foreach ($this->bomFiles as $result) {
if (! empty($result)) {
return true;
}
}

return false;
}
}

0 comments on commit 68f5c40

Please sign in to comment.