-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Check sum calculator #4
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
| ||
Microsoft Visual Studio Solution File, Format Version 12.00 | ||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "test1", "test1\test1.csproj", "{47A8A911-5B5E-448F-933A-0FAF7D3211B3}" | ||
EndProject | ||
Global | ||
GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
Debug|Any CPU = Debug|Any CPU | ||
Release|Any CPU = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
{47A8A911-5B5E-448F-933A-0FAF7D3211B3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
{47A8A911-5B5E-448F-933A-0FAF7D3211B3}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
{47A8A911-5B5E-448F-933A-0FAF7D3211B3}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
{47A8A911-5B5E-448F-933A-0FAF7D3211B3}.Release|Any CPU.Build.0 = Release|Any CPU | ||
EndGlobalSection | ||
EndGlobal |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
namespace test1; | ||
|
||
using System.Security.Cryptography; | ||
|
||
/// <summary> | ||
/// Calculates check sum of specific directory or file. | ||
/// </summary> | ||
public static class CheckSumCalculator | ||
{ | ||
/// <summary> | ||
/// Calculates check sum in one thead mode. | ||
/// </summary> | ||
/// <param name="path">Path of file or directory.</param> | ||
/// <returns>Check sum.</returns> | ||
/// <exception cref="FileNotFoundException">Throws if path does not point to file or directory.</exception> | ||
public static byte[] CalculateCheckSumSerially(string path) | ||
{ | ||
if (File.Exists(path)) | ||
{ | ||
return CalculateFileCheckSum(path); | ||
} | ||
if (Directory.Exists(path)) | ||
{ | ||
return CalculateDirectoryHashSerially(path); | ||
} | ||
|
||
throw new FileNotFoundException("Invalid path."); | ||
} | ||
|
||
/// <summary> | ||
/// Calculates check sum using multiple threads. | ||
/// </summary> | ||
/// <param name="path">Path of file or directory.</param> | ||
/// <returns>Check sum.</returns> | ||
/// <exception cref="FileNotFoundException">Throws if path does not point to file or directory.</exception> | ||
public static byte[] CalculateCheckSumConcurrently(string path) | ||
{ | ||
if (File.Exists(path)) | ||
{ | ||
return CalculateFileCheckSum(path); | ||
} | ||
if (Directory.Exists(path)) | ||
{ | ||
return CalculateDirectoryHashConcurrently(path); | ||
} | ||
|
||
throw new FileNotFoundException("Invalid path."); | ||
} | ||
|
||
/// <summary> | ||
/// Calculates check sum of specific file. | ||
/// </summary> | ||
/// <param name="path">Path of the file.</param> | ||
/// <returns>Check sum of the file.</returns> | ||
private static byte[] CalculateFileCheckSum(string path) | ||
{ | ||
using var file = File.OpenRead(path); | ||
using var md5 = MD5.Create(); | ||
|
||
return md5.ComputeHash(file); | ||
} | ||
|
||
private static string[] GetSortedItems(string path) | ||
{ | ||
var files = Directory.GetFiles(path); | ||
var directories = Directory.GetDirectories(path); | ||
var items = files.Concat(directories).ToArray(); | ||
Array.Sort(items); | ||
|
||
return items; | ||
} | ||
|
||
private static byte[] CalculateDirectoryHashSerially(string path) | ||
{ | ||
var items = GetSortedItems(path); | ||
var hashes = new List<byte>(); | ||
|
||
foreach (var item in items) | ||
{ | ||
if (File.Exists(item)) | ||
{ | ||
CalculateFileCheckSum(item).ToList().ForEach(x => hashes.Add(x)); | ||
} | ||
else | ||
{ | ||
CalculateDirectoryHashSerially(item).ToList().ForEach(x => hashes.Add(x)); | ||
} | ||
} | ||
|
||
using var md5 = MD5.Create(); | ||
|
||
return md5.ComputeHash(hashes.ToArray()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Так имя самой папки в хеше не участвует. Так что если папки содержат одинаковые файлы, но называются по-разному, хеш будет одинаковым. В условии было по-другому. |
||
} | ||
|
||
private static byte[] CalculateDirectoryHashConcurrently(string path) | ||
{ | ||
var items = GetSortedItems(path); | ||
var hashes = new SortedDictionary<string, byte[]>(); | ||
var numberOfThreads = Environment.ProcessorCount / 2 < 2 ? 2 : Environment.ProcessorCount / 2; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Это, кажется, слишком уж низкоуровнево по современным меркам. Правильнее было бы просто поделить вычисления на как можно более мелкие задачи и доверить библиотеке самой оптимально их посчитать. Просто Parallel.ForEach было бы вполне ок, например. |
||
var counter = 0; | ||
var locker = new object(); | ||
|
||
for (var i = 0; i < numberOfThreads; i++) | ||
{ | ||
var startingIndex = i; | ||
Task.Run(() => | ||
{ | ||
var localItems = Array.Empty<string>(); | ||
|
||
for (var j = startingIndex; | ||
j < (startingIndex == numberOfThreads - 1 | ||
? items.Length | ||
: (startingIndex + 1) * items.Length / numberOfThreads); | ||
j++) | ||
{ | ||
localItems[j - startingIndex] = items[j]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. localItems нулевой длины, тут падает при запуске |
||
} | ||
|
||
foreach (var item in localItems) | ||
{ | ||
if (File.Exists(item)) | ||
{ | ||
var hash = CalculateFileCheckSum(item); | ||
lock (hashes) | ||
{ | ||
hashes.Add(item, hash); | ||
} | ||
} | ||
|
||
else | ||
{ | ||
var hash = CalculateDirectoryHashSerially(item); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. И тут рекурсивный вызов, который всё равно насоздаёт новых тасков, так что попытка как-то вручную следить за их количеством обречена на провал (по крайней мере, надо хитрее). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Здесь вызывается функция последовательного подсчета, она не будет создавать новые таски. |
||
lock (hashes) | ||
{ | ||
hashes.Add(item, hash); | ||
} | ||
} | ||
} | ||
|
||
Interlocked.Increment(ref counter); | ||
if (counter == numberOfThreads) | ||
{ | ||
Monitor.Pulse(locker); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Небезопасно относительно исключений. Внутри что-то падает, Monitor.Pulse никогда не вызывается, дедлочимся в 150-й строчке. |
||
} | ||
}); | ||
} | ||
|
||
while (counter != numberOfThreads) | ||
{ | ||
Monitor.Wait(locker); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Monitor.Wait и Monitor.Pulse можно использовать только внутри lock. Иначе SynchronizationLockException (что, собственно, и получается, если исправить баг с IndexOutOfRange). Как тестировали? :) |
||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Можно было Task.WaitAll сделать, если бы запомнили ссылки на таски, которые вернул Task.Run. И не мучиться с мониторами. |
||
|
||
var hashesList = hashes.Values.SelectMany(value => value).ToList(); | ||
using var md5 = MD5.Create(); | ||
|
||
return md5.ComputeHash(hashesList.ToArray()); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
namespace test1; | ||
|
||
/// <summary> | ||
/// Tools for comparison. | ||
/// </summary> | ||
public static class Comparison | ||
{ | ||
/// <summary> | ||
/// Calculates standard deviation using time of calculations. | ||
/// </summary> | ||
/// <param name="calculationTime">Array, which elements are time of calculations.</param> | ||
/// <returns>Returns double value -- rounded to one decimal place standard deviation of the given data.</returns> | ||
public static double CalculateDeviation(long[] calculationTime) | ||
{ | ||
var expectedValue = calculationTime.Average(); | ||
double variance = 0; | ||
|
||
for (var i = 0; i < calculationTime.Length; i++) | ||
{ | ||
variance += Math.Pow(calculationTime[i] - expectedValue, 2) / calculationTime.Length; | ||
} | ||
|
||
return Math.Round(Math.Sqrt(variance), 1); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
using System.Diagnostics; | ||
using test1; | ||
|
||
if (args.Length != 1) | ||
{ | ||
return; | ||
} | ||
|
||
const int n = 10; | ||
|
||
var calculationTime = new long[n]; | ||
for (var i = 0; i < n; i++) | ||
{ | ||
var stopwatch = new Stopwatch(); | ||
stopwatch.Start(); | ||
CheckSumCalculator.CalculateCheckSumSerially(args[0]); | ||
stopwatch.Stop(); | ||
calculationTime[i] = stopwatch.ElapsedMilliseconds; | ||
} | ||
|
||
var expectedValue1 = calculationTime.Average(); | ||
var deviation1 = Comparison.CalculateDeviation(calculationTime); | ||
Console.WriteLine(expectedValue1); | ||
Console.WriteLine(deviation1); | ||
|
||
for (var i = 0; i < n; i++) | ||
{ | ||
var stopwatch = new Stopwatch(); | ||
stopwatch.Start(); | ||
CheckSumCalculator.CalculateCheckSumConcurrently(args[0]); | ||
stopwatch.Stop(); | ||
calculationTime[i] = stopwatch.ElapsedMilliseconds; | ||
} | ||
|
||
var expectedValue2 = calculationTime.Average(); | ||
var deviation2 = Comparison.CalculateDeviation(calculationTime); | ||
Console.WriteLine(expectedValue2); | ||
Console.WriteLine(deviation2); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<OutputType>Exe</OutputType> | ||
<TargetFramework>net6.0</TargetFramework> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<Nullable>enable</Nullable> | ||
</PropertyGroup> | ||
|
||
</Project> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
В .NET пространства имён называются в PascalCase