From 2e13c7c836d7742dc43fb9fcc03208df676b6c94 Mon Sep 17 00:00:00 2001 From: Kuzivanov Sergey <43182277+SergeyKuz1001@users.noreply.github.com> Date: Sun, 1 Mar 2020 18:22:45 +0300 Subject: [PATCH 1/4] Create bitonic_sort.cu --- Task06/bitonic_sort.cu | 216 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 216 insertions(+) create mode 100644 Task06/bitonic_sort.cu diff --git a/Task06/bitonic_sort.cu b/Task06/bitonic_sort.cu new file mode 100644 index 0000000..3435167 --- /dev/null +++ b/Task06/bitonic_sort.cu @@ -0,0 +1,216 @@ +#include +#include +#include +#include +#include + +#define MAX_AMOUNT_GPU_THREADS 1024 + +void bitonic_sort_CPU_compare(int *arr, + int arr_size, + int log2_block_size, + bool second_half_of_block_is_reversed, + int threadIdx, + int blockIdx, + int blockDim) { + int thread_index = threadIdx + blockDim * blockIdx, + block_size = 1 << log2_block_size, + log2_amount_threads_in_block = log2_block_size - 1, + amount_threads_in_block = block_size >> 1, + block_index = thread_index >> log2_amount_threads_in_block, + thread_index_in_block = thread_index & (amount_threads_in_block - 1), + first_index, + second_index; + if (second_half_of_block_is_reversed) { + first_index = amount_threads_in_block - thread_index_in_block - 1 + block_index * block_size; + second_index = first_index + thread_index_in_block * 2 + 1; + } + else { + first_index = thread_index_in_block + block_index * block_size; + second_index = first_index + amount_threads_in_block; + } + if (second_index < arr_size && arr[first_index] > arr[second_index]) { + int helper = arr[first_index]; + arr[first_index] = arr[second_index]; + arr[second_index] = helper; + } +} + +int *bitonic_sort_CPU(int *arr, int arr_size) { + int *local_arr; + local_arr = (int*) malloc(sizeof(int) * arr_size); + memcpy(local_arr, arr, sizeof(int) * arr_size); + + int amount_gpu_blocks = (arr_size - 1) / MAX_AMOUNT_GPU_THREADS + 1, + total_amount_gpu_threads = (arr_size + 1) >> 1, + amount_gpu_threads_in_block = (total_amount_gpu_threads - 1) / amount_gpu_blocks + 1; + + for (int stage = 0; (1 << stage) < arr_size; stage++) { + for (int log2_block_size = stage + 1; log2_block_size > 0; log2_block_size--) { + bool second_half_of_block_is_reversed = (log2_block_size == stage + 1); + for (int blockIdx = 0; blockIdx < amount_gpu_blocks; blockIdx++) { + for (int threadIdx = 0; threadIdx < amount_gpu_threads_in_block; threadIdx++) { + bitonic_sort_CPU_compare( + local_arr, + arr_size, + log2_block_size, + second_half_of_block_is_reversed, + threadIdx, + blockIdx, + amount_gpu_threads_in_block + ); + } + } + } + } + return local_arr; +} + +__global__ void bitonic_sort_GPU_compare(int *arr, + int arr_size, + int log2_block_size, + bool second_half_of_block_is_reversed) { + int thread_index = threadIdx.x + blockDim.x * blockIdx.x, + block_size = 1 << log2_block_size, + log2_amount_threads_in_block = log2_block_size - 1, + amount_threads_in_block = block_size >> 1, + block_index = thread_index >> log2_amount_threads_in_block, + thread_index_in_block = thread_index & (amount_threads_in_block - 1), + first_index, + second_index; + if (second_half_of_block_is_reversed) { + first_index = amount_threads_in_block - thread_index_in_block - 1 + block_index * block_size; + second_index = first_index + thread_index_in_block * 2 + 1; + } + else { + first_index = thread_index_in_block + block_index * block_size; + second_index = first_index + amount_threads_in_block; + } + if (second_index < arr_size && arr[first_index] > arr[second_index]) { + int helper = arr[first_index]; + arr[first_index] = arr[second_index]; + arr[second_index] = helper; + } +} + +int *bitonic_sort_GPU(int *arr, int arr_size) { + int *local_arr; + cudaMalloc(&local_arr, sizeof(int) * arr_size); + cudaMemcpy(local_arr, arr, sizeof(int) * arr_size, cudaMemcpyHostToDevice); + + int amount_gpu_blocks = (arr_size - 1) / MAX_AMOUNT_GPU_THREADS + 1, + total_amount_gpu_threads = (arr_size + 1) >> 1, + amount_gpu_threads_in_block = (total_amount_gpu_threads - 1) / amount_gpu_blocks + 1; + + for (int stage = 0; (1 << stage) < arr_size; stage++) { + for (int log2_block_size = stage + 1; log2_block_size > 0; log2_block_size--) { + bool second_half_of_block_is_reversed = (log2_block_size == stage + 1); + bitonic_sort_GPU_compare<<>>( + local_arr, + arr_size, + log2_block_size, + second_half_of_block_is_reversed + ); + } + } + + int *res_arr; + res_arr = (int*) malloc(sizeof(int) * arr_size); + + cudaMemcpy(res_arr, local_arr, sizeof(int) * arr_size, cudaMemcpyDeviceToHost); + + cudaFree(local_arr); + + return res_arr; +} + +int main(int argc, char **argv) { + bool input_file_is_declared = false, + output_file_is_declared = false, + take_sorting_time = false; + FILE *input_file = 0, + *output_file = 0; + bool processing_unit_declared_as_central = true; + for (int argi = 1; argi < argc; argi++) { + if (strcmp(argv[argi], "-in") == 0 || strcmp(argv[argi], "--input_file") == 0) { + if (argi + 1 == argc) { + return 1; + } + argi++; + input_file_is_declared = true; + input_file = fopen(argv[argi], "r"); + } + else if (strcmp(argv[argi], "-out") == 0 || strcmp(argv[argi], "--output_file") == 0) { + if (argi + 1 == argc) { + return 1; + } + argi++; + output_file_is_declared = true; + output_file = fopen(argv[argi], "w"); + } + else if (strcmp(argv[argi], "-c") == 0 || strcmp(argv[argi], "--on-cpu") == 0) { + processing_unit_declared_as_central = true; + } + else if (strcmp(argv[argi], "-g") == 0 || strcmp(argv[argi], "--on-gpu") == 0) { + processing_unit_declared_as_central = false; + } + else if (strcmp(argv[argi], "-tt") == 0 || strcmp(argv[argi], "--take-time") == 0) { + take_sorting_time = true; + } + else { + printf("%s is unknown option.", argv[argi]); + return 2; + } + } + + int arr_size, *arr; + if (input_file_is_declared) { + fscanf(input_file, "%d", &arr_size); + arr = (int*) malloc(arr_size * sizeof(int)); + for (int i = 0; i < arr_size; i++) { + fscanf(input_file, "%d", arr + i); + } + fclose(input_file); + } + else { + scanf("%d", &arr_size); + arr = (int*) malloc(arr_size * sizeof(int)); + for (int i = 0; i < arr_size; i++) { + scanf("%d", arr + i); + } + } + + int *sorted_arr; + clock_t start_time = clock(); + if (processing_unit_declared_as_central) { + sorted_arr = bitonic_sort_CPU(arr, arr_size); + } + else { + sorted_arr = bitonic_sort_GPU(arr, arr_size); + } + clock_t stop_time = clock(); + + if (output_file_is_declared) { + fprintf(output_file, "%d\n", arr_size); + for (int i = 0; i < arr_size; i++) { + fprintf(output_file, "%d\n", sorted_arr[i]); + } + fclose(output_file); + } + else { + printf("%d\n", arr_size); + for (int i = 0; i < arr_size; i++) { + printf("%d\n", sorted_arr[i]); + } + } + + if (take_sorting_time) { + if ( ! output_file_is_declared) + printf("\n"); + printf("Sorting time = %d ms.", stop_time - start_time); + } + + free(sorted_arr); + + return 0; +} From 65c9409daa46fdd267f19721d2be6bf075ff4acf Mon Sep 17 00:00:00 2001 From: Kuzivanov Sergey <43182277+SergeyKuz1001@users.noreply.github.com> Date: Sun, 1 Mar 2020 18:24:04 +0300 Subject: [PATCH 2/4] Create main.c --- Task06/main.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 Task06/main.c diff --git a/Task06/main.c b/Task06/main.c new file mode 100644 index 0000000..22913d7 --- /dev/null +++ b/Task06/main.c @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include + +int main(int argc, char** argv) { + FILE *fout_cpu, *fout_gpu; + int depth_requests_size = 5; + int step_size = 100000000; + int* requests_size = (int*) malloc(sizeof(int) * (1 << depth_requests_size)); + requests_size[0] = step_size; + for (int i = 0, j = 1, ind = 1; i < depth_requests_size; i++, step_size >>= 1, j <<= 1) { + for (int _size = (step_size >> 1), k = 0; k < j; _size += step_size, k++, ind++) { + requests_size[ind] = _size; + } + } + for (int i = -1; i < (1 << depth_requests_size); i++) { + printf("\nGenerating array of size %d elements", requests_size[abs(i)]); + char command[100]; + sprintf(command, "generate_array.exe %d input.txt", requests_size[abs(i)]); + system(command); + + printf("\nSorting on CPU\n"); + system("bitonic_sort.exe -in input.txt -out output_cpu.txt -c -tt"); + + printf("\nSorting on GPU\n"); + system("bitonic_sort.exe -in input.txt -out output_gpu.txt -g -tt"); + + printf("\nChecking answers\n"); + fout_cpu = fopen("output_cpu.txt", "r"); + fout_gpu = fopen("output_gpu.txt", "r"); + + int arr_size_cpu, arr_size_gpu, + pred_elem_arr_cpu, pred_elem_arr_gpu, + elem_arr_cpu, elem_arr_gpu; + + fscanf(fout_cpu, "%d", &arr_size_cpu); + fscanf(fout_gpu, "%d", &arr_size_gpu); + + if (arr_size_cpu != arr_size_gpu) { + printf("Error: Size of arrays are not equal"); + return -1; + } + + if (arr_size_cpu == 0) continue; + + fscanf(fout_cpu, "%d", &pred_elem_arr_cpu); + fscanf(fout_gpu, "%d", &pred_elem_arr_gpu); + + if (pred_elem_arr_cpu != pred_elem_arr_gpu) { + printf("Error: Elements with index 0 in arrays are not equal"); + return -1; + } + + for (int j = 1; j < arr_size_cpu; j++) { + fscanf(fout_cpu, "%d", &elem_arr_cpu); + fscanf(fout_gpu, "%d", &elem_arr_gpu); + + if (elem_arr_cpu < pred_elem_arr_cpu) { + printf("Error: CPU's array is not sorted"); + return -1; + } + + if (elem_arr_gpu < pred_elem_arr_gpu) { + printf("Error: GPU's array is not sorted"); + return -1; + } + + if (elem_arr_cpu != elem_arr_gpu) { + printf("Error: Elements with index %d in arrays are not equal", j); + return -1; + } + + pred_elem_arr_cpu = elem_arr_cpu; + pred_elem_arr_gpu = elem_arr_gpu; + } + + fclose(fout_cpu); + fclose(fout_gpu); + } + return 0; +} From a339c6a2c93543bade1a2bdc88f1743eb8a59f12 Mon Sep 17 00:00:00 2001 From: Kuzivanov Sergey <43182277+SergeyKuz1001@users.noreply.github.com> Date: Sun, 1 Mar 2020 18:24:45 +0300 Subject: [PATCH 3/4] Create generate_array.c --- Task06/generate_array.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 Task06/generate_array.c diff --git a/Task06/generate_array.c b/Task06/generate_array.c new file mode 100644 index 0000000..267895f --- /dev/null +++ b/Task06/generate_array.c @@ -0,0 +1,19 @@ +#include +#include +#include +#include + +int random() { + return (rand() % 2001) - 1000; +} + +int main(int argc, char** argv) { + srand(clock()); + int arr_size = atoi(argv[1]); + FILE *fout = fopen(argv[2], "w"); + fprintf(fout, "%d", arr_size); + for (int i = 0; i < arr_size; i++) { + fprintf(fout, " %d", random()); + } + fclose(fout); +} From 51612865b45eda5d09c92bab3f590afc87634914 Mon Sep 17 00:00:00 2001 From: Kuzivanov Sergey <43182277+SergeyKuz1001@users.noreply.github.com> Date: Sun, 1 Mar 2020 18:26:17 +0300 Subject: [PATCH 4/4] Create result.txt --- Task06/result.txt | 223 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 223 insertions(+) create mode 100644 Task06/result.txt diff --git a/Task06/result.txt b/Task06/result.txt new file mode 100644 index 0000000..3698c5e --- /dev/null +++ b/Task06/result.txt @@ -0,0 +1,223 @@ +Generating array of size 100000000 elements +Sorting on CPU +Sorting time = 44167 ms. +Sorting on GPU +Sorting time = 6448 ms. +Checking answers + +Generating array of size 50000000 elements +Sorting on CPU +Sorting time = 21100 ms. +Sorting on GPU +Sorting time = 3643 ms. +Checking answers + +Generating array of size 25000000 elements +Sorting on CPU +Sorting time = 11036 ms. +Sorting on GPU +Sorting time = 2277 ms. +Checking answers + +Generating array of size 75000000 elements +Sorting on CPU +Sorting time = 36606 ms. +Sorting on GPU +Sorting time = 5076 ms. +Checking answers + +Generating array of size 12500000 elements +Sorting on CPU +Sorting time = 5141 ms. +Sorting on GPU +Sorting time = 1584 ms. +Checking answers + +Generating array of size 37500000 elements +Sorting on CPU +Sorting time = 19316 ms. +Sorting on GPU +Sorting time = 2953 ms. +Checking answers + +Generating array of size 62500000 elements +Sorting on CPU +Sorting time = 28316 ms. +Sorting on GPU +Sorting time = 4238 ms. +Checking answers + +Generating array of size 87500000 elements +Sorting on CPU +Sorting time = 40159 ms. +Sorting on GPU +Sorting time = 5760 ms. +Checking answers + +Generating array of size 6250000 elements +Sorting on CPU +Sorting time = 2748 ms. +Sorting on GPU +Sorting time = 1275 ms. +Checking answers + +Generating array of size 18750000 elements +Sorting on CPU +Sorting time = 8117 ms. +Sorting on GPU +Sorting time = 1061 ms. +Checking answers + +Generating array of size 31250000 elements +Sorting on CPU +Sorting time = 14055 ms. +Sorting on GPU +Sorting time = 2528 ms. +Checking answers + +Generating array of size 43750000 elements +Sorting on CPU +Sorting time = 20833 ms. +Sorting on GPU +Sorting time = 3325 ms. +Checking answers + +Generating array of size 56250000 elements +Sorting on CPU +Sorting time = 25321 ms. +Sorting on GPU +Sorting time = 3913 ms. +Checking answers + +Generating array of size 68750000 elements +Sorting on CPU +Sorting time = 32614 ms. +Sorting on GPU +Sorting time = 4743 ms. +Checking answers + +Generating array of size 81250000 elements +Sorting on CPU +Sorting time = 36743 ms. +Sorting on GPU +Sorting time = 5444 ms. +Checking answers + +Generating array of size 93750000 elements +Sorting on CPU +Sorting time = 43493 ms. +Sorting on GPU +Sorting time = 6101 ms. +Checking answers + +Generating array of size 3125000 elements +Sorting on CPU +Sorting time = 960 ms. +Sorting on GPU +Sorting time = 1137 ms. +Checking answers + +Generating array of size 9375000 elements +Sorting on CPU +Sorting time = 4047 ms. +Sorting on GPU +Sorting time = 593 ms. +Checking answers + +Generating array of size 15625000 elements +Sorting on CPU +Sorting time = 7095 ms. +Sorting on GPU +Sorting time = 866 ms. +Checking answers + +Generating array of size 21875000 elements +Sorting on CPU +Sorting time = 11228 ms. +Sorting on GPU +Sorting time = 1214 ms. +Checking answers + +Generating array of size 28125000 elements +Sorting on CPU +Sorting time = 13477 ms. +Sorting on GPU +Sorting time = 2391 ms. +Checking answers + +Generating array of size 34375000 elements +Sorting on CPU +Sorting time = 16920 ms. +Sorting on GPU +Sorting time = 2781 ms. +Checking answers + +Generating array of size 40625000 elements +Sorting on CPU +Sorting time = 19470 ms. +Sorting on GPU +Sorting time = 3090 ms. +Checking answers + +Generating array of size 46875000 elements +Sorting on CPU +Sorting time = 21908 ms. +Sorting on GPU +Sorting time = 3410 ms. +Checking answers + +Generating array of size 53125000 elements +Sorting on CPU +Sorting time = 24442 ms. +Sorting on GPU +Sorting time = 3802 ms. +Checking answers + +Generating array of size 59375000 elements +Sorting on CPU +Sorting time = 26749 ms. +Sorting on GPU +Sorting time = 4066 ms. +Checking answers + +Generating array of size 65625000 elements +Sorting on CPU +Sorting time = 29501 ms. +Sorting on GPU +Sorting time = 4404 ms. +Checking answers + +Generating array of size 71875000 elements +Sorting on CPU +Sorting time = 33184 ms. +Sorting on GPU +Sorting time = 4906 ms. +Checking answers + +Generating array of size 78125000 elements +Sorting on CPU +Sorting time = 35587 ms. +Sorting on GPU +Sorting time = 5307 ms. +Checking answers + +Generating array of size 84375000 elements +Sorting on CPU +Sorting time = 38452 ms. +Sorting on GPU +Sorting time = 5601 ms. +Checking answers + +Generating array of size 90625000 elements +Sorting on CPU +Sorting time = 40645 ms. +Sorting on GPU +Sorting time = 5983 ms. +Checking answers + +Generating array of size 96875000 elements +Sorting on CPU +Sorting time = 44268 ms. +Sorting on GPU +Sorting time = 6284 ms. +Checking answers