bsc 3.3.0

IlyaGrebnov · Feb 12, 2023 · 1259681 · 1259681
1 parent 044c48c
commit 1259681
Show file tree

Hide file tree

Showing 20 changed files with 2,978 additions and 116 deletions.
diff --git a/CHANGES b/CHANGES
@@ -1,3 +1,7 @@
+Changes in 3.3.0 (February, 10 2023)
+- Improved GPU acceleration performance of forward ST algorithm.
+- Implemented GPU acceleration of forward Burrows�Wheeler transform.
+
 Changes in 3.2.5 (November, 23 2022)
 - Fixed data corruption issue in LZP encoder.
 - Due to these fix, an upgrade to this version is strongly recommended.

diff --git a/README b/README
@@ -10,7 +10,7 @@ block-sorting data compression algorithms.
 libbsc is a library based on bsc, it uses the same algorithms
 as bsc and enables you to compress memory blocks.
 
-Copyright (c) 2009-2022 Ilya Grebnov <[email protected]>
+Copyright (c) 2009-2023 Ilya Grebnov <[email protected]>
 
 See file AUTHORS for a full list of contributors.
 
@@ -21,7 +21,7 @@ See the bsc and libbsc web site:
 Software License:
 -----------------
 
-Copyright (c) 2009-2022 Ilya Grebnov <[email protected]>
+Copyright (c) 2009-2023 Ilya Grebnov <[email protected]>
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -54,14 +54,14 @@ Compression and decompression requirements are the same and in bytes, can
 be estimated as 16Mb + 5 x block size x number of blocks processed in parallel.
 
 GPU memory usage for NVIDIA CUDA technology is different from CPU memory usage
-and can be estimated as 20 x block size.
+and can be estimated as 20 x block size for ST and 32 x block size for BWT.
 
 
 NVIDIA GPU acceleration:
 ------------------------
 
 1. libbsc uses NVIDIA CUDA technology, resulting in a performance boost on computers
-with NVIDIA GPU of compute capability 3.5 or higher. Lists of supported GPUs
+with NVIDIA GPU of compute capability 5.0 or higher. Lists of supported GPUs
 can be found on the NVIDIA website http://developer.nvidia.com/cuda-gpus.
 You also need to install latest graphics drivers that support CUDA.
 

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-3.2.5
+3.3.0
diff --git a/bsc.cpp b/bsc.cpp
@@ -851,8 +851,8 @@ void ProcessCommandline(int argc, char * argv[])
 
 int main(int argc, char * argv[])
 {
-    fprintf(stdout, "This is bsc, Block Sorting Compressor. Version 3.2.5. 23 November 2022.\n");
-    fprintf(stdout, "Copyright (c) 2009-2022 Ilya Grebnov <[email protected]>.\n\n");
+    fprintf(stdout, "This is bsc, Block Sorting Compressor. Version 3.3.0. 10 February 2023.\n");
+    fprintf(stdout, "Copyright (c) 2009-2023 Ilya Grebnov <[email protected]>.\n\n");
 
 #if defined(_OPENMP) && defined(__INTEL_COMPILER)
 

diff --git a/libbsc/bwt/bwt.cpp b/libbsc/bwt/bwt.cpp
@@ -38,14 +38,149 @@ See also the bsc and libbsc web site:
 #include "../platform/platform.h"
 #include "../libbsc.h"
 
+#include "libcubwt/libcubwt.cuh"
 #include "libsais/libsais.h"
 
+#if defined(LIBBSC_CUDA_SUPPORT) && defined(LIBBSC_OPENMP)
+
+omp_lock_t bwt_cuda_lock;
+void *     bwt_cuda_device_storage = NULL;
+int        bwt_cuda_device_storage_size = 0;
+
+int bsc_bwt_init(int features)
+{
+    if (features & LIBBSC_FEATURE_CUDA)
+    {
+        omp_init_lock(&bwt_cuda_lock);
+    }
+
+    return LIBBSC_NO_ERROR;
+}
+
+#else
+
+int bsc_bwt_init(int features)
+{
+    return LIBBSC_NO_ERROR;
+}
+
+#endif
+
+int bsc_bwt_gpu_encode(unsigned char * T, int n, unsigned char * num_indexes, int * indexes, int features)
+{
+    int index = -1;
+
+    if (features & LIBBSC_FEATURE_CUDA)
+    {
+#ifdef LIBBSC_CUDA_SUPPORT
+        if (num_indexes != NULL && indexes != NULL)
+        {
+            int I[256];
+
+            int mod = n / 8;
+            {
+                mod |= mod >> 1;  mod |= mod >> 2;
+                mod |= mod >> 4;  mod |= mod >> 8;
+                mod |= mod >> 16; mod >>= 1;
+            }
+
+#ifdef LIBBSC_OPENMP
+            omp_set_lock(&bwt_cuda_lock);
+
+            if (bwt_cuda_device_storage_size < n)
+            {
+                if (bwt_cuda_device_storage != NULL)
+                {
+                    libcubwt_free_device_storage(bwt_cuda_device_storage);
+
+                    bwt_cuda_device_storage = NULL;
+                    bwt_cuda_device_storage_size = 0;
+                }
+
+                if (libcubwt_allocate_device_storage(&bwt_cuda_device_storage, n + (n / 32)) == LIBCUBWT_NO_ERROR)
+                {
+                    bwt_cuda_device_storage_size = n + (n / 32);
+                }
+            }
+
+            if (bwt_cuda_device_storage_size >= n)
+            {
+                index = (int)libcubwt_bwt_aux(bwt_cuda_device_storage, T, T, n, mod + 1, (unsigned int *)I);
+            } 
+
+            omp_unset_lock(&bwt_cuda_lock);
+#else
+            void * bwt_cuda_device_storage = NULL;
+
+            if (libcubwt_allocate_device_storage(&bwt_cuda_device_storage, n) == LIBCUBWT_NO_ERROR)
+            {
+                index = (int)libcubwt_bwt_aux(bwt_cuda_device_storage, T, T, n, mod + 1, (unsigned int *)I);
+
+                libcubwt_free_device_storage(bwt_cuda_device_storage);
+            }
+#endif
+
+            if (index == 0)
+            {
+                num_indexes[0] = (unsigned char)((n - 1) / (mod + 1));
+                index = I[0]; for (int t = 0; t < num_indexes[0]; ++t) indexes[t] = I[t + 1] - 1;
+            }
+        }
+        else
+        {
+#ifdef LIBBSC_OPENMP
+            omp_set_lock(&bwt_cuda_lock);
+
+            if (bwt_cuda_device_storage_size < n)
+            {
+                if (bwt_cuda_device_storage != NULL)
+                {
+                    libcubwt_free_device_storage(bwt_cuda_device_storage);
+
+                    bwt_cuda_device_storage = NULL;
+                    bwt_cuda_device_storage_size = 0;
+                }
+
+                if (libcubwt_allocate_device_storage(&bwt_cuda_device_storage, n + (n / 32)) == LIBCUBWT_NO_ERROR)
+                {
+                    bwt_cuda_device_storage_size = n + (n / 32);
+                }
+            }
+
+            if (bwt_cuda_device_storage_size >= n)
+            {
+                index = (int)libcubwt_bwt(bwt_cuda_device_storage, T, T, n);
+            } 
+
+            omp_unset_lock(&bwt_cuda_lock);
+#else
+            void * bwt_cuda_device_storage = NULL;
+
+            if (libcubwt_allocate_device_storage(&bwt_cuda_device_storage, n) == LIBCUBWT_NO_ERROR)
+            {
+                index = (int)libcubwt_bwt(bwt_cuda_device_storage, T, T, n);
+
+                libcubwt_free_device_storage(bwt_cuda_device_storage);
+            }
+#endif
+        }
+#endif
+    }
+
+    return index;
+}
+
+
 int bsc_bwt_encode(unsigned char * T, int n, unsigned char * num_indexes, int * indexes, int features)
 {
-    if (int * RESTRICT A = (int *)bsc_malloc(n * sizeof(int)))
+    int index = bsc_bwt_gpu_encode(T, n, num_indexes, indexes, features);
+    if (index >= 0)
     {
-        int index;
+        return index;
+    }
 
+    if (int * RESTRICT A = (int *)bsc_malloc(n * sizeof(int)))
+    {
         if (num_indexes != NULL && indexes != NULL)
         {
             int I[256];

diff --git a/libbsc/bwt/bwt.h b/libbsc/bwt/bwt.h
@@ -37,6 +37,13 @@ See also the bsc and libbsc web site:
 extern "C" {
 #endif
 
+    /**
+    * You should call this function before you call any of the other functions in bwt.
+    * @param features       - the set of additional features.
+    * @return LIBBSC_NO_ERROR if no error occurred, error code otherwise.
+    */
+    int bsc_bwt_init(int features);
+
     /**
     * Constructs the burrows wheeler transformed string of a given string.
     * @param T              - the input/output string of n chars.

diff --git a/libbsc/bwt/libcubwt/AUTHORS b/libbsc/bwt/libcubwt/AUTHORS
@@ -0,0 +1,10 @@
+-- Authors of libcubwt
+
+  Ilya Grebnov <[email protected]>
+
+-- This program is based on (at least) the work of
+
+  Leyuan Wang, Sean Baxter, John D. Owens, Yury Shukhrov,
+  Rory Mitchell, Jacopo Pantaleoni, Duane Merrill,
+  Georgy Evtushenko, Allison Vacanti, Robert Crovella,
+  Mark Harris.
diff --git a/libbsc/bwt/libcubwt/CHANGES b/libbsc/bwt/libcubwt/CHANGES
@@ -0,0 +1,3 @@
+Changes in 1.0.0 (February 10, 2023)
+- Initial public release of the libcubwt.
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		Changes in 1.0.0 (February 10, 2023)
		- Initial public release of the libcubwt.