Skip to content

Commit

Permalink
bsc 3.3.0
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyaGrebnov committed Feb 12, 2023
1 parent 044c48c commit 1259681
Show file tree
Hide file tree
Showing 20 changed files with 2,978 additions and 116 deletions.
4 changes: 4 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
Changes in 3.3.0 (February, 10 2023)
- Improved GPU acceleration performance of forward ST algorithm.
- Implemented GPU acceleration of forward Burrows�Wheeler transform.

Changes in 3.2.5 (November, 23 2022)
- Fixed data corruption issue in LZP encoder.
- Due to these fix, an upgrade to this version is strongly recommended.
Expand Down
8 changes: 4 additions & 4 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ block-sorting data compression algorithms.
libbsc is a library based on bsc, it uses the same algorithms
as bsc and enables you to compress memory blocks.

Copyright (c) 2009-2022 Ilya Grebnov <[email protected]>
Copyright (c) 2009-2023 Ilya Grebnov <[email protected]>

See file AUTHORS for a full list of contributors.

Expand All @@ -21,7 +21,7 @@ See the bsc and libbsc web site:
Software License:
-----------------

Copyright (c) 2009-2022 Ilya Grebnov <[email protected]>
Copyright (c) 2009-2023 Ilya Grebnov <[email protected]>

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -54,14 +54,14 @@ Compression and decompression requirements are the same and in bytes, can
be estimated as 16Mb + 5 x block size x number of blocks processed in parallel.

GPU memory usage for NVIDIA CUDA technology is different from CPU memory usage
and can be estimated as 20 x block size.
and can be estimated as 20 x block size for ST and 32 x block size for BWT.


NVIDIA GPU acceleration:
------------------------

1. libbsc uses NVIDIA CUDA technology, resulting in a performance boost on computers
with NVIDIA GPU of compute capability 3.5 or higher. Lists of supported GPUs
with NVIDIA GPU of compute capability 5.0 or higher. Lists of supported GPUs
can be found on the NVIDIA website http://developer.nvidia.com/cuda-gpus.
You also need to install latest graphics drivers that support CUDA.

Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.2.5
3.3.0
4 changes: 2 additions & 2 deletions bsc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -851,8 +851,8 @@ void ProcessCommandline(int argc, char * argv[])

int main(int argc, char * argv[])
{
fprintf(stdout, "This is bsc, Block Sorting Compressor. Version 3.2.5. 23 November 2022.\n");
fprintf(stdout, "Copyright (c) 2009-2022 Ilya Grebnov <[email protected]>.\n\n");
fprintf(stdout, "This is bsc, Block Sorting Compressor. Version 3.3.0. 10 February 2023.\n");
fprintf(stdout, "Copyright (c) 2009-2023 Ilya Grebnov <[email protected]>.\n\n");

#if defined(_OPENMP) && defined(__INTEL_COMPILER)

Expand Down
139 changes: 137 additions & 2 deletions libbsc/bwt/bwt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,149 @@ See also the bsc and libbsc web site:
#include "../platform/platform.h"
#include "../libbsc.h"

#include "libcubwt/libcubwt.cuh"
#include "libsais/libsais.h"

#if defined(LIBBSC_CUDA_SUPPORT) && defined(LIBBSC_OPENMP)

omp_lock_t bwt_cuda_lock;
void * bwt_cuda_device_storage = NULL;
int bwt_cuda_device_storage_size = 0;

int bsc_bwt_init(int features)
{
if (features & LIBBSC_FEATURE_CUDA)
{
omp_init_lock(&bwt_cuda_lock);
}

return LIBBSC_NO_ERROR;
}

#else

int bsc_bwt_init(int features)
{
return LIBBSC_NO_ERROR;
}

#endif

int bsc_bwt_gpu_encode(unsigned char * T, int n, unsigned char * num_indexes, int * indexes, int features)
{
int index = -1;

if (features & LIBBSC_FEATURE_CUDA)
{
#ifdef LIBBSC_CUDA_SUPPORT
if (num_indexes != NULL && indexes != NULL)
{
int I[256];

int mod = n / 8;
{
mod |= mod >> 1; mod |= mod >> 2;
mod |= mod >> 4; mod |= mod >> 8;
mod |= mod >> 16; mod >>= 1;
}

#ifdef LIBBSC_OPENMP
omp_set_lock(&bwt_cuda_lock);

if (bwt_cuda_device_storage_size < n)
{
if (bwt_cuda_device_storage != NULL)
{
libcubwt_free_device_storage(bwt_cuda_device_storage);

bwt_cuda_device_storage = NULL;
bwt_cuda_device_storage_size = 0;
}

if (libcubwt_allocate_device_storage(&bwt_cuda_device_storage, n + (n / 32)) == LIBCUBWT_NO_ERROR)
{
bwt_cuda_device_storage_size = n + (n / 32);
}
}

if (bwt_cuda_device_storage_size >= n)
{
index = (int)libcubwt_bwt_aux(bwt_cuda_device_storage, T, T, n, mod + 1, (unsigned int *)I);
}

omp_unset_lock(&bwt_cuda_lock);
#else
void * bwt_cuda_device_storage = NULL;

if (libcubwt_allocate_device_storage(&bwt_cuda_device_storage, n) == LIBCUBWT_NO_ERROR)
{
index = (int)libcubwt_bwt_aux(bwt_cuda_device_storage, T, T, n, mod + 1, (unsigned int *)I);

libcubwt_free_device_storage(bwt_cuda_device_storage);
}
#endif

if (index == 0)
{
num_indexes[0] = (unsigned char)((n - 1) / (mod + 1));
index = I[0]; for (int t = 0; t < num_indexes[0]; ++t) indexes[t] = I[t + 1] - 1;
}
}
else
{
#ifdef LIBBSC_OPENMP
omp_set_lock(&bwt_cuda_lock);

if (bwt_cuda_device_storage_size < n)
{
if (bwt_cuda_device_storage != NULL)
{
libcubwt_free_device_storage(bwt_cuda_device_storage);

bwt_cuda_device_storage = NULL;
bwt_cuda_device_storage_size = 0;
}

if (libcubwt_allocate_device_storage(&bwt_cuda_device_storage, n + (n / 32)) == LIBCUBWT_NO_ERROR)
{
bwt_cuda_device_storage_size = n + (n / 32);
}
}

if (bwt_cuda_device_storage_size >= n)
{
index = (int)libcubwt_bwt(bwt_cuda_device_storage, T, T, n);
}

omp_unset_lock(&bwt_cuda_lock);
#else
void * bwt_cuda_device_storage = NULL;

if (libcubwt_allocate_device_storage(&bwt_cuda_device_storage, n) == LIBCUBWT_NO_ERROR)
{
index = (int)libcubwt_bwt(bwt_cuda_device_storage, T, T, n);

libcubwt_free_device_storage(bwt_cuda_device_storage);
}
#endif
}
#endif
}

return index;
}


int bsc_bwt_encode(unsigned char * T, int n, unsigned char * num_indexes, int * indexes, int features)
{
if (int * RESTRICT A = (int *)bsc_malloc(n * sizeof(int)))
int index = bsc_bwt_gpu_encode(T, n, num_indexes, indexes, features);
if (index >= 0)
{
int index;
return index;
}

if (int * RESTRICT A = (int *)bsc_malloc(n * sizeof(int)))
{
if (num_indexes != NULL && indexes != NULL)
{
int I[256];
Expand Down
7 changes: 7 additions & 0 deletions libbsc/bwt/bwt.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ See also the bsc and libbsc web site:
extern "C" {
#endif

/**
* You should call this function before you call any of the other functions in bwt.
* @param features - the set of additional features.
* @return LIBBSC_NO_ERROR if no error occurred, error code otherwise.
*/
int bsc_bwt_init(int features);

/**
* Constructs the burrows wheeler transformed string of a given string.
* @param T - the input/output string of n chars.
Expand Down
10 changes: 10 additions & 0 deletions libbsc/bwt/libcubwt/AUTHORS
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
-- Authors of libcubwt

Ilya Grebnov <[email protected]>

-- This program is based on (at least) the work of

Leyuan Wang, Sean Baxter, John D. Owens, Yury Shukhrov,
Rory Mitchell, Jacopo Pantaleoni, Duane Merrill,
Georgy Evtushenko, Allison Vacanti, Robert Crovella,
Mark Harris.
3 changes: 3 additions & 0 deletions libbsc/bwt/libcubwt/CHANGES
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Changes in 1.0.0 (February 10, 2023)
- Initial public release of the libcubwt.

Loading

0 comments on commit 1259681

Please sign in to comment.