From 4d523e1ef99a7a8e3b29435f07f22c58c819d44b Mon Sep 17 00:00:00 2001 From: devnoname120 Date: Tue, 16 Dec 2014 23:21:08 +0100 Subject: [PATCH] Drastically improved Inferno reading speeds for CSO files, and added support for ZSO files, which are LZ4-compressed ISO files. Thanks to Codestation for his outstanding work! :D --- Common/lz4.h | 248 +++++ ISODrivers/Galaxy/SystemCtrlForKernel.S | 1 + ISODrivers/Galaxy/main.c | 16 +- ISODrivers/Inferno/Makefile | 1 + ISODrivers/Inferno/SystemCtrlForKernel.S | 6 + ISODrivers/Inferno/isoread.c | 266 +++++- Satelite/main.c | 2 +- SystemControl/Makefile | 3 +- SystemControl/exports.exp | 1 + .../libs/libpspsystemctrl_kernel/Makefile | 1 + .../SystemCtrlForKernel.S | 3 + SystemControl/lz4.c | 879 ++++++++++++++++++ Vshctrl/isoreader.c | 16 +- Vshctrl/virtual_pbp.c | 2 +- contrib/ciso.py | 186 ++-- 15 files changed, 1544 insertions(+), 87 deletions(-) create mode 100644 Common/lz4.h create mode 100644 ISODrivers/Inferno/SystemCtrlForKernel.S create mode 100644 SystemControl/lz4.c diff --git a/Common/lz4.h b/Common/lz4.h new file mode 100644 index 0000000..4b8a15b --- /dev/null +++ b/Common/lz4.h @@ -0,0 +1,248 @@ +/* + LZ4 - Fast LZ compression algorithm + Header File + Copyright (C) 2011-2013, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + - LZ4 source repository : http://code.google.com/p/lz4/ +*/ +#pragma once + +#if defined (__cplusplus) +extern "C" { +#endif + + +/************************************** + Version +**************************************/ +#define LZ4_VERSION_MAJOR 1 /* for major interface/format changes */ +#define LZ4_VERSION_MINOR 1 /* for minor interface/format changes */ +#define LZ4_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ + + +/************************************** + Compiler Options +**************************************/ +#if (defined(__GNUC__) && defined(__STRICT_ANSI__)) || (defined(_MSC_VER) && !defined(__cplusplus)) /* Visual Studio */ +# define inline __inline /* Visual C is not C99, but supports some kind of inline */ +#endif + + +/************************************** + Simple Functions +**************************************/ + +int LZ4_compress (const char* source, char* dest, int inputSize); +int LZ4_decompress_safe (const char* source, char* dest, int inputSize, int maxOutputSize); + +/* +LZ4_compress() : + Compresses 'inputSize' bytes from 'source' into 'dest'. + Destination buffer must be already allocated, + and must be sized to handle worst cases situations (input data not compressible) + Worst case size evaluation is provided by function LZ4_compressBound() + inputSize : Max supported value is LZ4_MAX_INPUT_VALUE + return : the number of bytes written in buffer dest + or 0 if the compression fails + +LZ4_decompress_safe() : + maxOutputSize : is the size of the destination buffer (which must be already allocated) + return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize) + If the source stream is detected malformed, the function will stop decoding and return a negative result. + This function is protected against buffer overflow exploits (never writes outside of output buffer, and never reads outside of input buffer). Therefore, it is protected against malicious data packets +*/ + + +/************************************** + Advanced Functions +**************************************/ +#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ +#define LZ4_COMPRESSBOUND(isize) ((unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16) + +/* +LZ4_compressBound() : + Provides the maximum size that LZ4 may output in a "worst case" scenario (input data not compressible) + primarily useful for memory allocation of output buffer. + inline function is recommended for the general case, + macro is also provided when result needs to be evaluated at compilation (such as stack memory allocation). + + isize : is the input size. Max supported value is LZ4_MAX_INPUT_SIZE + return : maximum output size in a "worst case" scenario + or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE) +*/ +int LZ4_compressBound(int isize); + + +/* +LZ4_compress_limitedOutput() : + Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'. + If it cannot achieve it, compression will stop, and result of the function will be zero. + This function never writes outside of provided output buffer. + + inputSize : Max supported value is LZ4_MAX_INPUT_VALUE + maxOutputSize : is the size of the destination buffer (which must be already allocated) + return : the number of bytes written in buffer 'dest' + or 0 if the compression fails +*/ +int LZ4_compress_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize); + + +/* +LZ4_decompress_fast() : + originalSize : is the original and therefore uncompressed size + return : the number of bytes read from the source buffer (in other words, the compressed size) + If the source stream is malformed, the function will stop decoding and return a negative result. + note : This function is a bit faster than LZ4_decompress_safe() + This function never writes outside of output buffers, but may read beyond input buffer in case of malicious data packet. + Use this function preferably into a trusted environment (data to decode comes from a trusted source). + Destination buffer must be already allocated. Its size must be a minimum of 'outputSize' bytes. +*/ +int LZ4_decompress_fast (const char* source, char* dest, int originalSize); + + +/* +LZ4_decompress_safe_partial() : + This function decompress a compressed block of size 'inputSize' at position 'source' + into output buffer 'dest' of size 'maxOutputSize'. + The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached, + reducing decompression time. + return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize) + Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller. + Always control how many bytes were decoded. + If the source stream is detected malformed, the function will stop decoding and return a negative result. + This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets +*/ +int LZ4_decompress_safe_partial (const char* source, char* dest, int inputSize, int targetOutputSize, int maxOutputSize); + + +/* +These functions are provided should you prefer to allocate memory for compression tables with your own allocation methods. +To know how much memory must be allocated for the compression tables, use : +int LZ4_sizeofState(); + +Note that tables must be aligned on 4-bytes boundaries, otherwise compression will fail (return code 0). + +The allocated memory can be provided to the compressions functions using 'void* state' parameter. +LZ4_compress_withState() and LZ4_compress_limitedOutput_withState() are equivalent to previously described functions. +They just use the externally allocated memory area instead of allocating their own (on stack, or on heap). +*/ +int LZ4_sizeofState(void); +int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); +int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); + + +/************************************** + Streaming Functions +**************************************/ +void* LZ4_create (const char* inputBuffer); +int LZ4_compress_continue (void* LZ4_Data, const char* source, char* dest, int inputSize); +int LZ4_compress_limitedOutput_continue (void* LZ4_Data, const char* source, char* dest, int inputSize, int maxOutputSize); +char* LZ4_slideInputBuffer (void* LZ4_Data); +int LZ4_free (void* LZ4_Data); + +/* +These functions allow the compression of dependent blocks, where each block benefits from prior 64 KB within preceding blocks. +In order to achieve this, it is necessary to start creating the LZ4 Data Structure, thanks to the function : + +void* LZ4_create (const char* inputBuffer); +The result of the function is the (void*) pointer on the LZ4 Data Structure. +This pointer will be needed in all other functions. +If the pointer returned is NULL, then the allocation has failed, and compression must be aborted. +The only parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer. +The input buffer must be already allocated, and size at least 192KB. +'inputBuffer' will also be the 'const char* source' of the first block. + +All blocks are expected to lay next to each other within the input buffer, starting from 'inputBuffer'. +To compress each block, use either LZ4_compress_continue() or LZ4_compress_limitedOutput_continue(). +Their behavior are identical to LZ4_compress() or LZ4_compress_limitedOutput(), +but require the LZ4 Data Structure as their first argument, and check that each block starts right after the previous one. +If next block does not begin immediately after the previous one, the compression will fail (return 0). + +When it's no longer possible to lay the next block after the previous one (not enough space left into input buffer), a call to : +char* LZ4_slideInputBuffer(void* LZ4_Data); +must be performed. It will typically copy the latest 64KB of input at the beginning of input buffer. +Note that, for this function to work properly, minimum size of an input buffer must be 192KB. +==> The memory position where the next input data block must start is provided as the result of the function. + +Compression can then resume, using LZ4_compress_continue() or LZ4_compress_limitedOutput_continue(), as usual. + +When compression is completed, a call to LZ4_free() will release the memory used by the LZ4 Data Structure. +*/ + + +int LZ4_sizeofStreamState(void); +int LZ4_resetStreamState(void* state, const char* inputBuffer); + +/* +These functions achieve the same result as : +void* LZ4_create (const char* inputBuffer); + +They are provided here to allow the user program to allocate memory using its own routines. + +To know how much space must be allocated, use LZ4_sizeofStreamState(); +Note also that space must be 4-bytes aligned. + +Once space is allocated, you must initialize it using : LZ4_resetStreamState(void* state, const char* inputBuffer); +void* state is a pointer to the space allocated. +It must be aligned on 4-bytes boundaries, and be large enough. +The parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer. +The input buffer must be already allocated, and size at least 192KB. +'inputBuffer' will also be the 'const char* source' of the first block. + +The same space can be re-used multiple times, just by initializing it each time with LZ4_resetStreamState(). +return value of LZ4_resetStreamState() must be 0 is OK. +Any other value means there was an error (typically, pointer is not aligned on 4-bytes boundaries). +*/ + + +int LZ4_decompress_safe_withPrefix64k (const char* source, char* dest, int inputSize, int maxOutputSize); +int LZ4_decompress_fast_withPrefix64k (const char* source, char* dest, int outputSize); + +/* +*_withPrefix64k() : + These decoding functions work the same as their "normal name" versions, + but can use up to 64KB of data in front of 'char* dest'. + These functions are necessary to decode inter-dependant blocks. +*/ + + +/************************************** + Obsolete Functions +**************************************/ +/* +These functions are deprecated and should no longer be used. +They are provided here for compatibility with existing user programs. +*/ +int LZ4_uncompress (const char* source, char* dest, int outputSize); +int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); + + +#if defined (__cplusplus) +} +#endif diff --git a/ISODrivers/Galaxy/SystemCtrlForKernel.S b/ISODrivers/Galaxy/SystemCtrlForKernel.S index aeca586..69c006b 100644 --- a/ISODrivers/Galaxy/SystemCtrlForKernel.S +++ b/ISODrivers/Galaxy/SystemCtrlForKernel.S @@ -8,3 +8,4 @@ IMPORT_FUNC "SystemCtrlForKernel",0xA65E8BC4,oe_free IMPORT_FUNC "SystemCtrlForKernel",0xE34A0D97,oe_mallocterminate IMPORT_FUNC "SystemCtrlForKernel",0xBA21998E,sctrlSEGetUmdFile + IMPORT_FUNC "SystemCtrlForKernel",0x16100529,LZ4_decompress_fast diff --git a/ISODrivers/Galaxy/main.c b/ISODrivers/Galaxy/main.c index bf798bc..eeba41f 100644 --- a/ISODrivers/Galaxy/main.c +++ b/ISODrivers/Galaxy/main.c @@ -29,6 +29,7 @@ #include "utils.h" #include "printk.h" #include "galaxy_patch_offset.h" +#include "lz4.h" #define CISO_IDX_BUFFER_SIZE 0x200 #define CISO_DEC_BUFFER_SIZE 0x2000 @@ -115,6 +116,8 @@ u8 g_umddata[16] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, }; +static int lz4_compressed = 0; + // 0x00000000 SceUID myKernelCreateThread(const char * name, SceKernelThreadEntry entry, @@ -158,7 +161,8 @@ int cso_open(SceUID fd) magic = (u32*)g_CISO_hdr.magic; - if(*magic == 0x4F534943) { // CISO + if(*magic == 0x4F534943 || *magic == 0x4F53495A) { // CISO or ZISO + lz4_compressed = (*magic == 0x4F53495A) ? 1 : 0; g_CISO_cur_idx = -1; ciso_total_block = g_CISO_hdr.total_bytes / g_CISO_hdr.block_size; printk("%s: total block %d\n", __func__, (int)ciso_total_block); @@ -411,7 +415,15 @@ int read_cso_sector(u8 *addr, int sector) } // loc_8B8 - ret = sceKernelDeflateDecompress(addr, ISO_SECTOR_SIZE, g_ciso_dec_buf + offset - g_ciso_dec_buf_offset, 0); + if(!lz4_compressed) { + ret = sceKernelDeflateDecompress(addr, ISO_SECTOR_SIZE, g_ciso_dec_buf + offset - g_ciso_dec_buf_offset, 0); + } else { + ret = LZ4_decompress_fast(g_ciso_dec_buf + offset - g_ciso_dec_buf_offset, addr, ISO_SECTOR_SIZE); + if(ret < 0) { + ret = -20; + printk("%s: -> %d\n", __func__, ret); + } + } return ret < 0 ? ret : ISO_SECTOR_SIZE; } diff --git a/ISODrivers/Inferno/Makefile b/ISODrivers/Inferno/Makefile index 0a46b35..b622c6c 100644 --- a/ISODrivers/Inferno/Makefile +++ b/ISODrivers/Inferno/Makefile @@ -3,6 +3,7 @@ all: TARGET = inferno OBJS = main.o iodrv_funcs.o umd.o isoread.o inferno_patch_offset.o isocache.o imports.o ../../Common/utils.o +OBJS += SystemCtrlForKernel.o CFLAGS = -Os -G0 -Wall -I. -I.. -I ../../Common -I ../../include diff --git a/ISODrivers/Inferno/SystemCtrlForKernel.S b/ISODrivers/Inferno/SystemCtrlForKernel.S new file mode 100644 index 0000000..a34a38e --- /dev/null +++ b/ISODrivers/Inferno/SystemCtrlForKernel.S @@ -0,0 +1,6 @@ + .set noreorder + +#include "pspimport.s" + + IMPORT_START "SystemCtrlForKernel",0x00090000 + IMPORT_FUNC "SystemCtrlForKernel",0x16100529,LZ4_decompress_fast diff --git a/ISODrivers/Inferno/isoread.c b/ISODrivers/Inferno/isoread.c index fffdf6f..4bb77e8 100644 --- a/ISODrivers/Inferno/isoread.c +++ b/ISODrivers/Inferno/isoread.c @@ -33,6 +33,13 @@ #include "systemctrl_se.h" #include "systemctrl_private.h" #include "inferno.h" +#include "lz4.h" + +#define CISO_IDX_MAX_ENTRIES 4096 + +#define REMAINDER(a,b) ((a) % (b)) +#define IS_DIVISIBLE(a,b) (REMAINDER(a,b) == 0) +#define GET_CSO_OFFSET(block) ((g_cso_idx_cache[block] & 0x7FFFFFFF) << g_CISO_hdr.align) // 0x00002784 struct IoReadArg g_read_arg; @@ -69,6 +76,8 @@ static int g_ciso_dec_buf_size = 0; // 0x00002720 static u32 g_ciso_total_block = 0; +static int lz4_compressed = 0; + struct CISO_header { u8 magic[4]; // 0 u32 header_size; // 4 @@ -85,6 +94,10 @@ static struct CISO_header g_CISO_hdr __attribute__((aligned(64))); // 0x00002500 static u32 g_CISO_idx_cache[CISO_IDX_BUFFER_SIZE/4] __attribute__((aligned(64))); +static u32 *g_cso_idx_cache = NULL; + +u32 g_cso_idx_start_block = -1; + // 0x00000368 static void wait_until_ms0_ready(void) { @@ -171,7 +184,8 @@ static int is_ciso(SceUID fd) magic = (u32*)g_CISO_hdr.magic; - if(*magic == 0x4F534943) { // CISO + if(*magic == 0x4F534943 || *magic == 0x4F53495A) { // CISO or ZISO + lz4_compressed = (*magic == 0x4F53495A) ? 1 : 0; g_CISO_cur_idx = -1; g_ciso_total_block = g_CISO_hdr.total_bytes / g_CISO_hdr.block_size; printk("%s: total block %d\n", __func__, (int)g_ciso_total_block); @@ -201,6 +215,19 @@ static int is_ciso(SceUID fd) if((u32)g_ciso_block_buf & 63) g_ciso_block_buf = (void*)(((u32)g_ciso_block_buf & (~63)) + 64); } + + if (g_cso_idx_cache == NULL) { + g_cso_idx_cache = oe_malloc((CISO_IDX_MAX_ENTRIES * 4) + 64); + if (g_cso_idx_cache == NULL) { + ret = -3; + printk("%s: -> %d\n", __func__, ret); + goto exit; + } + + if((u32)g_ciso_block_buf & 63) + g_cso_idx_cache = (void*)(((u32)g_cso_idx_cache & (~63)) + 64); + + } ret = 0; } else { @@ -371,7 +398,15 @@ static int read_cso_sector(u8 *addr, int sector) g_ciso_dec_buf_size = ret; } - ret = sceKernelDeflateDecompress(addr, ISO_SECTOR_SIZE, g_ciso_dec_buf + offset - g_ciso_dec_buf_offset, 0); + if(!lz4_compressed) { + ret = sceKernelDeflateDecompress(addr, ISO_SECTOR_SIZE, g_ciso_dec_buf + offset - g_ciso_dec_buf_offset, 0); + } else { + ret = LZ4_decompress_fast(g_ciso_dec_buf + offset - g_ciso_dec_buf_offset, (char *)addr, ISO_SECTOR_SIZE); + if(ret < 0) { + ret = -20; + printk("%s: -> %d\n", __func__, ret); + } + } return ret < 0 ? ret : ISO_SECTOR_SIZE; } @@ -410,11 +445,236 @@ static int read_cso_data(u8* addr, u32 size, u32 offset) return offset - o_offset; } +/** + * decompress (if necessary) a compressed block and copy it to the destination + * If offset_shift is nonzero then the decompressed data will by used from this offset. + * The block_num is used to know if it needs to be decompressed + */ +static int decompress_block(u8 *dst, u8 *src, int size, int block_num, int offset_shift) +{ + int ret; + + if(g_cso_idx_cache[block_num] & 0x80000000) { + // do not copy if the block is already in place + if(offset_shift > 0 || src != dst) { + // no decompression is needed, copy the data from the end of the buffer to the start + memmove(dst, src + offset_shift, size); + } + ret = size; + } else { + if(!lz4_compressed) { + // gzip decompress + ret = sceKernelDeflateDecompress(g_ciso_dec_buf, ISO_SECTOR_SIZE, src, 0); + } else { + // LZ4 decompress + ret = LZ4_decompress_fast((char *)src, (char *)g_ciso_dec_buf, ISO_SECTOR_SIZE); + } + if(ret < 0) { + ret = -20; + printk("%s: -> %d\n", __func__, ret); + return ret; + } + // copy the decompressed data to the destination buffer + memcpy(dst, g_ciso_dec_buf + offset_shift, size); + } + + return ret; +} + +static int refresh_cso_index(u32 size, u32 offset) { + // seek the first block offset + u32 starting_block = offset / ISO_SECTOR_SIZE; + + // calculate the last needed block and read the index + u32 ending_block = (offset + size) / ISO_SECTOR_SIZE + 1; + + u32 idx_size = (ending_block - starting_block + 1) * 4; + + if (idx_size > CISO_IDX_MAX_ENTRIES * 4) { + // the requested index size is too big + return -1; + } + + // out of scope, read cso index table again + if (starting_block < g_cso_idx_start_block|| ending_block >= g_cso_idx_start_block + CISO_IDX_MAX_ENTRIES) { + + u32 total_blocks = g_CISO_hdr.total_bytes / g_CISO_hdr.block_size; + + if (starting_block > total_blocks) { + // the requested block goes beyond the max block number + return -1; + } + + if (starting_block + 4096 > total_blocks) { + idx_size = (total_blocks - starting_block + 1) * 4; + } else { + idx_size = CISO_IDX_MAX_ENTRIES * 4; + } + + int ret = read_raw_data(g_cso_idx_cache, idx_size, starting_block * 4 + 24); + if(ret < 0) { + return ret; + } + + g_cso_idx_start_block = starting_block; + return 0; + } + + return starting_block - g_cso_idx_start_block; +} + +static int read_cso_data_ng(u8 *addr, u32 size, u32 offset) +{ + u32 cso_block; + + u32 start_blk = 0; + u32 first_block_size = 0; + u32 last_block_size = 0; + + u32 cso_read_offset, cso_read_size; + + if(offset > g_CISO_hdr.total_bytes) { + // return if the offset goes beyond the iso size + return 0; + } else if(offset + size > g_CISO_hdr.total_bytes) { + // adjust size if it tries to read beyond the game data + size = g_CISO_hdr.total_bytes - offset; + } + + if ((start_blk = refresh_cso_index(size, offset)) < 0) { + //FIXME: fallback to slower read, try to get a bigger block instead + // a game shouldn't request more than 8MiB in a single read so this + // isn't executed in normal cases + printk("Index for read of size %i is greater that allowed maximum\n", size); + return read_cso_data(addr, size, offset); + } + + // check if the first read is in the middle of a compressed block or if there is only one block + if(!IS_DIVISIBLE(offset, ISO_SECTOR_SIZE) || size <= ISO_SECTOR_SIZE) { + // calculate the offset and size of the compressed data + cso_read_offset = GET_CSO_OFFSET(start_blk); + cso_read_size = GET_CSO_OFFSET(start_blk + 1) - cso_read_offset; + + // READ #2 (only if the first block is a partial one) + read_raw_data(g_ciso_block_buf, cso_read_size, cso_read_offset); + + u32 offset_shift = REMAINDER(offset, ISO_SECTOR_SIZE); + + // calculate the real size needed from the decompressed block + if(offset_shift + size <= ISO_SECTOR_SIZE) { + // if the size + offset shift is less than the sector size then + // use the value directly for the first block size + first_block_size = size; + } else { + // else use the remainder + first_block_size = ISO_SECTOR_SIZE - offset_shift; + } + + // decompress (if required) + if(decompress_block(addr, g_ciso_block_buf, first_block_size, start_blk, offset_shift) < 0) { + return -2; + } + + // update size + size -= first_block_size; + + // only one block to read, return early + if(size == 0) { + return first_block_size; + } + + // update offset and addr + offset += first_block_size; + addr += first_block_size; + + start_blk++; + } + + { + // calculate the last block (or the remaining one) + cso_block = size / 2048 + start_blk; + + // don't go over the next block if the read size occupies all of it + if(IS_DIVISIBLE(size, ISO_SECTOR_SIZE)) { + cso_block--; + } + + cso_read_offset = GET_CSO_OFFSET(cso_block); + + // get the compressed block size + cso_read_size = GET_CSO_OFFSET(cso_block + 1) - cso_read_offset; + + // READ #3 (only if the last block is a partial one) + read_raw_data(g_ciso_block_buf, cso_read_size, cso_read_offset); + + // calculate the partial decompressed block size + last_block_size = size % 2048; + + // update size + size -= last_block_size; + + // calculate the offset to place the last decompressed block + void *last_offset = addr + ((size / ISO_SECTOR_SIZE) * ISO_SECTOR_SIZE); + + if(decompress_block(last_offset, g_ciso_block_buf, last_block_size, cso_block, 0) < 0) { + return -3; + } + + // no more blocks + if(size == 0) { + return first_block_size +last_block_size; + } + } + + // calculate the needed blocks + if(IS_DIVISIBLE(size, 2048)) { + cso_block = size / 2048; + } else { + cso_block = size / 2048 + 1; + } + + cso_read_offset = GET_CSO_OFFSET(start_blk); + cso_read_size = GET_CSO_OFFSET(start_blk + cso_block) - cso_read_offset; + + // place the compressed blocks at the end of the provided buffer + // so it can be reused in the decompression without overlap + u32 shifted_offset = cso_block * 2048 - cso_read_size; + + // READ #4 (main section of compressed blocks) + read_raw_data(addr + shifted_offset, cso_read_size, cso_read_offset); + + int i; + u32 read_size = 0; + + // process every compressed block + for(i = 0; i < cso_block ; i++) { + // shift the source with the size of the last read + void *src = addr + shifted_offset + read_size; + + // shift the destination, block by block + void *dst = addr + i * ISO_SECTOR_SIZE; + + // calculate a size in case last block is a partial one and its + // size is less that the sector size + int dec_size = size < ISO_SECTOR_SIZE ? size : ISO_SECTOR_SIZE; + + if(decompress_block(dst, src, dec_size, i + start_blk, 0) < 0) { + return -4; + } + + cso_read_offset = GET_CSO_OFFSET(start_blk + i); + u32 decompressed_size = GET_CSO_OFFSET(start_blk + i + 1) - cso_read_offset; + read_size += decompressed_size; + } + + return size + first_block_size + last_block_size; +} + // 0x00000C7C int iso_read(struct IoReadArg *args) { if(g_is_ciso != 0) { - return read_cso_data(args->address, args->size, args->offset); + return read_cso_data_ng(args->address, args->size, args->offset); } return read_raw_data(args->address, args->size, args->offset); diff --git a/Satelite/main.c b/Satelite/main.c index ec3b5f4..1e6fef4 100644 --- a/Satelite/main.c +++ b/Satelite/main.c @@ -209,7 +209,7 @@ static int get_umdvideo(UmdVideoList *list, char *path) if(p == NULL) p = dir.d_name; - if(0 == stricmp(p, ".iso") || 0 == stricmp(p, ".cso")) { + if(0 == stricmp(p, ".iso") || 0 == stricmp(p, ".cso") || 0 == stricmp(p, ".zso")) { #ifdef CONFIG_639 if(psp_fw_version == FW_639) scePaf_sprintf(fullpath, "%s/%s", path, dir.d_name); diff --git a/SystemControl/Makefile b/SystemControl/Makefile index 9b1be0d..5895c6a 100644 --- a/SystemControl/Makefile +++ b/SystemControl/Makefile @@ -35,7 +35,8 @@ OBJS = main.o \ ../Common/libs.o \ ../Common/utils.o \ ../Common/strsafe.o \ - setlongjmp.o + setlongjmp.o \ + lz4.o INCDIR = ../Common/ ../include/ CFLAGS = -Os -G0 -Wall -fno-pic diff --git a/SystemControl/exports.exp b/SystemControl/exports.exp index 808b0e5..274266a 100644 --- a/SystemControl/exports.exp +++ b/SystemControl/exports.exp @@ -134,6 +134,7 @@ PSP_EXPORT_FUNC(sctrlSetCustomStartModule) PSP_EXPORT_FUNC(sctrlKernelSetNidResolver) PSP_EXPORT_FUNC(sctrlKernelRand) PSP_EXPORT_FUNC(sctrlGetRealEthernetAddress) +PSP_EXPORT_FUNC(LZ4_decompress_fast) PSP_EXPORT_END PSP_EXPORT_START(SystemCtrlPrivate, 0, 0x0001) diff --git a/SystemControl/libs/libpspsystemctrl_kernel/Makefile b/SystemControl/libs/libpspsystemctrl_kernel/Makefile index 7aab8c2..fa0984d 100644 --- a/SystemControl/libs/libpspsystemctrl_kernel/Makefile +++ b/SystemControl/libs/libpspsystemctrl_kernel/Makefile @@ -1,6 +1,7 @@ TARGET=libpspsystemctrl_kernel.a all: $(TARGET) OBJS = SystemCtrlForKernel_0000.o SystemCtrlForKernel_0001.o SystemCtrlForKernel_0002.o SystemCtrlForKernel_0003.o SystemCtrlForKernel_0004.o SystemCtrlForKernel_0005.o SystemCtrlForKernel_0006.o SystemCtrlForKernel_0007.o SystemCtrlForKernel_0008.o SystemCtrlForKernel_0009.o SystemCtrlForKernel_0010.o SystemCtrlForKernel_0011.o SystemCtrlForKernel_0012.o SystemCtrlForKernel_0013.o SystemCtrlForKernel_0014.o SystemCtrlForKernel_0015.o SystemCtrlForKernel_0016.o SystemCtrlForKernel_0017.o SystemCtrlForKernel_0018.o SystemCtrlForKernel_0019.o SystemCtrlForKernel_0020.o SystemCtrlForKernel_0021.o SystemCtrlForKernel_0022.o SystemCtrlForKernel_0023.o SystemCtrlForKernel_0024.o SystemCtrlForKernel_0025.o SystemCtrlForKernel_0026.o SystemCtrlForKernel_0027.o SystemCtrlForKernel_0028.o SystemCtrlForKernel_0029.o SystemCtrlForKernel_0030.o SystemCtrlForKernel_0031.o SystemCtrlForKernel_0032.o SystemCtrlForKernel_0033.o SystemCtrlForKernel_0034.o SystemCtrlForKernel_0035.o SystemCtrlForKernel_0036.o SystemCtrlForKernel_0037.o SystemCtrlForKernel_0038.o SystemCtrlForKernel_0039.o SystemCtrlForKernel_0040.o SystemCtrlForKernel_0041.o SystemCtrlForKernel_0042.o SystemCtrlForKernel_0043.o SystemCtrlForKernel_0044.o SystemCtrlForKernel_0045.o SystemCtrlForKernel_0046.o SystemCtrlForKernel_0047.o SystemCtrlForKernel_0048.o SystemCtrlForKernel_0049.o SystemCtrlForKernel_0050.o SystemCtrlForKernel_0051.o SystemCtrlForKernel_0052.o SystemCtrlForKernel_0053.o SystemCtrlForKernel_0054.o SystemCtrlForKernel_0055.o SystemCtrlForKernel_0056.o +OBJS += SystemCtrlForKernel_0057.o PSPSDK=$(shell psp-config --pspsdk-path) diff --git a/SystemControl/libs/libpspsystemctrl_kernel/SystemCtrlForKernel.S b/SystemControl/libs/libpspsystemctrl_kernel/SystemCtrlForKernel.S index 48e5d7e..92ef651 100644 --- a/SystemControl/libs/libpspsystemctrl_kernel/SystemCtrlForKernel.S +++ b/SystemControl/libs/libpspsystemctrl_kernel/SystemCtrlForKernel.S @@ -176,3 +176,6 @@ #ifdef F_SystemCtrlForKernel_0056 IMPORT_FUNC "SystemCtrlForKernel",0xB364FBB4,sctrlKernelRand #endif +#ifdef F_SystemCtrlForKernel_0057 + IMPORT_FUNC "SystemCtrlForKernel",0x16100529,LZ4_decompress_fast +#endif diff --git a/SystemControl/lz4.c b/SystemControl/lz4.c new file mode 100644 index 0000000..795f16a --- /dev/null +++ b/SystemControl/lz4.c @@ -0,0 +1,879 @@ +/* + LZ4 - Fast LZ compression algorithm + Copyright (C) 2011-2014, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 source repository : http://code.google.com/p/lz4/ + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/************************************** + Tuning parameters +**************************************/ +/* + * MEMORY_USAGE : + * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) + * Increasing memory usage improves compression ratio + * Reduced memory usage can improve speed, due to cache effect + * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache + */ +#define MEMORY_USAGE 14 + +/* + * HEAPMODE : + * Select how default compression functions will allocate memory for their hash table, + * in memory stack (0:default, fastest), or in memory heap (1:requires memory allocation (malloc)). + */ +#define HEAPMODE 0 + + +/************************************** + CPU Feature Detection +**************************************/ +/* 32 or 64 bits ? */ +#if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \ + || defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \ + || defined(__64BIT__) || defined(_LP64) || defined(__LP64__) \ + || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) ) /* Detects 64 bits mode */ +# define LZ4_ARCH64 1 +#else +# define LZ4_ARCH64 0 +#endif + +/* + * Little Endian or Big Endian ? + * Overwrite the #define below if you know your architecture endianess + */ +#if defined (__GLIBC__) +# include +# if (__BYTE_ORDER == __BIG_ENDIAN) +# define LZ4_BIG_ENDIAN 1 +# endif +#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)) +# define LZ4_BIG_ENDIAN 1 +#elif defined(__sparc) || defined(__sparc__) \ + || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \ + || defined(__hpux) || defined(__hppa) \ + || defined(_MIPSEB) || defined(__s390__) +# define LZ4_BIG_ENDIAN 1 +#else +/* Little Endian assumed. PDP Endian and other very rare endian format are unsupported. */ +#endif + +/* + * Unaligned memory access is automatically enabled for "common" CPU, such as x86. + * For others CPU, such as ARM, the compiler may be more cautious, inserting unnecessary extra code to ensure aligned access property + * If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance + */ +#if defined(__ARM_FEATURE_UNALIGNED) +# define LZ4_FORCE_UNALIGNED_ACCESS 1 +#endif + +/* Define this parameter if your target system or compiler does not support hardware bit count */ +#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for Windows CE does not support Hardware bit count */ +# define LZ4_FORCE_SW_BITCOUNT +#endif + +/* + * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE : + * This option may provide a small boost to performance for some big endian cpu, although probably modest. + * You may set this option to 1 if data will remain within closed environment. + * This option is useless on Little_Endian CPU (such as x86) + */ + +/* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ + + +/************************************** + Compiler Options +**************************************/ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ +/* "restrict" is a known keyword */ +#else +# define restrict /* Disable restrict */ +#endif + +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# if LZ4_ARCH64 /* 64-bits */ +# pragma intrinsic(_BitScanForward64) /* For Visual 2005 */ +# pragma intrinsic(_BitScanReverse64) /* For Visual 2005 */ +# else /* 32-bits */ +# pragma intrinsic(_BitScanForward) /* For Visual 2005 */ +# pragma intrinsic(_BitScanReverse) /* For Visual 2005 */ +# endif +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#else +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + +#ifdef _MSC_VER /* Visual Studio */ +# define lz4_bswap16(x) _byteswap_ushort(x) +#else +# define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))) +#endif + +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) +# define expect(expr,value) (__builtin_expect ((expr),(value)) ) +#else +# define expect(expr,value) (expr) +#endif + +#define likely(expr) expect((expr) != 0, 1) +#define unlikely(expr) expect((expr) != 0, 0) + + +/************************************** + Memory routines +**************************************/ +#include /* malloc, calloc, free */ +#define ALLOCATOR(n,s) calloc(n,s) +#define FREEMEM free +#include /* memset, memcpy */ +#define MEM_INIT memset + + +/************************************** + Includes +**************************************/ +#include "lz4.h" + + +/************************************** + Basic Types +**************************************/ +#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; +#endif + +#if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS) +# define _PACKED __attribute__ ((packed)) +#else +# define _PACKED +#endif + +#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) +# if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) +# pragma pack(1) +# else +# pragma pack(push, 1) +# endif +#endif + +typedef struct { U16 v; } _PACKED U16_S; +typedef struct { U32 v; } _PACKED U32_S; +typedef struct { U64 v; } _PACKED U64_S; +typedef struct {size_t v;} _PACKED size_t_S; + +#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) +# if defined(__SUNPRO_C) || defined(__SUNPRO_CC) +# pragma pack(0) +# else +# pragma pack(pop) +# endif +#endif + +#define A16(x) (((U16_S *)(x))->v) +#define A32(x) (((U32_S *)(x))->v) +#define A64(x) (((U64_S *)(x))->v) +#define AARCH(x) (((size_t_S *)(x))->v) + + +/************************************** + Constants +**************************************/ +#define LZ4_HASHLOG (MEMORY_USAGE-2) +#define HASHTABLESIZE (1 << MEMORY_USAGE) +#define HASHNBCELLS4 (1 << LZ4_HASHLOG) + +#define MINMATCH 4 + +#define COPYLENGTH 8 +#define LASTLITERALS 5 +#define MFLIMIT (COPYLENGTH+MINMATCH) +//static const int LZ4_minLength = (MFLIMIT+1); + +#define KB *(1U<<10) +#define MB *(1U<<20) +#define GB *(1U<<30) + +#define LZ4_64KLIMIT ((64 KB) + (MFLIMIT-1)) +#define SKIPSTRENGTH 6 /* Increasing this value will make the compression run slower on incompressible data */ + +#define MAXD_LOG 16 +#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) + +#define ML_BITS 4 +#define ML_MASK ((1U<=e; */ +#else +# define LZ4_WILDCOPY(d,s,e) { if (likely(e-d <= 8)) LZ4_COPY8(d,s) else do { LZ4_COPY8(d,s) } while (d>3); +# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_clzll(val) >> 3); +# else + int r; + if (!(val>>32)) { r=4; } else { r=0; val>>=32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif +# else +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanForward64( &r, val ); + return (int)(r>>3); +# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_ctzll(val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif +# endif +} + +#else + +FORCE_INLINE int LZ4_NbCommonBytes (register U32 val) +{ +# if defined(LZ4_BIG_ENDIAN) +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanReverse( &r, val ); + return (int)(r>>3); +# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_clz(val) >> 3); +# else + int r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif +# else +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r; + _BitScanForward( &r, val ); + return (int)(r>>3); +# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_ctz(val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif +# endif +} + +#endif + +/**************************** + Compression functions +****************************/ +int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } + +FORCE_INLINE int LZ4_hashSequence(U32 sequence, tableType_t tableType) +{ + if (tableType == byU16) + return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); + else + return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); +} + +FORCE_INLINE int LZ4_hashPosition(const BYTE* p, tableType_t tableType) { return LZ4_hashSequence(A32(p), tableType); } + +FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + switch (tableType) + { + case byPtr: { const BYTE** hashTable = (const BYTE**) tableBase; hashTable[h] = p; break; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); break; } + case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); break; } + } +} + +FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + U32 h = LZ4_hashPosition(p, tableType); + LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase); +} + +FORCE_INLINE const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; } + if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; } + { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */ +} + +FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + U32 h = LZ4_hashPosition(p, tableType); + return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase); +} + + +FORCE_INLINE int LZ4_compress_generic( + void* ctx, + const char* source, + char* dest, + int inputSize, + int maxOutputSize, + + limitedOutput_directive limitedOutput, + tableType_t tableType, + prefix64k_directive prefix) +{ + const BYTE* ip = (const BYTE*) source; + const BYTE* const base = (prefix==withPrefix) ? ((LZ4_Data_Structure*)ctx)->base : (const BYTE*) source; + const BYTE* const lowLimit = ((prefix==withPrefix) ? ((LZ4_Data_Structure*)ctx)->bufferStart : (const BYTE*)source); + const BYTE* anchor = (const BYTE*) source; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = iend - LASTLITERALS; + + BYTE* op = (BYTE*) dest; + BYTE* const oend = op + maxOutputSize; + + int length; + const int skipStrength = SKIPSTRENGTH; + U32 forwardH; + + /* Init conditions */ + if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */ + if ((prefix==withPrefix) && (ip != ((LZ4_Data_Structure*)ctx)->nextBlock)) return 0; /* must continue from end of previous block */ + if (prefix==withPrefix) ((LZ4_Data_Structure*)ctx)->nextBlock=iend; /* do it now, due to potential early exit */ + if ((tableType == byU16) && (inputSize>=(int)LZ4_64KLIMIT)) return 0; /* Size too large (not within 64K limit) */ + if (inputSize> skipStrength; + ip = forwardIp; + forwardIp = ip + step; + + if (unlikely(forwardIp > mflimit)) { goto _last_literals; } + + forwardH = LZ4_hashPosition(forwardIp, tableType); + ref = LZ4_getPositionOnHash(h, ctx, tableType, base); + LZ4_putPositionOnHash(ip, h, ctx, tableType, base); + + } while ((ref + MAX_DISTANCE < ip) || (A32(ref) != A32(ip))); + + /* Catch up */ + while ((ip>anchor) && (ref > lowLimit) && (unlikely(ip[-1]==ref[-1]))) { ip--; ref--; } + + /* Encode Literal length */ + length = (int)(ip - anchor); + token = op++; + if ((limitedOutput) && (unlikely(op + length + (2 + 1 + LASTLITERALS) + (length/255) > oend))) return 0; /* Check output limit */ + if (length>=(int)RUN_MASK) + { + int len = length-RUN_MASK; + *token=(RUN_MASK<= 255 ; len-=255) *op++ = 255; + *op++ = (BYTE)len; + } + else *token = (BYTE)(length<>8) > oend))) return 0; /* Check output limit */ + if (length>=(int)ML_MASK) + { + *token += ML_MASK; + length -= ML_MASK; + for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; } + if (length >= 255) { length-=255; *op++ = 255; } + *op++ = (BYTE)length; + } + else *token += (BYTE)(length); + + /* Test end of chunk */ + if (ip > mflimit) { anchor = ip; break; } + + /* Fill table */ + LZ4_putPosition(ip-2, ctx, tableType, base); + + /* Test next position */ + ref = LZ4_getPosition(ip, ctx, tableType, base); + LZ4_putPosition(ip, ctx, tableType, base); + if ((ref + MAX_DISTANCE >= ip) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; } + + /* Prepare next loop */ + anchor = ip++; + forwardH = LZ4_hashPosition(ip, tableType); + } + +_last_literals: + /* Encode Last Literals */ + { + int lastRun = (int)(iend - anchor); + if ((limitedOutput) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */ + if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<= 255 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } + else *op++ = (BYTE)(lastRun<hashTable, 0, sizeof(lz4ds->hashTable)); + lz4ds->bufferStart = base; + lz4ds->base = base; + lz4ds->nextBlock = base; +} + +int LZ4_resetStreamState(void* state, const char* inputBuffer) +{ + if ((((size_t)state) & 3) != 0) return 1; /* Error : pointer is not aligned on 4-bytes boundary */ + LZ4_init((LZ4_Data_Structure*)state, (const BYTE*)inputBuffer); + return 0; +} + +void* LZ4_create (const char* inputBuffer) +{ + void* lz4ds = ALLOCATOR(1, sizeof(LZ4_Data_Structure)); + LZ4_init ((LZ4_Data_Structure*)lz4ds, (const BYTE*)inputBuffer); + return lz4ds; +} + + +int LZ4_free (void* LZ4_Data) +{ + FREEMEM(LZ4_Data); + return (0); +} + + +char* LZ4_slideInputBuffer (void* LZ4_Data) +{ + LZ4_Data_Structure* lz4ds = (LZ4_Data_Structure*)LZ4_Data; + size_t delta = lz4ds->nextBlock - (lz4ds->bufferStart + 64 KB); + + if ( (lz4ds->base - delta > lz4ds->base) /* underflow control */ + || ((size_t)(lz4ds->nextBlock - lz4ds->base) > 0xE0000000) ) /* close to 32-bits limit */ + { + size_t deltaLimit = (lz4ds->nextBlock - 64 KB) - lz4ds->base; + int nH; + + for (nH=0; nH < HASHNBCELLS4; nH++) + { + if ((size_t)(lz4ds->hashTable[nH]) < deltaLimit) lz4ds->hashTable[nH] = 0; + else lz4ds->hashTable[nH] -= (U32)deltaLimit; + } + memcpy((void*)(lz4ds->bufferStart), (const void*)(lz4ds->nextBlock - 64 KB), 64 KB); + lz4ds->base = lz4ds->bufferStart; + lz4ds->nextBlock = lz4ds->base + 64 KB; + } + else + { + memcpy((void*)(lz4ds->bufferStart), (const void*)(lz4ds->nextBlock - 64 KB), 64 KB); + lz4ds->nextBlock -= delta; + lz4ds->base -= delta; + } + + return (char*)(lz4ds->nextBlock); +} + + +int LZ4_compress_continue (void* LZ4_Data, const char* source, char* dest, int inputSize) +{ + return LZ4_compress_generic(LZ4_Data, source, dest, inputSize, 0, notLimited, byU32, withPrefix); +} + + +int LZ4_compress_limitedOutput_continue (void* LZ4_Data, const char* source, char* dest, int inputSize, int maxOutputSize) +{ + return LZ4_compress_generic(LZ4_Data, source, dest, inputSize, maxOutputSize, limited, byU32, withPrefix); +} +#endif + +/**************************** + Decompression functions +****************************/ +/* + * This generic decompression function cover all use cases. + * It shall be instanciated several times, using different sets of directives + * Note that it is essential this generic function is really inlined, + * in order to remove useless branches during compilation optimisation. + */ +FORCE_INLINE int LZ4_decompress_generic( + const char* source, + char* dest, + int inputSize, + int outputSize, /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */ + + int endOnInput, /* endOnOutputSize, endOnInputSize */ + int prefix64k, /* noPrefix, withPrefix */ + int partialDecoding, /* full, partial */ + int targetOutputSize /* only used if partialDecoding==partial */ + ) +{ + /* Local Variables */ + const BYTE* restrict ip = (const BYTE*) source; + const BYTE* ref; + const BYTE* const iend = ip + inputSize; + + BYTE* op = (BYTE*) dest; + BYTE* const oend = op + outputSize; + BYTE* cpy; + BYTE* oexit = op + targetOutputSize; + + /*const size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; / static reduces speed for LZ4_decompress_safe() on GCC64 */ + const size_t dec32table[] = {4-0, 4-3, 4-2, 4-3, 4-0, 4-0, 4-0, 4-0}; /* static reduces speed for LZ4_decompress_safe() on GCC64 */ + static const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; + + + /* Special cases */ + if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => decode everything */ + if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */ + if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1); + + + /* Main Loop */ + while (1) + { + unsigned token; + size_t length; + + /* get runlength */ + token = *ip++; + if ((length=(token>>ML_BITS)) == RUN_MASK) + { + unsigned s=255; + while (((endOnInput)?ip(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) ) + || ((!endOnInput) && (cpy>oend-COPYLENGTH))) + { + if (partialDecoding) + { + if (cpy > oend) goto _output_error; /* Error : write attempt beyond end of output buffer */ + if ((endOnInput) && (ip+length > iend)) goto _output_error; /* Error : read attempt beyond end of input buffer */ + } + else + { + if ((!endOnInput) && (cpy != oend)) goto _output_error; /* Error : block decoding must stop exactly there */ + if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error; /* Error : input must be consumed */ + } + memcpy(op, ip, length); + ip += length; + op += length; + break; /* Necessarily EOF, due to parsing restrictions */ + } + LZ4_WILDCOPY(op, ip, cpy); ip -= (op-cpy); op = cpy; + + /* get offset */ + LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; + if ((prefix64k==noPrefix) && (unlikely(ref < (BYTE* const)dest))) goto _output_error; /* Error : offset outside destination buffer */ + + /* get matchlength */ + if ((length=(token&ML_MASK)) == ML_MASK) + { + while ((!endOnInput) || (ipoend-COPYLENGTH-(STEPSIZE-4))) + { + if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last 5 bytes must be literals */ + LZ4_SECURECOPY(op, ref, (oend-COPYLENGTH)); + while(op %d\n", __func__, ret); + } + } return ret < 0 ? ret : SECTOR_SIZE; } @@ -400,7 +411,8 @@ int isoOpen(const char *path) magic = (u32*)g_ciso_h.magic; - if (*magic == 0x4F534943 && g_ciso_h.block_size == SECTOR_SIZE) { + if ((*magic == 0x4F534943 || *magic == 0x4F53495A) && g_ciso_h.block_size == SECTOR_SIZE) { + lz4_compressed = (*magic == 0x4F53495A) ? 1 : 0; g_is_compressed = 1; } else { g_is_compressed = 0; diff --git a/Vshctrl/virtual_pbp.c b/Vshctrl/virtual_pbp.c index 50df9d2..cbdabac 100644 --- a/Vshctrl/virtual_pbp.c +++ b/Vshctrl/virtual_pbp.c @@ -157,7 +157,7 @@ static int is_iso(SceIoDirent * dir) //filename length check if (ext > dir->d_name) { //check extension - if (stricmp(ext, ".iso") == 0 || stricmp(ext, ".cso") == 0) { + if (stricmp(ext, ".iso") == 0 || stricmp(ext, ".cso") == 0 || stricmp(ext, ".zso") == 0) { result = 1; } } diff --git a/contrib/ciso.py b/contrib/ciso.py index e772e99..9a5236c 100644 --- a/contrib/ciso.py +++ b/contrib/ciso.py @@ -1,7 +1,7 @@ #!/usr/bin/python # Copyright (c) 2011 by Virtuous Flame -# Based BOOSTER 1.01 CSO Compressor +# Based BOOSTER 1.01 CSO/ZSO Compressor # # GNU General Public Licence (GPL) # @@ -18,55 +18,73 @@ # Place, Suite 330, Boston, MA 02111-1307 USA # +from __future__ import print_function + __author__ = "Virtuous Flame" __license__ = "GPL" -__version__ = "1.0" +__version__ = "2.0" -import sys, os +import os +import sys +import lz4 +import zlib -from zlib import compress, decompress, error as zlibError -from struct import pack, unpack -from multiprocessing import Pool from getopt import * +from struct import pack, unpack +from multiprocessing.pool import Pool CISO_MAGIC = 0x4F534943 +ZISO_MAGIC = 0x4F53495A DEFAULT_ALIGN = 0 COMPRESS_THREHOLD = 100 DEFAULT_PADDING = br'X' +USE_LZ4 = False MP = False MP_NR = 1024 * 16 def hexdump(data): for i in data: - print("0x%02X" % ((ord(i)))), + print("%02X " % i, end="") print("") def zip_compress(plain, level=9): - compressed = compress(plain, level) + if not USE_LZ4: + compressed = zlib.compress(plain, level) + return compressed[2:] + else: + compressed = lz4.compress(plain) if level < 9 else lz4.compressHC(plain) + return compressed[4:] + # assert(compressed.startswith(b"\x78")) # We have to remove the 0xXX78 header - return compressed[2:] def zip_compress_mp(i): try: return zip_compress(i[0], i[1]) - except zlibError as e: - print("%d block: %s" % (block, e)) + except zlib.error as e: + print("mp error: %s" % (e)) sys.exit(-1) -def zip_decompress(compressed): -# hexdump(data) - return decompress(compressed, -15) + +def zip_decompress(compressed, magic): + # hexdump(data) + if magic == CISO_MAGIC: + return zlib.decompress(compressed, -15) + elif magic == ZISO_MAGIC: + return lz4.decompress(b'\x00\x08\x00\x00' + compressed) + def usage(): print("Usage: ciso [-c level] [-m] [-t percent] [-h] infile outfile") print(" -c level: 1-9 compress ISO to CSO (1=fast/large - 9=small/slow") print(" 0 decompress CSO to ISO") + print(" When using LZ4, 1-8: normal, 9: HC compression") print(" -m Use multiprocessing acceleration for compressing") print(" -t percent Compression Threshold (1-100)") print(" -a align Padding alignment 0=small/slow 6=fast/large") print(" -p pad Padding byte") + print(" -z use LZ4 compression") print(" -h this help") def open_input_output(fname_in, fname_out): @@ -75,7 +93,7 @@ def open_input_output(fname_in, fname_out): except IOError: print("Can't open %s" % (fname_in)) sys.exit(-1) - + try: fout = open(fname_out, "wb") except IOError: @@ -99,53 +117,61 @@ def generate_cso_header(magic, header_size, total_bytes, block_size, ver, align) # assert(len(data) == 0x18) return data -def show_cso_info(fname_in, fname_out, total_bytes, block_size, total_block, align): +def show_cso_info(magic, fname_in, fname_out, total_bytes, block_size, total_block, align): + if magic == CISO_MAGIC: + compression_type = "gzip" + elif magic == ZISO_MAGIC: + compression_type = "LZ4" + else: + compression_type = "unknown" + print("Decompress '%s' to '%s'" % (fname_in, fname_out)) - print("Total File Size %ld bytes" %(total_bytes)) - print("block size %d bytes" %(block_size)) - print("total blocks %d blocks" %(total_block)) - print("index align %d" % (1<= percent_period and percent_period != 0: percent_cnt = 0 - print >> sys.stderr, ("decompress %d%%\r" % (block / percent_period)), + print("decompress %d%%\r" % (block // percent_period), file=sys.stderr, end=""), - index = index_buf[block] - plain = index & 0x80000000 - index &= 0x7fffffff + index = index_buf[block] + plain = index & 0x80000000 + index &= 0x7fffffff read_pos = index << (align) if plain: read_size = block_size else: - index2 = index_buf[block+1] & 0x7fffffff - """ Have to read more bytes if align was set""" - if align: - read_size = (index2-index+1) << (align) + index2 = index_buf[block + 1] & 0x7fffffff + # Have to read more bytes if align was set + if align != 0: + read_size = (index2 - index + 1) << align else: - read_size = (index2-index) << (align) + read_size = (index2 - index) << align cso_data = seek_and_read(fin, read_pos, read_size) @@ -153,8 +179,8 @@ def decompress_cso(fname_in, fname_out, level): dec_data = cso_data else: try: - dec_data = zip_decompress(cso_data) - except zlibError as e: + dec_data = zip_decompress(cso_data, magic) + except zlib.error as e: print("%d block: 0x%08X %d %s" % (block, read_pos, read_size, e)) sys.exit(-1) @@ -166,29 +192,31 @@ def decompress_cso(fname_in, fname_out, level): print("ciso decompress completed") def show_comp_info(fname_in, fname_out, total_bytes, block_size, align, level): - print("Compress '%s' to '%s'" % (fname_in,fname_out)) - print("Total File Size %ld bytes" % (total_bytes)) - print("block size %d bytes" % (block_size)) - print("index align %d" % (1< 2GB, for MSB bit of index as the plain indicator # If we don't then the index can be larger than 2GB, which its plain indicator was improperly set @@ -198,18 +226,20 @@ def compress_cso(fname_in, fname_out, level): header = generate_cso_header(magic, header_size, total_bytes, block_size, ver, align) fout.write(header) - total_block = total_bytes / block_size - index_buf = [ 0 for i in xrange(total_block + 1) ] + total_block = total_bytes // block_size + index_buf = [0 for i in range(total_block + 1)] fout.write(b"\x00\x00\x00\x00" * len(index_buf)) show_comp_info(fname_in, fname_out, total_bytes, block_size, align, level) write_pos = fout.tell() - percent_period = total_block/100 - percent_cnt = 0 + percent_period = total_block // 100 + percent_cnt = 0 if MP: pool = Pool() + else: + pool = None block = 0 while block < total_block: @@ -222,26 +252,26 @@ def compress_cso(fname_in, fname_out, level): percent_cnt = 0 if block == 0: - print >> sys.stderr, ("compress %3d%% avarage rate %3d%%\r" % ( - block / percent_period - ,0)), + print("compress %3d%% avarage rate %3d%%\r" % ( + block // percent_period + , 0), file=sys.stderr, end="") else: - print >> sys.stderr, ("compress %3d%% avarage rate %3d%%\r" % ( - block / percent_period - ,100*write_pos/(block*0x800))), + print("compress %3d%% avarage rate %3d%%\r" % ( + block // percent_period + , 100 * write_pos // (block * 0x800)), file=sys.stderr, end="") if MP: - iso_data = [ (fin.read(block_size), level) for i in xrange(min(total_block - block, MP_NR))] + iso_data = [(fin.read(block_size), level) for i in range(min(total_block - block, MP_NR))] cso_data_all = pool.map_async(zip_compress_mp, iso_data).get(9999999) - for i in xrange(len(cso_data_all)): + for i in range(len(cso_data_all)): write_pos = set_align(fout, write_pos, align) index_buf[block] = write_pos >> align cso_data = cso_data_all[i] - if 100 * len(cso_data) / len(iso_data[i][0]) >= min(COMPRESS_THREHOLD, 100): + if 100 * len(cso_data) // len(iso_data[i][0]) >= min(COMPRESS_THREHOLD, 100): cso_data = iso_data[i][0] - index_buf[block] |= 0x80000000 # Mark as plain + index_buf[block] |= 0x80000000 # Mark as plain elif index_buf[block] & 0x80000000: print("Align error, you have to increase align by 1 or CFW won't be able to read offset above 2 ** 31 bytes") sys.exit(1) @@ -254,16 +284,16 @@ def compress_cso(fname_in, fname_out, level): try: cso_data = zip_compress(iso_data, level) - except zlibError as e: + except zlib.error as e: print("%d block: %s" % (block, e)) sys.exit(-1) write_pos = set_align(fout, write_pos, align) index_buf[block] = write_pos >> align - if 100 * len(cso_data) / len(iso_data) >= COMPRESS_THREHOLD: + if 100 * len(cso_data) // len(iso_data) >= COMPRESS_THREHOLD: cso_data = iso_data - index_buf[block] |= 0x80000000 # Mark as plain + index_buf[block] |= 0x80000000 # Mark as plain elif index_buf[block] & 0x80000000: print("Align error, you have to increase align by 1 or CFW won't be able to read offset above 2 ** 31 bytes") sys.exit(1) @@ -282,21 +312,21 @@ def compress_cso(fname_in, fname_out, level): # assert(len(idx) == 4) fout.write(idx) - print("ciso compress completed , total size = %8d bytes , rate %d%%" % (write_pos,(write_pos*100/total_bytes))) - + print("ciso compress completed , total size = %8d bytes , rate %d%%" % (write_pos, (write_pos * 100 // total_bytes))) + fin.close() fout.close() def parse_args(): - global MP, COMPRESS_THREHOLD, DEFAULT_PADDING, DEFAULT_ALIGN + global MP, COMPRESS_THREHOLD, DEFAULT_PADDING, DEFAULT_ALIGN, USE_LZ4 if len(sys.argv) < 2: usage() sys.exit(-1) try: - optlist, args = gnu_getopt(sys.argv, "c:mt:a:p:h") - except GetoptError, err: + optlist, args = gnu_getopt(sys.argv, "c:mt:a:p:h:z") + except GetoptError as err: print(str(err)) usage() sys.exit(-1) @@ -314,6 +344,8 @@ def parse_args(): DEFAULT_ALIGN = int(a) elif o == '-p': DEFAULT_PADDING = bytes(a[0]) + elif o == '-z': + USE_LZ4 = True elif o == '-h': usage() sys.exit(0) @@ -340,31 +372,31 @@ def load_sector_table(sector_table_fn, total_block, default_level = 9): a = line.split(":") if len(a) < 2: - raise ValueError("Invalid line founded: %s" % (line)) + raise ValueError("Invalid line founded: %s" % line) if -1 == a[0].find("-"): try: sector, level = int(a[0]), int(a[1]) except ValueError: - raise ValueError("Invalid line founded: %s" % (line)) + raise ValueError("Invalid line founded: %s" % line) if level < 1 or level > 9: - raise ValueError("Invalid line founded: %s" % (line)) - sectors[sector] = level + raise ValueError("Invalid line founded: %s" % line) + sectors[sector] = level else: b = a[0].split("-") try: start, end, level = int(b[0]), int(b[1]), int(a[1]) except ValueError: - raise ValueError("Invalid line founded: %s" % (line)) + raise ValueError("Invalid line founded: %s" % line) i = start while i < end: - sectors[i] = level + sectors[i] = level i += 1 return sectors def main(): - print ("ciso-python %s by %s" % (__version__, __author__)) + print("ciso-python %s by %s" % (__version__, __author__)) level, fname_in, fname_out = parse_args() if level == 0: