mirror of
https://github.com/sica42/RollFor.git
synced 2025-10-26 12:06:49 +00:00
3697 lines
127 KiB
Lua
3697 lines
127 KiB
Lua
--[[--
|
|
LibDeflate 1.0.2-release <br>
|
|
Pure Lua compressor and decompressor with high compression ratio using
|
|
DEFLATE/zlib format.
|
|
|
|
@file LibDeflate.lua
|
|
@author Haoqian He (Github: SafeteeWoW; World of Warcraft: Safetyy-Illidan(US))
|
|
@copyright LibDeflate <2018-2020> Haoqian He
|
|
@license zlib License
|
|
|
|
This library is implemented according to the following specifications. <br>
|
|
Report a bug if LibDeflate is not fully compliant with those specs. <br>
|
|
Both compressors and decompressors have been implemented in the library.<br>
|
|
1. RFC1950: DEFLATE Compressed Data Format Specification version 1.3 <br>
|
|
https://tools.ietf.org/html/rfc1951 <br>
|
|
2. RFC1951: ZLIB Compressed Data Format Specification version 3.3 <br>
|
|
https://tools.ietf.org/html/rfc1950 <br>
|
|
|
|
This library requires Lua 5.1/5.2/5.3/5.4 interpreter or LuaJIT v2.0+. <br>
|
|
This library does not have any dependencies. <br>
|
|
<br>
|
|
This file "LibDeflate.lua" is the only source file of
|
|
the library. <br>
|
|
Submit suggestions or report bugs to
|
|
https://github.com/safeteeWow/LibDeflate/issues
|
|
]]
|
|
|
|
--[[
|
|
zlib License
|
|
|
|
(C) 2018-2020 Haoqian He
|
|
|
|
This software is provided 'as-is', without any express or implied
|
|
warranty. In no event will the authors be held liable for any damages
|
|
arising from the use of this software.
|
|
|
|
Permission is granted to anyone to use this software for any purpose,
|
|
including commercial applications, and to alter it and redistribute it
|
|
freely, subject to the following restrictions:
|
|
|
|
1. The origin of this software must not be misrepresented; you must not
|
|
claim that you wrote the original software. If you use this software
|
|
in a product, an acknowledgment in the product documentation would be
|
|
appreciated but is not required.
|
|
2. Altered source versions must be plainly marked as such, and must not be
|
|
misrepresented as being the original software.
|
|
3. This notice may not be removed or altered from any source distribution.
|
|
|
|
License History:
|
|
1. GNU General Public License Version 3 in v1.0.0 and earlier versions.
|
|
2. GNU Lesser General Public License Version 3 in v1.0.1
|
|
3. the zlib License since v1.0.2
|
|
|
|
Credits and Disclaimer:
|
|
This library rewrites the code from the algorithm
|
|
and the ideas of the following projects,
|
|
and uses their code to help to test the correctness of this library,
|
|
but their code is not included directly in the library itself.
|
|
Their original licenses shall be comply when used.
|
|
|
|
1. zlib, by Jean-loup Gailly (compression) and Mark Adler (decompression).
|
|
http://www.zlib.net/
|
|
Licensed under zlib License. http://www.zlib.net/zlib_license.html
|
|
For the compression algorithm.
|
|
2. puff, by Mark Adler. https://github.com/madler/zlib/tree/master/contrib/puff
|
|
Licensed under zlib License. http://www.zlib.net/zlib_license.html
|
|
For the decompression algorithm.
|
|
3. LibCompress, by jjsheets and Galmok of European Stormrage (Horde)
|
|
https://www.wowace.com/projects/libcompress
|
|
Licensed under GPLv2.
|
|
https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
|
|
For the code to create customized codec.
|
|
4. WeakAuras2,
|
|
https://github.com/WeakAuras/WeakAuras2
|
|
Licensed under GPLv2.
|
|
For the 6bit encoding and decoding.
|
|
]]
|
|
|
|
--[[
|
|
Curseforge auto-packaging replacements:
|
|
|
|
Project Date: @project-date-iso@
|
|
Project Hash: @project-hash@
|
|
Project Version: @project-version@
|
|
--]]
|
|
|
|
local LibDeflate
|
|
|
|
do
|
|
-- Semantic version. all lowercase.
|
|
-- Suffix can be alpha1, alpha2, beta1, beta2, rc1, rc2, etc.
|
|
-- NOTE: Two version numbers needs to modify.
|
|
-- 1. On the top of LibDeflate.lua
|
|
-- 2. _VERSION
|
|
-- 3. _MINOR
|
|
|
|
-- version to store the official version of LibDeflate
|
|
local _VERSION = "1.0.2-release"
|
|
|
|
-- When MAJOR is changed, I should name it as LibDeflate2
|
|
local _MAJOR = "LibDeflate"
|
|
|
|
-- Update this whenever a new version, for LibStub version registration.
|
|
-- 0 : v0.x
|
|
-- 1 : v1.0.0
|
|
-- 2 : v1.0.1
|
|
-- 3 : v1.0.2
|
|
local _MINOR = 3
|
|
|
|
local _COPYRIGHT =
|
|
"LibDeflate " .. _VERSION
|
|
.. " Copyright (C) 2018-2020 Haoqian He."
|
|
.. " Licensed under the zlib License"
|
|
|
|
-- Register in the World of Warcraft library "LibStub" if detected.
|
|
if LibStub then
|
|
local lib, minor = LibStub:GetLibrary( _MAJOR, true )
|
|
if lib and minor and minor >= _MINOR then -- No need to update.
|
|
return lib
|
|
else -- Update or first time register
|
|
LibDeflate = LibStub:NewLibrary( _MAJOR, _MINOR )
|
|
-- NOTE: It is important that new version has implemented
|
|
-- all exported APIs and tables in the old version,
|
|
-- so the old library is fully garbage collected,
|
|
-- and we 100% ensure the backward compatibility.
|
|
end
|
|
else -- "LibStub" is not detected.
|
|
LibDeflate = {}
|
|
end
|
|
|
|
LibDeflate._VERSION = _VERSION
|
|
LibDeflate._MAJOR = _MAJOR
|
|
LibDeflate._MINOR = _MINOR
|
|
LibDeflate._COPYRIGHT = _COPYRIGHT
|
|
end
|
|
|
|
-- localize Lua api for faster access.
|
|
local assert = assert
|
|
local error = error
|
|
local pairs = pairs
|
|
local string_byte = string.byte
|
|
local string_char = string.char
|
|
local string_find = string.find
|
|
local string_gsub = string.gsub
|
|
local string_sub = string.sub
|
|
local table_concat = table.concat
|
|
local table_sort = table.sort
|
|
local tostring = tostring
|
|
local type = type
|
|
---@diagnostic disable-next-line: undefined-field
|
|
local mod = math.mod
|
|
|
|
-- Converts i to 2^i, (0<=i<=32)
|
|
-- This is used to implement bit left shift and bit right shift.
|
|
-- "x >> y" in C: "(x-x%_pow2[y])/_pow2[y]" in Lua
|
|
-- "x << y" in C: "x*_pow2[y]" in Lua
|
|
local _pow2 = {}
|
|
|
|
-- Converts any byte to a character, (0<=byte<=255)
|
|
local _byte_to_char = {}
|
|
|
|
-- _reverseBitsTbl[len][val] stores the bit reverse of
|
|
-- the number with bit length "len" and value "val"
|
|
-- For example, decimal number 6 with bits length 5 is binary 00110
|
|
-- It's reverse is binary 01100,
|
|
-- which is decimal 12 and 12 == _reverseBitsTbl[5][6]
|
|
-- 1<=len<=9, 0<=val<=2^len-1
|
|
-- The reason for 1<=len<=9 is that the max of min bitlen of huffman code
|
|
-- of a huffman alphabet is 9?
|
|
local _reverse_bits_tbl = {}
|
|
|
|
-- Convert a LZ77 length (3<=len<=258) to
|
|
-- a deflate literal/LZ77_length code (257<=code<=285)
|
|
local _length_to_deflate_code = {}
|
|
|
|
-- convert a LZ77 length (3<=len<=258) to
|
|
-- a deflate literal/LZ77_length code extra bits.
|
|
local _length_to_deflate_extra_bits = {}
|
|
|
|
-- Convert a LZ77 length (3<=len<=258) to
|
|
-- a deflate literal/LZ77_length code extra bit length.
|
|
local _length_to_deflate_extra_bitlen = {}
|
|
|
|
-- Convert a small LZ77 distance (1<=dist<=256) to a deflate code.
|
|
local _dist256_to_deflate_code = {}
|
|
|
|
-- Convert a small LZ77 distance (1<=dist<=256) to
|
|
-- a deflate distance code extra bits.
|
|
local _dist256_to_deflate_extra_bits = {}
|
|
|
|
-- Convert a small LZ77 distance (1<=dist<=256) to
|
|
-- a deflate distance code extra bit length.
|
|
local _dist256_to_deflate_extra_bitlen = {}
|
|
|
|
-- Convert a literal/LZ77_length deflate code to LZ77 base length
|
|
-- The key of the table is (code - 256), 257<=code<=285
|
|
local _literal_deflate_code_to_base_len = {
|
|
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
|
|
35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258,
|
|
}
|
|
|
|
-- Convert a literal/LZ77_length deflate code to base LZ77 length extra bits
|
|
-- The key of the table is (code - 256), 257<=code<=285
|
|
local _literal_deflate_code_to_extra_bitlen = {
|
|
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
|
|
3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0,
|
|
}
|
|
|
|
-- Convert a distance deflate code to base LZ77 distance. (0<=code<=29)
|
|
local _dist_deflate_code_to_base_dist = {
|
|
[ 0 ] = 1,
|
|
2,
|
|
3,
|
|
4,
|
|
5,
|
|
7,
|
|
9,
|
|
13,
|
|
17,
|
|
25,
|
|
33,
|
|
49,
|
|
65,
|
|
97,
|
|
129,
|
|
193,
|
|
257,
|
|
385,
|
|
513,
|
|
769,
|
|
1025,
|
|
1537,
|
|
2049,
|
|
3073,
|
|
4097,
|
|
6145,
|
|
8193,
|
|
12289,
|
|
16385,
|
|
24577,
|
|
}
|
|
|
|
-- Convert a distance deflate code to LZ77 bits length. (0<=code<=29)
|
|
local _dist_deflate_code_to_extra_bitlen = {
|
|
[ 0 ] = 0,
|
|
0,
|
|
0,
|
|
0,
|
|
1,
|
|
1,
|
|
2,
|
|
2,
|
|
3,
|
|
3,
|
|
4,
|
|
4,
|
|
5,
|
|
5,
|
|
6,
|
|
6,
|
|
7,
|
|
7,
|
|
8,
|
|
8,
|
|
9,
|
|
9,
|
|
10,
|
|
10,
|
|
11,
|
|
11,
|
|
12,
|
|
12,
|
|
13,
|
|
13,
|
|
}
|
|
|
|
-- The code order of the first huffman header in the dynamic deflate block.
|
|
-- See the page 12 of RFC1951
|
|
local _rle_codes_huffman_bitlen_order = { 16, 17, 18,
|
|
0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
|
|
}
|
|
|
|
-- The following tables are used by fixed deflate block.
|
|
-- The value of these tables are assigned at the bottom of the source.
|
|
|
|
-- The huffman code of the literal/LZ77_length deflate codes,
|
|
-- in fixed deflate block.
|
|
local _fix_block_literal_huffman_code
|
|
|
|
-- Convert huffman code of the literal/LZ77_length to deflate codes,
|
|
-- in fixed deflate block.
|
|
local _fix_block_literal_huffman_to_deflate_code
|
|
|
|
-- The bit length of the huffman code of literal/LZ77_length deflate codes,
|
|
-- in fixed deflate block.
|
|
local _fix_block_literal_huffman_bitlen
|
|
|
|
-- The count of each bit length of the literal/LZ77_length deflate codes,
|
|
-- in fixed deflate block.
|
|
local _fix_block_literal_huffman_bitlen_count
|
|
|
|
-- The huffman code of the distance deflate codes,
|
|
-- in fixed deflate block.
|
|
local _fix_block_dist_huffman_code
|
|
|
|
-- Convert huffman code of the distance to deflate codes,
|
|
-- in fixed deflate block.
|
|
local _fix_block_dist_huffman_to_deflate_code
|
|
|
|
-- The bit length of the huffman code of the distance deflate codes,
|
|
-- in fixed deflate block.
|
|
local _fix_block_dist_huffman_bitlen
|
|
|
|
-- The count of each bit length of the huffman code of
|
|
-- the distance deflate codes,
|
|
-- in fixed deflate block.
|
|
local _fix_block_dist_huffman_bitlen_count
|
|
|
|
for i = 0, 255 do
|
|
_byte_to_char[ i ] = string_char( i )
|
|
end
|
|
|
|
do
|
|
local pow = 1
|
|
for i = 0, 32 do
|
|
_pow2[ i ] = pow
|
|
pow = pow * 2
|
|
end
|
|
end
|
|
|
|
for i = 1, 9 do
|
|
_reverse_bits_tbl[ i ] = {}
|
|
for j = 0, _pow2[ i + 1 ] - 1 do
|
|
local reverse = 0
|
|
local value = j
|
|
for _ = 1, i do
|
|
-- The following line is equivalent to "res | (code %2)" in C.
|
|
reverse = reverse - mod( reverse, 2 )
|
|
+ (((mod( reverse, 2 ) == 1) or (mod( value, 2 )) == 1) and 1 or 0)
|
|
value = (value - mod( value, 2 )) / 2
|
|
reverse = reverse * 2
|
|
end
|
|
_reverse_bits_tbl[ i ][ j ] = (reverse - mod( reverse, 2 )) / 2
|
|
end
|
|
end
|
|
|
|
-- The source code is written according to the pattern in the numbers
|
|
-- in RFC1951 Page10.
|
|
do
|
|
local a = 18
|
|
local b = 16
|
|
local c = 265
|
|
local bitlen = 1
|
|
for len = 3, 258 do
|
|
if len <= 10 then
|
|
_length_to_deflate_code[ len ] = len + 254
|
|
_length_to_deflate_extra_bitlen[ len ] = 0
|
|
elseif len == 258 then
|
|
_length_to_deflate_code[ len ] = 285
|
|
_length_to_deflate_extra_bitlen[ len ] = 0
|
|
else
|
|
if len > a then
|
|
a = a + b
|
|
b = b * 2
|
|
c = c + 4
|
|
bitlen = bitlen + 1
|
|
end
|
|
local t = len - a - 1 + b / 2
|
|
_length_to_deflate_code[ len ] = (t - (mod( t, b / 8 ))) / (b / 8) + c
|
|
_length_to_deflate_extra_bitlen[ len ] = bitlen
|
|
_length_to_deflate_extra_bits[ len ] = mod( t, b / 8 )
|
|
end
|
|
end
|
|
end
|
|
|
|
-- The source code is written according to the pattern in the numbers
|
|
-- in RFC1951 Page11.
|
|
do
|
|
_dist256_to_deflate_code[ 1 ] = 0
|
|
_dist256_to_deflate_code[ 2 ] = 1
|
|
_dist256_to_deflate_extra_bitlen[ 1 ] = 0
|
|
_dist256_to_deflate_extra_bitlen[ 2 ] = 0
|
|
|
|
local a = 3
|
|
local b = 4
|
|
local code = 2
|
|
local bitlen = 0
|
|
for dist = 3, 256 do
|
|
if dist > b then
|
|
a = a * 2
|
|
b = b * 2
|
|
code = code + 2
|
|
bitlen = bitlen + 1
|
|
end
|
|
_dist256_to_deflate_code[ dist ] = (dist <= a) and code or (code + 1)
|
|
_dist256_to_deflate_extra_bitlen[ dist ] = (bitlen < 0) and 0 or bitlen
|
|
if b >= 8 then
|
|
_dist256_to_deflate_extra_bits[ dist ] = mod( dist - b / 2 - 1, b / 4 )
|
|
end
|
|
end
|
|
end
|
|
|
|
--- Calculate the Adler-32 checksum of the string. <br>
|
|
-- See RFC1950 Page 9 https://tools.ietf.org/html/rfc1950 for the
|
|
-- definition of Adler-32 checksum.
|
|
-- @param str [string] the input string to calcuate its Adler-32 checksum.
|
|
-- @return [integer] The Adler-32 checksum, which is greater or equal to 0,
|
|
-- and less than 2^32 (4294967296).
|
|
function LibDeflate:Adler32( str )
|
|
-- This function is loop unrolled by better performance.
|
|
--
|
|
-- Here is the minimum code:
|
|
--
|
|
-- local a = 1
|
|
-- local b = 0
|
|
-- for i=1, #str do
|
|
-- local s = string.byte(str, i, i)
|
|
-- a = (a+s)%65521
|
|
-- b = (b+a)%65521
|
|
-- end
|
|
-- return b*65536+a
|
|
if type( str ) ~= "string" then
|
|
error( string.format( "Usage: LibDeflate:Adler32(str):"
|
|
.. " 'str' - string expected got '%s'.", type( str ) ), 2 )
|
|
end
|
|
local strlen = string.len( str )
|
|
|
|
local i = 1
|
|
local a = 1
|
|
local b = 0
|
|
while i <= strlen - 15 do
|
|
local x1, x2, x3, x4, x5, x6, x7, x8,
|
|
x9, x10, x11, x12, x13, x14, x15, x16 = string_byte( str, i, i + 15 )
|
|
b = mod( b + 16 * a + 16 * x1 + 15 * x2 + 14 * x3 + 13 * x4 + 12 * x5 + 11 * x6 + 10 * x7 + 9 * x8 + 8 * x9
|
|
+ 7 * x10 + 6 * x11 + 5 * x12 + 4 * x13 + 3 * x14 + 2 * x15 + x16, 65521 )
|
|
a = mod( a + x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12 + x13 + x14 + x15 + x16, 65521 )
|
|
i = i + 16
|
|
end
|
|
while (i <= strlen) do
|
|
local x = string_byte( str, i, i )
|
|
a = mod( a + x, 65521 )
|
|
b = mod( b + a, 65521 )
|
|
i = i + 1
|
|
end
|
|
return mod( b * 65536 + a, 4294967296 )
|
|
end
|
|
|
|
-- Compare adler32 checksum.
|
|
-- adler32 should be compared with a mod to avoid sign problem
|
|
-- 4072834167 (unsigned) is the same adler32 as -222133129
|
|
local function IsEqualAdler32( actual, expected )
|
|
return mod( actual, 4294967296 ) == mod( expected, 4294967296 )
|
|
end
|
|
|
|
--- Create a preset dictionary.
|
|
--
|
|
-- This function is not fast, and the memory consumption of the produced
|
|
-- dictionary is about 50 times of the input string. Therefore, it is suggestted
|
|
-- to run this function only once in your program.
|
|
--
|
|
-- It is very important to know that if you do use a preset dictionary,
|
|
-- compressors and decompressors MUST USE THE SAME dictionary. That is,
|
|
-- dictionary must be created using the same string. If you update your program
|
|
-- with a new dictionary, people with the old version won't be able to transmit
|
|
-- data with people with the new version. Therefore, changing the dictionary
|
|
-- must be very careful.
|
|
--
|
|
-- The parameters "strlen" and "adler32" add a layer of verification to ensure
|
|
-- the parameter "str" is not modified unintentionally during the program
|
|
-- development.
|
|
--
|
|
-- @usage local dict_str = "1234567890"
|
|
--
|
|
-- -- print(dict_str:len(), LibDeflate:Adler32(dict_str))
|
|
-- -- Hardcode the print result below to verify it to avoid acciently
|
|
-- -- modification of 'str' during the program development.
|
|
-- -- string length: 10, Adler-32: 187433486,
|
|
-- -- Don't calculate string length and its Adler-32 at run-time.
|
|
--
|
|
-- local dict = LibDeflate:CreateDictionary(dict_str, 10, 187433486)
|
|
--
|
|
-- @param str [string] The string used as the preset dictionary. <br>
|
|
-- You should put stuffs that frequently appears in the dictionary
|
|
-- string and preferablely put more frequently appeared stuffs toward the end
|
|
-- of the string. <br>
|
|
-- Empty string and string longer than 32768 bytes are not allowed.
|
|
-- @param strlen [integer] The length of 'str'. Please pass in this parameter
|
|
-- as a hardcoded constant, in order to verify the content of 'str'. The value
|
|
-- of this parameter should be known before your program runs.
|
|
-- @param adler32 [integer] The Adler-32 checksum of 'str'. Please pass in this
|
|
-- parameter as a hardcoded constant, in order to verify the content of 'str'.
|
|
-- The value of this parameter should be known before your program runs.
|
|
-- @return [table] The dictionary used for preset dictionary compression and
|
|
-- decompression.
|
|
-- @raise error if 'strlen' does not match the length of 'str',
|
|
-- or if 'adler32' does not match the Adler-32 checksum of 'str'.
|
|
function LibDeflate:CreateDictionary( str, strlen, adler32 )
|
|
if type( str ) ~= "string" then
|
|
error( string.format( "Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
|
|
.. " 'str' - string expected got '%s'.", type( str ) ), 2 )
|
|
end
|
|
if type( strlen ) ~= "number" then
|
|
error( string.format( "Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
|
|
.. " 'strlen' - number expected got '%s'.", type( strlen ) ), 2 )
|
|
end
|
|
if type( adler32 ) ~= "number" then
|
|
error( string.format( "Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
|
|
.. " 'adler32' - number expected got '%s'.", type( adler32 ) ), 2 )
|
|
end
|
|
if strlen ~= string.len( str ) then
|
|
error( string.format( "Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
|
|
.. " 'strlen' does not match the actual length of 'str'."
|
|
.. " 'strlen': %u, '#str': %u ."
|
|
.. " Please check if 'str' is modified unintentionally.", strlen, string.len( str ) ) )
|
|
end
|
|
if strlen == 0 then
|
|
error( ("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
|
|
.. " 'str' - Empty string is not allowed."), 2 )
|
|
end
|
|
if strlen > 32768 then
|
|
error( string.format( "Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
|
|
.. " 'str' - string longer than 32768 bytes is not allowed."
|
|
.. " Got %d bytes.", strlen ), 2 )
|
|
end
|
|
local actual_adler32 = self:Adler32( str )
|
|
if not IsEqualAdler32( adler32, actual_adler32 ) then
|
|
error( string.format( "Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
|
|
.. " 'adler32' does not match the actual adler32 of 'str'."
|
|
.. " 'adler32': %u, 'Adler32(str)': %u ."
|
|
.. " Please check if 'str' is modified unintentionally.", adler32, actual_adler32 ) )
|
|
end
|
|
|
|
local dictionary = {}
|
|
dictionary.adler32 = adler32
|
|
dictionary.hash_tables = {}
|
|
dictionary.string_table = {}
|
|
dictionary.strlen = strlen
|
|
local string_table = dictionary.string_table
|
|
local hash_tables = dictionary.hash_tables
|
|
string_table[ 1 ] = string_byte( str, 1, 1 )
|
|
string_table[ 2 ] = string_byte( str, 2, 2 )
|
|
if strlen >= 3 then
|
|
local i = 1
|
|
local hash = string_table[ 1 ] * 256 + string_table[ 2 ]
|
|
while i <= strlen - 2 - 3 do
|
|
local x1, x2, x3, x4 = string_byte( str, i + 2, i + 5 )
|
|
string_table[ i + 2 ] = x1
|
|
string_table[ i + 3 ] = x2
|
|
string_table[ i + 4 ] = x3
|
|
string_table[ i + 5 ] = x4
|
|
hash = mod( hash * 256 + x1, 16777216 )
|
|
local t = hash_tables[ hash ]
|
|
if not t then
|
|
t = {}; hash_tables[ hash ] = t
|
|
end
|
|
t[ table.getn( t ) + 1 ] = i - strlen
|
|
i = i + 1
|
|
hash = mod( hash * 256 + x2, 16777216 )
|
|
t = hash_tables[ hash ]
|
|
if not t then
|
|
t = {}; hash_tables[ hash ] = t
|
|
end
|
|
t[ table.getn( t ) + 1 ] = i - strlen
|
|
i = i + 1
|
|
hash = mod( hash * 256 + x3, 16777216 )
|
|
t = hash_tables[ hash ]
|
|
if not t then
|
|
t = {}; hash_tables[ hash ] = t
|
|
end
|
|
t[ table.getn( t ) + 1 ] = i - strlen
|
|
i = i + 1
|
|
hash = mod( hash * 256 + x4, 16777216 )
|
|
t = hash_tables[ hash ]
|
|
if not t then
|
|
t = {}; hash_tables[ hash ] = t
|
|
end
|
|
t[ table.getn( t ) + 1 ] = i - strlen
|
|
i = i + 1
|
|
end
|
|
while i <= strlen - 2 do
|
|
local x = string_byte( str, i + 2 )
|
|
string_table[ i + 2 ] = x
|
|
hash = mod( hash * 256 + x, 16777216 )
|
|
local t = hash_tables[ hash ]
|
|
if not t then
|
|
t = {}; hash_tables[ hash ] = t
|
|
end
|
|
t[ table.getn( t ) + 1 ] = i - strlen
|
|
i = i + 1
|
|
end
|
|
end
|
|
return dictionary
|
|
end
|
|
|
|
-- Check if the dictionary is valid.
|
|
-- @param dictionary The preset dictionary for compression and decompression.
|
|
-- @return true if valid, false if not valid.
|
|
-- @return if not valid, the error message.
|
|
local function IsValidDictionary( dictionary )
|
|
if type( dictionary ) ~= "table" then
|
|
return false, string.format( "'dictionary' - table expected got '%s'.", type( dictionary ) )
|
|
end
|
|
if type( dictionary.adler32 ) ~= "number"
|
|
or type( dictionary.string_table ) ~= "table"
|
|
or type( dictionary.strlen ) ~= "number"
|
|
or dictionary.strlen <= 0
|
|
or dictionary.strlen > 32768
|
|
or dictionary.strlen ~= table.getn( dictionary.string_table )
|
|
or type( dictionary.hash_tables ) ~= "table"
|
|
then
|
|
return false, "'dictionary' - corrupted dictionary."
|
|
end
|
|
return true, ""
|
|
end
|
|
|
|
--[[
|
|
key of the configuration table is the compression level,
|
|
and its value stores the compression setting.
|
|
These numbers come from zlib source code.
|
|
|
|
Higher compression level usually means better compression.
|
|
(Because LibDeflate uses a simplified version of zlib algorithm,
|
|
there is no guarantee that higher compression level does not create
|
|
bigger file than lower level, but I can say it's 99% likely)
|
|
|
|
Be careful with the high compression level. This is a pure lua
|
|
implementation compressor/decompressor, which is significant slower than
|
|
a C/C++ equivalant compressor/decompressor. Very high compression level
|
|
costs significant more CPU time, and usually compression size won't be
|
|
significant smaller when you increase compression level by 1, when the
|
|
level is already very high. Benchmark yourself if you can afford it.
|
|
|
|
See also https://github.com/madler/zlib/blob/master/doc/algorithm.txt,
|
|
https://github.com/madler/zlib/blob/master/deflate.c for more information.
|
|
|
|
The meaning of each field:
|
|
@field 1 use_lazy_evaluation:
|
|
true/false. Whether the program uses lazy evaluation.
|
|
See what is "lazy evaluation" in the link above.
|
|
lazy_evaluation improves ratio, but relatively slow.
|
|
@field 2 good_prev_length:
|
|
Only effective if lazy is set, Only use 1/4 of max_chain,
|
|
if prev length of lazy match is above this.
|
|
@field 3 max_insert_length/max_lazy_match:
|
|
If not using lazy evaluation,
|
|
insert new strings in the hash table only if the match length is not
|
|
greater than this length.
|
|
If using lazy evaluation, only continue lazy evaluation,
|
|
if previous match length is strictly smaller than this value.
|
|
@field 4 nice_length:
|
|
Number. Don't continue to go down the hash chain,
|
|
if match length is above this.
|
|
@field 5 max_chain:
|
|
Number. The maximum number of hash chains we look.
|
|
--]]
|
|
local _compression_level_configs = {
|
|
[ 0 ] = { false, nil, 0, 0, 0 }, -- level 0, no compression
|
|
[ 1 ] = { false, nil, 4, 8, 4 }, -- level 1, similar to zlib level 1
|
|
[ 2 ] = { false, nil, 5, 18, 8 }, -- level 2, similar to zlib level 2
|
|
[ 3 ] = { false, nil, 6, 32, 32 }, -- level 3, similar to zlib level 3
|
|
[ 4 ] = { true, 4, 4, 16, 16 }, -- level 4, similar to zlib level 4
|
|
[ 5 ] = { true, 8, 16, 32, 32 }, -- level 5, similar to zlib level 5
|
|
[ 6 ] = { true, 8, 16, 128, 128 }, -- level 6, similar to zlib level 6
|
|
[ 7 ] = { true, 8, 32, 128, 256 }, -- (SLOW) level 7, similar to zlib level 7
|
|
[ 8 ] = { true, 32, 128, 258, 1024 }, --(SLOW) level 8,similar to zlib level 8
|
|
[ 9 ] = { true, 32, 258, 258, 4096 },
|
|
-- (VERY SLOW) level 9, similar to zlib level 9
|
|
}
|
|
|
|
-- Check if the compression/decompression arguments is valid
|
|
-- @param str The input string.
|
|
-- @param check_dictionary if true, check if dictionary is valid.
|
|
-- @param dictionary The preset dictionary for compression and decompression.
|
|
-- @param check_configs if true, check if config is valid.
|
|
-- @param configs The compression configuration table
|
|
-- @return true if valid, false if not valid.
|
|
-- @return if not valid, the error message.
|
|
local function IsValidArguments( str,
|
|
check_dictionary, dictionary,
|
|
check_configs, configs )
|
|
if type( str ) ~= "string" then
|
|
return false, string.format( "'str' - string expected got '%s'.", type( str ) )
|
|
end
|
|
if check_dictionary then
|
|
local dict_valid, dict_err = IsValidDictionary( dictionary )
|
|
if not dict_valid then
|
|
return false, dict_err
|
|
end
|
|
end
|
|
if check_configs then
|
|
local type_configs = type( configs )
|
|
if type_configs ~= "nil" and type_configs ~= "table" then
|
|
return false, string.format( "'configs' - nil or table expected got '%s'.", type( configs ) )
|
|
end
|
|
if type_configs == "table" then
|
|
for k, v in pairs( configs ) do
|
|
if k ~= "level" and k ~= "strategy" then
|
|
return false,
|
|
string.format( "'configs' - unsupported table key in the configs: '%s'.", k )
|
|
elseif k == "level" and not _compression_level_configs[ v ] then
|
|
return false,
|
|
string.format( "'configs' - unsupported 'level': %s.", tostring( v ) )
|
|
elseif k == "strategy" and v ~= "fixed" and v ~= "huffman_only"
|
|
and v ~= "dynamic" then
|
|
-- random_block_type is for testing purpose
|
|
return false, string.format( "'configs' - unsupported 'strategy': '%s'.", tostring( v ) )
|
|
end
|
|
end
|
|
end
|
|
end
|
|
return true, ""
|
|
end
|
|
|
|
|
|
|
|
--[[ --------------------------------------------------------------------------
|
|
Compress code
|
|
--]] --------------------------------------------------------------------------
|
|
|
|
-- partial flush to save memory
|
|
local _FLUSH_MODE_MEMORY_CLEANUP = 0
|
|
-- full flush with partial bytes
|
|
local _FLUSH_MODE_OUTPUT = 1
|
|
-- write bytes to get to byte boundary
|
|
local _FLUSH_MODE_BYTE_BOUNDARY = 2
|
|
-- no flush, just get num of bits written so far
|
|
local _FLUSH_MODE_NO_FLUSH = 3
|
|
|
|
--[[
|
|
Create an empty writer to easily write stuffs as the unit of bits.
|
|
Return values:
|
|
1. WriteBits(code, bitlen):
|
|
2. WriteString(str):
|
|
3. Flush(mode):
|
|
--]]
|
|
local function CreateWriter()
|
|
local buffer_size = 0
|
|
local cache = 0
|
|
local cache_bitlen = 0
|
|
local total_bitlen = 0
|
|
local buffer = {}
|
|
-- When buffer is big enough, flush into result_buffer to save memory.
|
|
local result_buffer = {}
|
|
|
|
-- Write bits with value "value" and bit length of "bitlen" into writer.
|
|
-- @param value: The value being written
|
|
-- @param bitlen: The bit length of "value"
|
|
-- @return nil
|
|
local function WriteBits( value, bitlen )
|
|
cache = cache + value * _pow2[ cache_bitlen ]
|
|
cache_bitlen = cache_bitlen + bitlen
|
|
total_bitlen = total_bitlen + bitlen
|
|
-- Only bulk to buffer every 4 bytes. This is quicker.
|
|
if cache_bitlen >= 32 then
|
|
buffer_size = buffer_size + 1
|
|
buffer[ buffer_size ] =
|
|
_byte_to_char[ mod( cache, 256 ) ]
|
|
.. _byte_to_char[ mod( (cache - mod( cache, 256 )) / 256, 256 ) ]
|
|
.. _byte_to_char[ mod( (cache - mod( cache, 65536 )) / 65536, 256 ) ]
|
|
.. _byte_to_char[ mod( (cache - mod( cache, 16777216 )) / 16777216, 256 ) ]
|
|
local rshift_mask = _pow2[ 32 - cache_bitlen + bitlen ]
|
|
cache = (value - mod( value, rshift_mask )) / rshift_mask
|
|
cache_bitlen = cache_bitlen - 32
|
|
end
|
|
end
|
|
|
|
-- Write the entire string into the writer.
|
|
-- @param str The string being written
|
|
-- @return nil
|
|
local function WriteString( str )
|
|
for _ = 1, cache_bitlen, 8 do
|
|
buffer_size = buffer_size + 1
|
|
buffer[ buffer_size ] = string_char( mod( cache, 256 ) )
|
|
cache = (cache - mod( cache, 256 )) / 256
|
|
end
|
|
cache_bitlen = 0
|
|
buffer_size = buffer_size + 1
|
|
buffer[ buffer_size ] = str
|
|
total_bitlen = total_bitlen + string.len( str ) * 8
|
|
end
|
|
|
|
-- Flush current stuffs in the writer and return it.
|
|
-- This operation will free most of the memory.
|
|
-- @param mode See the descrtion of the constant and the source code.
|
|
-- @return The total number of bits stored in the writer right now.
|
|
-- for byte boundary mode, it includes the padding bits.
|
|
-- for output mode, it does not include padding bits.
|
|
-- @return Return the outputs if mode is output.
|
|
local function FlushWriter( mode )
|
|
if mode == _FLUSH_MODE_NO_FLUSH then
|
|
return total_bitlen
|
|
end
|
|
|
|
if mode == _FLUSH_MODE_OUTPUT
|
|
or mode == _FLUSH_MODE_BYTE_BOUNDARY then
|
|
-- Full flush, also output cache.
|
|
-- Need to pad some bits if cache_bitlen is not multiple of 8.
|
|
local padding_bitlen = mod( 8 - mod( cache_bitlen, 8 ), 8 )
|
|
|
|
if cache_bitlen > 0 then
|
|
-- padding with all 1 bits, mainly because "\000" is not
|
|
-- good to be tranmitted. I do this so "\000" is a little bit
|
|
-- less frequent.
|
|
cache = cache - _pow2[ cache_bitlen ]
|
|
+ _pow2[ cache_bitlen + padding_bitlen ]
|
|
for _ = 1, cache_bitlen, 8 do
|
|
buffer_size = buffer_size + 1
|
|
buffer[ buffer_size ] = _byte_to_char[ mod( cache, 256 ) ]
|
|
cache = (cache - mod( cache, 256 )) / 256
|
|
end
|
|
|
|
cache = 0
|
|
cache_bitlen = 0
|
|
end
|
|
if mode == _FLUSH_MODE_BYTE_BOUNDARY then
|
|
total_bitlen = total_bitlen + padding_bitlen
|
|
return total_bitlen
|
|
end
|
|
end
|
|
|
|
local flushed = table_concat( buffer )
|
|
buffer = {}
|
|
buffer_size = 0
|
|
result_buffer[ table.getn( result_buffer ) + 1 ] = flushed
|
|
|
|
if mode == _FLUSH_MODE_MEMORY_CLEANUP then
|
|
return total_bitlen
|
|
else
|
|
return total_bitlen, table_concat( result_buffer )
|
|
end
|
|
end
|
|
|
|
return WriteBits, WriteString, FlushWriter
|
|
end
|
|
|
|
-- Push an element into a max heap
|
|
-- @param heap A max heap whose max element is at index 1.
|
|
-- @param e The element to be pushed. Assume element "e" is a table
|
|
-- and comparison is done via its first entry e[1]
|
|
-- @param heap_size current number of elements in the heap.
|
|
-- NOTE: There may be some garbage stored in
|
|
-- heap[heap_size+1], heap[heap_size+2], etc..
|
|
-- @return nil
|
|
local function MinHeapPush( heap, e, heap_size )
|
|
heap_size = heap_size + 1
|
|
heap[ heap_size ] = e
|
|
local value = e[ 1 ]
|
|
local pos = heap_size
|
|
local parent_pos = (pos - mod( pos, 2 )) / 2
|
|
|
|
while (parent_pos >= 1 and heap[ parent_pos ][ 1 ] > value) do
|
|
local t = heap[ parent_pos ]
|
|
heap[ parent_pos ] = e
|
|
heap[ pos ] = t
|
|
pos = parent_pos
|
|
parent_pos = (parent_pos - mod( parent_pos, 2 )) / 2
|
|
end
|
|
end
|
|
|
|
-- Pop an element from a max heap
|
|
-- @param heap A max heap whose max element is at index 1.
|
|
-- @param heap_size current number of elements in the heap.
|
|
-- @return the poped element
|
|
-- Note: This function does not change table size of "heap" to save CPU time.
|
|
local function MinHeapPop( heap, heap_size )
|
|
local top = heap[ 1 ]
|
|
local e = heap[ heap_size ]
|
|
local value = e[ 1 ]
|
|
heap[ 1 ] = e
|
|
heap[ heap_size ] = top
|
|
heap_size = heap_size - 1
|
|
|
|
local pos = 1
|
|
local left_child_pos = pos * 2
|
|
local right_child_pos = left_child_pos + 1
|
|
|
|
while (left_child_pos <= heap_size) do
|
|
local left_child = heap[ left_child_pos ]
|
|
if (right_child_pos <= heap_size
|
|
and heap[ right_child_pos ][ 1 ] < left_child[ 1 ]) then
|
|
local right_child = heap[ right_child_pos ]
|
|
if right_child[ 1 ] < value then
|
|
heap[ right_child_pos ] = e
|
|
heap[ pos ] = right_child
|
|
pos = right_child_pos
|
|
left_child_pos = pos * 2
|
|
right_child_pos = left_child_pos + 1
|
|
else
|
|
break
|
|
end
|
|
else
|
|
if left_child[ 1 ] < value then
|
|
heap[ left_child_pos ] = e
|
|
heap[ pos ] = left_child
|
|
pos = left_child_pos
|
|
left_child_pos = pos * 2
|
|
right_child_pos = left_child_pos + 1
|
|
else
|
|
break
|
|
end
|
|
end
|
|
end
|
|
|
|
return top
|
|
end
|
|
|
|
-- Deflate defines a special huffman tree, which is unique once the bit length
|
|
-- of huffman code of all symbols are known.
|
|
-- @param bitlen_count Number of symbols with a specific bitlen
|
|
-- @param symbol_bitlen The bit length of a symbol
|
|
-- @param max_symbol The max symbol among all symbols,
|
|
-- which is (number of symbols - 1)
|
|
-- @param max_bitlen The max huffman bit length among all symbols.
|
|
-- @return The huffman code of all symbols.
|
|
local function GetHuffmanCodeFromBitlen( bitlen_counts, symbol_bitlens
|
|
, max_symbol, max_bitlen )
|
|
local huffman_code = 0
|
|
local next_codes = {}
|
|
local symbol_huffman_codes = {}
|
|
for bitlen = 1, max_bitlen do
|
|
huffman_code = (huffman_code + (bitlen_counts[ bitlen - 1 ] or 0)) * 2
|
|
next_codes[ bitlen ] = huffman_code
|
|
end
|
|
for symbol = 0, max_symbol do
|
|
local bitlen = symbol_bitlens[ symbol ]
|
|
if bitlen then
|
|
huffman_code = next_codes[ bitlen ]
|
|
next_codes[ bitlen ] = huffman_code + 1
|
|
|
|
-- Reverse the bits of huffman code,
|
|
-- because most signifant bits of huffman code
|
|
-- is stored first into the compressed data.
|
|
-- @see RFC1951 Page5 Section 3.1.1
|
|
if bitlen <= 9 then -- Have cached reverse for small bitlen.
|
|
symbol_huffman_codes[ symbol ] =
|
|
_reverse_bits_tbl[ bitlen ][ huffman_code ]
|
|
else
|
|
local reverse = 0
|
|
for _ = 1, bitlen do
|
|
reverse = reverse - mod( reverse, 2 )
|
|
+ (((mod( reverse, 2 ) == 1)
|
|
or mod( huffman_code, 2 ) == 1) and 1 or 0)
|
|
huffman_code = (huffman_code - mod( huffman_code, 2 )) / 2
|
|
reverse = reverse * 2
|
|
end
|
|
symbol_huffman_codes[ symbol ] = (reverse - mod( reverse, 2 )) / 2
|
|
end
|
|
end
|
|
end
|
|
return symbol_huffman_codes
|
|
end
|
|
|
|
-- A helper function to sort heap elements
|
|
-- a[1], b[1] is the huffman frequency
|
|
-- a[2], b[2] is the symbol value.
|
|
local function SortByFirstThenSecond( a, b )
|
|
return a[ 1 ] < b[ 1 ] or
|
|
(a[ 1 ] == b[ 1 ] and a[ 2 ] < b[ 2 ])
|
|
end
|
|
|
|
-- Calculate the huffman bit length and huffman code.
|
|
-- @param symbol_count: A table whose table key is the symbol, and table value
|
|
-- is the symbol frenquency (nil means 0 frequency).
|
|
-- @param max_bitlen: See description of return value.
|
|
-- @param max_symbol: The maximum symbol
|
|
-- @return a table whose key is the symbol, and the value is the huffman bit
|
|
-- bit length. We guarantee that all bit length <= max_bitlen.
|
|
-- For 0<=symbol<=max_symbol, table value could be nil if the frequency
|
|
-- of the symbol is 0 or nil.
|
|
-- @return a table whose key is the symbol, and the value is the huffman code.
|
|
-- @return a number indicating the maximum symbol whose bitlen is not 0.
|
|
local function GetHuffmanBitlenAndCode( symbol_counts, max_bitlen, max_symbol )
|
|
local heap_size
|
|
local max_non_zero_bitlen_symbol = -1
|
|
local leafs = {}
|
|
local heap = {}
|
|
local symbol_bitlens = {}
|
|
local symbol_codes = {}
|
|
local bitlen_counts = {}
|
|
|
|
--[[
|
|
tree[1]: weight, temporarily used as parent and bitLengths
|
|
tree[2]: symbol
|
|
tree[3]: left child
|
|
tree[4]: right child
|
|
--]]
|
|
local number_unique_symbols = 0
|
|
for symbol, count in pairs( symbol_counts ) do
|
|
number_unique_symbols = number_unique_symbols + 1
|
|
leafs[ number_unique_symbols ] = { count, symbol }
|
|
end
|
|
|
|
if (number_unique_symbols == 0) then
|
|
-- no code.
|
|
return {}, {}, -1
|
|
elseif (number_unique_symbols == 1) then
|
|
-- Only one code. In this case, its huffman code
|
|
-- needs to be assigned as 0, and bit length is 1.
|
|
-- This is the only case that the return result
|
|
-- represents an imcomplete huffman tree.
|
|
local symbol = leafs[ 1 ][ 2 ]
|
|
symbol_bitlens[ symbol ] = 1
|
|
symbol_codes[ symbol ] = 0
|
|
return symbol_bitlens, symbol_codes, symbol
|
|
else
|
|
table_sort( leafs, SortByFirstThenSecond )
|
|
heap_size = number_unique_symbols
|
|
for i = 1, heap_size do
|
|
heap[ i ] = leafs[ i ]
|
|
end
|
|
|
|
while (heap_size > 1) do
|
|
-- Note: pop does not change table size of heap
|
|
local leftChild = MinHeapPop( heap, heap_size )
|
|
heap_size = heap_size - 1
|
|
local rightChild = MinHeapPop( heap, heap_size )
|
|
heap_size = heap_size - 1
|
|
local newNode =
|
|
{ leftChild[ 1 ] + rightChild[ 1 ], -1, leftChild, rightChild }
|
|
MinHeapPush( heap, newNode, heap_size )
|
|
heap_size = heap_size + 1
|
|
end
|
|
|
|
-- Number of leafs whose bit length is greater than max_len.
|
|
local number_bitlen_overflow = 0
|
|
|
|
-- Calculate bit length of all nodes
|
|
local fifo = { heap[ 1 ], 0, 0, 0 } -- preallocate some spaces.
|
|
local fifo_size = 1
|
|
local index = 1
|
|
heap[ 1 ][ 1 ] = 0
|
|
while (index <= fifo_size) do -- Breath first search
|
|
local e = fifo[ index ]
|
|
local bitlen = e[ 1 ]
|
|
local symbol = e[ 2 ]
|
|
local left_child = e[ 3 ]
|
|
local right_child = e[ 4 ]
|
|
if left_child then
|
|
fifo_size = fifo_size + 1
|
|
fifo[ fifo_size ] = left_child
|
|
left_child[ 1 ] = bitlen + 1
|
|
end
|
|
if right_child then
|
|
fifo_size = fifo_size + 1
|
|
fifo[ fifo_size ] = right_child
|
|
right_child[ 1 ] = bitlen + 1
|
|
end
|
|
index = index + 1
|
|
|
|
if (bitlen > max_bitlen) then
|
|
number_bitlen_overflow = number_bitlen_overflow + 1
|
|
bitlen = max_bitlen
|
|
end
|
|
if symbol >= 0 then
|
|
symbol_bitlens[ symbol ] = bitlen
|
|
max_non_zero_bitlen_symbol =
|
|
(symbol > max_non_zero_bitlen_symbol)
|
|
and symbol or max_non_zero_bitlen_symbol
|
|
bitlen_counts[ bitlen ] = (bitlen_counts[ bitlen ] or 0) + 1
|
|
end
|
|
end
|
|
|
|
-- Resolve bit length overflow
|
|
-- @see ZLib/trees.c:gen_bitlen(s, desc), for reference
|
|
if (number_bitlen_overflow > 0) then
|
|
repeat
|
|
local bitlen = max_bitlen - 1
|
|
while ((bitlen_counts[ bitlen ] or 0) == 0) do
|
|
bitlen = bitlen - 1
|
|
end
|
|
-- move one leaf down the tree
|
|
bitlen_counts[ bitlen ] = bitlen_counts[ bitlen ] - 1
|
|
-- move one overflow item as its brother
|
|
bitlen_counts[ bitlen + 1 ] = (bitlen_counts[ bitlen + 1 ] or 0) + 2
|
|
bitlen_counts[ max_bitlen ] = bitlen_counts[ max_bitlen ] - 1
|
|
number_bitlen_overflow = number_bitlen_overflow - 2
|
|
until (number_bitlen_overflow <= 0)
|
|
|
|
index = 1
|
|
for bitlen = max_bitlen, 1, -1 do
|
|
local n = bitlen_counts[ bitlen ] or 0
|
|
while (n > 0) do
|
|
local symbol = leafs[ index ][ 2 ]
|
|
symbol_bitlens[ symbol ] = bitlen
|
|
n = n - 1
|
|
index = index + 1
|
|
end
|
|
end
|
|
end
|
|
|
|
symbol_codes = GetHuffmanCodeFromBitlen( bitlen_counts, symbol_bitlens,
|
|
max_symbol, max_bitlen )
|
|
return symbol_bitlens, symbol_codes, max_non_zero_bitlen_symbol
|
|
end
|
|
end
|
|
|
|
-- Calculate the first huffman header in the dynamic huffman block
|
|
-- @see RFC1951 Page 12
|
|
-- @param lcode_bitlen: The huffman bit length of literal/LZ77_length.
|
|
-- @param max_non_zero_bitlen_lcode: The maximum literal/LZ77_length symbol
|
|
-- whose huffman bit length is not zero.
|
|
-- @param dcode_bitlen: The huffman bit length of LZ77 distance.
|
|
-- @param max_non_zero_bitlen_dcode: The maximum LZ77 distance symbol
|
|
-- whose huffman bit length is not zero.
|
|
-- @return The run length encoded codes.
|
|
-- @return The extra bits. One entry for each rle code that needs extra bits.
|
|
-- (code == 16 or 17 or 18).
|
|
-- @return The count of appearance of each rle codes.
|
|
local function RunLengthEncodeHuffmanBitlen(
|
|
lcode_bitlens,
|
|
max_non_zero_bitlen_lcode,
|
|
dcode_bitlens,
|
|
max_non_zero_bitlen_dcode )
|
|
local rle_code_tblsize = 0
|
|
local rle_codes = {}
|
|
local rle_code_counts = {}
|
|
local rle_extra_bits_tblsize = 0
|
|
local rle_extra_bits = {}
|
|
local prev = nil
|
|
local count = 0
|
|
|
|
-- If there is no distance code, assume one distance code of bit length 0.
|
|
-- RFC1951: One distance code of zero bits means that
|
|
-- there are no distance codes used at all (the data is all literals).
|
|
max_non_zero_bitlen_dcode = (max_non_zero_bitlen_dcode < 0)
|
|
and 0 or max_non_zero_bitlen_dcode
|
|
local max_code = max_non_zero_bitlen_lcode + max_non_zero_bitlen_dcode + 1
|
|
|
|
for code = 0, max_code + 1 do
|
|
local len = (code <= max_non_zero_bitlen_lcode)
|
|
and (lcode_bitlens[ code ] or 0)
|
|
or ((code <= max_code)
|
|
and (dcode_bitlens[ code - max_non_zero_bitlen_lcode - 1 ] or 0) or nil)
|
|
if len == prev then
|
|
count = count + 1
|
|
if len ~= 0 and count == 6 then
|
|
rle_code_tblsize = rle_code_tblsize + 1
|
|
rle_codes[ rle_code_tblsize ] = 16
|
|
rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1
|
|
rle_extra_bits[ rle_extra_bits_tblsize ] = 3
|
|
rle_code_counts[ 16 ] = (rle_code_counts[ 16 ] or 0) + 1
|
|
count = 0
|
|
elseif len == 0 and count == 138 then
|
|
rle_code_tblsize = rle_code_tblsize + 1
|
|
rle_codes[ rle_code_tblsize ] = 18
|
|
rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1
|
|
rle_extra_bits[ rle_extra_bits_tblsize ] = 127
|
|
rle_code_counts[ 18 ] = (rle_code_counts[ 18 ] or 0) + 1
|
|
count = 0
|
|
end
|
|
else
|
|
if count == 1 then
|
|
rle_code_tblsize = rle_code_tblsize + 1
|
|
rle_codes[ rle_code_tblsize ] = prev
|
|
rle_code_counts[ prev ] = (rle_code_counts[ prev ] or 0) + 1
|
|
elseif count == 2 then
|
|
rle_code_tblsize = rle_code_tblsize + 1
|
|
rle_codes[ rle_code_tblsize ] = prev
|
|
rle_code_tblsize = rle_code_tblsize + 1
|
|
rle_codes[ rle_code_tblsize ] = prev
|
|
rle_code_counts[ prev ] = (rle_code_counts[ prev ] or 0) + 2
|
|
elseif count >= 3 then
|
|
rle_code_tblsize = rle_code_tblsize + 1
|
|
local rleCode = (prev ~= 0) and 16 or (count <= 10 and 17 or 18)
|
|
rle_codes[ rle_code_tblsize ] = rleCode
|
|
rle_code_counts[ rleCode ] = (rle_code_counts[ rleCode ] or 0) + 1
|
|
rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1
|
|
rle_extra_bits[ rle_extra_bits_tblsize ] =
|
|
(count <= 10) and (count - 3) or (count - 11)
|
|
end
|
|
|
|
prev = len
|
|
if len and len ~= 0 then
|
|
rle_code_tblsize = rle_code_tblsize + 1
|
|
rle_codes[ rle_code_tblsize ] = len
|
|
rle_code_counts[ len ] = (rle_code_counts[ len ] or 0) + 1
|
|
count = 0
|
|
else
|
|
count = 1
|
|
end
|
|
end
|
|
end
|
|
|
|
return rle_codes, rle_extra_bits, rle_code_counts
|
|
end
|
|
|
|
-- Load the string into a table, in order to speed up LZ77.
|
|
-- Loop unrolled 16 times to speed this function up.
|
|
-- @param str The string to be loaded.
|
|
-- @param t The load destination
|
|
-- @param start str[index] will be the first character to be loaded.
|
|
-- @param end str[index] will be the last character to be loaded
|
|
-- @param offset str[index] will be loaded into t[index-offset]
|
|
-- @return t
|
|
local function LoadStringToTable( str, t, start, stop, offset )
|
|
local i = start - offset
|
|
while i <= stop - 15 - offset do
|
|
t[ i ], t[ i + 1 ], t[ i + 2 ], t[ i + 3 ], t[ i + 4 ], t[ i + 5 ], t[ i + 6 ], t[ i + 7 ], t[ i + 8 ],
|
|
t[ i + 9 ], t[ i + 10 ], t[ i + 11 ], t[ i + 12 ], t[ i + 13 ], t[ i + 14 ], t[ i + 15 ] =
|
|
string_byte( str, i + offset, i + 15 + offset )
|
|
i = i + 16
|
|
end
|
|
while (i <= stop - offset) do
|
|
t[ i ] = string_byte( str, i + offset, i + offset )
|
|
i = i + 1
|
|
end
|
|
return t
|
|
end
|
|
|
|
-- Do LZ77 process. This function uses the majority of the CPU time.
|
|
-- @see zlib/deflate.c:deflate_fast(), zlib/deflate.c:deflate_slow()
|
|
-- @see https://github.com/madler/zlib/blob/master/doc/algorithm.txt
|
|
-- This function uses the algorithms used above. You should read the
|
|
-- algorithm.txt above to understand what is the hash function and the
|
|
-- lazy evaluation.
|
|
--
|
|
-- The special optimization used here is hash functions used here.
|
|
-- The hash function is just the multiplication of the three consective
|
|
-- characters. So if the hash matches, it guarantees 3 characters are matched.
|
|
-- This optimization can be implemented because Lua table is a hash table.
|
|
--
|
|
-- @param level integer that describes compression level.
|
|
-- @param string_table table that stores the value of string to be compressed.
|
|
-- The index of this table starts from 1.
|
|
-- The caller needs to make sure all values needed by this function
|
|
-- are loaded.
|
|
-- Assume "str" is the origin input string into the compressor
|
|
-- str[block_start]..str[block_end+3] needs to be loaded into
|
|
-- string_table[block_start-offset]..string_table[block_end-offset]
|
|
-- If dictionary is presented, the last 258 bytes of the dictionary
|
|
-- needs to be loaded into sing_table[-257..0]
|
|
-- (See more in the description of offset.)
|
|
-- @param hash_tables. The table key is the hash value (0<=hash<=16777216=256^3)
|
|
-- The table value is an array0 that stores the indexes of the
|
|
-- input data string to be compressed, such that
|
|
-- hash == str[index]*str[index+1]*str[index+2]
|
|
-- Indexes are ordered in this array.
|
|
-- @param block_start The indexes of the input data string to be compressed.
|
|
-- that starts the LZ77 block.
|
|
-- @param block_end The indexes of the input data string to be compressed.
|
|
-- that stores the LZ77 block.
|
|
-- @param offset str[index] is stored in string_table[index-offset],
|
|
-- This offset is mainly an optimization to limit the index
|
|
-- of string_table, so lua can access this table quicker.
|
|
-- @param dictionary See LibDeflate:CreateDictionary
|
|
-- @return literal/LZ77_length deflate codes.
|
|
-- @return the extra bits of literal/LZ77_length deflate codes.
|
|
-- @return the count of each literal/LZ77 deflate code.
|
|
-- @return LZ77 distance deflate codes.
|
|
-- @return the extra bits of LZ77 distance deflate codes.
|
|
-- @return the count of each LZ77 distance deflate code.
|
|
local function GetBlockLZ77Result( level, string_table, hash_tables, block_start,
|
|
block_end, offset, dictionary )
|
|
local config = _compression_level_configs[ level ]
|
|
local config_use_lazy
|
|
, config_good_prev_length
|
|
, config_max_lazy_match
|
|
, config_nice_length
|
|
, config_max_hash_chain =
|
|
config[ 1 ], config[ 2 ], config[ 3 ], config[ 4 ], config[ 5 ]
|
|
|
|
local config_max_insert_length = (not config_use_lazy)
|
|
and config_max_lazy_match or 2147483646
|
|
local config_good_hash_chain =
|
|
(config_max_hash_chain - mod( config_max_hash_chain, 4 ) / 4)
|
|
|
|
local hash
|
|
|
|
local dict_hash_tables
|
|
local dict_string_table
|
|
local dict_string_len = 0
|
|
|
|
if dictionary then
|
|
dict_hash_tables = dictionary.hash_tables
|
|
dict_string_table = dictionary.string_table
|
|
dict_string_len = dictionary.strlen
|
|
assert( block_start == 1 )
|
|
if block_end >= block_start and dict_string_len >= 2 then
|
|
hash = dict_string_table[ dict_string_len - 1 ] * 65536
|
|
+ dict_string_table[ dict_string_len ] * 256 + string_table[ 1 ]
|
|
local t = hash_tables[ hash ]
|
|
if not t then
|
|
t = {}; hash_tables[ hash ] = t
|
|
end
|
|
t[ table.getn( t ) + 1 ] = -1
|
|
end
|
|
if block_end >= block_start + 1 and dict_string_len >= 1 then
|
|
hash = dict_string_table[ dict_string_len ] * 65536
|
|
+ string_table[ 1 ] * 256 + string_table[ 2 ]
|
|
local t = hash_tables[ hash ]
|
|
if not t then
|
|
t = {}; hash_tables[ hash ] = t
|
|
end
|
|
t[ table.getn( t ) + 1 ] = 0
|
|
end
|
|
end
|
|
|
|
local dict_string_len_plus3 = dict_string_len + 3
|
|
|
|
hash = (string_table[ block_start - offset ] or 0) * 256
|
|
+ (string_table[ block_start + 1 - offset ] or 0)
|
|
|
|
local lcodes = {}
|
|
local lcode_tblsize = 0
|
|
local lcodes_counts = {}
|
|
local dcodes = {}
|
|
local dcodes_tblsize = 0
|
|
local dcodes_counts = {}
|
|
|
|
local lextra_bits = {}
|
|
local lextra_bits_tblsize = 0
|
|
local dextra_bits = {}
|
|
local dextra_bits_tblsize = 0
|
|
|
|
local match_available = false
|
|
local prev_len
|
|
local prev_dist
|
|
local cur_len = 0
|
|
local cur_dist = 0
|
|
|
|
local index = block_start
|
|
local index_end = block_end + (config_use_lazy and 1 or 0)
|
|
|
|
-- the zlib source code writes separate code for lazy evaluation and
|
|
-- not lazy evaluation, which is easier to understand.
|
|
-- I put them together, so it is a bit harder to understand.
|
|
-- because I think this is easier for me to maintain it.
|
|
while (index <= index_end) do
|
|
local string_table_index = index - offset
|
|
local offset_minus_three = offset - 3
|
|
prev_len = cur_len
|
|
prev_dist = cur_dist
|
|
cur_len = 0
|
|
|
|
hash = mod( hash * 256 + (string_table[ string_table_index + 2 ] or 0), 16777216 )
|
|
|
|
local chain_index
|
|
local cur_chain
|
|
local hash_chain = hash_tables[ hash ]
|
|
local chain_old_size
|
|
if not hash_chain then
|
|
chain_old_size = 0
|
|
hash_chain = {}
|
|
hash_tables[ hash ] = hash_chain
|
|
if dict_hash_tables then
|
|
cur_chain = dict_hash_tables[ hash ]
|
|
chain_index = cur_chain and table.getn( cur_chain ) or 0
|
|
else
|
|
chain_index = 0
|
|
end
|
|
else
|
|
chain_old_size = table.getn( hash_chain )
|
|
cur_chain = hash_chain
|
|
chain_index = chain_old_size
|
|
end
|
|
|
|
if index <= block_end then
|
|
hash_chain[ chain_old_size + 1 ] = index
|
|
end
|
|
|
|
if (chain_index > 0 and index + 2 <= block_end
|
|
and (not config_use_lazy or prev_len < config_max_lazy_match)) then
|
|
local depth =
|
|
(config_use_lazy and prev_len >= config_good_prev_length)
|
|
and config_good_hash_chain or config_max_hash_chain
|
|
|
|
local max_len_minus_one = block_end - index
|
|
max_len_minus_one = (max_len_minus_one >= 257) and 257 or max_len_minus_one
|
|
max_len_minus_one = max_len_minus_one + string_table_index
|
|
local string_table_index_plus_three = string_table_index + 3
|
|
|
|
while chain_index >= 1 and depth > 0 do
|
|
local prev = cur_chain[ chain_index ]
|
|
|
|
if index - prev > 32768 then
|
|
break
|
|
end
|
|
if prev < index then
|
|
local sj = string_table_index_plus_three
|
|
|
|
if prev >= -257 then
|
|
local pj = prev - offset_minus_three
|
|
while (sj <= max_len_minus_one
|
|
and string_table[ pj ]
|
|
== string_table[ sj ]) do
|
|
sj = sj + 1
|
|
pj = pj + 1
|
|
end
|
|
else
|
|
local pj = dict_string_len_plus3 + prev
|
|
while (sj <= max_len_minus_one
|
|
and dict_string_table[ pj ]
|
|
== string_table[ sj ]) do
|
|
sj = sj + 1
|
|
pj = pj + 1
|
|
end
|
|
end
|
|
local j = sj - string_table_index
|
|
if j > cur_len then
|
|
cur_len = j
|
|
cur_dist = index - prev
|
|
end
|
|
if cur_len >= config_nice_length then
|
|
break
|
|
end
|
|
end
|
|
|
|
chain_index = chain_index - 1
|
|
depth = depth - 1
|
|
if chain_index == 0 and prev > 0 and dict_hash_tables then
|
|
cur_chain = dict_hash_tables[ hash ]
|
|
chain_index = cur_chain and table.getn( cur_chain ) or 0
|
|
end
|
|
end
|
|
end
|
|
|
|
if not config_use_lazy then
|
|
prev_len, prev_dist = cur_len, cur_dist
|
|
end
|
|
if ((not config_use_lazy or match_available)
|
|
and (prev_len > 3 or (prev_len == 3 and prev_dist < 4096))
|
|
and cur_len <= prev_len) then
|
|
local code = _length_to_deflate_code[ prev_len ]
|
|
local length_extra_bits_bitlen =
|
|
_length_to_deflate_extra_bitlen[ prev_len ]
|
|
local dist_code, dist_extra_bits_bitlen, dist_extra_bits
|
|
if prev_dist <= 256 then -- have cached code for small distance.
|
|
dist_code = _dist256_to_deflate_code[ prev_dist ]
|
|
dist_extra_bits = _dist256_to_deflate_extra_bits[ prev_dist ]
|
|
dist_extra_bits_bitlen =
|
|
_dist256_to_deflate_extra_bitlen[ prev_dist ]
|
|
else
|
|
dist_code = 16
|
|
dist_extra_bits_bitlen = 7
|
|
local a = 384
|
|
local b = 512
|
|
|
|
while true do
|
|
if prev_dist <= a then
|
|
dist_extra_bits = mod( prev_dist - (b / 2) - 1, b / 4 )
|
|
break
|
|
elseif prev_dist <= b then
|
|
dist_extra_bits = mod( prev_dist - (b / 2) - 1, b / 4 )
|
|
dist_code = dist_code + 1
|
|
break
|
|
else
|
|
dist_code = dist_code + 2
|
|
dist_extra_bits_bitlen = dist_extra_bits_bitlen + 1
|
|
a = a * 2
|
|
b = b * 2
|
|
end
|
|
end
|
|
end
|
|
lcode_tblsize = lcode_tblsize + 1
|
|
lcodes[ lcode_tblsize ] = code
|
|
lcodes_counts[ code ] = (lcodes_counts[ code ] or 0) + 1
|
|
|
|
dcodes_tblsize = dcodes_tblsize + 1
|
|
dcodes[ dcodes_tblsize ] = dist_code
|
|
dcodes_counts[ dist_code ] = (dcodes_counts[ dist_code ] or 0) + 1
|
|
|
|
if length_extra_bits_bitlen > 0 then
|
|
local lenExtraBits = _length_to_deflate_extra_bits[ prev_len ]
|
|
lextra_bits_tblsize = lextra_bits_tblsize + 1
|
|
lextra_bits[ lextra_bits_tblsize ] = lenExtraBits
|
|
end
|
|
if dist_extra_bits_bitlen > 0 then
|
|
dextra_bits_tblsize = dextra_bits_tblsize + 1
|
|
dextra_bits[ dextra_bits_tblsize ] = dist_extra_bits
|
|
end
|
|
|
|
for i = index + 1, index + prev_len - (config_use_lazy and 2 or 1) do
|
|
hash = mod( hash * 256 + (string_table[ i - offset + 2 ] or 0), 16777216 )
|
|
if prev_len <= config_max_insert_length then
|
|
hash_chain = hash_tables[ hash ]
|
|
if not hash_chain then
|
|
hash_chain = {}
|
|
hash_tables[ hash ] = hash_chain
|
|
end
|
|
hash_chain[ table.getn( hash_chain ) + 1 ] = i
|
|
end
|
|
end
|
|
index = index + prev_len - (config_use_lazy and 1 or 0)
|
|
match_available = false
|
|
elseif (not config_use_lazy) or match_available then
|
|
local code = string_table[ config_use_lazy
|
|
and (string_table_index - 1) or string_table_index ]
|
|
lcode_tblsize = lcode_tblsize + 1
|
|
lcodes[ lcode_tblsize ] = code
|
|
lcodes_counts[ code ] = (lcodes_counts[ code ] or 0) + 1
|
|
index = index + 1
|
|
else
|
|
match_available = true
|
|
index = index + 1
|
|
end
|
|
end
|
|
|
|
-- Write "end of block" symbol
|
|
lcode_tblsize = lcode_tblsize + 1
|
|
lcodes[ lcode_tblsize ] = 256
|
|
lcodes_counts[ 256 ] = (lcodes_counts[ 256 ] or 0) + 1
|
|
|
|
return lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits
|
|
, dcodes_counts
|
|
end
|
|
|
|
-- Get the header data of dynamic block.
|
|
-- @param lcodes_count The count of each literal/LZ77_length codes.
|
|
-- @param dcodes_count The count of each Lz77 distance codes.
|
|
-- @return a lots of stuffs.
|
|
-- @see RFC1951 Page 12
|
|
local function GetBlockDynamicHuffmanHeader( lcodes_counts, dcodes_counts )
|
|
local lcodes_huffman_bitlens, lcodes_huffman_codes
|
|
, max_non_zero_bitlen_lcode =
|
|
GetHuffmanBitlenAndCode( lcodes_counts, 15, 285 )
|
|
local dcodes_huffman_bitlens, dcodes_huffman_codes
|
|
, max_non_zero_bitlen_dcode =
|
|
GetHuffmanBitlenAndCode( dcodes_counts, 15, 29 )
|
|
|
|
local rle_deflate_codes, rle_extra_bits, rle_codes_counts =
|
|
RunLengthEncodeHuffmanBitlen( lcodes_huffman_bitlens
|
|
, max_non_zero_bitlen_lcode, dcodes_huffman_bitlens
|
|
, max_non_zero_bitlen_dcode )
|
|
|
|
local rle_codes_huffman_bitlens, rle_codes_huffman_codes =
|
|
GetHuffmanBitlenAndCode( rle_codes_counts, 7, 18 )
|
|
|
|
local HCLEN = 0
|
|
for i = 1, 19 do
|
|
local symbol = _rle_codes_huffman_bitlen_order[ i ]
|
|
local length = rle_codes_huffman_bitlens[ symbol ] or 0
|
|
if length ~= 0 then
|
|
HCLEN = i
|
|
end
|
|
end
|
|
|
|
HCLEN = HCLEN - 4
|
|
local HLIT = max_non_zero_bitlen_lcode + 1 - 257
|
|
local HDIST = max_non_zero_bitlen_dcode + 1 - 1
|
|
if HDIST < 0 then HDIST = 0 end
|
|
|
|
return HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens
|
|
, rle_codes_huffman_codes, rle_deflate_codes, rle_extra_bits
|
|
, lcodes_huffman_bitlens, lcodes_huffman_codes
|
|
, dcodes_huffman_bitlens, dcodes_huffman_codes
|
|
end
|
|
|
|
-- Get the size of dynamic block without writing any bits into the writer.
|
|
-- @param ... Read the source code of GetBlockDynamicHuffmanHeader()
|
|
-- @return the bit length of the dynamic block
|
|
local function GetDynamicHuffmanBlockSize( lcodes, dcodes, HCLEN
|
|
, rle_codes_huffman_bitlens, rle_deflate_codes
|
|
, lcodes_huffman_bitlens, dcodes_huffman_bitlens )
|
|
local block_bitlen = 17 -- 1+2+5+5+4
|
|
block_bitlen = block_bitlen + (HCLEN + 4) * 3
|
|
|
|
for i = 1, table.getn( rle_deflate_codes ) do
|
|
local code = rle_deflate_codes[ i ]
|
|
block_bitlen = block_bitlen + rle_codes_huffman_bitlens[ code ]
|
|
if code >= 16 then
|
|
block_bitlen = block_bitlen +
|
|
((code == 16) and 2 or (code == 17 and 3 or 7))
|
|
end
|
|
end
|
|
|
|
local length_code_count = 0
|
|
for i = 1, table.getn( lcodes ) do
|
|
local code = lcodes[ i ]
|
|
local huffman_bitlen = lcodes_huffman_bitlens[ code ]
|
|
block_bitlen = block_bitlen + huffman_bitlen
|
|
if code > 256 then -- Length code
|
|
length_code_count = length_code_count + 1
|
|
if code > 264 and code < 285 then -- Length code with extra bits
|
|
local extra_bits_bitlen =
|
|
_literal_deflate_code_to_extra_bitlen[ code - 256 ]
|
|
block_bitlen = block_bitlen + extra_bits_bitlen
|
|
end
|
|
local dist_code = dcodes[ length_code_count ]
|
|
local dist_huffman_bitlen = dcodes_huffman_bitlens[ dist_code ]
|
|
block_bitlen = block_bitlen + dist_huffman_bitlen
|
|
|
|
if dist_code > 3 then -- dist code with extra bits
|
|
local dist_extra_bits_bitlen = (dist_code - mod( dist_code, 2 )) / 2 - 1
|
|
block_bitlen = block_bitlen + dist_extra_bits_bitlen
|
|
end
|
|
end
|
|
end
|
|
return block_bitlen
|
|
end
|
|
|
|
-- Write dynamic block.
|
|
-- @param ... Read the source code of GetBlockDynamicHuffmanHeader()
|
|
local function CompressDynamicHuffmanBlock( WriteBits, is_last_block
|
|
, lcodes, lextra_bits, dcodes, dextra_bits, HLIT, HDIST, HCLEN
|
|
, rle_codes_huffman_bitlens, rle_codes_huffman_codes
|
|
, rle_deflate_codes, rle_extra_bits
|
|
, lcodes_huffman_bitlens, lcodes_huffman_codes
|
|
, dcodes_huffman_bitlens, dcodes_huffman_codes )
|
|
WriteBits( is_last_block and 1 or 0, 1 ) -- Last block identifier
|
|
WriteBits( 2, 2 ) -- Dynamic Huffman block identifier
|
|
|
|
WriteBits( HLIT, 5 )
|
|
WriteBits( HDIST, 5 )
|
|
WriteBits( HCLEN, 4 )
|
|
|
|
for i = 1, HCLEN + 4 do
|
|
local symbol = _rle_codes_huffman_bitlen_order[ i ]
|
|
local length = rle_codes_huffman_bitlens[ symbol ] or 0
|
|
WriteBits( length, 3 )
|
|
end
|
|
|
|
local rleExtraBitsIndex = 1
|
|
for i = 1, table.getn( rle_deflate_codes ) do
|
|
local code = rle_deflate_codes[ i ]
|
|
WriteBits( rle_codes_huffman_codes[ code ]
|
|
, rle_codes_huffman_bitlens[ code ] )
|
|
if code >= 16 then
|
|
local extraBits = rle_extra_bits[ rleExtraBitsIndex ]
|
|
WriteBits( extraBits, (code == 16) and 2 or (code == 17 and 3 or 7) )
|
|
rleExtraBitsIndex = rleExtraBitsIndex + 1
|
|
end
|
|
end
|
|
|
|
local length_code_count = 0
|
|
local length_code_with_extra_count = 0
|
|
local dist_code_with_extra_count = 0
|
|
|
|
for i = 1, table.getn( lcodes ) do
|
|
local deflate_codee = lcodes[ i ]
|
|
local huffman_code = lcodes_huffman_codes[ deflate_codee ]
|
|
local huffman_bitlen = lcodes_huffman_bitlens[ deflate_codee ]
|
|
WriteBits( huffman_code, huffman_bitlen )
|
|
if deflate_codee > 256 then -- Length code
|
|
length_code_count = length_code_count + 1
|
|
if deflate_codee > 264 and deflate_codee < 285 then
|
|
-- Length code with extra bits
|
|
length_code_with_extra_count = length_code_with_extra_count + 1
|
|
local extra_bits = lextra_bits[ length_code_with_extra_count ]
|
|
local extra_bits_bitlen =
|
|
_literal_deflate_code_to_extra_bitlen[ deflate_codee - 256 ]
|
|
WriteBits( extra_bits, extra_bits_bitlen )
|
|
end
|
|
-- Write distance code
|
|
local dist_deflate_code = dcodes[ length_code_count ]
|
|
local dist_huffman_code = dcodes_huffman_codes[ dist_deflate_code ]
|
|
local dist_huffman_bitlen =
|
|
dcodes_huffman_bitlens[ dist_deflate_code ]
|
|
WriteBits( dist_huffman_code, dist_huffman_bitlen )
|
|
|
|
if dist_deflate_code > 3 then -- dist code with extra bits
|
|
dist_code_with_extra_count = dist_code_with_extra_count + 1
|
|
local dist_extra_bits = dextra_bits[ dist_code_with_extra_count ]
|
|
local dist_extra_bits_bitlen =
|
|
(dist_deflate_code - mod( dist_deflate_code, 2 )) / 2 - 1
|
|
WriteBits( dist_extra_bits, dist_extra_bits_bitlen )
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
-- Get the size of fixed block without writing any bits into the writer.
|
|
-- @param lcodes literal/LZ77_length deflate codes
|
|
-- @param decodes LZ77 distance deflate codes
|
|
-- @return the bit length of the fixed block
|
|
local function GetFixedHuffmanBlockSize( lcodes, dcodes )
|
|
local block_bitlen = 3
|
|
local length_code_count = 0
|
|
for i = 1, table.getn( lcodes ) do
|
|
local code = lcodes[ i ]
|
|
local huffman_bitlen = _fix_block_literal_huffman_bitlen[ code ]
|
|
block_bitlen = block_bitlen + huffman_bitlen
|
|
if code > 256 then -- Length code
|
|
length_code_count = length_code_count + 1
|
|
if code > 264 and code < 285 then -- Length code with extra bits
|
|
local extra_bits_bitlen =
|
|
_literal_deflate_code_to_extra_bitlen[ code - 256 ]
|
|
block_bitlen = block_bitlen + extra_bits_bitlen
|
|
end
|
|
local dist_code = dcodes[ length_code_count ]
|
|
block_bitlen = block_bitlen + 5
|
|
|
|
if dist_code > 3 then -- dist code with extra bits
|
|
local dist_extra_bits_bitlen =
|
|
(dist_code - mod( dist_code, 2 )) / 2 - 1
|
|
block_bitlen = block_bitlen + dist_extra_bits_bitlen
|
|
end
|
|
end
|
|
end
|
|
return block_bitlen
|
|
end
|
|
|
|
-- Write fixed block.
|
|
-- @param lcodes literal/LZ77_length deflate codes
|
|
-- @param decodes LZ77 distance deflate codes
|
|
local function CompressFixedHuffmanBlock( WriteBits, is_last_block,
|
|
lcodes, lextra_bits, dcodes, dextra_bits )
|
|
WriteBits( is_last_block and 1 or 0, 1 ) -- Last block identifier
|
|
WriteBits( 1, 2 ) -- Fixed Huffman block identifier
|
|
local length_code_count = 0
|
|
local length_code_with_extra_count = 0
|
|
local dist_code_with_extra_count = 0
|
|
for i = 1, table.getn( lcodes ) do
|
|
local deflate_code = lcodes[ i ]
|
|
local huffman_code = _fix_block_literal_huffman_code[ deflate_code ]
|
|
local huffman_bitlen = _fix_block_literal_huffman_bitlen[ deflate_code ]
|
|
WriteBits( huffman_code, huffman_bitlen )
|
|
if deflate_code > 256 then -- Length code
|
|
length_code_count = length_code_count + 1
|
|
if deflate_code > 264 and deflate_code < 285 then
|
|
-- Length code with extra bits
|
|
length_code_with_extra_count = length_code_with_extra_count + 1
|
|
local extra_bits = lextra_bits[ length_code_with_extra_count ]
|
|
local extra_bits_bitlen =
|
|
_literal_deflate_code_to_extra_bitlen[ deflate_code - 256 ]
|
|
WriteBits( extra_bits, extra_bits_bitlen )
|
|
end
|
|
-- Write distance code
|
|
local dist_code = dcodes[ length_code_count ]
|
|
local dist_huffman_code = _fix_block_dist_huffman_code[ dist_code ]
|
|
WriteBits( dist_huffman_code, 5 )
|
|
|
|
if dist_code > 3 then -- dist code with extra bits
|
|
dist_code_with_extra_count = dist_code_with_extra_count + 1
|
|
local dist_extra_bits = dextra_bits[ dist_code_with_extra_count ]
|
|
local dist_extra_bits_bitlen = (dist_code - mod( dist_code, 2 )) / 2 - 1
|
|
WriteBits( dist_extra_bits, dist_extra_bits_bitlen )
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
-- Get the size of store block without writing any bits into the writer.
|
|
-- @param block_start The start index of the origin input string
|
|
-- @param block_end The end index of the origin input string
|
|
-- @param Total bit lens had been written into the compressed result before,
|
|
-- because store block needs to shift to byte boundary.
|
|
-- @return the bit length of the fixed block
|
|
local function GetStoreBlockSize( block_start, block_end, total_bitlen )
|
|
assert( block_end - block_start + 1 <= 65535 )
|
|
local block_bitlen = 3
|
|
total_bitlen = total_bitlen + 3
|
|
local padding_bitlen = mod( 8 - mod( total_bitlen, 8 ), 8 )
|
|
block_bitlen = block_bitlen + padding_bitlen
|
|
block_bitlen = block_bitlen + 32
|
|
block_bitlen = block_bitlen + (block_end - block_start + 1) * 8
|
|
return block_bitlen
|
|
end
|
|
|
|
-- Write the store block.
|
|
-- @param ... lots of stuffs
|
|
-- @return nil
|
|
local function CompressStoreBlock( WriteBits, WriteString, is_last_block, str
|
|
, block_start, block_end, total_bitlen )
|
|
assert( block_end - block_start + 1 <= 65535 )
|
|
WriteBits( is_last_block and 1 or 0, 1 ) -- Last block identifer.
|
|
WriteBits( 0, 2 ) -- Store block identifier.
|
|
total_bitlen = total_bitlen + 3
|
|
local padding_bitlen = mod( 8 - mod( total_bitlen, 8 ), 8 )
|
|
if padding_bitlen > 0 then
|
|
WriteBits( _pow2[ padding_bitlen ] - 1, padding_bitlen )
|
|
end
|
|
local size = block_end - block_start + 1
|
|
WriteBits( size, 16 )
|
|
|
|
-- Write size's one's complement
|
|
local comp = (255 - mod( size, 256 )) + (255 - (size - mod( size, 256 )) / 256) * 256
|
|
WriteBits( comp, 16 )
|
|
|
|
WriteString( string.sub( str, block_start, block_end ) )
|
|
end
|
|
|
|
-- Do the deflate
|
|
-- Currently using a simple way to determine the block size
|
|
-- (This is why the compression ratio is little bit worse than zlib when
|
|
-- the input size is very large
|
|
-- The first block is 64KB, the following block is 32KB.
|
|
-- After each block, there is a memory cleanup operation.
|
|
-- This is not a fast operation, but it is needed to save memory usage, so
|
|
-- the memory usage does not grow unboundly. If the data size is less than
|
|
-- 64KB, then memory cleanup won't happen.
|
|
-- This function determines whether to use store/fixed/dynamic blocks by
|
|
-- calculating the block size of each block type and chooses the smallest one.
|
|
local function Deflate( configs, WriteBits, WriteString, FlushWriter, str
|
|
, dictionary )
|
|
local string_table = {}
|
|
local hash_tables = {}
|
|
local is_last_block = nil
|
|
local block_start
|
|
local block_end
|
|
local bitlen_written
|
|
local total_bitlen = FlushWriter( _FLUSH_MODE_NO_FLUSH )
|
|
local strlen = string.len( str )
|
|
local offset
|
|
|
|
local level
|
|
local strategy
|
|
if configs then
|
|
if configs.level then
|
|
level = configs.level
|
|
end
|
|
if configs.strategy then
|
|
strategy = configs.strategy
|
|
end
|
|
end
|
|
|
|
if not level then
|
|
if strlen < 2048 then
|
|
level = 7
|
|
elseif strlen > 65536 then
|
|
level = 3
|
|
else
|
|
level = 5
|
|
end
|
|
end
|
|
|
|
while not is_last_block do
|
|
if not block_start then
|
|
block_start = 1
|
|
block_end = 64 * 1024 - 1
|
|
offset = 0
|
|
else
|
|
block_start = block_end + 1
|
|
block_end = block_end + 32 * 1024
|
|
offset = block_start - 32 * 1024 - 1
|
|
end
|
|
|
|
if block_end >= strlen then
|
|
block_end = strlen
|
|
is_last_block = true
|
|
else
|
|
is_last_block = false
|
|
end
|
|
|
|
local lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits
|
|
, dcodes_counts
|
|
|
|
local HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens
|
|
, rle_codes_huffman_codes, rle_deflate_codes
|
|
, rle_extra_bits, lcodes_huffman_bitlens, lcodes_huffman_codes
|
|
, dcodes_huffman_bitlens, dcodes_huffman_codes
|
|
|
|
local dynamic_block_bitlen
|
|
local fixed_block_bitlen
|
|
local store_block_bitlen
|
|
|
|
if level ~= 0 then
|
|
-- GetBlockLZ77 needs block_start to block_end+3 to be loaded.
|
|
LoadStringToTable( str, string_table, block_start, block_end + 3
|
|
, offset )
|
|
if block_start == 1 and dictionary then
|
|
local dict_string_table = dictionary.string_table
|
|
local dict_strlen = dictionary.strlen
|
|
for i = 0, (-dict_strlen + 1) < -257
|
|
and -257 or (-dict_strlen + 1), -1 do
|
|
string_table[ i ] = dict_string_table[ dict_strlen + i ]
|
|
end
|
|
end
|
|
|
|
if strategy == "huffman_only" then
|
|
lcodes = {}
|
|
LoadStringToTable( str, lcodes, block_start, block_end
|
|
, block_start - 1 )
|
|
lextra_bits = {}
|
|
lcodes_counts = {}
|
|
lcodes[ block_end - block_start + 2 ] = 256 -- end of block
|
|
for i = 1, block_end - block_start + 2 do
|
|
local code = lcodes[ i ]
|
|
lcodes_counts[ code ] = (lcodes_counts[ code ] or 0) + 1
|
|
end
|
|
dcodes = {}
|
|
dextra_bits = {}
|
|
dcodes_counts = {}
|
|
else
|
|
lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits
|
|
, dcodes_counts = GetBlockLZ77Result( level, string_table
|
|
, hash_tables, block_start, block_end, offset, dictionary
|
|
)
|
|
end
|
|
|
|
HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens
|
|
, rle_codes_huffman_codes, rle_deflate_codes
|
|
, rle_extra_bits, lcodes_huffman_bitlens, lcodes_huffman_codes
|
|
, dcodes_huffman_bitlens, dcodes_huffman_codes =
|
|
GetBlockDynamicHuffmanHeader( lcodes_counts, dcodes_counts )
|
|
dynamic_block_bitlen = GetDynamicHuffmanBlockSize(
|
|
lcodes, dcodes, HCLEN, rle_codes_huffman_bitlens
|
|
, rle_deflate_codes, lcodes_huffman_bitlens
|
|
, dcodes_huffman_bitlens )
|
|
fixed_block_bitlen = GetFixedHuffmanBlockSize( lcodes, dcodes )
|
|
end
|
|
|
|
store_block_bitlen = GetStoreBlockSize( block_start, block_end
|
|
, total_bitlen )
|
|
|
|
local min_bitlen = store_block_bitlen
|
|
min_bitlen = (fixed_block_bitlen and fixed_block_bitlen < min_bitlen)
|
|
and fixed_block_bitlen or min_bitlen
|
|
min_bitlen = (dynamic_block_bitlen
|
|
and dynamic_block_bitlen < min_bitlen)
|
|
and dynamic_block_bitlen or min_bitlen
|
|
|
|
if level == 0 or (strategy ~= "fixed" and strategy ~= "dynamic" and
|
|
store_block_bitlen == min_bitlen) then
|
|
CompressStoreBlock( WriteBits, WriteString, is_last_block
|
|
, str, block_start, block_end, total_bitlen )
|
|
total_bitlen = total_bitlen + store_block_bitlen
|
|
elseif strategy ~= "dynamic" and (
|
|
strategy == "fixed" or fixed_block_bitlen == min_bitlen) then
|
|
CompressFixedHuffmanBlock( WriteBits, is_last_block,
|
|
lcodes, lextra_bits, dcodes, dextra_bits )
|
|
total_bitlen = total_bitlen + fixed_block_bitlen
|
|
elseif strategy == "dynamic" or dynamic_block_bitlen == min_bitlen then
|
|
CompressDynamicHuffmanBlock( WriteBits, is_last_block, lcodes
|
|
, lextra_bits, dcodes, dextra_bits, HLIT, HDIST, HCLEN
|
|
, rle_codes_huffman_bitlens, rle_codes_huffman_codes
|
|
, rle_deflate_codes, rle_extra_bits
|
|
, lcodes_huffman_bitlens, lcodes_huffman_codes
|
|
, dcodes_huffman_bitlens, dcodes_huffman_codes )
|
|
total_bitlen = total_bitlen + dynamic_block_bitlen
|
|
end
|
|
|
|
if is_last_block then
|
|
bitlen_written = FlushWriter( _FLUSH_MODE_NO_FLUSH )
|
|
else
|
|
bitlen_written = FlushWriter( _FLUSH_MODE_MEMORY_CLEANUP )
|
|
end
|
|
|
|
assert( bitlen_written == total_bitlen )
|
|
|
|
-- Memory clean up, so memory consumption does not always grow linearly
|
|
-- , even if input string is > 64K.
|
|
-- Not a very efficient operation, but this operation won't happen
|
|
-- when the input data size is less than 64K.
|
|
if not is_last_block then
|
|
local j
|
|
if dictionary and block_start == 1 then
|
|
j = 0
|
|
while (string_table[ j ]) do
|
|
string_table[ j ] = nil
|
|
j = j - 1
|
|
end
|
|
end
|
|
dictionary = nil
|
|
j = 1
|
|
for i = block_end - 32767, block_end do
|
|
string_table[ j ] = string_table[ i - offset ]
|
|
j = j + 1
|
|
end
|
|
|
|
for k, t in pairs( hash_tables ) do
|
|
local tSize = table.getn( t )
|
|
if tSize > 0 and block_end + 1 - t[ 1 ] > 32768 then
|
|
if tSize == 1 then
|
|
hash_tables[ k ] = nil
|
|
else
|
|
local new = {}
|
|
local newSize = 0
|
|
for i = 2, tSize do
|
|
j = t[ i ]
|
|
if block_end + 1 - j <= 32768 then
|
|
newSize = newSize + 1
|
|
new[ newSize ] = j
|
|
end
|
|
end
|
|
hash_tables[ k ] = new
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
--- The description to compression configuration table. <br>
|
|
-- Any field can be nil to use its default. <br>
|
|
-- Table with keys other than those below is an invalid table.
|
|
-- @class table
|
|
-- @name compression_configs
|
|
-- @field level The compression level ranged from 0 to 9. 0 is no compression.
|
|
-- 9 is the slowest but best compression. Use nil for default level.
|
|
-- @field strategy The compression strategy. "fixed" to only use fixed deflate
|
|
-- compression block. "dynamic" to only use dynamic block. "huffman_only" to
|
|
-- do no LZ77 compression. Only do huffman compression.
|
|
|
|
|
|
-- @see LibDeflate:CompressDeflate(str, configs)
|
|
-- @see LibDeflate:CompressDeflateWithDict(str, dictionary, configs)
|
|
local function CompressDeflateInternal( str, dictionary, configs )
|
|
local WriteBits, WriteString, FlushWriter = CreateWriter()
|
|
Deflate( configs, WriteBits, WriteString, FlushWriter, str, dictionary )
|
|
local total_bitlen, result = FlushWriter( _FLUSH_MODE_OUTPUT )
|
|
local padding_bitlen = mod( 8 - mod( total_bitlen, 8 ), 8 )
|
|
return result, padding_bitlen
|
|
end
|
|
|
|
-- @see LibDeflate:CompressZlib
|
|
-- @see LibDeflate:CompressZlibWithDict
|
|
local function CompressZlibInternal( str, dictionary, configs )
|
|
local WriteBits, WriteString, FlushWriter = CreateWriter()
|
|
|
|
local CM = 8 -- Compression method
|
|
local CINFO = 7 --Window Size = 32K
|
|
local CMF = CINFO * 16 + CM
|
|
WriteBits( CMF, 8 )
|
|
|
|
local FDIST = dictionary and 1 or 0
|
|
local FLEVEL = 2 -- Default compression
|
|
local FLG = FLEVEL * 64 + FDIST * 32
|
|
local FCHECK = (31 - mod( CMF * 256 + FLG, 31 ))
|
|
-- The FCHECK value must be such that CMF and FLG,
|
|
-- when viewed as a 16-bit unsigned integer stored
|
|
-- in MSB order (CMF*256 + FLG), is a multiple of 31.
|
|
FLG = FLG + FCHECK
|
|
WriteBits( FLG, 8 )
|
|
|
|
if FDIST == 1 then
|
|
local adler32 = dictionary.adler32
|
|
local byte0 = mod( adler32, 256 )
|
|
adler32 = (adler32 - byte0) / 256
|
|
local byte1 = mod( adler32, 256 )
|
|
adler32 = (adler32 - byte1) / 256
|
|
local byte2 = mod( adler32, 256 )
|
|
adler32 = (adler32 - byte2) / 256
|
|
local byte3 = mod( adler32, 256 )
|
|
WriteBits( byte3, 8 )
|
|
WriteBits( byte2, 8 )
|
|
WriteBits( byte1, 8 )
|
|
WriteBits( byte0, 8 )
|
|
end
|
|
|
|
Deflate( configs, WriteBits, WriteString, FlushWriter, str, dictionary )
|
|
FlushWriter( _FLUSH_MODE_BYTE_BOUNDARY )
|
|
|
|
local adler32 = LibDeflate:Adler32( str )
|
|
|
|
-- Most significant byte first
|
|
local byte3 = mod( adler32, 256 )
|
|
adler32 = (adler32 - byte3) / 256
|
|
local byte2 = mod( adler32, 256 )
|
|
adler32 = (adler32 - byte2) / 256
|
|
local byte1 = mod( adler32, 256 )
|
|
adler32 = (adler32 - byte1) / 256
|
|
local byte0 = mod( adler32, 256 )
|
|
|
|
WriteBits( byte0, 8 )
|
|
WriteBits( byte1, 8 )
|
|
WriteBits( byte2, 8 )
|
|
WriteBits( byte3, 8 )
|
|
local total_bitlen, result = FlushWriter( _FLUSH_MODE_OUTPUT )
|
|
local padding_bitlen = mod( 8 - mod( total_bitlen, 8 ), 8 )
|
|
return result, padding_bitlen
|
|
end
|
|
|
|
--- Compress using the raw deflate format.
|
|
-- @param str [string] The data to be compressed.
|
|
-- @param configs [table/nil] The configuration table to control the compression
|
|
-- . If nil, use the default configuration.
|
|
-- @return [string] The compressed data.
|
|
-- @return [integer] The number of bits padded at the end of output.
|
|
-- 0 <= bits < 8 <br>
|
|
-- This means the most significant "bits" of the last byte of the returned
|
|
-- compressed data are padding bits and they don't affect decompression.
|
|
-- You don't need to use this value unless you want to do some postprocessing
|
|
-- to the compressed data.
|
|
-- @see compression_configs
|
|
-- @see LibDeflate:DecompressDeflate
|
|
function LibDeflate:CompressDeflate( str, configs )
|
|
local arg_valid, arg_err = IsValidArguments( str, false, nil, true, configs )
|
|
if not arg_valid then
|
|
error( ("Usage: LibDeflate:CompressDeflate(str, configs): "
|
|
.. arg_err), 2 )
|
|
end
|
|
return CompressDeflateInternal( str, nil, configs )
|
|
end
|
|
|
|
--- Compress using the raw deflate format with a preset dictionary.
|
|
-- @param str [string] The data to be compressed.
|
|
-- @param dictionary [table] The preset dictionary produced by
|
|
-- LibDeflate:CreateDictionary
|
|
-- @param configs [table/nil] The configuration table to control the compression
|
|
-- . If nil, use the default configuration.
|
|
-- @return [string] The compressed data.
|
|
-- @return [integer] The number of bits padded at the end of output.
|
|
-- 0 <= bits < 8 <br>
|
|
-- This means the most significant "bits" of the last byte of the returned
|
|
-- compressed data are padding bits and they don't affect decompression.
|
|
-- You don't need to use this value unless you want to do some postprocessing
|
|
-- to the compressed data.
|
|
-- @see compression_configs
|
|
-- @see LibDeflate:CreateDictionary
|
|
-- @see LibDeflate:DecompressDeflateWithDict
|
|
function LibDeflate:CompressDeflateWithDict( str, dictionary, configs )
|
|
local arg_valid, arg_err = IsValidArguments( str, true, dictionary
|
|
, true, configs )
|
|
if not arg_valid then
|
|
error( ("Usage: LibDeflate:CompressDeflateWithDict"
|
|
.. "(str, dictionary, configs): "
|
|
.. arg_err), 2 )
|
|
end
|
|
return CompressDeflateInternal( str, dictionary, configs )
|
|
end
|
|
|
|
--- Compress using the zlib format.
|
|
-- @param str [string] the data to be compressed.
|
|
-- @param configs [table/nil] The configuration table to control the compression
|
|
-- . If nil, use the default configuration.
|
|
-- @return [string] The compressed data.
|
|
-- @return [integer] The number of bits padded at the end of output.
|
|
-- Should always be 0.
|
|
-- Zlib formatted compressed data never has padding bits at the end.
|
|
-- @see compression_configs
|
|
-- @see LibDeflate:DecompressZlib
|
|
function LibDeflate:CompressZlib( str, configs )
|
|
local arg_valid, arg_err = IsValidArguments( str, false, nil, true, configs )
|
|
if not arg_valid then
|
|
error( ("Usage: LibDeflate:CompressZlib(str, configs): "
|
|
.. arg_err), 2 )
|
|
end
|
|
return CompressZlibInternal( str, nil, configs )
|
|
end
|
|
|
|
--- Compress using the zlib format with a preset dictionary.
|
|
-- @param str [string] the data to be compressed.
|
|
-- @param dictionary [table] A preset dictionary produced
|
|
-- by LibDeflate:CreateDictionary()
|
|
-- @param configs [table/nil] The configuration table to control the compression
|
|
-- . If nil, use the default configuration.
|
|
-- @return [string] The compressed data.
|
|
-- @return [integer] The number of bits padded at the end of output.
|
|
-- Should always be 0.
|
|
-- Zlib formatted compressed data never has padding bits at the end.
|
|
-- @see compression_configs
|
|
-- @see LibDeflate:CreateDictionary
|
|
-- @see LibDeflate:DecompressZlibWithDict
|
|
function LibDeflate:CompressZlibWithDict( str, dictionary, configs )
|
|
local arg_valid, arg_err = IsValidArguments( str, true, dictionary
|
|
, true, configs )
|
|
if not arg_valid then
|
|
error( ("Usage: LibDeflate:CompressZlibWithDict"
|
|
.. "(str, dictionary, configs): "
|
|
.. arg_err), 2 )
|
|
end
|
|
return CompressZlibInternal( str, dictionary, configs )
|
|
end
|
|
|
|
--[[ --------------------------------------------------------------------------
|
|
Decompress code
|
|
--]] --------------------------------------------------------------------------
|
|
|
|
--[[
|
|
Create a reader to easily reader stuffs as the unit of bits.
|
|
Return values:
|
|
1. ReadBits(bitlen)
|
|
2. ReadBytes(bytelen, buffer, buffer_size)
|
|
3. Decode(huffman_bitlen_count, huffman_symbol, min_bitlen)
|
|
4. ReaderBitlenLeft()
|
|
5. SkipToByteBoundary()
|
|
--]]
|
|
local function CreateReader( input_string )
|
|
local input = input_string
|
|
local input_strlen = string.len( input_string )
|
|
local input_next_byte_pos = 1
|
|
local cache_bitlen = 0
|
|
local cache = 0
|
|
|
|
-- Read some bits.
|
|
-- To improve speed, this function does not
|
|
-- check if the input has been exhausted.
|
|
-- Use ReaderBitlenLeft() < 0 to check it.
|
|
-- @param bitlen the number of bits to read
|
|
-- @return the data is read.
|
|
local function ReadBits( bitlen )
|
|
local rshift_mask = _pow2[ bitlen ]
|
|
local code
|
|
if bitlen <= cache_bitlen then
|
|
code = mod( cache, rshift_mask )
|
|
cache = (cache - code) / rshift_mask
|
|
cache_bitlen = cache_bitlen - bitlen
|
|
else -- Whether input has been exhausted is not checked.
|
|
local lshift_mask = _pow2[ cache_bitlen ]
|
|
local byte1, byte2, byte3, byte4 = string_byte( input
|
|
, input_next_byte_pos, input_next_byte_pos + 3 )
|
|
-- This requires lua number to be at least double ()
|
|
cache = cache + ((byte1 or 0) + (byte2 or 0) * 256
|
|
+ (byte3 or 0) * 65536 + (byte4 or 0) * 16777216) * lshift_mask
|
|
input_next_byte_pos = input_next_byte_pos + 4
|
|
cache_bitlen = cache_bitlen + 32 - bitlen
|
|
code = mod( cache, rshift_mask )
|
|
cache = (cache - code) / rshift_mask
|
|
end
|
|
return code
|
|
end
|
|
|
|
-- Read some bytes from the reader.
|
|
-- Assume reader is on the byte boundary.
|
|
-- @param bytelen The number of bytes to be read.
|
|
-- @param buffer The byte read will be stored into this buffer.
|
|
-- @param buffer_size The buffer will be modified starting from
|
|
-- buffer[buffer_size+1], ending at buffer[buffer_size+bytelen-1]
|
|
-- @return the new buffer_size
|
|
local function ReadBytes( bytelen, buffer, buffer_size )
|
|
assert( mod( cache_bitlen, 8 ) == 0 )
|
|
|
|
local byte_from_cache = (cache_bitlen / 8 < bytelen)
|
|
and (cache_bitlen / 8) or bytelen
|
|
for _ = 1, byte_from_cache do
|
|
local byte = mod( cache, 256 )
|
|
buffer_size = buffer_size + 1
|
|
buffer[ buffer_size ] = string_char( byte )
|
|
cache = (cache - byte) / 256
|
|
end
|
|
cache_bitlen = cache_bitlen - byte_from_cache * 8
|
|
bytelen = bytelen - byte_from_cache
|
|
if (input_strlen - input_next_byte_pos - bytelen + 1) * 8
|
|
+ cache_bitlen < 0 then
|
|
return -1 -- out of input
|
|
end
|
|
for i = input_next_byte_pos, input_next_byte_pos + bytelen - 1 do
|
|
buffer_size = buffer_size + 1
|
|
buffer[ buffer_size ] = string_sub( input, i, i )
|
|
end
|
|
|
|
input_next_byte_pos = input_next_byte_pos + bytelen
|
|
return buffer_size
|
|
end
|
|
|
|
-- Decode huffman code
|
|
-- To improve speed, this function does not check
|
|
-- if the input has been exhausted.
|
|
-- Use ReaderBitlenLeft() < 0 to check it.
|
|
-- Credits for Mark Adler. This code is from puff:Decode()
|
|
-- @see puff:Decode(...)
|
|
-- @param huffman_bitlen_count
|
|
-- @param huffman_symbol
|
|
-- @param min_bitlen The minimum huffman bit length of all symbols
|
|
-- @return The decoded deflate code.
|
|
-- Negative value is returned if decoding fails.
|
|
local function Decode( huffman_bitlen_counts, huffman_symbols, min_bitlen )
|
|
local code = 0
|
|
local first = 0
|
|
local index = 0
|
|
local count
|
|
if min_bitlen > 0 then
|
|
if cache_bitlen < 15 and input then
|
|
local lshift_mask = _pow2[ cache_bitlen ]
|
|
local byte1, byte2, byte3, byte4 =
|
|
string_byte( input, input_next_byte_pos
|
|
, input_next_byte_pos + 3 )
|
|
-- This requires lua number to be at least double ()
|
|
cache = cache + ((byte1 or 0) + (byte2 or 0) * 256
|
|
+ (byte3 or 0) * 65536 + (byte4 or 0) * 16777216) * lshift_mask
|
|
input_next_byte_pos = input_next_byte_pos + 4
|
|
cache_bitlen = cache_bitlen + 32
|
|
end
|
|
|
|
local rshift_mask = _pow2[ min_bitlen ]
|
|
cache_bitlen = cache_bitlen - min_bitlen
|
|
code = mod( cache, rshift_mask )
|
|
cache = (cache - code) / rshift_mask
|
|
-- Reverse the bits
|
|
code = _reverse_bits_tbl[ min_bitlen ][ code ]
|
|
|
|
count = huffman_bitlen_counts[ min_bitlen ]
|
|
if code < count then
|
|
return huffman_symbols[ code ]
|
|
end
|
|
index = count
|
|
first = count * 2
|
|
code = code * 2
|
|
end
|
|
|
|
for bitlen = min_bitlen + 1, 15 do
|
|
local bit
|
|
bit = mod( cache, 2 )
|
|
cache = (cache - bit) / 2
|
|
cache_bitlen = cache_bitlen - 1
|
|
|
|
code = (bit == 1) and (code + 1 - mod( code, 2 )) or code
|
|
count = huffman_bitlen_counts[ bitlen ] or 0
|
|
local diff = code - first
|
|
if diff < count then
|
|
return huffman_symbols[ index + diff ]
|
|
end
|
|
index = index + count
|
|
first = first + count
|
|
first = first * 2
|
|
code = code * 2
|
|
end
|
|
-- invalid literal/length or distance code
|
|
-- in fixed or dynamic block (run out of code)
|
|
return -10
|
|
end
|
|
|
|
local function ReaderBitlenLeft()
|
|
return (input_strlen - input_next_byte_pos + 1) * 8 + cache_bitlen
|
|
end
|
|
|
|
local function SkipToByteBoundary()
|
|
local skipped_bitlen = mod( cache_bitlen, 8 )
|
|
local rshift_mask = _pow2[ skipped_bitlen ]
|
|
cache_bitlen = cache_bitlen - skipped_bitlen
|
|
cache = (cache - mod( cache, rshift_mask )) / rshift_mask
|
|
end
|
|
|
|
return ReadBits, ReadBytes, Decode, ReaderBitlenLeft, SkipToByteBoundary
|
|
end
|
|
|
|
-- Create a deflate state, so I can pass in less arguments to functions.
|
|
-- @param str the whole string to be decompressed.
|
|
-- @param dictionary The preset dictionary. nil if not provided.
|
|
-- This dictionary should be produced by LibDeflate:CreateDictionary(str)
|
|
-- @return The decomrpess state.
|
|
local function CreateDecompressState( str, dictionary )
|
|
local ReadBits, ReadBytes, Decode, ReaderBitlenLeft
|
|
, SkipToByteBoundary = CreateReader( str )
|
|
local state =
|
|
{
|
|
ReadBits = ReadBits,
|
|
ReadBytes = ReadBytes,
|
|
Decode = Decode,
|
|
ReaderBitlenLeft = ReaderBitlenLeft,
|
|
SkipToByteBoundary = SkipToByteBoundary,
|
|
buffer_size = 0,
|
|
buffer = {},
|
|
result_buffer = {},
|
|
dictionary = dictionary,
|
|
}
|
|
return state
|
|
end
|
|
|
|
-- Get the stuffs needed to decode huffman codes
|
|
-- @see puff.c:construct(...)
|
|
-- @param huffman_bitlen The huffman bit length of the huffman codes.
|
|
-- @param max_symbol The maximum symbol
|
|
-- @param max_bitlen The min huffman bit length of all codes
|
|
-- @return zero or positive for success, negative for failure.
|
|
-- @return The count of each huffman bit length.
|
|
-- @return A table to convert huffman codes to deflate codes.
|
|
-- @return The minimum huffman bit length.
|
|
local function GetHuffmanForDecode( huffman_bitlens, max_symbol, max_bitlen )
|
|
local huffman_bitlen_counts = {}
|
|
local min_bitlen = max_bitlen
|
|
for symbol = 0, max_symbol do
|
|
local bitlen = huffman_bitlens[ symbol ] or 0
|
|
min_bitlen = (bitlen > 0 and bitlen < min_bitlen)
|
|
and bitlen or min_bitlen
|
|
huffman_bitlen_counts[ bitlen ] = (huffman_bitlen_counts[ bitlen ] or 0) + 1
|
|
end
|
|
|
|
if huffman_bitlen_counts[ 0 ] == max_symbol + 1 then -- No Codes
|
|
return 0, huffman_bitlen_counts, {}, 0 -- Complete, but decode will fail
|
|
end
|
|
|
|
local left = 1
|
|
for len = 1, max_bitlen do
|
|
left = left * 2
|
|
left = left - (huffman_bitlen_counts[ len ] or 0)
|
|
if left < 0 then
|
|
return left -- Over-subscribed, return negative
|
|
end
|
|
end
|
|
|
|
-- Generate offsets info symbol table for each length for sorting
|
|
local offsets = {}
|
|
offsets[ 1 ] = 0
|
|
for len = 1, max_bitlen - 1 do
|
|
offsets[ len + 1 ] = offsets[ len ] + (huffman_bitlen_counts[ len ] or 0)
|
|
end
|
|
|
|
local huffman_symbols = {}
|
|
for symbol = 0, max_symbol do
|
|
local bitlen = huffman_bitlens[ symbol ] or 0
|
|
if bitlen ~= 0 then
|
|
local offset = offsets[ bitlen ]
|
|
huffman_symbols[ offset ] = symbol
|
|
offsets[ bitlen ] = offsets[ bitlen ] + 1
|
|
end
|
|
end
|
|
|
|
-- Return zero for complete set, positive for incomplete set.
|
|
return left, huffman_bitlen_counts, huffman_symbols, min_bitlen
|
|
end
|
|
|
|
-- Decode a fixed or dynamic huffman blocks, excluding last block identifier
|
|
-- and block type identifer.
|
|
-- @see puff.c:codes()
|
|
-- @param state decompression state that will be modified by this function.
|
|
-- @see CreateDecompressState
|
|
-- @param ... Read the source code
|
|
-- @return 0 on success, other value on failure.
|
|
local function DecodeUntilEndOfBlock( state, lcodes_huffman_bitlens
|
|
, lcodes_huffman_symbols, lcodes_huffman_min_bitlen
|
|
, dcodes_huffman_bitlens, dcodes_huffman_symbols
|
|
, dcodes_huffman_min_bitlen )
|
|
local buffer, buffer_size, ReadBits, Decode, ReaderBitlenLeft
|
|
, result_buffer =
|
|
state.buffer, state.buffer_size, state.ReadBits, state.Decode
|
|
, state.ReaderBitlenLeft, state.result_buffer
|
|
local dictionary = state.dictionary
|
|
local dict_string_table
|
|
local dict_strlen
|
|
|
|
local buffer_end = 1
|
|
if dictionary and not buffer[ 0 ] then
|
|
-- If there is a dictionary, copy the last 258 bytes into
|
|
-- the string_table to make the copy in the main loop quicker.
|
|
-- This is done only once per decompression.
|
|
dict_string_table = dictionary.string_table
|
|
dict_strlen = dictionary.strlen
|
|
buffer_end = -dict_strlen + 1
|
|
for i = 0, (-dict_strlen + 1) < -257 and -257 or (-dict_strlen + 1), -1 do
|
|
buffer[ i ] = _byte_to_char[ dict_string_table[ dict_strlen + i ] ]
|
|
end
|
|
end
|
|
|
|
repeat
|
|
local symbol = Decode( lcodes_huffman_bitlens
|
|
, lcodes_huffman_symbols, lcodes_huffman_min_bitlen )
|
|
if symbol < 0 or symbol > 285 then
|
|
-- invalid literal/length or distance code in fixed or dynamic block
|
|
return -10
|
|
elseif symbol < 256 then -- Literal
|
|
buffer_size = buffer_size + 1
|
|
buffer[ buffer_size ] = _byte_to_char[ symbol ]
|
|
elseif symbol > 256 then -- Length code
|
|
symbol = symbol - 256
|
|
local bitlen = _literal_deflate_code_to_base_len[ symbol ]
|
|
bitlen = (symbol >= 8)
|
|
and (bitlen
|
|
+ ReadBits( _literal_deflate_code_to_extra_bitlen[ symbol ] ))
|
|
or bitlen
|
|
symbol = Decode( dcodes_huffman_bitlens, dcodes_huffman_symbols
|
|
, dcodes_huffman_min_bitlen )
|
|
if symbol < 0 or symbol > 29 then
|
|
-- invalid literal/length or distance code in fixed or dynamic block
|
|
return -10
|
|
end
|
|
local dist = _dist_deflate_code_to_base_dist[ symbol ]
|
|
dist = (dist > 4) and (dist
|
|
+ ReadBits( _dist_deflate_code_to_extra_bitlen[ symbol ] )) or dist
|
|
|
|
local char_buffer_index = buffer_size - dist + 1
|
|
if char_buffer_index < buffer_end then
|
|
-- distance is too far back in fixed or dynamic block
|
|
return -11
|
|
end
|
|
if char_buffer_index >= -257 then
|
|
for _ = 1, bitlen do
|
|
buffer_size = buffer_size + 1
|
|
buffer[ buffer_size ] = buffer[ char_buffer_index ]
|
|
char_buffer_index = char_buffer_index + 1
|
|
end
|
|
else
|
|
char_buffer_index = dict_strlen + char_buffer_index
|
|
for _ = 1, bitlen do
|
|
buffer_size = buffer_size + 1
|
|
buffer[ buffer_size ] =
|
|
_byte_to_char[ dict_string_table[ char_buffer_index ] ]
|
|
char_buffer_index = char_buffer_index + 1
|
|
end
|
|
end
|
|
end
|
|
|
|
if ReaderBitlenLeft() < 0 then
|
|
return 2 -- available inflate data did not terminate
|
|
end
|
|
|
|
if buffer_size >= 65536 then
|
|
result_buffer[ table.getn( result_buffer ) + 1 ] =
|
|
table_concat( buffer, "", 1, 32768 )
|
|
for i = 32769, buffer_size do
|
|
buffer[ i - 32768 ] = buffer[ i ]
|
|
end
|
|
buffer_size = buffer_size - 32768
|
|
buffer[ buffer_size + 1 ] = nil
|
|
-- NOTE: buffer[32769..end] and buffer[-257..0] are not cleared.
|
|
-- This is why "buffer_size" variable is needed.
|
|
end
|
|
until symbol == 256
|
|
|
|
state.buffer_size = buffer_size
|
|
|
|
return 0
|
|
end
|
|
|
|
-- Decompress a store block
|
|
-- @param state decompression state that will be modified by this function.
|
|
-- @return 0 if succeeds, other value if fails.
|
|
local function DecompressStoreBlock( state )
|
|
local buffer, buffer_size, ReadBits, ReadBytes, ReaderBitlenLeft
|
|
, SkipToByteBoundary, result_buffer =
|
|
state.buffer, state.buffer_size, state.ReadBits, state.ReadBytes
|
|
, state.ReaderBitlenLeft, state.SkipToByteBoundary, state.result_buffer
|
|
|
|
SkipToByteBoundary()
|
|
local bytelen = ReadBits( 16 )
|
|
if ReaderBitlenLeft() < 0 then
|
|
return 2 -- available inflate data did not terminate
|
|
end
|
|
local bytelenComp = ReadBits( 16 )
|
|
if ReaderBitlenLeft() < 0 then
|
|
return 2 -- available inflate data did not terminate
|
|
end
|
|
|
|
if mod( bytelen, 256 ) + mod( bytelenComp, 256 ) ~= 255 then
|
|
return -2 -- Not one's complement
|
|
end
|
|
if (bytelen - mod( bytelen, 256 )) / 256
|
|
+ (bytelenComp - mod( bytelenComp, 256 )) / 256 ~= 255 then
|
|
return -2 -- Not one's complement
|
|
end
|
|
|
|
-- Note that ReadBytes will skip to the next byte boundary first.
|
|
buffer_size = ReadBytes( bytelen, buffer, buffer_size )
|
|
if buffer_size < 0 then
|
|
return 2 -- available inflate data did not terminate
|
|
end
|
|
|
|
-- memory clean up when there are enough bytes in the buffer.
|
|
if buffer_size >= 65536 then
|
|
result_buffer[ table.getn( result_buffer ) + 1 ] = table_concat( buffer, "", 1, 32768 )
|
|
for i = 32769, buffer_size do
|
|
buffer[ i - 32768 ] = buffer[ i ]
|
|
end
|
|
buffer_size = buffer_size - 32768
|
|
buffer[ buffer_size + 1 ] = nil
|
|
end
|
|
state.buffer_size = buffer_size
|
|
return 0
|
|
end
|
|
|
|
-- Decompress a fixed block
|
|
-- @param state decompression state that will be modified by this function.
|
|
-- @return 0 if succeeds other value if fails.
|
|
local function DecompressFixBlock( state )
|
|
return DecodeUntilEndOfBlock( state
|
|
, _fix_block_literal_huffman_bitlen_count
|
|
, _fix_block_literal_huffman_to_deflate_code, 7
|
|
, _fix_block_dist_huffman_bitlen_count
|
|
, _fix_block_dist_huffman_to_deflate_code, 5 )
|
|
end
|
|
|
|
-- Decompress a dynamic block
|
|
-- @param state decompression state that will be modified by this function.
|
|
-- @return 0 if success, other value if fails.
|
|
local function DecompressDynamicBlock( state )
|
|
local ReadBits, Decode = state.ReadBits, state.Decode
|
|
local nlen = ReadBits( 5 ) + 257
|
|
local ndist = ReadBits( 5 ) + 1
|
|
local ncode = ReadBits( 4 ) + 4
|
|
if nlen > 286 or ndist > 30 then
|
|
-- dynamic block code description: too many length or distance codes
|
|
return -3
|
|
end
|
|
|
|
local rle_codes_huffman_bitlens = {}
|
|
|
|
for i = 1, ncode do
|
|
rle_codes_huffman_bitlens[ _rle_codes_huffman_bitlen_order[ i ] ] =
|
|
ReadBits( 3 )
|
|
end
|
|
|
|
local rle_codes_err, rle_codes_huffman_bitlen_counts,
|
|
rle_codes_huffman_symbols, rle_codes_huffman_min_bitlen =
|
|
GetHuffmanForDecode( rle_codes_huffman_bitlens, 18, 7 )
|
|
if rle_codes_err ~= 0 then -- Require complete code set here
|
|
-- dynamic block code description: code lengths codes incomplete
|
|
return -4
|
|
end
|
|
|
|
local lcodes_huffman_bitlens = {}
|
|
local dcodes_huffman_bitlens = {}
|
|
-- Read length/literal and distance code length tables
|
|
local index = 0
|
|
while index < nlen + ndist do
|
|
local symbol -- Decoded value
|
|
local bitlen -- Last length to repeat
|
|
|
|
symbol = Decode( rle_codes_huffman_bitlen_counts
|
|
, rle_codes_huffman_symbols, rle_codes_huffman_min_bitlen )
|
|
|
|
if symbol < 0 then
|
|
return symbol -- Invalid symbol
|
|
elseif symbol < 16 then
|
|
if index < nlen then
|
|
lcodes_huffman_bitlens[ index ] = symbol
|
|
else
|
|
dcodes_huffman_bitlens[ index - nlen ] = symbol
|
|
end
|
|
index = index + 1
|
|
else
|
|
bitlen = 0
|
|
if symbol == 16 then
|
|
if index == 0 then
|
|
-- dynamic block code description: repeat lengths
|
|
-- with no first length
|
|
return -5
|
|
end
|
|
if index - 1 < nlen then
|
|
bitlen = lcodes_huffman_bitlens[ index - 1 ]
|
|
else
|
|
bitlen = dcodes_huffman_bitlens[ index - nlen - 1 ]
|
|
end
|
|
symbol = 3 + ReadBits( 2 )
|
|
elseif symbol == 17 then -- Repeat zero 3..10 times
|
|
symbol = 3 + ReadBits( 3 )
|
|
else -- == 18, repeat zero 11.138 times
|
|
symbol = 11 + ReadBits( 7 )
|
|
end
|
|
if index + symbol > nlen + ndist then
|
|
-- dynamic block code description:
|
|
-- repeat more than specified lengths
|
|
return -6
|
|
end
|
|
while symbol > 0 do -- Repeat last or zero symbol times
|
|
symbol = symbol - 1
|
|
if index < nlen then
|
|
lcodes_huffman_bitlens[ index ] = bitlen
|
|
else
|
|
dcodes_huffman_bitlens[ index - nlen ] = bitlen
|
|
end
|
|
index = index + 1
|
|
end
|
|
end
|
|
end
|
|
|
|
if (lcodes_huffman_bitlens[ 256 ] or 0) == 0 then
|
|
-- dynamic block code description: missing end-of-block code
|
|
return -9
|
|
end
|
|
|
|
local lcodes_err, lcodes_huffman_bitlen_counts
|
|
, lcodes_huffman_symbols, lcodes_huffman_min_bitlen =
|
|
GetHuffmanForDecode( lcodes_huffman_bitlens, nlen - 1, 15 )
|
|
--dynamic block code description: invalid literal/length code lengths,
|
|
-- Incomplete code ok only for single length 1 code
|
|
if (lcodes_err ~= 0 and (lcodes_err < 0
|
|
or nlen ~= (lcodes_huffman_bitlen_counts[ 0 ] or 0)
|
|
+ (lcodes_huffman_bitlen_counts[ 1 ] or 0))) then
|
|
return -7
|
|
end
|
|
|
|
local dcodes_err, dcodes_huffman_bitlen_counts
|
|
, dcodes_huffman_symbols, dcodes_huffman_min_bitlen =
|
|
GetHuffmanForDecode( dcodes_huffman_bitlens, ndist - 1, 15 )
|
|
-- dynamic block code description: invalid distance code lengths,
|
|
-- Incomplete code ok only for single length 1 code
|
|
if (dcodes_err ~= 0 and (dcodes_err < 0
|
|
or ndist ~= (dcodes_huffman_bitlen_counts[ 0 ] or 0)
|
|
+ (dcodes_huffman_bitlen_counts[ 1 ] or 0))) then
|
|
return -8
|
|
end
|
|
|
|
-- Build buffman table for literal/length codes
|
|
return DecodeUntilEndOfBlock( state, lcodes_huffman_bitlen_counts
|
|
, lcodes_huffman_symbols, lcodes_huffman_min_bitlen
|
|
, dcodes_huffman_bitlen_counts, dcodes_huffman_symbols
|
|
, dcodes_huffman_min_bitlen )
|
|
end
|
|
|
|
-- Decompress a deflate stream
|
|
-- @param state: a decompression state
|
|
-- @return the decompressed string if succeeds. nil if fails.
|
|
local function Inflate( state )
|
|
local ReadBits = state.ReadBits
|
|
|
|
local is_last_block
|
|
while not is_last_block do
|
|
is_last_block = (ReadBits( 1 ) == 1)
|
|
local block_type = ReadBits( 2 )
|
|
local status
|
|
if block_type == 0 then
|
|
status = DecompressStoreBlock( state )
|
|
elseif block_type == 1 then
|
|
status = DecompressFixBlock( state )
|
|
elseif block_type == 2 then
|
|
status = DecompressDynamicBlock( state )
|
|
else
|
|
return nil, -1 -- invalid block type (type == 3)
|
|
end
|
|
if status ~= 0 then
|
|
return nil, status
|
|
end
|
|
end
|
|
|
|
state.result_buffer[ table.getn( state.result_buffer ) + 1 ] =
|
|
table_concat( state.buffer, "", 1, state.buffer_size )
|
|
local result = table_concat( state.result_buffer )
|
|
return result
|
|
end
|
|
|
|
-- @see LibDeflate:DecompressDeflate(str)
|
|
-- @see LibDeflate:DecompressDeflateWithDict(str, dictionary)
|
|
local function DecompressDeflateInternal( str, dictionary )
|
|
local state = CreateDecompressState( str, dictionary )
|
|
local result, status = Inflate( state )
|
|
if not result then
|
|
return nil, status
|
|
end
|
|
|
|
local bitlen_left = state.ReaderBitlenLeft()
|
|
local bytelen_left = (bitlen_left - mod( bitlen_left, 8 )) / 8
|
|
return result, bytelen_left
|
|
end
|
|
|
|
-- @see LibDeflate:DecompressZlib(str)
|
|
-- @see LibDeflate:DecompressZlibWithDict(str)
|
|
local function DecompressZlibInternal( str, dictionary )
|
|
local state = CreateDecompressState( str, dictionary )
|
|
local ReadBits = state.ReadBits
|
|
|
|
local CMF = ReadBits( 8 )
|
|
if state.ReaderBitlenLeft() < 0 then
|
|
return nil, 2 -- available inflate data did not terminate
|
|
end
|
|
local CM = mod( CMF, 16 )
|
|
local CINFO = (CMF - CM) / 16
|
|
if CM ~= 8 then
|
|
return nil, -12 -- invalid compression method
|
|
end
|
|
if CINFO > 7 then
|
|
return nil, -13 -- invalid window size
|
|
end
|
|
|
|
local FLG = ReadBits( 8 )
|
|
if state.ReaderBitlenLeft() < 0 then
|
|
return nil, 2 -- available inflate data did not terminate
|
|
end
|
|
if mod( CMF * 256 + FLG, 31 ) ~= 0 then
|
|
return nil, -14 -- invalid header checksum
|
|
end
|
|
|
|
local FDIST = mod( (FLG - mod( FLG, 32 )) / 32, 2 )
|
|
local FLEVEL = mod( (FLG - mod( FLG, 64 )) / 64, 4 ) -- luacheck: ignore FLEVEL
|
|
|
|
if FDIST == 1 then
|
|
if not dictionary then
|
|
return nil, -16 -- need dictonary, but dictionary is not provided.
|
|
end
|
|
local byte3 = ReadBits( 8 )
|
|
local byte2 = ReadBits( 8 )
|
|
local byte1 = ReadBits( 8 )
|
|
local byte0 = ReadBits( 8 )
|
|
local actual_adler32 = byte3 * 16777216 + byte2 * 65536 + byte1 * 256 + byte0
|
|
if state.ReaderBitlenLeft() < 0 then
|
|
return nil, 2 -- available inflate data did not terminate
|
|
end
|
|
if not IsEqualAdler32( actual_adler32, dictionary.adler32 ) then
|
|
return nil, -17 -- dictionary adler32 does not match
|
|
end
|
|
end
|
|
local result, status = Inflate( state )
|
|
if not result then
|
|
return nil, status
|
|
end
|
|
state.SkipToByteBoundary()
|
|
|
|
local adler_byte0 = ReadBits( 8 )
|
|
local adler_byte1 = ReadBits( 8 )
|
|
local adler_byte2 = ReadBits( 8 )
|
|
local adler_byte3 = ReadBits( 8 )
|
|
if state.ReaderBitlenLeft() < 0 then
|
|
return nil, 2 -- available inflate data did not terminate
|
|
end
|
|
|
|
local adler32_expected = adler_byte0 * 16777216
|
|
+ adler_byte1 * 65536 + adler_byte2 * 256 + adler_byte3
|
|
local adler32_actual = LibDeflate:Adler32( result )
|
|
if not IsEqualAdler32( adler32_expected, adler32_actual ) then
|
|
return nil, -15 -- Adler32 checksum does not match
|
|
end
|
|
|
|
local bitlen_left = state.ReaderBitlenLeft()
|
|
local bytelen_left = (bitlen_left - mod( bitlen_left, 8 )) / 8
|
|
return result, bytelen_left
|
|
end
|
|
|
|
--- Decompress a raw deflate compressed data.
|
|
-- @param str [string] The data to be decompressed.
|
|
-- @return [string/nil] If the decompression succeeds, return the decompressed
|
|
-- data. If the decompression fails, return nil. You should check if this return
|
|
-- value is non-nil to know if the decompression succeeds.
|
|
-- @return [integer] If the decompression succeeds, return the number of
|
|
-- unprocessed bytes in the input compressed data. This return value is a
|
|
-- positive integer if the input data is a valid compressed data appended by an
|
|
-- arbitary non-empty string. This return value is 0 if the input data does not
|
|
-- contain any extra bytes.<br>
|
|
-- If the decompression fails (The first return value of this function is nil),
|
|
-- this return value is undefined.
|
|
-- @see LibDeflate:CompressDeflate
|
|
function LibDeflate:DecompressDeflate( str )
|
|
local arg_valid, arg_err = IsValidArguments( str )
|
|
if not arg_valid then
|
|
error( ("Usage: LibDeflate:DecompressDeflate(str): "
|
|
.. arg_err), 2 )
|
|
end
|
|
return DecompressDeflateInternal( str )
|
|
end
|
|
|
|
--- Decompress a raw deflate compressed data with a preset dictionary.
|
|
-- @param str [string] The data to be decompressed.
|
|
-- @param dictionary [table] The preset dictionary used by
|
|
-- LibDeflate:CompressDeflateWithDict when the compressed data is produced.
|
|
-- Decompression and compression must use the same dictionary.
|
|
-- Otherwise wrong decompressed data could be produced without generating any
|
|
-- error.
|
|
-- @return [string/nil] If the decompression succeeds, return the decompressed
|
|
-- data. If the decompression fails, return nil. You should check if this return
|
|
-- value is non-nil to know if the decompression succeeds.
|
|
-- @return [integer] If the decompression succeeds, return the number of
|
|
-- unprocessed bytes in the input compressed data. This return value is a
|
|
-- positive integer if the input data is a valid compressed data appended by an
|
|
-- arbitary non-empty string. This return value is 0 if the input data does not
|
|
-- contain any extra bytes.<br>
|
|
-- If the decompression fails (The first return value of this function is nil),
|
|
-- this return value is undefined.
|
|
-- @see LibDeflate:CompressDeflateWithDict
|
|
function LibDeflate:DecompressDeflateWithDict( str, dictionary )
|
|
local arg_valid, arg_err = IsValidArguments( str, true, dictionary )
|
|
if not arg_valid then
|
|
error( ("Usage: LibDeflate:DecompressDeflateWithDict(str, dictionary): "
|
|
.. arg_err), 2 )
|
|
end
|
|
return DecompressDeflateInternal( str, dictionary )
|
|
end
|
|
|
|
--- Decompress a zlib compressed data.
|
|
-- @param str [string] The data to be decompressed
|
|
-- @return [string/nil] If the decompression succeeds, return the decompressed
|
|
-- data. If the decompression fails, return nil. You should check if this return
|
|
-- value is non-nil to know if the decompression succeeds.
|
|
-- @return [integer] If the decompression succeeds, return the number of
|
|
-- unprocessed bytes in the input compressed data. This return value is a
|
|
-- positive integer if the input data is a valid compressed data appended by an
|
|
-- arbitary non-empty string. This return value is 0 if the input data does not
|
|
-- contain any extra bytes.<br>
|
|
-- If the decompression fails (The first return value of this function is nil),
|
|
-- this return value is undefined.
|
|
-- @see LibDeflate:CompressZlib
|
|
function LibDeflate:DecompressZlib( str )
|
|
local arg_valid, arg_err = IsValidArguments( str )
|
|
if not arg_valid then
|
|
error( ("Usage: LibDeflate:DecompressZlib(str): "
|
|
.. arg_err), 2 )
|
|
end
|
|
return DecompressZlibInternal( str )
|
|
end
|
|
|
|
--- Decompress a zlib compressed data with a preset dictionary.
|
|
-- @param str [string] The data to be decompressed
|
|
-- @param dictionary [table] The preset dictionary used by
|
|
-- LibDeflate:CompressDeflateWithDict when the compressed data is produced.
|
|
-- Decompression and compression must use the same dictionary.
|
|
-- Otherwise wrong decompressed data could be produced without generating any
|
|
-- error.
|
|
-- @return [string/nil] If the decompression succeeds, return the decompressed
|
|
-- data. If the decompression fails, return nil. You should check if this return
|
|
-- value is non-nil to know if the decompression succeeds.
|
|
-- @return [integer] If the decompression succeeds, return the number of
|
|
-- unprocessed bytes in the input compressed data. This return value is a
|
|
-- positive integer if the input data is a valid compressed data appended by an
|
|
-- arbitary non-empty string. This return value is 0 if the input data does not
|
|
-- contain any extra bytes.<br>
|
|
-- If the decompression fails (The first return value of this function is nil),
|
|
-- this return value is undefined.
|
|
-- @see LibDeflate:CompressZlibWithDict
|
|
function LibDeflate:DecompressZlibWithDict( str, dictionary )
|
|
local arg_valid, arg_err = IsValidArguments( str, true, dictionary )
|
|
if not arg_valid then
|
|
error( ("Usage: LibDeflate:DecompressZlibWithDict(str, dictionary): "
|
|
.. arg_err), 2 )
|
|
end
|
|
return DecompressZlibInternal( str, dictionary )
|
|
end
|
|
|
|
-- Calculate the huffman code of fixed block
|
|
do
|
|
_fix_block_literal_huffman_bitlen = {}
|
|
for sym = 0, 143 do
|
|
_fix_block_literal_huffman_bitlen[ sym ] = 8
|
|
end
|
|
for sym = 144, 255 do
|
|
_fix_block_literal_huffman_bitlen[ sym ] = 9
|
|
end
|
|
for sym = 256, 279 do
|
|
_fix_block_literal_huffman_bitlen[ sym ] = 7
|
|
end
|
|
for sym = 280, 287 do
|
|
_fix_block_literal_huffman_bitlen[ sym ] = 8
|
|
end
|
|
|
|
_fix_block_dist_huffman_bitlen = {}
|
|
for dist = 0, 31 do
|
|
_fix_block_dist_huffman_bitlen[ dist ] = 5
|
|
end
|
|
local status
|
|
status, _fix_block_literal_huffman_bitlen_count
|
|
, _fix_block_literal_huffman_to_deflate_code =
|
|
GetHuffmanForDecode( _fix_block_literal_huffman_bitlen, 287, 9 )
|
|
assert( status == 0 )
|
|
status, _fix_block_dist_huffman_bitlen_count,
|
|
_fix_block_dist_huffman_to_deflate_code =
|
|
GetHuffmanForDecode( _fix_block_dist_huffman_bitlen, 31, 5 )
|
|
assert( status == 0 )
|
|
|
|
_fix_block_literal_huffman_code =
|
|
GetHuffmanCodeFromBitlen( _fix_block_literal_huffman_bitlen_count
|
|
, _fix_block_literal_huffman_bitlen, 287, 9 )
|
|
_fix_block_dist_huffman_code =
|
|
GetHuffmanCodeFromBitlen( _fix_block_dist_huffman_bitlen_count
|
|
, _fix_block_dist_huffman_bitlen, 31, 5 )
|
|
end
|
|
|
|
-- Prefix encoding algorithm
|
|
-- Credits to LibCompress.
|
|
-- The code has been rewritten by the author of LibDeflate.
|
|
------------------------------------------------------------------------------
|
|
|
|
-- to be able to match any requested byte value, the search
|
|
-- string must be preprocessed characters to escape with %:
|
|
-- ( ) . % + - * ? [ ] ^ $
|
|
-- "illegal" byte values:
|
|
-- 0 is replaces %z
|
|
local _gsub_escape_table = {
|
|
[ "\000" ] = "%z",
|
|
[ "(" ] = "%(",
|
|
[ ")" ] = "%)",
|
|
[ "." ] = "%.",
|
|
[ "%" ] = "%%",
|
|
[ "+" ] = "%+",
|
|
[ "-" ] = "%-",
|
|
[ "*" ] = "%*",
|
|
[ "?" ] = "%?",
|
|
[ "[" ] = "%[",
|
|
[ "]" ] = "%]",
|
|
[ "^" ] = "%^",
|
|
[ "$" ] = "%$",
|
|
}
|
|
|
|
local function escape_for_gsub( str )
|
|
return string.gsub( str, "([%z%(%)%.%%%+%-%*%?%[%]%^%$])", _gsub_escape_table )
|
|
end
|
|
|
|
--- Create a custom codec with encoder and decoder. <br>
|
|
-- This codec is used to convert an input string to make it not contain
|
|
-- some specific bytes.
|
|
-- This created codec and the parameters of this function do NOT take
|
|
-- localization into account. One byte (0-255) in the string is exactly one
|
|
-- character (0-255).
|
|
-- Credits to LibCompress.
|
|
-- The code has been rewritten by the author of LibDeflate. <br>
|
|
-- @param reserved_chars [string] The created encoder will ensure encoded
|
|
-- data does not contain any single character in reserved_chars. This parameter
|
|
-- should be non-empty.
|
|
-- @param escape_chars [string] The escape character(s) used in the created
|
|
-- codec. The codec converts any character included in reserved\_chars /
|
|
-- escape\_chars / map\_chars to (one escape char + one character not in
|
|
-- reserved\_chars / escape\_chars / map\_chars).
|
|
-- You usually only need to provide a length-1 string for this parameter.
|
|
-- Length-2 string is only needed when
|
|
-- reserved\_chars + escape\_chars + map\_chars is longer than 127.
|
|
-- This parameter should be non-empty.
|
|
-- @param map_chars [string] The created encoder will map every
|
|
-- reserved\_chars:sub(i, i) (1 <= i <= #map\_chars) to map\_chars:sub(i, i).
|
|
-- This parameter CAN be empty string.
|
|
-- @return [table/nil] If the codec cannot be created, return nil.<br>
|
|
-- If the codec can be created according to the given
|
|
-- parameters, return the codec, which is a encode/decode table.
|
|
-- The table contains two functions: <br>
|
|
-- t:Encode(str) returns the encoded string. <br>
|
|
-- t:Decode(str) returns the decoded string if succeeds. nil if fails.
|
|
-- @return [nil/string] If the codec is successfully created, return nil.
|
|
-- If not, return a string that describes the reason why the codec cannot be
|
|
-- created.
|
|
-- @usage
|
|
-- -- Create an encoder/decoder that maps all "\000" to "\003",
|
|
-- -- and escape "\001" (and "\002" and "\003") properly
|
|
-- local codec = LibDeflate:CreateCodec("\000\001", "\002", "\003")
|
|
--
|
|
-- local encoded = codec:Encode(SOME_STRING)
|
|
-- -- "encoded" does not contain "\000" or "\001"
|
|
-- local decoded = codec:Decode(encoded)
|
|
-- -- assert(decoded == SOME_STRING)
|
|
function LibDeflate:CreateCodec( reserved_chars, escape_chars
|
|
, map_chars )
|
|
if type( reserved_chars ) ~= "string"
|
|
or type( escape_chars ) ~= "string"
|
|
or type( map_chars ) ~= "string" then
|
|
error(
|
|
"Usage: LibDeflate:CreateCodec(reserved_chars,"
|
|
.. " escape_chars, map_chars):"
|
|
.. " All arguments must be string.", 2 )
|
|
end
|
|
|
|
if escape_chars == "" then
|
|
return nil, "No escape characters supplied."
|
|
end
|
|
if string.len( reserved_chars ) < string.len( map_chars ) then
|
|
return nil, "The number of reserved characters must be"
|
|
.. " at least as many as the number of mapped chars."
|
|
end
|
|
if reserved_chars == "" then
|
|
return nil, "No characters to encode."
|
|
end
|
|
|
|
local encode_bytes = reserved_chars .. escape_chars .. map_chars
|
|
-- build list of bytes not available as a suffix to a prefix byte
|
|
local taken = {}
|
|
for i = 1, string.len( encode_bytes ) do
|
|
local byte = string_byte( encode_bytes, i, i )
|
|
if taken[ byte ] then
|
|
return nil, "There must be no duplicate characters in the"
|
|
.. " concatenation of reserved_chars, escape_chars and"
|
|
.. " map_chars."
|
|
end
|
|
taken[ byte ] = true
|
|
end
|
|
|
|
local decode_patterns = {}
|
|
local decode_repls = {}
|
|
|
|
-- the encoding can be a single gsub
|
|
-- , but the decoding can require multiple gsubs
|
|
local encode_search = {}
|
|
local encode_translate = {}
|
|
|
|
-- map single byte to single byte
|
|
if string.len( map_chars ) > 0 then
|
|
local decode_search = {}
|
|
local decode_translate = {}
|
|
for i = 1, string.len( map_chars ) do
|
|
local from = string_sub( reserved_chars, i, i )
|
|
local to = string_sub( map_chars, i, i )
|
|
encode_translate[ from ] = to
|
|
encode_search[ table.getn( encode_search ) + 1 ] = from
|
|
decode_translate[ to ] = from
|
|
decode_search[ table.getn( decode_search ) + 1 ] = to
|
|
end
|
|
decode_patterns[ table.getn( decode_patterns ) + 1 ] =
|
|
"([" .. escape_for_gsub( table_concat( decode_search ) ) .. "])"
|
|
decode_repls[ table.getn( decode_repls ) + 1 ] = decode_translate
|
|
end
|
|
|
|
local escape_char_index = 1
|
|
local escape_char = string_sub( escape_chars
|
|
, escape_char_index, escape_char_index )
|
|
-- map single byte to double-byte
|
|
local r = 0 -- suffix char value to the escapeChar
|
|
|
|
local decode_search = {}
|
|
local decode_translate = {}
|
|
for i = 1, string.len( encode_bytes ) do
|
|
local c = string_sub( encode_bytes, i, i )
|
|
if not encode_translate[ c ] then
|
|
while r >= 256 or taken[ r ] do
|
|
r = r + 1
|
|
if r > 255 then -- switch to next escapeChar
|
|
decode_patterns[ table.getn( decode_patterns ) + 1 ] =
|
|
escape_for_gsub( escape_char )
|
|
.. "(["
|
|
.. escape_for_gsub( table_concat( decode_search ) ) .. "])"
|
|
decode_repls[ table.getn( decode_repls ) + 1 ] = decode_translate
|
|
|
|
escape_char_index = escape_char_index + 1
|
|
escape_char = string_sub( escape_chars, escape_char_index
|
|
, escape_char_index )
|
|
r = 0
|
|
decode_search = {}
|
|
decode_translate = {}
|
|
|
|
if not escape_char or escape_char == "" then
|
|
-- actually I don't need to check
|
|
-- "not ecape_char", but what if Lua changes
|
|
-- the behavior of string.sub() in the future?
|
|
-- we are out of escape chars and we need more!
|
|
return nil, "Out of escape characters."
|
|
end
|
|
end
|
|
end
|
|
|
|
local char_r = _byte_to_char[ r ]
|
|
encode_translate[ c ] = escape_char .. char_r
|
|
encode_search[ table.getn( encode_search ) + 1 ] = c
|
|
decode_translate[ char_r ] = c
|
|
decode_search[ table.getn( decode_search ) + 1 ] = char_r
|
|
r = r + 1
|
|
end
|
|
if i == string.len( encode_bytes ) then
|
|
decode_patterns[ table.getn( decode_patterns ) + 1 ] =
|
|
escape_for_gsub( escape_char ) .. "(["
|
|
.. escape_for_gsub( table_concat( decode_search ) ) .. "])"
|
|
decode_repls[ table.getn( decode_repls ) + 1 ] = decode_translate
|
|
end
|
|
end
|
|
|
|
local codec = {}
|
|
|
|
local encode_pattern = "(["
|
|
.. escape_for_gsub( table_concat( encode_search ) ) .. "])"
|
|
local encode_repl = encode_translate
|
|
|
|
function codec:Encode( str )
|
|
if type( str ) ~= "string" then
|
|
error( string.format( "Usage: codec:Encode(str):"
|
|
.. " 'str' - string expected got '%s'.", type( str ) ), 2 )
|
|
end
|
|
return string_gsub( str, encode_pattern, encode_repl )
|
|
end
|
|
|
|
local decode_tblsize = table.getn( decode_patterns )
|
|
local decode_fail_pattern = "(["
|
|
.. escape_for_gsub( reserved_chars ) .. "])"
|
|
|
|
function codec:Decode( str )
|
|
if type( str ) ~= "string" then
|
|
error( string.format( "Usage: codec:Decode(str):"
|
|
.. " 'str' - string expected got '%s'.", type( str ) ), 2 )
|
|
end
|
|
if string_find( str, decode_fail_pattern ) then
|
|
return nil
|
|
end
|
|
for i = 1, decode_tblsize do
|
|
str = string_gsub( str, decode_patterns[ i ], decode_repls[ i ] )
|
|
end
|
|
return str
|
|
end
|
|
|
|
return codec
|
|
end
|
|
|
|
local _addon_channel_codec
|
|
|
|
local function GenerateWoWAddonChannelCodec()
|
|
return LibDeflate:CreateCodec( "\000", "\001", "" )
|
|
end
|
|
|
|
--- Encode the string to make it ready to be transmitted in World of
|
|
-- Warcraft addon channel. <br>
|
|
-- The encoded string is guaranteed to contain no NULL ("\000") character.
|
|
-- @param str [string] The string to be encoded.
|
|
-- @return The encoded string.
|
|
-- @see LibDeflate:DecodeForWoWAddonChannel
|
|
function LibDeflate:EncodeForWoWAddonChannel( str )
|
|
if type( str ) ~= "string" then
|
|
error( string.format( "Usage: LibDeflate:EncodeForWoWAddonChannel(str):"
|
|
.. " 'str' - string expected got '%s'.", type( str ) ), 2 )
|
|
end
|
|
if not _addon_channel_codec then
|
|
_addon_channel_codec = GenerateWoWAddonChannelCodec()
|
|
end
|
|
return _addon_channel_codec:Encode( str )
|
|
end
|
|
|
|
--- Decode the string produced by LibDeflate:EncodeForWoWAddonChannel
|
|
-- @param str [string] The string to be decoded.
|
|
-- @return [string/nil] The decoded string if succeeds. nil if fails.
|
|
-- @see LibDeflate:EncodeForWoWAddonChannel
|
|
function LibDeflate:DecodeForWoWAddonChannel( str )
|
|
if type( str ) ~= "string" then
|
|
error( string.format( "Usage: LibDeflate:DecodeForWoWAddonChannel(str):"
|
|
.. " 'str' - string expected got '%s'.", type( str ) ), 2 )
|
|
end
|
|
if not _addon_channel_codec then
|
|
_addon_channel_codec = GenerateWoWAddonChannelCodec()
|
|
end
|
|
return _addon_channel_codec:Decode( str )
|
|
end
|
|
|
|
-- For World of Warcraft Chat Channel Encoding
|
|
-- Credits to LibCompress.
|
|
-- The code has been rewritten by the author of LibDeflate. <br>
|
|
-- Following byte values are not allowed:
|
|
-- \000, s, S, \010, \013, \124, %
|
|
-- Because SendChatMessage will error
|
|
-- if an UTF8 multibyte character is incomplete,
|
|
-- all character values above 127 have to be encoded to avoid this.
|
|
-- This costs quite a bit of bandwidth (about 13-14%)
|
|
-- Also, because drunken status is unknown for the received
|
|
-- , strings used with SendChatMessage should be terminated with
|
|
-- an identifying byte value, after which the server MAY add "...hic!"
|
|
-- or as much as it can fit(!).
|
|
-- Pass the identifying byte as a reserved character to this function
|
|
-- to ensure the encoding doesn't contain that value.
|
|
-- or use this: local message, match = arg1:gsub("^(.*)\029.-$", "%1")
|
|
-- arg1 is message from channel, \029 is the string terminator
|
|
-- , but may be used in the encoded datastream as well. :-)
|
|
-- This encoding will expand data anywhere from:
|
|
-- 0% (average with pure ascii text)
|
|
-- 53.5% (average with random data valued zero to 255)
|
|
-- 100% (only encoding data that encodes to two bytes)
|
|
local function GenerateWoWChatChannelCodec()
|
|
local r = {}
|
|
for i = 128, 255 do
|
|
r[ table.getn( r ) + 1 ] = _byte_to_char[ i ]
|
|
end
|
|
|
|
local reserved_chars = "sS\000\010\013\124%" .. table_concat( r )
|
|
return LibDeflate:CreateCodec( reserved_chars
|
|
, "\029\031", "\015\020" )
|
|
end
|
|
|
|
local _chat_channel_codec
|
|
|
|
--- Encode the string to make it ready to be transmitted in World of
|
|
-- Warcraft chat channel. <br>
|
|
-- See also https://wow.gamepedia.com/ValidChatMessageCharacters
|
|
-- @param str [string] The string to be encoded.
|
|
-- @return [string] The encoded string.
|
|
-- @see LibDeflate:DecodeForWoWChatChannel
|
|
function LibDeflate:EncodeForWoWChatChannel( str )
|
|
if type( str ) ~= "string" then
|
|
error( string.format( "Usage: LibDeflate:EncodeForWoWChatChannel(str):"
|
|
.. " 'str' - string expected got '%s'.", type( str ) ), 2 )
|
|
end
|
|
if not _chat_channel_codec then
|
|
_chat_channel_codec = GenerateWoWChatChannelCodec()
|
|
end
|
|
return _chat_channel_codec:Encode( str )
|
|
end
|
|
|
|
--- Decode the string produced by LibDeflate:EncodeForWoWChatChannel.
|
|
-- @param str [string] The string to be decoded.
|
|
-- @return [string/nil] The decoded string if succeeds. nil if fails.
|
|
-- @see LibDeflate:EncodeForWoWChatChannel
|
|
function LibDeflate:DecodeForWoWChatChannel( str )
|
|
if type( str ) ~= "string" then
|
|
error( string.format( "Usage: LibDeflate:DecodeForWoWChatChannel(str):"
|
|
.. " 'str' - string expected got '%s'.", type( str ) ), 2 )
|
|
end
|
|
if not _chat_channel_codec then
|
|
_chat_channel_codec = GenerateWoWChatChannelCodec()
|
|
end
|
|
return _chat_channel_codec:Decode( str )
|
|
end
|
|
|
|
-- Credits to WeakAuras2 and Galmok for the 6 bit encoding algorithm.
|
|
-- The code has been rewritten by the author of LibDeflate.
|
|
-- The result of encoding will be 25% larger than the
|
|
-- origin string, but every single byte of the encoding result will be
|
|
-- printable characters as the following.
|
|
local _byte_to_6bit_char = {
|
|
[ 0 ] = "a",
|
|
"b",
|
|
"c",
|
|
"d",
|
|
"e",
|
|
"f",
|
|
"g",
|
|
"h",
|
|
"i",
|
|
"j",
|
|
"k",
|
|
"l",
|
|
"m",
|
|
"n",
|
|
"o",
|
|
"p",
|
|
"q",
|
|
"r",
|
|
"s",
|
|
"t",
|
|
"u",
|
|
"v",
|
|
"w",
|
|
"x",
|
|
"y",
|
|
"z",
|
|
"A",
|
|
"B",
|
|
"C",
|
|
"D",
|
|
"E",
|
|
"F",
|
|
"G",
|
|
"H",
|
|
"I",
|
|
"J",
|
|
"K",
|
|
"L",
|
|
"M",
|
|
"N",
|
|
"O",
|
|
"P",
|
|
"Q",
|
|
"R",
|
|
"S",
|
|
"T",
|
|
"U",
|
|
"V",
|
|
"W",
|
|
"X",
|
|
"Y",
|
|
"Z",
|
|
"0",
|
|
"1",
|
|
"2",
|
|
"3",
|
|
"4",
|
|
"5",
|
|
"6",
|
|
"7",
|
|
"8",
|
|
"9",
|
|
"(",
|
|
")",
|
|
}
|
|
|
|
local _6bit_to_byte = {
|
|
[ 97 ] = 0,
|
|
[ 98 ] = 1,
|
|
[ 99 ] = 2,
|
|
[ 100 ] = 3,
|
|
[ 101 ] = 4,
|
|
[ 102 ] = 5,
|
|
[ 103 ] = 6,
|
|
[ 104 ] = 7,
|
|
[ 105 ] = 8,
|
|
[ 106 ] = 9,
|
|
[ 107 ] = 10,
|
|
[ 108 ] = 11,
|
|
[ 109 ] = 12,
|
|
[ 110 ] = 13,
|
|
[ 111 ] = 14,
|
|
[ 112 ] = 15,
|
|
[ 113 ] = 16,
|
|
[ 114 ] = 17,
|
|
[ 115 ] = 18,
|
|
[ 116 ] = 19,
|
|
[ 117 ] = 20,
|
|
[ 118 ] = 21,
|
|
[ 119 ] = 22,
|
|
[ 120 ] = 23,
|
|
[ 121 ] = 24,
|
|
[ 122 ] = 25,
|
|
[ 65 ] = 26,
|
|
[ 66 ] = 27,
|
|
[ 67 ] = 28,
|
|
[ 68 ] = 29,
|
|
[ 69 ] = 30,
|
|
[ 70 ] = 31,
|
|
[ 71 ] = 32,
|
|
[ 72 ] = 33,
|
|
[ 73 ] = 34,
|
|
[ 74 ] = 35,
|
|
[ 75 ] = 36,
|
|
[ 76 ] = 37,
|
|
[ 77 ] = 38,
|
|
[ 78 ] = 39,
|
|
[ 79 ] = 40,
|
|
[ 80 ] = 41,
|
|
[ 81 ] = 42,
|
|
[ 82 ] = 43,
|
|
[ 83 ] = 44,
|
|
[ 84 ] = 45,
|
|
[ 85 ] = 46,
|
|
[ 86 ] = 47,
|
|
[ 87 ] = 48,
|
|
[ 88 ] = 49,
|
|
[ 89 ] = 50,
|
|
[ 90 ] = 51,
|
|
[ 48 ] = 52,
|
|
[ 49 ] = 53,
|
|
[ 50 ] = 54,
|
|
[ 51 ] = 55,
|
|
[ 52 ] = 56,
|
|
[ 53 ] = 57,
|
|
[ 54 ] = 58,
|
|
[ 55 ] = 59,
|
|
[ 56 ] = 60,
|
|
[ 57 ] = 61,
|
|
[ 40 ] = 62,
|
|
[ 41 ] = 63,
|
|
}
|
|
|
|
--- Encode the string to make it printable. <br>
|
|
--
|
|
-- Credit to WeakAuras2, this function is equivalant to the implementation
|
|
-- it is using right now. <br>
|
|
-- The code has been rewritten by the author of LibDeflate. <br>
|
|
-- The encoded string will be 25% larger than the origin string. However, every
|
|
-- single byte of the encoded string will be one of 64 printable ASCII
|
|
-- characters, which are can be easier copied, pasted and displayed.
|
|
-- (26 lowercase letters, 26 uppercase letters, 10 numbers digits,
|
|
-- left parenthese, or right parenthese)
|
|
-- @param str [string] The string to be encoded.
|
|
-- @return [string] The encoded string.
|
|
function LibDeflate:EncodeForPrint( str )
|
|
if type( str ) ~= "string" then
|
|
error( string.format( "Usage: LibDeflate:EncodeForPrint(str):"
|
|
.. " 'str' - string expected got '%s'.", type( str ) ), 2 )
|
|
end
|
|
local strlen = string.len( str )
|
|
local strlenMinus2 = strlen - 2
|
|
local i = 1
|
|
local buffer = {}
|
|
local buffer_size = 0
|
|
while i <= strlenMinus2 do
|
|
local x1, x2, x3 = string_byte( str, i, i + 2 )
|
|
i = i + 3
|
|
local cache = x1 + x2 * 256 + x3 * 65536
|
|
local b1 = mod( cache, 64 )
|
|
cache = (cache - b1) / 64
|
|
local b2 = mod( cache, 64 )
|
|
cache = (cache - b2) / 64
|
|
local b3 = mod( cache, 64 )
|
|
local b4 = (cache - b3) / 64
|
|
buffer_size = buffer_size + 1
|
|
buffer[ buffer_size ] =
|
|
_byte_to_6bit_char[ b1 ] .. _byte_to_6bit_char[ b2 ]
|
|
.. _byte_to_6bit_char[ b3 ] .. _byte_to_6bit_char[ b4 ]
|
|
end
|
|
|
|
local cache = 0
|
|
local cache_bitlen = 0
|
|
while i <= strlen do
|
|
local x = string_byte( str, i, i )
|
|
cache = cache + x * _pow2[ cache_bitlen ]
|
|
cache_bitlen = cache_bitlen + 8
|
|
i = i + 1
|
|
end
|
|
while cache_bitlen > 0 do
|
|
local bit6 = mod( cache, 64 )
|
|
buffer_size = buffer_size + 1
|
|
buffer[ buffer_size ] = _byte_to_6bit_char[ bit6 ]
|
|
cache = (cache - bit6) / 64
|
|
cache_bitlen = cache_bitlen - 6
|
|
end
|
|
|
|
return table_concat( buffer )
|
|
end
|
|
|
|
--- Decode the printable string produced by LibDeflate:EncodeForPrint.
|
|
-- "str" will have its prefixed and trailing control characters or space
|
|
-- removed before it is decoded, so it is easier to use if "str" comes form
|
|
-- user copy and paste with some prefixed or trailing spaces.
|
|
-- Then decode fails if the string contains any characters cant be produced by
|
|
-- LibDeflate:EncodeForPrint. That means, decode fails if the string contains a
|
|
-- characters NOT one of 26 lowercase letters, 26 uppercase letters,
|
|
-- 10 numbers digits, left parenthese, or right parenthese.
|
|
-- @param str [string] The string to be decoded
|
|
-- @return [string/nil] The decoded string if succeeds. nil if fails.
|
|
function LibDeflate:DecodeForPrint( str )
|
|
if type( str ) ~= "string" then
|
|
error( string.format( "Usage: LibDeflate:DecodeForPrint(str):"
|
|
.. " 'str' - string expected got '%s'.", type( str ) ), 2 )
|
|
end
|
|
str = string.gsub( str, "^[%c ]+", "" )
|
|
str = string.gsub( str, "[%c ]+$", "" )
|
|
|
|
local strlen = string.len( str )
|
|
if strlen == 1 then
|
|
return nil
|
|
end
|
|
local strlenMinus3 = strlen - 3
|
|
local i = 1
|
|
local buffer = {}
|
|
local buffer_size = 0
|
|
while i <= strlenMinus3 do
|
|
local x1, x2, x3, x4 = string_byte( str, i, i + 3 )
|
|
x1 = _6bit_to_byte[ x1 ]
|
|
x2 = _6bit_to_byte[ x2 ]
|
|
x3 = _6bit_to_byte[ x3 ]
|
|
x4 = _6bit_to_byte[ x4 ]
|
|
if not (x1 and x2 and x3 and x4) then
|
|
return nil
|
|
end
|
|
i = i + 4
|
|
local cache = x1 + x2 * 64 + x3 * 4096 + x4 * 262144
|
|
local b1 = mod( cache, 256 )
|
|
cache = (cache - b1) / 256
|
|
local b2 = mod( cache, 256 )
|
|
local b3 = (cache - b2) / 256
|
|
buffer_size = buffer_size + 1
|
|
buffer[ buffer_size ] =
|
|
_byte_to_char[ b1 ] .. _byte_to_char[ b2 ] .. _byte_to_char[ b3 ]
|
|
end
|
|
|
|
local cache = 0
|
|
local cache_bitlen = 0
|
|
while i <= strlen do
|
|
local x = string_byte( str, i, i )
|
|
x = _6bit_to_byte[ x ]
|
|
if not x then
|
|
return nil
|
|
end
|
|
cache = cache + x * _pow2[ cache_bitlen ]
|
|
cache_bitlen = cache_bitlen + 6
|
|
i = i + 1
|
|
end
|
|
|
|
while cache_bitlen >= 8 do
|
|
local byte = mod( cache, 256 )
|
|
buffer_size = buffer_size + 1
|
|
buffer[ buffer_size ] = _byte_to_char[ byte ]
|
|
cache = (cache - byte) / 256
|
|
cache_bitlen = cache_bitlen - 8
|
|
end
|
|
|
|
return table_concat( buffer )
|
|
end
|
|
|
|
local function InternalClearCache()
|
|
_chat_channel_codec = nil
|
|
_addon_channel_codec = nil
|
|
end
|
|
|
|
-- For test. Don't use the functions in this table for real application.
|
|
-- Stuffs in this table is subject to change.
|
|
LibDeflate.internals = {
|
|
LoadStringToTable = LoadStringToTable,
|
|
IsValidDictionary = IsValidDictionary,
|
|
IsEqualAdler32 = IsEqualAdler32,
|
|
_byte_to_6bit_char = _byte_to_6bit_char,
|
|
_6bit_to_byte = _6bit_to_byte,
|
|
InternalClearCache = InternalClearCache,
|
|
}
|
|
|
|
--[[-- Commandline options
|
|
@class table
|
|
@name CommandlineOptions
|
|
@usage lua LibDeflate.lua [OPTION] [INPUT] [OUTPUT]
|
|
\-0 store only. no compression.
|
|
\-1 fastest compression.
|
|
\-9 slowest and best compression.
|
|
\-d do decompression instead of compression.
|
|
\--dict <filename> specify the file that contains
|
|
the entire preset dictionary.
|
|
\-h give this help.
|
|
\--strategy <fixed/huffman_only/dynamic> specify a special compression strategy.
|
|
\-v print the version and copyright info.
|
|
\--zlib use zlib format instead of raw deflate.
|
|
]]
|
|
|
|
-- currently no plan to support stdin and stdout.
|
|
-- Because Lua in Windows does not set stdout with binary mode.
|
|
if io and os and debug and _G.arg then
|
|
local io = io
|
|
local os = os
|
|
local debug = debug
|
|
local arg = _G.arg
|
|
local debug_info = debug.getinfo( 1 )
|
|
if debug_info.source == arg[ 0 ]
|
|
or debug_info.short_src == arg[ 0 ] then
|
|
-- We are indeed runnning THIS file from the commandline.
|
|
local input
|
|
local output
|
|
local i = 1
|
|
local status
|
|
local is_zlib = false
|
|
local is_decompress = false
|
|
local level
|
|
local strategy
|
|
local dictionary
|
|
while (arg[ i ]) do
|
|
local a = arg[ i ]
|
|
if a == "-h" then
|
|
print( LibDeflate._COPYRIGHT
|
|
.. "\nUsage: lua LibDeflate.lua [OPTION] [INPUT] [OUTPUT]\n"
|
|
.. " -0 store only. no compression.\n"
|
|
.. " -1 fastest compression.\n"
|
|
.. " -9 slowest and best compression.\n"
|
|
.. " -d do decompression instead of compression.\n"
|
|
.. " --dict <filename> specify the file that contains"
|
|
.. " the entire preset dictionary.\n"
|
|
.. " -h give this help.\n"
|
|
.. " --strategy <fixed/huffman_only/dynamic>"
|
|
.. " specify a special compression strategy.\n"
|
|
.. " -v print the version and copyright info.\n"
|
|
.. " --zlib use zlib format instead of raw deflate.\n" )
|
|
os.exit( 0 )
|
|
elseif a == "-v" then
|
|
print( LibDeflate._COPYRIGHT )
|
|
os.exit( 0 )
|
|
elseif string.find( a, "^%-[0-9]$" ) then
|
|
level = tonumber( string.sub( a, 2, 2 ) )
|
|
elseif a == "-d" then
|
|
is_decompress = true
|
|
elseif a == "--dict" then
|
|
i = i + 1
|
|
local dict_filename = arg[ i ]
|
|
if not dict_filename then
|
|
io.stderr:write( "You must speicify the dict filename" )
|
|
os.exit( 1 )
|
|
end
|
|
local dict_file, dict_status = io.open( dict_filename, "rb" )
|
|
if not dict_file then
|
|
io.stderr:write(
|
|
string.format( "LibDeflate: Cannot read the dictionary file '%s': %s", dict_filename, dict_status ) )
|
|
os.exit( 1 )
|
|
end
|
|
local dict_str = dict_file:read( "*all" )
|
|
dict_file:close()
|
|
-- In your lua program, you should pass in adler32 as a CONSTANT
|
|
-- , so it actually prevent you from modifying dictionary
|
|
-- unintentionally during the program development. I do this
|
|
-- here just because no convenient way to verify in commandline.
|
|
dictionary = LibDeflate:CreateDictionary( dict_str,
|
|
string.len( dict_str ), LibDeflate:Adler32( dict_str ) )
|
|
elseif a == "--strategy" then
|
|
-- Not sure if I should check error here
|
|
-- If I do, redudant code.
|
|
i = i + 1
|
|
strategy = arg[ i ]
|
|
elseif a == "--zlib" then
|
|
is_zlib = true
|
|
elseif string.find( a, "^%-" ) then
|
|
io.stderr:write( string.format( "LibDeflate: Invalid argument: %s", a ) )
|
|
os.exit( 1 )
|
|
else
|
|
if not input then
|
|
input, status = io.open( a, "rb" )
|
|
if not input then
|
|
io.stderr:write(
|
|
string.format( "LibDeflate: Cannot read the file '%s': %s", a, tostring( status ) ) )
|
|
os.exit( 1 )
|
|
end
|
|
elseif not output then
|
|
output, status = io.open( a, "wb" )
|
|
if not output then
|
|
io.stderr:write(
|
|
string.format( "LibDeflate: Cannot write the file '%s': %s", a, tostring( status ) ) )
|
|
os.exit( 1 )
|
|
end
|
|
end
|
|
end
|
|
i = i + 1
|
|
end -- while (arg[i])
|
|
|
|
if not input or not output then
|
|
io.stderr:write( "LibDeflate:"
|
|
.. " You must specify both input and output files." )
|
|
os.exit( 1 )
|
|
end
|
|
|
|
local input_data = input:read( "*all" )
|
|
local configs = {
|
|
level = level,
|
|
strategy = strategy,
|
|
}
|
|
local output_data
|
|
if not is_decompress then
|
|
if not is_zlib then
|
|
if not dictionary then
|
|
output_data =
|
|
LibDeflate:CompressDeflate( input_data, configs )
|
|
else
|
|
output_data =
|
|
LibDeflate:CompressDeflateWithDict( input_data, dictionary
|
|
, configs )
|
|
end
|
|
else
|
|
if not dictionary then
|
|
output_data =
|
|
LibDeflate:CompressZlib( input_data, configs )
|
|
else
|
|
output_data =
|
|
LibDeflate:CompressZlibWithDict( input_data, dictionary
|
|
, configs )
|
|
end
|
|
end
|
|
else
|
|
if not is_zlib then
|
|
if not dictionary then
|
|
output_data = LibDeflate:DecompressDeflate( input_data )
|
|
else
|
|
output_data = LibDeflate:DecompressDeflateWithDict(
|
|
input_data, dictionary )
|
|
end
|
|
else
|
|
if not dictionary then
|
|
output_data = LibDeflate:DecompressZlib( input_data )
|
|
else
|
|
output_data = LibDeflate:DecompressZlibWithDict(
|
|
input_data, dictionary )
|
|
end
|
|
end
|
|
end
|
|
|
|
if not output_data then
|
|
io.stderr:write( "LibDeflate: Decompress fails." )
|
|
os.exit( 1 )
|
|
end
|
|
|
|
output:write( output_data )
|
|
if input and input ~= io.stdin then
|
|
input:close()
|
|
end
|
|
if output and output ~= io.stdout then
|
|
output:close()
|
|
end
|
|
|
|
io.stderr:write( string.format( "Successfully writes %d bytes", output_data:len() ) )
|
|
os.exit( 0 )
|
|
end
|
|
end
|
|
|
|
return LibDeflate
|