RollFor/libs/vanilla/LibDeflate/LibDeflate.lua

--[[--
LibDeflate 1.0.2-release <br>
Pure Lua compressor and decompressor with high compression ratio using
DEFLATE/zlib format.

@file LibDeflate.lua
@author Haoqian He (Github: SafeteeWoW; World of Warcraft: Safetyy-Illidan(US))
@copyright LibDeflate <2018-2020> Haoqian He
@license zlib License

This library is implemented according to the following specifications. <br>
Report a bug if LibDeflate is not fully compliant with those specs. <br>
Both compressors and decompressors have been implemented in the library.<br>
1. RFC1950: DEFLATE Compressed Data Format Specification version 1.3 <br>
https://tools.ietf.org/html/rfc1951 <br>
2. RFC1951: ZLIB Compressed Data Format Specification version 3.3 <br>
https://tools.ietf.org/html/rfc1950 <br>

This library requires Lua 5.1/5.2/5.3/5.4 interpreter or LuaJIT v2.0+. <br>
This library does not have any dependencies. <br>
<br>
This file "LibDeflate.lua" is the only source file of
the library. <br>
Submit suggestions or report bugs to
https://github.com/safeteeWow/LibDeflate/issues
]]

--[[
zlib License

(C) 2018-2020 Haoqian He

This software is provided 'as-is', without any express or implied
warranty.  In no event will the authors be held liable for any damages
arising from the use of this software.

Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not
   claim that you wrote the original software. If you use this software
   in a product, an acknowledgment in the product documentation would be
   appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
   misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.

License History:
1. GNU General Public License Version 3 in v1.0.0 and earlier versions.
2. GNU Lesser General Public License Version 3 in v1.0.1
3. the zlib License since v1.0.2

Credits and Disclaimer:
This library rewrites the code from the algorithm
and the ideas of the following projects,
and uses their code to help to test the correctness of this library,
but their code is not included directly in the library itself.
Their original licenses shall be comply when used.

1. zlib, by Jean-loup Gailly (compression) and Mark Adler (decompression).
	http://www.zlib.net/
	Licensed under zlib License. http://www.zlib.net/zlib_license.html
	For the compression algorithm.
2. puff, by Mark Adler. https://github.com/madler/zlib/tree/master/contrib/puff
	Licensed under zlib License. http://www.zlib.net/zlib_license.html
	For the decompression algorithm.
3. LibCompress, by jjsheets and Galmok of European Stormrage (Horde)
	https://www.wowace.com/projects/libcompress
	Licensed under GPLv2.
	https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
	For the code to create customized codec.
4. WeakAuras2,
	https://github.com/WeakAuras/WeakAuras2
	Licensed under GPLv2.
	For the 6bit encoding and decoding.
]]

--[[
	Curseforge auto-packaging replacements:

	Project Date: @project-date-iso@
	Project Hash: @project-hash@
	Project Version: @project-version@
--]]

local LibDeflate

do
  -- Semantic version. all lowercase.
  -- Suffix can be alpha1, alpha2, beta1, beta2, rc1, rc2, etc.
  -- NOTE: Two version numbers needs to modify.
  -- 1. On the top of LibDeflate.lua
  -- 2. _VERSION
  -- 3. _MINOR

  -- version to store the official version of LibDeflate
  local _VERSION = "1.0.2-release"

  -- When MAJOR is changed, I should name it as LibDeflate2
  local _MAJOR = "LibDeflate"

  -- Update this whenever a new version, for LibStub version registration.
  -- 0 : v0.x
  -- 1 : v1.0.0
  -- 2 : v1.0.1
  -- 3 : v1.0.2
  local _MINOR = 3

  local _COPYRIGHT =
      "LibDeflate " .. _VERSION
      .. " Copyright (C) 2018-2020 Haoqian He."
      .. " Licensed under the zlib License"

  -- Register in the World of Warcraft library "LibStub" if detected.
  if LibStub then
    local lib, minor = LibStub:GetLibrary( _MAJOR, true )
    if lib and minor and minor >= _MINOR then -- No need to update.
      return lib
    else                                      -- Update or first time register
      LibDeflate = LibStub:NewLibrary( _MAJOR, _MINOR )
      -- NOTE: It is important that new version has implemented
      -- all exported APIs and tables in the old version,
      -- so the old library is fully garbage collected,
      -- and we 100% ensure the backward compatibility.
    end
  else -- "LibStub" is not detected.
    LibDeflate = {}
  end

  LibDeflate._VERSION = _VERSION
  LibDeflate._MAJOR = _MAJOR
  LibDeflate._MINOR = _MINOR
  LibDeflate._COPYRIGHT = _COPYRIGHT
end

-- localize Lua api for faster access.
local assert = assert
local error = error
local pairs = pairs
local string_byte = string.byte
local string_char = string.char
local string_find = string.find
local string_gsub = string.gsub
local string_sub = string.sub
local table_concat = table.concat
local table_sort = table.sort
local tostring = tostring
local type = type
---@diagnostic disable-next-line: undefined-field
local mod = math.mod

-- Converts i to 2^i, (0<=i<=32)
-- This is used to implement bit left shift and bit right shift.
-- "x >> y" in C:   "(x-x%_pow2[y])/_pow2[y]" in Lua
-- "x << y" in C:   "x*_pow2[y]" in Lua
local _pow2 = {}

-- Converts any byte to a character, (0<=byte<=255)
local _byte_to_char = {}

-- _reverseBitsTbl[len][val] stores the bit reverse of
-- the number with bit length "len" and value "val"
-- For example, decimal number 6 with bits length 5 is binary 00110
-- It's reverse is binary 01100,
-- which is decimal 12 and 12 == _reverseBitsTbl[5][6]
-- 1<=len<=9, 0<=val<=2^len-1
-- The reason for 1<=len<=9 is that the max of min bitlen of huffman code
-- of a huffman alphabet is 9?
local _reverse_bits_tbl = {}

-- Convert a LZ77 length (3<=len<=258) to
-- a deflate literal/LZ77_length code (257<=code<=285)
local _length_to_deflate_code = {}

-- convert a LZ77 length (3<=len<=258) to
-- a deflate literal/LZ77_length code extra bits.
local _length_to_deflate_extra_bits = {}

-- Convert a LZ77 length (3<=len<=258) to
-- a deflate literal/LZ77_length code extra bit length.
local _length_to_deflate_extra_bitlen = {}

-- Convert a small LZ77 distance (1<=dist<=256) to a deflate code.
local _dist256_to_deflate_code = {}

-- Convert a small LZ77 distance (1<=dist<=256) to
-- a deflate distance code extra bits.
local _dist256_to_deflate_extra_bits = {}

-- Convert a small LZ77 distance (1<=dist<=256) to
-- a deflate distance code extra bit length.
local _dist256_to_deflate_extra_bitlen = {}

-- Convert a literal/LZ77_length deflate code to LZ77 base length
-- The key of the table is (code - 256), 257<=code<=285
local _literal_deflate_code_to_base_len = {
  3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
  35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258,
}

-- Convert a literal/LZ77_length deflate code to base LZ77 length extra bits
-- The key of the table is (code - 256), 257<=code<=285
local _literal_deflate_code_to_extra_bitlen = {
  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
  3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0,
}

-- Convert a distance deflate code to base LZ77 distance. (0<=code<=29)
local _dist_deflate_code_to_base_dist = {
  [ 0 ] = 1,
  2,
  3,
  4,
  5,
  7,
  9,
  13,
  17,
  25,
  33,
  49,
  65,
  97,
  129,
  193,
  257,
  385,
  513,
  769,
  1025,
  1537,
  2049,
  3073,
  4097,
  6145,
  8193,
  12289,
  16385,
  24577,
}

-- Convert a distance deflate code to LZ77 bits length. (0<=code<=29)
local _dist_deflate_code_to_extra_bitlen = {
  [ 0 ] = 0,
  0,
  0,
  0,
  1,
  1,
  2,
  2,
  3,
  3,
  4,
  4,
  5,
  5,
  6,
  6,
  7,
  7,
  8,
  8,
  9,
  9,
  10,
  10,
  11,
  11,
  12,
  12,
  13,
  13,
}

-- The code order of the first huffman header in the dynamic deflate block.
-- See the page 12 of RFC1951
local _rle_codes_huffman_bitlen_order = { 16, 17, 18,
  0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
}

-- The following tables are used by fixed deflate block.
-- The value of these tables are assigned at the bottom of the source.

-- The huffman code of the literal/LZ77_length deflate codes,
-- in fixed deflate block.
local _fix_block_literal_huffman_code

-- Convert huffman code of the literal/LZ77_length to deflate codes,
-- in fixed deflate block.
local _fix_block_literal_huffman_to_deflate_code

-- The bit length of the huffman code of literal/LZ77_length deflate codes,
-- in fixed deflate block.
local _fix_block_literal_huffman_bitlen

-- The count of each bit length of the literal/LZ77_length deflate codes,
-- in fixed deflate block.
local _fix_block_literal_huffman_bitlen_count

-- The huffman code of the distance deflate codes,
-- in fixed deflate block.
local _fix_block_dist_huffman_code

-- Convert huffman code of the distance to deflate codes,
-- in fixed deflate block.
local _fix_block_dist_huffman_to_deflate_code

-- The bit length of the huffman code of the distance deflate codes,
-- in fixed deflate block.
local _fix_block_dist_huffman_bitlen

-- The count of each bit length of the huffman code of
-- the distance deflate codes,
-- in fixed deflate block.
local _fix_block_dist_huffman_bitlen_count

for i = 0, 255 do
  _byte_to_char[ i ] = string_char( i )
end

do
  local pow = 1
  for i = 0, 32 do
    _pow2[ i ] = pow
    pow = pow * 2
  end
end

for i = 1, 9 do
  _reverse_bits_tbl[ i ] = {}
  for j = 0, _pow2[ i + 1 ] - 1 do
    local reverse = 0
    local value = j
    for _ = 1, i do
      -- The following line is equivalent to "res | (code %2)" in C.
      reverse = reverse - mod( reverse, 2 )
          + (((mod( reverse, 2 ) == 1) or (mod( value, 2 )) == 1) and 1 or 0)
      value = (value - mod( value, 2 )) / 2
      reverse = reverse * 2
    end
    _reverse_bits_tbl[ i ][ j ] = (reverse - mod( reverse, 2 )) / 2
  end
end

-- The source code is written according to the pattern in the numbers
-- in RFC1951 Page10.
do
  local a = 18
  local b = 16
  local c = 265
  local bitlen = 1
  for len = 3, 258 do
    if len <= 10 then
      _length_to_deflate_code[ len ] = len + 254
      _length_to_deflate_extra_bitlen[ len ] = 0
    elseif len == 258 then
      _length_to_deflate_code[ len ] = 285
      _length_to_deflate_extra_bitlen[ len ] = 0
    else
      if len > a then
        a = a + b
        b = b * 2
        c = c + 4
        bitlen = bitlen + 1
      end
      local t = len - a - 1 + b / 2
      _length_to_deflate_code[ len ] = (t - (mod( t, b / 8 ))) / (b / 8) + c
      _length_to_deflate_extra_bitlen[ len ] = bitlen
      _length_to_deflate_extra_bits[ len ] = mod( t, b / 8 )
    end
  end
end

-- The source code is written according to the pattern in the numbers
-- in RFC1951 Page11.
do
  _dist256_to_deflate_code[ 1 ] = 0
  _dist256_to_deflate_code[ 2 ] = 1
  _dist256_to_deflate_extra_bitlen[ 1 ] = 0
  _dist256_to_deflate_extra_bitlen[ 2 ] = 0

  local a = 3
  local b = 4
  local code = 2
  local bitlen = 0
  for dist = 3, 256 do
    if dist > b then
      a = a * 2
      b = b * 2
      code = code + 2
      bitlen = bitlen + 1
    end
    _dist256_to_deflate_code[ dist ] = (dist <= a) and code or (code + 1)
    _dist256_to_deflate_extra_bitlen[ dist ] = (bitlen < 0) and 0 or bitlen
    if b >= 8 then
      _dist256_to_deflate_extra_bits[ dist ] = mod( dist - b / 2 - 1, b / 4 )
    end
  end
end

--- Calculate the Adler-32 checksum of the string. <br>
-- See RFC1950 Page 9 https://tools.ietf.org/html/rfc1950 for the
-- definition of Adler-32 checksum.
-- @param str [string] the input string to calcuate its Adler-32 checksum.
-- @return [integer] The Adler-32 checksum, which is greater or equal to 0,
-- and less than 2^32 (4294967296).
function LibDeflate:Adler32( str )
  -- This function is loop unrolled by better performance.
  --
  -- Here is the minimum code:
  --
  -- local a = 1
  -- local b = 0
  -- for i=1, #str do
  -- 		local s = string.byte(str, i, i)
  -- 		a = (a+s)%65521
  -- 		b = (b+a)%65521
  -- 		end
  -- return b*65536+a
  if type( str ) ~= "string" then
    error( string.format( "Usage: LibDeflate:Adler32(str):"
      .. " 'str' - string expected got '%s'.", type( str ) ), 2 )
  end
  local strlen = string.len( str )

  local i = 1
  local a = 1
  local b = 0
  while i <= strlen - 15 do
    local x1, x2, x3, x4, x5, x6, x7, x8,
    x9, x10, x11, x12, x13, x14, x15, x16 = string_byte( str, i, i + 15 )
    b = mod( b + 16 * a + 16 * x1 + 15 * x2 + 14 * x3 + 13 * x4 + 12 * x5 + 11 * x6 + 10 * x7 + 9 * x8 + 8 * x9
      + 7 * x10 + 6 * x11 + 5 * x12 + 4 * x13 + 3 * x14 + 2 * x15 + x16, 65521 )
    a = mod( a + x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12 + x13 + x14 + x15 + x16, 65521 )
    i = i + 16
  end
  while (i <= strlen) do
    local x = string_byte( str, i, i )
    a = mod( a + x, 65521 )
    b = mod( b + a, 65521 )
    i = i + 1
  end
  return mod( b * 65536 + a, 4294967296 )
end

-- Compare adler32 checksum.
-- adler32 should be compared with a mod to avoid sign problem
-- 4072834167 (unsigned) is the same adler32 as -222133129
local function IsEqualAdler32( actual, expected )
  return mod( actual, 4294967296 ) == mod( expected, 4294967296 )
end

--- Create a preset dictionary.
--
-- This function is not fast, and the memory consumption of the produced
-- dictionary is about 50 times of the input string. Therefore, it is suggestted
-- to run this function only once in your program.
--
-- It is very important to know that if you do use a preset dictionary,
-- compressors and decompressors MUST USE THE SAME dictionary. That is,
-- dictionary must be created using the same string. If you update your program
-- with a new dictionary, people with the old version won't be able to transmit
-- data with people with the new version. Therefore, changing the dictionary
-- must be very careful.
--
-- The parameters "strlen" and "adler32" add a layer of verification to ensure
-- the parameter "str" is not modified unintentionally during the program
-- development.
--
-- @usage local dict_str = "1234567890"
--
-- -- print(dict_str:len(), LibDeflate:Adler32(dict_str))
-- -- Hardcode the print result below to verify it to avoid acciently
-- -- modification of 'str' during the program development.
-- -- string length: 10, Adler-32: 187433486,
-- -- Don't calculate string length and its Adler-32 at run-time.
--
-- local dict = LibDeflate:CreateDictionary(dict_str, 10, 187433486)
--
-- @param str [string] The string used as the preset dictionary. <br>
-- You should put stuffs that frequently appears in the dictionary
-- string and preferablely put more frequently appeared stuffs toward the end
-- of the string. <br>
-- Empty string and string longer than 32768 bytes are not allowed.
-- @param strlen [integer] The length of 'str'. Please pass in this parameter
-- as a hardcoded constant, in order to verify the content of 'str'. The value
-- of this parameter should be known before your program runs.
-- @param adler32 [integer] The Adler-32 checksum of 'str'. Please pass in this
-- parameter as a hardcoded constant, in order to verify the content of 'str'.
-- The value of this parameter should be known before your program runs.
-- @return  [table] The dictionary used for preset dictionary compression and
-- decompression.
-- @raise error if 'strlen' does not match the length of 'str',
-- or if 'adler32' does not match the Adler-32 checksum of 'str'.
function LibDeflate:CreateDictionary( str, strlen, adler32 )
  if type( str ) ~= "string" then
    error( string.format( "Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
      .. " 'str' - string expected got '%s'.", type( str ) ), 2 )
  end
  if type( strlen ) ~= "number" then
    error( string.format( "Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
      .. " 'strlen' - number expected got '%s'.", type( strlen ) ), 2 )
  end
  if type( adler32 ) ~= "number" then
    error( string.format( "Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
      .. " 'adler32' - number expected got '%s'.", type( adler32 ) ), 2 )
  end
  if strlen ~= string.len( str ) then
    error( string.format( "Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
      .. " 'strlen' does not match the actual length of 'str'."
      .. " 'strlen': %u, '#str': %u ."
      .. " Please check if 'str' is modified unintentionally.", strlen, string.len( str ) ) )
  end
  if strlen == 0 then
    error( ("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
      .. " 'str' - Empty string is not allowed."), 2 )
  end
  if strlen > 32768 then
    error( string.format( "Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
      .. " 'str' - string longer than 32768 bytes is not allowed."
      .. " Got %d bytes.", strlen ), 2 )
  end
  local actual_adler32 = self:Adler32( str )
  if not IsEqualAdler32( adler32, actual_adler32 ) then
    error( string.format( "Usage: LibDeflate:CreateDictionary(str, strlen, adler32):"
      .. " 'adler32' does not match the actual adler32 of 'str'."
      .. " 'adler32': %u, 'Adler32(str)': %u ."
      .. " Please check if 'str' is modified unintentionally.", adler32, actual_adler32 ) )
  end

  local dictionary = {}
  dictionary.adler32 = adler32
  dictionary.hash_tables = {}
  dictionary.string_table = {}
  dictionary.strlen = strlen
  local string_table = dictionary.string_table
  local hash_tables = dictionary.hash_tables
  string_table[ 1 ] = string_byte( str, 1, 1 )
  string_table[ 2 ] = string_byte( str, 2, 2 )
  if strlen >= 3 then
    local i = 1
    local hash = string_table[ 1 ] * 256 + string_table[ 2 ]
    while i <= strlen - 2 - 3 do
      local x1, x2, x3, x4 = string_byte( str, i + 2, i + 5 )
      string_table[ i + 2 ] = x1
      string_table[ i + 3 ] = x2
      string_table[ i + 4 ] = x3
      string_table[ i + 5 ] = x4
      hash = mod( hash * 256 + x1, 16777216 )
      local t = hash_tables[ hash ]
      if not t then
        t = {}; hash_tables[ hash ] = t
      end
      t[ table.getn( t ) + 1 ] = i - strlen
      i = i + 1
      hash = mod( hash * 256 + x2, 16777216 )
      t = hash_tables[ hash ]
      if not t then
        t = {}; hash_tables[ hash ] = t
      end
      t[ table.getn( t ) + 1 ] = i - strlen
      i = i + 1
      hash = mod( hash * 256 + x3, 16777216 )
      t = hash_tables[ hash ]
      if not t then
        t = {}; hash_tables[ hash ] = t
      end
      t[ table.getn( t ) + 1 ] = i - strlen
      i = i + 1
      hash = mod( hash * 256 + x4, 16777216 )
      t = hash_tables[ hash ]
      if not t then
        t = {}; hash_tables[ hash ] = t
      end
      t[ table.getn( t ) + 1 ] = i - strlen
      i = i + 1
    end
    while i <= strlen - 2 do
      local x = string_byte( str, i + 2 )
      string_table[ i + 2 ] = x
      hash = mod( hash * 256 + x, 16777216 )
      local t = hash_tables[ hash ]
      if not t then
        t = {}; hash_tables[ hash ] = t
      end
      t[ table.getn( t ) + 1 ] = i - strlen
      i = i + 1
    end
  end
  return dictionary
end

-- Check if the dictionary is valid.
-- @param dictionary The preset dictionary for compression and decompression.
-- @return true if valid, false if not valid.
-- @return if not valid, the error message.
local function IsValidDictionary( dictionary )
  if type( dictionary ) ~= "table" then
    return false, string.format( "'dictionary' - table expected got '%s'.", type( dictionary ) )
  end
  if type( dictionary.adler32 ) ~= "number"
      or type( dictionary.string_table ) ~= "table"
      or type( dictionary.strlen ) ~= "number"
      or dictionary.strlen <= 0
      or dictionary.strlen > 32768
      or dictionary.strlen ~= table.getn( dictionary.string_table )
      or type( dictionary.hash_tables ) ~= "table"
  then
    return false, "'dictionary' - corrupted dictionary."
  end
  return true, ""
end

--[[
	key of the configuration table is the compression level,
	and its value stores the compression setting.
	These numbers come from zlib source code.

	Higher compression level usually means better compression.
	(Because LibDeflate uses a simplified version of zlib algorithm,
	there is no guarantee that higher compression level does not create
	bigger file than lower level, but I can say it's 99% likely)

	Be careful with the high compression level. This is a pure lua
	implementation compressor/decompressor, which is significant slower than
	a C/C++ equivalant compressor/decompressor. Very high compression level
	costs significant more CPU time, and usually compression size won't be
	significant smaller when you increase compression level by 1, when the
	level is already very high. Benchmark yourself if you can afford it.

	See also https://github.com/madler/zlib/blob/master/doc/algorithm.txt,
	https://github.com/madler/zlib/blob/master/deflate.c for more information.

	The meaning of each field:
	@field 1 use_lazy_evaluation:
		true/false. Whether the program uses lazy evaluation.
		See what is "lazy evaluation" in the link above.
		lazy_evaluation improves ratio, but relatively slow.
	@field 2 good_prev_length:
		Only effective if lazy is set, Only use 1/4 of max_chain,
		if prev length of lazy match is above this.
	@field 3 max_insert_length/max_lazy_match:
		If not using lazy evaluation,
		insert new strings in the hash table only if the match length is not
		greater than this length.
		If using lazy evaluation, only continue lazy evaluation,
		if previous match length is strictly smaller than this value.
	@field 4 nice_length:
		Number. Don't continue to go down the hash chain,
		if match length is above this.
	@field 5 max_chain:
		Number. The maximum number of hash chains we look.
--]]
local _compression_level_configs = {
  [ 0 ] = { false, nil, 0, 0, 0 },      -- level 0, no compression
  [ 1 ] = { false, nil, 4, 8, 4 },      -- level 1, similar to zlib level 1
  [ 2 ] = { false, nil, 5, 18, 8 },     -- level 2, similar to zlib level 2
  [ 3 ] = { false, nil, 6, 32, 32 },    -- level 3, similar to zlib level 3
  [ 4 ] = { true, 4, 4, 16, 16 },       -- level 4, similar to zlib level 4
  [ 5 ] = { true, 8, 16, 32, 32 },      -- level 5, similar to zlib level 5
  [ 6 ] = { true, 8, 16, 128, 128 },    -- level 6, similar to zlib level 6
  [ 7 ] = { true, 8, 32, 128, 256 },    -- (SLOW) level 7, similar to zlib level 7
  [ 8 ] = { true, 32, 128, 258, 1024 }, --(SLOW) level 8,similar to zlib level 8
  [ 9 ] = { true, 32, 258, 258, 4096 },
  -- (VERY SLOW) level 9, similar to zlib level 9
}

-- Check if the compression/decompression arguments is valid
-- @param str The input string.
-- @param check_dictionary if true, check if dictionary is valid.
-- @param dictionary The preset dictionary for compression and decompression.
-- @param check_configs if true, check if config is valid.
-- @param configs The compression configuration table
-- @return true if valid, false if not valid.
-- @return if not valid, the error message.
local function IsValidArguments( str,
                                 check_dictionary, dictionary,
                                 check_configs, configs )
  if type( str ) ~= "string" then
    return false, string.format( "'str' - string expected got '%s'.", type( str ) )
  end
  if check_dictionary then
    local dict_valid, dict_err = IsValidDictionary( dictionary )
    if not dict_valid then
      return false, dict_err
    end
  end
  if check_configs then
    local type_configs = type( configs )
    if type_configs ~= "nil" and type_configs ~= "table" then
      return false, string.format( "'configs' - nil or table expected got '%s'.", type( configs ) )
    end
    if type_configs == "table" then
      for k, v in pairs( configs ) do
        if k ~= "level" and k ~= "strategy" then
          return false,
              string.format( "'configs' - unsupported table key in the configs: '%s'.", k )
        elseif k == "level" and not _compression_level_configs[ v ] then
          return false,
              string.format( "'configs' - unsupported 'level': %s.", tostring( v ) )
        elseif k == "strategy" and v ~= "fixed" and v ~= "huffman_only"
            and v ~= "dynamic" then
          -- random_block_type is for testing purpose
          return false, string.format( "'configs' - unsupported 'strategy': '%s'.", tostring( v ) )
        end
      end
    end
  end
  return true, ""
end


--[[ --------------------------------------------------------------------------
	Compress code
--]] --------------------------------------------------------------------------

-- partial flush to save memory
local _FLUSH_MODE_MEMORY_CLEANUP = 0
-- full flush with partial bytes
local _FLUSH_MODE_OUTPUT = 1
-- write bytes to get to byte boundary
local _FLUSH_MODE_BYTE_BOUNDARY = 2
-- no flush, just get num of bits written so far
local _FLUSH_MODE_NO_FLUSH = 3

--[[
	Create an empty writer to easily write stuffs as the unit of bits.
	Return values:
	1. WriteBits(code, bitlen):
	2. WriteString(str):
	3. Flush(mode):
--]]
local function CreateWriter()
  local buffer_size = 0
  local cache = 0
  local cache_bitlen = 0
  local total_bitlen = 0
  local buffer = {}
  -- When buffer is big enough, flush into result_buffer to save memory.
  local result_buffer = {}

  -- Write bits with value "value" and bit length of "bitlen" into writer.
  -- @param value: The value being written
  -- @param bitlen: The bit length of "value"
  -- @return nil
  local function WriteBits( value, bitlen )
    cache = cache + value * _pow2[ cache_bitlen ]
    cache_bitlen = cache_bitlen + bitlen
    total_bitlen = total_bitlen + bitlen
    -- Only bulk to buffer every 4 bytes. This is quicker.
    if cache_bitlen >= 32 then
      buffer_size = buffer_size + 1
      buffer[ buffer_size ] =
          _byte_to_char[ mod( cache, 256 ) ]
          .. _byte_to_char[ mod( (cache - mod( cache, 256 )) / 256, 256 ) ]
          .. _byte_to_char[ mod( (cache - mod( cache, 65536 )) / 65536, 256 ) ]
          .. _byte_to_char[ mod( (cache - mod( cache, 16777216 )) / 16777216, 256 ) ]
      local rshift_mask = _pow2[ 32 - cache_bitlen + bitlen ]
      cache = (value - mod( value, rshift_mask )) / rshift_mask
      cache_bitlen = cache_bitlen - 32
    end
  end

  -- Write the entire string into the writer.
  -- @param str The string being written
  -- @return nil
  local function WriteString( str )
    for _ = 1, cache_bitlen, 8 do
      buffer_size = buffer_size + 1
      buffer[ buffer_size ] = string_char( mod( cache, 256 ) )
      cache = (cache - mod( cache, 256 )) / 256
    end
    cache_bitlen = 0
    buffer_size = buffer_size + 1
    buffer[ buffer_size ] = str
    total_bitlen = total_bitlen + string.len( str ) * 8
  end

  -- Flush current stuffs in the writer and return it.
  -- This operation will free most of the memory.
  -- @param mode See the descrtion of the constant and the source code.
  -- @return The total number of bits stored in the writer right now.
  -- for byte boundary mode, it includes the padding bits.
  -- for output mode, it does not include padding bits.
  -- @return Return the outputs if mode is output.
  local function FlushWriter( mode )
    if mode == _FLUSH_MODE_NO_FLUSH then
      return total_bitlen
    end

    if mode == _FLUSH_MODE_OUTPUT
        or mode == _FLUSH_MODE_BYTE_BOUNDARY then
      -- Full flush, also output cache.
      -- Need to pad some bits if cache_bitlen is not multiple of 8.
      local padding_bitlen = mod( 8 - mod( cache_bitlen, 8 ), 8 )

      if cache_bitlen > 0 then
        -- padding with all 1 bits, mainly because "\000" is not
        -- good to be tranmitted. I do this so "\000" is a little bit
        -- less frequent.
        cache = cache - _pow2[ cache_bitlen ]
            + _pow2[ cache_bitlen + padding_bitlen ]
        for _ = 1, cache_bitlen, 8 do
          buffer_size = buffer_size + 1
          buffer[ buffer_size ] = _byte_to_char[ mod( cache, 256 ) ]
          cache = (cache - mod( cache, 256 )) / 256
        end

        cache = 0
        cache_bitlen = 0
      end
      if mode == _FLUSH_MODE_BYTE_BOUNDARY then
        total_bitlen = total_bitlen + padding_bitlen
        return total_bitlen
      end
    end

    local flushed = table_concat( buffer )
    buffer = {}
    buffer_size = 0
    result_buffer[ table.getn( result_buffer ) + 1 ] = flushed

    if mode == _FLUSH_MODE_MEMORY_CLEANUP then
      return total_bitlen
    else
      return total_bitlen, table_concat( result_buffer )
    end
  end

  return WriteBits, WriteString, FlushWriter
end

-- Push an element into a max heap
-- @param heap A max heap whose max element is at index 1.
-- @param e The element to be pushed. Assume element "e" is a table
--  and comparison is done via its first entry e[1]
-- @param heap_size current number of elements in the heap.
--  NOTE: There may be some garbage stored in
--  heap[heap_size+1], heap[heap_size+2], etc..
-- @return nil
local function MinHeapPush( heap, e, heap_size )
  heap_size = heap_size + 1
  heap[ heap_size ] = e
  local value = e[ 1 ]
  local pos = heap_size
  local parent_pos = (pos - mod( pos, 2 )) / 2

  while (parent_pos >= 1 and heap[ parent_pos ][ 1 ] > value) do
    local t = heap[ parent_pos ]
    heap[ parent_pos ] = e
    heap[ pos ] = t
    pos = parent_pos
    parent_pos = (parent_pos - mod( parent_pos, 2 )) / 2
  end
end

-- Pop an element from a max heap
-- @param heap A max heap whose max element is at index 1.
-- @param heap_size current number of elements in the heap.
-- @return the poped element
-- Note: This function does not change table size of "heap" to save CPU time.
local function MinHeapPop( heap, heap_size )
  local top = heap[ 1 ]
  local e = heap[ heap_size ]
  local value = e[ 1 ]
  heap[ 1 ] = e
  heap[ heap_size ] = top
  heap_size = heap_size - 1

  local pos = 1
  local left_child_pos = pos * 2
  local right_child_pos = left_child_pos + 1

  while (left_child_pos <= heap_size) do
    local left_child = heap[ left_child_pos ]
    if (right_child_pos <= heap_size
          and heap[ right_child_pos ][ 1 ] < left_child[ 1 ]) then
      local right_child = heap[ right_child_pos ]
      if right_child[ 1 ] < value then
        heap[ right_child_pos ] = e
        heap[ pos ] = right_child
        pos = right_child_pos
        left_child_pos = pos * 2
        right_child_pos = left_child_pos + 1
      else
        break
      end
    else
      if left_child[ 1 ] < value then
        heap[ left_child_pos ] = e
        heap[ pos ] = left_child
        pos = left_child_pos
        left_child_pos = pos * 2
        right_child_pos = left_child_pos + 1
      else
        break
      end
    end
  end

  return top
end

-- Deflate defines a special huffman tree, which is unique once the bit length
-- of huffman code of all symbols are known.
-- @param bitlen_count Number of symbols with a specific bitlen
-- @param symbol_bitlen The bit length of a symbol
-- @param max_symbol The max symbol among all symbols,
--		which is (number of symbols - 1)
-- @param max_bitlen The max huffman bit length among all symbols.
-- @return The huffman code of all symbols.
local function GetHuffmanCodeFromBitlen( bitlen_counts, symbol_bitlens
    , max_symbol, max_bitlen )
  local huffman_code = 0
  local next_codes = {}
  local symbol_huffman_codes = {}
  for bitlen = 1, max_bitlen do
    huffman_code = (huffman_code + (bitlen_counts[ bitlen - 1 ] or 0)) * 2
    next_codes[ bitlen ] = huffman_code
  end
  for symbol = 0, max_symbol do
    local bitlen = symbol_bitlens[ symbol ]
    if bitlen then
      huffman_code = next_codes[ bitlen ]
      next_codes[ bitlen ] = huffman_code + 1

      -- Reverse the bits of huffman code,
      -- because most signifant bits of huffman code
      -- is stored first into the compressed data.
      -- @see RFC1951 Page5 Section 3.1.1
      if bitlen <= 9 then -- Have cached reverse for small bitlen.
        symbol_huffman_codes[ symbol ] =
            _reverse_bits_tbl[ bitlen ][ huffman_code ]
      else
        local reverse = 0
        for _ = 1, bitlen do
          reverse = reverse - mod( reverse, 2 )
              + (((mod( reverse, 2 ) == 1)
                or mod( huffman_code, 2 ) == 1) and 1 or 0)
          huffman_code = (huffman_code - mod( huffman_code, 2 )) / 2
          reverse = reverse * 2
        end
        symbol_huffman_codes[ symbol ] = (reverse - mod( reverse, 2 )) / 2
      end
    end
  end
  return symbol_huffman_codes
end

-- A helper function to sort heap elements
-- a[1], b[1] is the huffman frequency
-- a[2], b[2] is the symbol value.
local function SortByFirstThenSecond( a, b )
  return a[ 1 ] < b[ 1 ] or
      (a[ 1 ] == b[ 1 ] and a[ 2 ] < b[ 2 ])
end

-- Calculate the huffman bit length and huffman code.
-- @param symbol_count: A table whose table key is the symbol, and table value
--		is the symbol frenquency (nil means 0 frequency).
-- @param max_bitlen: See description of return value.
-- @param max_symbol: The maximum symbol
-- @return a table whose key is the symbol, and the value is the huffman bit
--		bit length. We guarantee that all bit length <= max_bitlen.
--		For 0<=symbol<=max_symbol, table value could be nil if the frequency
--		of the symbol is 0 or nil.
-- @return a table whose key is the symbol, and the value is the huffman code.
-- @return a number indicating the maximum symbol whose bitlen is not 0.
local function GetHuffmanBitlenAndCode( symbol_counts, max_bitlen, max_symbol )
  local heap_size
  local max_non_zero_bitlen_symbol = -1
  local leafs = {}
  local heap = {}
  local symbol_bitlens = {}
  local symbol_codes = {}
  local bitlen_counts = {}

  --[[
		tree[1]: weight, temporarily used as parent and bitLengths
		tree[2]: symbol
		tree[3]: left child
		tree[4]: right child
	--]]
  local number_unique_symbols = 0
  for symbol, count in pairs( symbol_counts ) do
    number_unique_symbols = number_unique_symbols + 1
    leafs[ number_unique_symbols ] = { count, symbol }
  end

  if (number_unique_symbols == 0) then
    -- no code.
    return {}, {}, -1
  elseif (number_unique_symbols == 1) then
    -- Only one code. In this case, its huffman code
    -- needs to be assigned as 0, and bit length is 1.
    -- This is the only case that the return result
    -- represents an imcomplete huffman tree.
    local symbol = leafs[ 1 ][ 2 ]
    symbol_bitlens[ symbol ] = 1
    symbol_codes[ symbol ] = 0
    return symbol_bitlens, symbol_codes, symbol
  else
    table_sort( leafs, SortByFirstThenSecond )
    heap_size = number_unique_symbols
    for i = 1, heap_size do
      heap[ i ] = leafs[ i ]
    end

    while (heap_size > 1) do
      -- Note: pop does not change table size of heap
      local leftChild = MinHeapPop( heap, heap_size )
      heap_size = heap_size - 1
      local rightChild = MinHeapPop( heap, heap_size )
      heap_size = heap_size - 1
      local newNode =
      { leftChild[ 1 ] + rightChild[ 1 ], -1, leftChild, rightChild }
      MinHeapPush( heap, newNode, heap_size )
      heap_size = heap_size + 1
    end

    -- Number of leafs whose bit length is greater than max_len.
    local number_bitlen_overflow = 0

    -- Calculate bit length of all nodes
    local fifo = { heap[ 1 ], 0, 0, 0 } -- preallocate some spaces.
    local fifo_size = 1
    local index = 1
    heap[ 1 ][ 1 ] = 0
    while (index <= fifo_size) do -- Breath first search
      local e = fifo[ index ]
      local bitlen = e[ 1 ]
      local symbol = e[ 2 ]
      local left_child = e[ 3 ]
      local right_child = e[ 4 ]
      if left_child then
        fifo_size = fifo_size + 1
        fifo[ fifo_size ] = left_child
        left_child[ 1 ] = bitlen + 1
      end
      if right_child then
        fifo_size = fifo_size + 1
        fifo[ fifo_size ] = right_child
        right_child[ 1 ] = bitlen + 1
      end
      index = index + 1

      if (bitlen > max_bitlen) then
        number_bitlen_overflow = number_bitlen_overflow + 1
        bitlen = max_bitlen
      end
      if symbol >= 0 then
        symbol_bitlens[ symbol ] = bitlen
        max_non_zero_bitlen_symbol =
            (symbol > max_non_zero_bitlen_symbol)
            and symbol or max_non_zero_bitlen_symbol
        bitlen_counts[ bitlen ] = (bitlen_counts[ bitlen ] or 0) + 1
      end
    end

    -- Resolve bit length overflow
    -- @see ZLib/trees.c:gen_bitlen(s, desc), for reference
    if (number_bitlen_overflow > 0) then
      repeat
        local bitlen = max_bitlen - 1
        while ((bitlen_counts[ bitlen ] or 0) == 0) do
          bitlen = bitlen - 1
        end
        -- move one leaf down the tree
        bitlen_counts[ bitlen ] = bitlen_counts[ bitlen ] - 1
        -- move one overflow item as its brother
        bitlen_counts[ bitlen + 1 ] = (bitlen_counts[ bitlen + 1 ] or 0) + 2
        bitlen_counts[ max_bitlen ] = bitlen_counts[ max_bitlen ] - 1
        number_bitlen_overflow = number_bitlen_overflow - 2
      until (number_bitlen_overflow <= 0)

      index = 1
      for bitlen = max_bitlen, 1, -1 do
        local n = bitlen_counts[ bitlen ] or 0
        while (n > 0) do
          local symbol = leafs[ index ][ 2 ]
          symbol_bitlens[ symbol ] = bitlen
          n = n - 1
          index = index + 1
        end
      end
    end

    symbol_codes = GetHuffmanCodeFromBitlen( bitlen_counts, symbol_bitlens,
      max_symbol, max_bitlen )
    return symbol_bitlens, symbol_codes, max_non_zero_bitlen_symbol
  end
end

-- Calculate the first huffman header in the dynamic huffman block
-- @see RFC1951 Page 12
-- @param lcode_bitlen: The huffman bit length of literal/LZ77_length.
-- @param max_non_zero_bitlen_lcode: The maximum literal/LZ77_length symbol
--		whose huffman bit length is not zero.
-- @param dcode_bitlen: The huffman bit length of LZ77 distance.
-- @param max_non_zero_bitlen_dcode: The maximum LZ77 distance symbol
--		whose huffman bit length is not zero.
-- @return The run length encoded codes.
-- @return The extra bits. One entry for each rle code that needs extra bits.
--		(code == 16 or 17 or 18).
-- @return The count of appearance of each rle codes.
local function RunLengthEncodeHuffmanBitlen(
    lcode_bitlens,
    max_non_zero_bitlen_lcode,
    dcode_bitlens,
    max_non_zero_bitlen_dcode )
  local rle_code_tblsize = 0
  local rle_codes = {}
  local rle_code_counts = {}
  local rle_extra_bits_tblsize = 0
  local rle_extra_bits = {}
  local prev = nil
  local count = 0

  -- If there is no distance code, assume one distance code of bit length 0.
  -- RFC1951: One distance code of zero bits means that
  -- there are no distance codes used at all (the data is all literals).
  max_non_zero_bitlen_dcode = (max_non_zero_bitlen_dcode < 0)
      and 0 or max_non_zero_bitlen_dcode
  local max_code = max_non_zero_bitlen_lcode + max_non_zero_bitlen_dcode + 1

  for code = 0, max_code + 1 do
    local len = (code <= max_non_zero_bitlen_lcode)
        and (lcode_bitlens[ code ] or 0)
        or ((code <= max_code)
          and (dcode_bitlens[ code - max_non_zero_bitlen_lcode - 1 ] or 0) or nil)
    if len == prev then
      count = count + 1
      if len ~= 0 and count == 6 then
        rle_code_tblsize = rle_code_tblsize + 1
        rle_codes[ rle_code_tblsize ] = 16
        rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1
        rle_extra_bits[ rle_extra_bits_tblsize ] = 3
        rle_code_counts[ 16 ] = (rle_code_counts[ 16 ] or 0) + 1
        count = 0
      elseif len == 0 and count == 138 then
        rle_code_tblsize = rle_code_tblsize + 1
        rle_codes[ rle_code_tblsize ] = 18
        rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1
        rle_extra_bits[ rle_extra_bits_tblsize ] = 127
        rle_code_counts[ 18 ] = (rle_code_counts[ 18 ] or 0) + 1
        count = 0
      end
    else
      if count == 1 then
        rle_code_tblsize = rle_code_tblsize + 1
        rle_codes[ rle_code_tblsize ] = prev
        rle_code_counts[ prev ] = (rle_code_counts[ prev ] or 0) + 1
      elseif count == 2 then
        rle_code_tblsize = rle_code_tblsize + 1
        rle_codes[ rle_code_tblsize ] = prev
        rle_code_tblsize = rle_code_tblsize + 1
        rle_codes[ rle_code_tblsize ] = prev
        rle_code_counts[ prev ] = (rle_code_counts[ prev ] or 0) + 2
      elseif count >= 3 then
        rle_code_tblsize = rle_code_tblsize + 1
        local rleCode = (prev ~= 0) and 16 or (count <= 10 and 17 or 18)
        rle_codes[ rle_code_tblsize ] = rleCode
        rle_code_counts[ rleCode ] = (rle_code_counts[ rleCode ] or 0) + 1
        rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1
        rle_extra_bits[ rle_extra_bits_tblsize ] =
            (count <= 10) and (count - 3) or (count - 11)
      end

      prev = len
      if len and len ~= 0 then
        rle_code_tblsize = rle_code_tblsize + 1
        rle_codes[ rle_code_tblsize ] = len
        rle_code_counts[ len ] = (rle_code_counts[ len ] or 0) + 1
        count = 0
      else
        count = 1
      end
    end
  end

  return rle_codes, rle_extra_bits, rle_code_counts
end

-- Load the string into a table, in order to speed up LZ77.
-- Loop unrolled 16 times to speed this function up.
-- @param str The string to be loaded.
-- @param t The load destination
-- @param start str[index] will be the first character to be loaded.
-- @param end str[index] will be the last character to be loaded
-- @param offset str[index] will be loaded into t[index-offset]
-- @return t
local function LoadStringToTable( str, t, start, stop, offset )
  local i = start - offset
  while i <= stop - 15 - offset do
    t[ i ], t[ i + 1 ], t[ i + 2 ], t[ i + 3 ], t[ i + 4 ], t[ i + 5 ], t[ i + 6 ], t[ i + 7 ], t[ i + 8 ],
    t[ i + 9 ], t[ i + 10 ], t[ i + 11 ], t[ i + 12 ], t[ i + 13 ], t[ i + 14 ], t[ i + 15 ] =
        string_byte( str, i + offset, i + 15 + offset )
    i = i + 16
  end
  while (i <= stop - offset) do
    t[ i ] = string_byte( str, i + offset, i + offset )
    i = i + 1
  end
  return t
end

-- Do LZ77 process. This function uses the majority of the CPU time.
-- @see zlib/deflate.c:deflate_fast(), zlib/deflate.c:deflate_slow()
-- @see https://github.com/madler/zlib/blob/master/doc/algorithm.txt
-- This function uses the algorithms used above. You should read the
-- algorithm.txt above to understand what is the hash function and the
-- lazy evaluation.
--
-- The special optimization used here is hash functions used here.
-- The hash function is just the multiplication of the three consective
-- characters. So if the hash matches, it guarantees 3 characters are matched.
-- This optimization can be implemented because Lua table is a hash table.
--
-- @param level integer that describes compression level.
-- @param string_table table that stores the value of string to be compressed.
--			The index of this table starts from 1.
--			The caller needs to make sure all values needed by this function
--			are loaded.
--			Assume "str" is the origin input string into the compressor
--			str[block_start]..str[block_end+3] needs to be loaded into
--			string_table[block_start-offset]..string_table[block_end-offset]
--			If dictionary is presented, the last 258 bytes of the dictionary
--			needs to be loaded into sing_table[-257..0]
--			(See more in the description of offset.)
-- @param hash_tables. The table key is the hash value (0<=hash<=16777216=256^3)
--			The table value is an array0 that stores the indexes of the
--			input data string to be compressed, such that
--			hash == str[index]*str[index+1]*str[index+2]
--			Indexes are ordered in this array.
-- @param block_start The indexes of the input data string to be compressed.
--				that starts the LZ77 block.
-- @param block_end The indexes of the input data string to be compressed.
--				that stores the LZ77 block.
-- @param offset str[index] is stored in string_table[index-offset],
--			This offset is mainly an optimization to limit the index
--			of string_table, so lua can access this table quicker.
-- @param dictionary See LibDeflate:CreateDictionary
-- @return literal/LZ77_length deflate codes.
-- @return the extra bits of literal/LZ77_length deflate codes.
-- @return the count of each literal/LZ77 deflate code.
-- @return LZ77 distance deflate codes.
-- @return the extra bits of LZ77 distance deflate codes.
-- @return the count of each LZ77 distance deflate code.
local function GetBlockLZ77Result( level, string_table, hash_tables, block_start,
                                   block_end, offset, dictionary )
  local config = _compression_level_configs[ level ]
  local config_use_lazy
  , config_good_prev_length
  , config_max_lazy_match
  , config_nice_length
  , config_max_hash_chain =
      config[ 1 ], config[ 2 ], config[ 3 ], config[ 4 ], config[ 5 ]

  local config_max_insert_length = (not config_use_lazy)
      and config_max_lazy_match or 2147483646
  local config_good_hash_chain =
      (config_max_hash_chain - mod( config_max_hash_chain, 4 ) / 4)

  local hash

  local dict_hash_tables
  local dict_string_table
  local dict_string_len = 0

  if dictionary then
    dict_hash_tables = dictionary.hash_tables
    dict_string_table = dictionary.string_table
    dict_string_len = dictionary.strlen
    assert( block_start == 1 )
    if block_end >= block_start and dict_string_len >= 2 then
      hash = dict_string_table[ dict_string_len - 1 ] * 65536
          + dict_string_table[ dict_string_len ] * 256 + string_table[ 1 ]
      local t = hash_tables[ hash ]
      if not t then
        t = {}; hash_tables[ hash ] = t
      end
      t[ table.getn( t ) + 1 ] = -1
    end
    if block_end >= block_start + 1 and dict_string_len >= 1 then
      hash = dict_string_table[ dict_string_len ] * 65536
          + string_table[ 1 ] * 256 + string_table[ 2 ]
      local t = hash_tables[ hash ]
      if not t then
        t = {}; hash_tables[ hash ] = t
      end
      t[ table.getn( t ) + 1 ] = 0
    end
  end

  local dict_string_len_plus3 = dict_string_len + 3

  hash = (string_table[ block_start - offset ] or 0) * 256
      + (string_table[ block_start + 1 - offset ] or 0)

  local lcodes = {}
  local lcode_tblsize = 0
  local lcodes_counts = {}
  local dcodes = {}
  local dcodes_tblsize = 0
  local dcodes_counts = {}

  local lextra_bits = {}
  local lextra_bits_tblsize = 0
  local dextra_bits = {}
  local dextra_bits_tblsize = 0

  local match_available = false
  local prev_len
  local prev_dist
  local cur_len = 0
  local cur_dist = 0

  local index = block_start
  local index_end = block_end + (config_use_lazy and 1 or 0)

  -- the zlib source code writes separate code for lazy evaluation and
  -- not lazy evaluation, which is easier to understand.
  -- I put them together, so it is a bit harder to understand.
  -- because I think this is easier for me to maintain it.
  while (index <= index_end) do
    local string_table_index = index - offset
    local offset_minus_three = offset - 3
    prev_len = cur_len
    prev_dist = cur_dist
    cur_len = 0

    hash = mod( hash * 256 + (string_table[ string_table_index + 2 ] or 0), 16777216 )

    local chain_index
    local cur_chain
    local hash_chain = hash_tables[ hash ]
    local chain_old_size
    if not hash_chain then
      chain_old_size = 0
      hash_chain = {}
      hash_tables[ hash ] = hash_chain
      if dict_hash_tables then
        cur_chain = dict_hash_tables[ hash ]
        chain_index = cur_chain and table.getn( cur_chain ) or 0
      else
        chain_index = 0
      end
    else
      chain_old_size = table.getn( hash_chain )
      cur_chain = hash_chain
      chain_index = chain_old_size
    end

    if index <= block_end then
      hash_chain[ chain_old_size + 1 ] = index
    end

    if (chain_index > 0 and index + 2 <= block_end
          and (not config_use_lazy or prev_len < config_max_lazy_match)) then
      local depth =
          (config_use_lazy and prev_len >= config_good_prev_length)
          and config_good_hash_chain or config_max_hash_chain

      local max_len_minus_one = block_end - index
      max_len_minus_one = (max_len_minus_one >= 257) and 257 or max_len_minus_one
      max_len_minus_one = max_len_minus_one + string_table_index
      local string_table_index_plus_three = string_table_index + 3

      while chain_index >= 1 and depth > 0 do
        local prev = cur_chain[ chain_index ]

        if index - prev > 32768 then
          break
        end
        if prev < index then
          local sj = string_table_index_plus_three

          if prev >= -257 then
            local pj = prev - offset_minus_three
            while (sj <= max_len_minus_one
                and string_table[ pj ]
                == string_table[ sj ]) do
              sj = sj + 1
              pj = pj + 1
            end
          else
            local pj = dict_string_len_plus3 + prev
            while (sj <= max_len_minus_one
                and dict_string_table[ pj ]
                == string_table[ sj ]) do
              sj = sj + 1
              pj = pj + 1
            end
          end
          local j = sj - string_table_index
          if j > cur_len then
            cur_len = j
            cur_dist = index - prev
          end
          if cur_len >= config_nice_length then
            break
          end
        end

        chain_index = chain_index - 1
        depth = depth - 1
        if chain_index == 0 and prev > 0 and dict_hash_tables then
          cur_chain = dict_hash_tables[ hash ]
          chain_index = cur_chain and table.getn( cur_chain ) or 0
        end
      end
    end

    if not config_use_lazy then
      prev_len, prev_dist = cur_len, cur_dist
    end
    if ((not config_use_lazy or match_available)
          and (prev_len > 3 or (prev_len == 3 and prev_dist < 4096))
          and cur_len <= prev_len) then
      local code = _length_to_deflate_code[ prev_len ]
      local length_extra_bits_bitlen =
          _length_to_deflate_extra_bitlen[ prev_len ]
      local dist_code, dist_extra_bits_bitlen, dist_extra_bits
      if prev_dist <= 256 then -- have cached code for small distance.
        dist_code = _dist256_to_deflate_code[ prev_dist ]
        dist_extra_bits = _dist256_to_deflate_extra_bits[ prev_dist ]
        dist_extra_bits_bitlen =
            _dist256_to_deflate_extra_bitlen[ prev_dist ]
      else
        dist_code = 16
        dist_extra_bits_bitlen = 7
        local a = 384
        local b = 512

        while true do
          if prev_dist <= a then
            dist_extra_bits = mod( prev_dist - (b / 2) - 1, b / 4 )
            break
          elseif prev_dist <= b then
            dist_extra_bits = mod( prev_dist - (b / 2) - 1, b / 4 )
            dist_code = dist_code + 1
            break
          else
            dist_code = dist_code + 2
            dist_extra_bits_bitlen = dist_extra_bits_bitlen + 1
            a = a * 2
            b = b * 2
          end
        end
      end
      lcode_tblsize = lcode_tblsize + 1
      lcodes[ lcode_tblsize ] = code
      lcodes_counts[ code ] = (lcodes_counts[ code ] or 0) + 1

      dcodes_tblsize = dcodes_tblsize + 1
      dcodes[ dcodes_tblsize ] = dist_code
      dcodes_counts[ dist_code ] = (dcodes_counts[ dist_code ] or 0) + 1

      if length_extra_bits_bitlen > 0 then
        local lenExtraBits = _length_to_deflate_extra_bits[ prev_len ]
        lextra_bits_tblsize = lextra_bits_tblsize + 1
        lextra_bits[ lextra_bits_tblsize ] = lenExtraBits
      end
      if dist_extra_bits_bitlen > 0 then
        dextra_bits_tblsize = dextra_bits_tblsize + 1
        dextra_bits[ dextra_bits_tblsize ] = dist_extra_bits
      end

      for i = index + 1, index + prev_len - (config_use_lazy and 2 or 1) do
        hash = mod( hash * 256 + (string_table[ i - offset + 2 ] or 0), 16777216 )
        if prev_len <= config_max_insert_length then
          hash_chain = hash_tables[ hash ]
          if not hash_chain then
            hash_chain = {}
            hash_tables[ hash ] = hash_chain
          end
          hash_chain[ table.getn( hash_chain ) + 1 ] = i
        end
      end
      index = index + prev_len - (config_use_lazy and 1 or 0)
      match_available = false
    elseif (not config_use_lazy) or match_available then
      local code = string_table[ config_use_lazy
      and (string_table_index - 1) or string_table_index ]
      lcode_tblsize = lcode_tblsize + 1
      lcodes[ lcode_tblsize ] = code
      lcodes_counts[ code ] = (lcodes_counts[ code ] or 0) + 1
      index = index + 1
    else
      match_available = true
      index = index + 1
    end
  end

  -- Write "end of block" symbol
  lcode_tblsize = lcode_tblsize + 1
  lcodes[ lcode_tblsize ] = 256
  lcodes_counts[ 256 ] = (lcodes_counts[ 256 ] or 0) + 1

  return lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits
  , dcodes_counts
end

-- Get the header data of dynamic block.
-- @param lcodes_count The count of each literal/LZ77_length codes.
-- @param dcodes_count The count of each Lz77 distance codes.
-- @return a lots of stuffs.
-- @see RFC1951 Page 12
local function GetBlockDynamicHuffmanHeader( lcodes_counts, dcodes_counts )
  local lcodes_huffman_bitlens, lcodes_huffman_codes
  , max_non_zero_bitlen_lcode =
      GetHuffmanBitlenAndCode( lcodes_counts, 15, 285 )
  local dcodes_huffman_bitlens, dcodes_huffman_codes
  , max_non_zero_bitlen_dcode =
      GetHuffmanBitlenAndCode( dcodes_counts, 15, 29 )

  local rle_deflate_codes, rle_extra_bits, rle_codes_counts =
      RunLengthEncodeHuffmanBitlen( lcodes_huffman_bitlens
      , max_non_zero_bitlen_lcode, dcodes_huffman_bitlens
      , max_non_zero_bitlen_dcode )

  local rle_codes_huffman_bitlens, rle_codes_huffman_codes =
      GetHuffmanBitlenAndCode( rle_codes_counts, 7, 18 )

  local HCLEN = 0
  for i = 1, 19 do
    local symbol = _rle_codes_huffman_bitlen_order[ i ]
    local length = rle_codes_huffman_bitlens[ symbol ] or 0
    if length ~= 0 then
      HCLEN = i
    end
  end

  HCLEN = HCLEN - 4
  local HLIT = max_non_zero_bitlen_lcode + 1 - 257
  local HDIST = max_non_zero_bitlen_dcode + 1 - 1
  if HDIST < 0 then HDIST = 0 end

  return HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens
  , rle_codes_huffman_codes, rle_deflate_codes, rle_extra_bits
  , lcodes_huffman_bitlens, lcodes_huffman_codes
  , dcodes_huffman_bitlens, dcodes_huffman_codes
end

-- Get the size of dynamic block without writing any bits into the writer.
-- @param ... Read the source code of GetBlockDynamicHuffmanHeader()
-- @return the bit length of the dynamic block
local function GetDynamicHuffmanBlockSize( lcodes, dcodes, HCLEN
    , rle_codes_huffman_bitlens, rle_deflate_codes
    , lcodes_huffman_bitlens, dcodes_huffman_bitlens )
  local block_bitlen = 17 -- 1+2+5+5+4
  block_bitlen = block_bitlen + (HCLEN + 4) * 3

  for i = 1, table.getn( rle_deflate_codes ) do
    local code = rle_deflate_codes[ i ]
    block_bitlen = block_bitlen + rle_codes_huffman_bitlens[ code ]
    if code >= 16 then
      block_bitlen = block_bitlen +
          ((code == 16) and 2 or (code == 17 and 3 or 7))
    end
  end

  local length_code_count = 0
  for i = 1, table.getn( lcodes ) do
    local code = lcodes[ i ]
    local huffman_bitlen = lcodes_huffman_bitlens[ code ]
    block_bitlen = block_bitlen + huffman_bitlen
    if code > 256 then                  -- Length code
      length_code_count = length_code_count + 1
      if code > 264 and code < 285 then -- Length code with extra bits
        local extra_bits_bitlen =
            _literal_deflate_code_to_extra_bitlen[ code - 256 ]
        block_bitlen = block_bitlen + extra_bits_bitlen
      end
      local dist_code = dcodes[ length_code_count ]
      local dist_huffman_bitlen = dcodes_huffman_bitlens[ dist_code ]
      block_bitlen = block_bitlen + dist_huffman_bitlen

      if dist_code > 3 then -- dist code with extra bits
        local dist_extra_bits_bitlen = (dist_code - mod( dist_code, 2 )) / 2 - 1
        block_bitlen = block_bitlen + dist_extra_bits_bitlen
      end
    end
  end
  return block_bitlen
end

-- Write dynamic block.
-- @param ... Read the source code of GetBlockDynamicHuffmanHeader()
local function CompressDynamicHuffmanBlock( WriteBits, is_last_block
    , lcodes, lextra_bits, dcodes, dextra_bits, HLIT, HDIST, HCLEN
    , rle_codes_huffman_bitlens, rle_codes_huffman_codes
    , rle_deflate_codes, rle_extra_bits
    , lcodes_huffman_bitlens, lcodes_huffman_codes
    , dcodes_huffman_bitlens, dcodes_huffman_codes )
  WriteBits( is_last_block and 1 or 0, 1 ) -- Last block identifier
  WriteBits( 2, 2 )                        -- Dynamic Huffman block identifier

  WriteBits( HLIT, 5 )
  WriteBits( HDIST, 5 )
  WriteBits( HCLEN, 4 )

  for i = 1, HCLEN + 4 do
    local symbol = _rle_codes_huffman_bitlen_order[ i ]
    local length = rle_codes_huffman_bitlens[ symbol ] or 0
    WriteBits( length, 3 )
  end

  local rleExtraBitsIndex = 1
  for i = 1, table.getn( rle_deflate_codes ) do
    local code = rle_deflate_codes[ i ]
    WriteBits( rle_codes_huffman_codes[ code ]
    , rle_codes_huffman_bitlens[ code ] )
    if code >= 16 then
      local extraBits = rle_extra_bits[ rleExtraBitsIndex ]
      WriteBits( extraBits, (code == 16) and 2 or (code == 17 and 3 or 7) )
      rleExtraBitsIndex = rleExtraBitsIndex + 1
    end
  end

  local length_code_count = 0
  local length_code_with_extra_count = 0
  local dist_code_with_extra_count = 0

  for i = 1, table.getn( lcodes ) do
    local deflate_codee = lcodes[ i ]
    local huffman_code = lcodes_huffman_codes[ deflate_codee ]
    local huffman_bitlen = lcodes_huffman_bitlens[ deflate_codee ]
    WriteBits( huffman_code, huffman_bitlen )
    if deflate_codee > 256 then -- Length code
      length_code_count = length_code_count + 1
      if deflate_codee > 264 and deflate_codee < 285 then
        -- Length code with extra bits
        length_code_with_extra_count = length_code_with_extra_count + 1
        local extra_bits = lextra_bits[ length_code_with_extra_count ]
        local extra_bits_bitlen =
            _literal_deflate_code_to_extra_bitlen[ deflate_codee - 256 ]
        WriteBits( extra_bits, extra_bits_bitlen )
      end
      -- Write distance code
      local dist_deflate_code = dcodes[ length_code_count ]
      local dist_huffman_code = dcodes_huffman_codes[ dist_deflate_code ]
      local dist_huffman_bitlen =
          dcodes_huffman_bitlens[ dist_deflate_code ]
      WriteBits( dist_huffman_code, dist_huffman_bitlen )

      if dist_deflate_code > 3 then -- dist code with extra bits
        dist_code_with_extra_count = dist_code_with_extra_count + 1
        local dist_extra_bits = dextra_bits[ dist_code_with_extra_count ]
        local dist_extra_bits_bitlen =
            (dist_deflate_code - mod( dist_deflate_code, 2 )) / 2 - 1
        WriteBits( dist_extra_bits, dist_extra_bits_bitlen )
      end
    end
  end
end

-- Get the size of fixed block without writing any bits into the writer.
-- @param lcodes literal/LZ77_length deflate codes
-- @param decodes LZ77 distance deflate codes
-- @return the bit length of the fixed block
local function GetFixedHuffmanBlockSize( lcodes, dcodes )
  local block_bitlen = 3
  local length_code_count = 0
  for i = 1, table.getn( lcodes ) do
    local code = lcodes[ i ]
    local huffman_bitlen = _fix_block_literal_huffman_bitlen[ code ]
    block_bitlen = block_bitlen + huffman_bitlen
    if code > 256 then                  -- Length code
      length_code_count = length_code_count + 1
      if code > 264 and code < 285 then -- Length code with extra bits
        local extra_bits_bitlen =
            _literal_deflate_code_to_extra_bitlen[ code - 256 ]
        block_bitlen = block_bitlen + extra_bits_bitlen
      end
      local dist_code = dcodes[ length_code_count ]
      block_bitlen = block_bitlen + 5

      if dist_code > 3 then -- dist code with extra bits
        local dist_extra_bits_bitlen =
            (dist_code - mod( dist_code, 2 )) / 2 - 1
        block_bitlen = block_bitlen + dist_extra_bits_bitlen
      end
    end
  end
  return block_bitlen
end

-- Write fixed block.
-- @param lcodes literal/LZ77_length deflate codes
-- @param decodes LZ77 distance deflate codes
local function CompressFixedHuffmanBlock( WriteBits, is_last_block,
                                          lcodes, lextra_bits, dcodes, dextra_bits )
  WriteBits( is_last_block and 1 or 0, 1 ) -- Last block identifier
  WriteBits( 1, 2 )                        -- Fixed Huffman block identifier
  local length_code_count = 0
  local length_code_with_extra_count = 0
  local dist_code_with_extra_count = 0
  for i = 1, table.getn( lcodes ) do
    local deflate_code = lcodes[ i ]
    local huffman_code = _fix_block_literal_huffman_code[ deflate_code ]
    local huffman_bitlen = _fix_block_literal_huffman_bitlen[ deflate_code ]
    WriteBits( huffman_code, huffman_bitlen )
    if deflate_code > 256 then -- Length code
      length_code_count = length_code_count + 1
      if deflate_code > 264 and deflate_code < 285 then
        -- Length code with extra bits
        length_code_with_extra_count = length_code_with_extra_count + 1
        local extra_bits = lextra_bits[ length_code_with_extra_count ]
        local extra_bits_bitlen =
            _literal_deflate_code_to_extra_bitlen[ deflate_code - 256 ]
        WriteBits( extra_bits, extra_bits_bitlen )
      end
      -- Write distance code
      local dist_code = dcodes[ length_code_count ]
      local dist_huffman_code = _fix_block_dist_huffman_code[ dist_code ]
      WriteBits( dist_huffman_code, 5 )

      if dist_code > 3 then -- dist code with extra bits
        dist_code_with_extra_count = dist_code_with_extra_count + 1
        local dist_extra_bits = dextra_bits[ dist_code_with_extra_count ]
        local dist_extra_bits_bitlen = (dist_code - mod( dist_code, 2 )) / 2 - 1
        WriteBits( dist_extra_bits, dist_extra_bits_bitlen )
      end
    end
  end
end

-- Get the size of store block without writing any bits into the writer.
-- @param block_start The start index of the origin input string
-- @param block_end The end index of the origin input string
-- @param Total bit lens had been written into the compressed result before,
-- because store block needs to shift to byte boundary.
-- @return the bit length of the fixed block
local function GetStoreBlockSize( block_start, block_end, total_bitlen )
  assert( block_end - block_start + 1 <= 65535 )
  local block_bitlen = 3
  total_bitlen = total_bitlen + 3
  local padding_bitlen = mod( 8 - mod( total_bitlen, 8 ), 8 )
  block_bitlen = block_bitlen + padding_bitlen
  block_bitlen = block_bitlen + 32
  block_bitlen = block_bitlen + (block_end - block_start + 1) * 8
  return block_bitlen
end

-- Write the store block.
-- @param ... lots of stuffs
-- @return nil
local function CompressStoreBlock( WriteBits, WriteString, is_last_block, str
    , block_start, block_end, total_bitlen )
  assert( block_end - block_start + 1 <= 65535 )
  WriteBits( is_last_block and 1 or 0, 1 ) -- Last block identifer.
  WriteBits( 0, 2 )                        -- Store block identifier.
  total_bitlen = total_bitlen + 3
  local padding_bitlen = mod( 8 - mod( total_bitlen, 8 ), 8 )
  if padding_bitlen > 0 then
    WriteBits( _pow2[ padding_bitlen ] - 1, padding_bitlen )
  end
  local size = block_end - block_start + 1
  WriteBits( size, 16 )

  -- Write size's one's complement
  local comp = (255 - mod( size, 256 )) + (255 - (size - mod( size, 256 )) / 256) * 256
  WriteBits( comp, 16 )

  WriteString( string.sub( str, block_start, block_end ) )
end

-- Do the deflate
-- Currently using a simple way to determine the block size
-- (This is why the compression ratio is little bit worse than zlib when
-- the input size is very large
-- The first block is 64KB, the following block is 32KB.
-- After each block, there is a memory cleanup operation.
-- This is not a fast operation, but it is needed to save memory usage, so
-- the memory usage does not grow unboundly. If the data size is less than
-- 64KB, then memory cleanup won't happen.
-- This function determines whether to use store/fixed/dynamic blocks by
-- calculating the block size of each block type and chooses the smallest one.
local function Deflate( configs, WriteBits, WriteString, FlushWriter, str
    , dictionary )
  local string_table = {}
  local hash_tables = {}
  local is_last_block = nil
  local block_start
  local block_end
  local bitlen_written
  local total_bitlen = FlushWriter( _FLUSH_MODE_NO_FLUSH )
  local strlen = string.len( str )
  local offset

  local level
  local strategy
  if configs then
    if configs.level then
      level = configs.level
    end
    if configs.strategy then
      strategy = configs.strategy
    end
  end

  if not level then
    if strlen < 2048 then
      level = 7
    elseif strlen > 65536 then
      level = 3
    else
      level = 5
    end
  end

  while not is_last_block do
    if not block_start then
      block_start = 1
      block_end = 64 * 1024 - 1
      offset = 0
    else
      block_start = block_end + 1
      block_end = block_end + 32 * 1024
      offset = block_start - 32 * 1024 - 1
    end

    if block_end >= strlen then
      block_end = strlen
      is_last_block = true
    else
      is_last_block = false
    end

    local lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits
    , dcodes_counts

    local HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens
    , rle_codes_huffman_codes, rle_deflate_codes
    , rle_extra_bits, lcodes_huffman_bitlens, lcodes_huffman_codes
    , dcodes_huffman_bitlens, dcodes_huffman_codes

    local dynamic_block_bitlen
    local fixed_block_bitlen
    local store_block_bitlen

    if level ~= 0 then
      -- GetBlockLZ77 needs block_start to block_end+3 to be loaded.
      LoadStringToTable( str, string_table, block_start, block_end + 3
      , offset )
      if block_start == 1 and dictionary then
        local dict_string_table = dictionary.string_table
        local dict_strlen = dictionary.strlen
        for i = 0, (-dict_strlen + 1) < -257
        and -257 or (-dict_strlen + 1), -1 do
          string_table[ i ] = dict_string_table[ dict_strlen + i ]
        end
      end

      if strategy == "huffman_only" then
        lcodes = {}
        LoadStringToTable( str, lcodes, block_start, block_end
        , block_start - 1 )
        lextra_bits = {}
        lcodes_counts = {}
        lcodes[ block_end - block_start + 2 ] = 256 -- end of block
        for i = 1, block_end - block_start + 2 do
          local code = lcodes[ i ]
          lcodes_counts[ code ] = (lcodes_counts[ code ] or 0) + 1
        end
        dcodes = {}
        dextra_bits = {}
        dcodes_counts = {}
      else
        lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits
        , dcodes_counts = GetBlockLZ77Result( level, string_table
        , hash_tables, block_start, block_end, offset, dictionary
        )
      end

      HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens
      , rle_codes_huffman_codes, rle_deflate_codes
      , rle_extra_bits, lcodes_huffman_bitlens, lcodes_huffman_codes
      , dcodes_huffman_bitlens, dcodes_huffman_codes =
          GetBlockDynamicHuffmanHeader( lcodes_counts, dcodes_counts )
      dynamic_block_bitlen = GetDynamicHuffmanBlockSize(
        lcodes, dcodes, HCLEN, rle_codes_huffman_bitlens
        , rle_deflate_codes, lcodes_huffman_bitlens
        , dcodes_huffman_bitlens )
      fixed_block_bitlen = GetFixedHuffmanBlockSize( lcodes, dcodes )
    end

    store_block_bitlen = GetStoreBlockSize( block_start, block_end
    , total_bitlen )

    local min_bitlen = store_block_bitlen
    min_bitlen = (fixed_block_bitlen and fixed_block_bitlen < min_bitlen)
        and fixed_block_bitlen or min_bitlen
    min_bitlen = (dynamic_block_bitlen
          and dynamic_block_bitlen < min_bitlen)
        and dynamic_block_bitlen or min_bitlen

    if level == 0 or (strategy ~= "fixed" and strategy ~= "dynamic" and
          store_block_bitlen == min_bitlen) then
      CompressStoreBlock( WriteBits, WriteString, is_last_block
      , str, block_start, block_end, total_bitlen )
      total_bitlen = total_bitlen + store_block_bitlen
    elseif strategy ~= "dynamic" and (
          strategy == "fixed" or fixed_block_bitlen == min_bitlen) then
      CompressFixedHuffmanBlock( WriteBits, is_last_block,
        lcodes, lextra_bits, dcodes, dextra_bits )
      total_bitlen = total_bitlen + fixed_block_bitlen
    elseif strategy == "dynamic" or dynamic_block_bitlen == min_bitlen then
      CompressDynamicHuffmanBlock( WriteBits, is_last_block, lcodes
      , lextra_bits, dcodes, dextra_bits, HLIT, HDIST, HCLEN
      , rle_codes_huffman_bitlens, rle_codes_huffman_codes
      , rle_deflate_codes, rle_extra_bits
      , lcodes_huffman_bitlens, lcodes_huffman_codes
      , dcodes_huffman_bitlens, dcodes_huffman_codes )
      total_bitlen = total_bitlen + dynamic_block_bitlen
    end

    if is_last_block then
      bitlen_written = FlushWriter( _FLUSH_MODE_NO_FLUSH )
    else
      bitlen_written = FlushWriter( _FLUSH_MODE_MEMORY_CLEANUP )
    end

    assert( bitlen_written == total_bitlen )

    -- Memory clean up, so memory consumption does not always grow linearly
    -- , even if input string is > 64K.
    -- Not a very efficient operation, but this operation won't happen
    -- when the input data size is less than 64K.
    if not is_last_block then
      local j
      if dictionary and block_start == 1 then
        j = 0
        while (string_table[ j ]) do
          string_table[ j ] = nil
          j = j - 1
        end
      end
      dictionary = nil
      j = 1
      for i = block_end - 32767, block_end do
        string_table[ j ] = string_table[ i - offset ]
        j = j + 1
      end

      for k, t in pairs( hash_tables ) do
        local tSize = table.getn( t )
        if tSize > 0 and block_end + 1 - t[ 1 ] > 32768 then
          if tSize == 1 then
            hash_tables[ k ] = nil
          else
            local new = {}
            local newSize = 0
            for i = 2, tSize do
              j = t[ i ]
              if block_end + 1 - j <= 32768 then
                newSize = newSize + 1
                new[ newSize ] = j
              end
            end
            hash_tables[ k ] = new
          end
        end
      end
    end
  end
end

--- The description to compression configuration table. <br>
-- Any field can be nil to use its default. <br>
-- Table with keys other than those below is an invalid table.
-- @class table
-- @name compression_configs
-- @field level The compression level ranged from 0 to 9. 0 is no compression.
-- 9 is the slowest but best compression. Use nil for default level.
-- @field strategy The compression strategy. "fixed" to only use fixed deflate
-- compression block. "dynamic" to only use dynamic block. "huffman_only" to
-- do no LZ77 compression. Only do huffman compression.


-- @see LibDeflate:CompressDeflate(str, configs)
-- @see LibDeflate:CompressDeflateWithDict(str, dictionary, configs)
local function CompressDeflateInternal( str, dictionary, configs )
  local WriteBits, WriteString, FlushWriter = CreateWriter()
  Deflate( configs, WriteBits, WriteString, FlushWriter, str, dictionary )
  local total_bitlen, result = FlushWriter( _FLUSH_MODE_OUTPUT )
  local padding_bitlen = mod( 8 - mod( total_bitlen, 8 ), 8 )
  return result, padding_bitlen
end

-- @see LibDeflate:CompressZlib
-- @see LibDeflate:CompressZlibWithDict
local function CompressZlibInternal( str, dictionary, configs )
  local WriteBits, WriteString, FlushWriter = CreateWriter()

  local CM = 8    -- Compression method
  local CINFO = 7 --Window Size = 32K
  local CMF = CINFO * 16 + CM
  WriteBits( CMF, 8 )

  local FDIST = dictionary and 1 or 0
  local FLEVEL = 2 -- Default compression
  local FLG = FLEVEL * 64 + FDIST * 32
  local FCHECK = (31 - mod( CMF * 256 + FLG, 31 ))
  -- The FCHECK value must be such that CMF and FLG,
  -- when viewed as a 16-bit unsigned integer stored
  -- in MSB order (CMF*256 + FLG), is a multiple of 31.
  FLG = FLG + FCHECK
  WriteBits( FLG, 8 )

  if FDIST == 1 then
    local adler32 = dictionary.adler32
    local byte0 = mod( adler32, 256 )
    adler32 = (adler32 - byte0) / 256
    local byte1 = mod( adler32, 256 )
    adler32 = (adler32 - byte1) / 256
    local byte2 = mod( adler32, 256 )
    adler32 = (adler32 - byte2) / 256
    local byte3 = mod( adler32, 256 )
    WriteBits( byte3, 8 )
    WriteBits( byte2, 8 )
    WriteBits( byte1, 8 )
    WriteBits( byte0, 8 )
  end

  Deflate( configs, WriteBits, WriteString, FlushWriter, str, dictionary )
  FlushWriter( _FLUSH_MODE_BYTE_BOUNDARY )

  local adler32 = LibDeflate:Adler32( str )

  -- Most significant byte first
  local byte3 = mod( adler32, 256 )
  adler32 = (adler32 - byte3) / 256
  local byte2 = mod( adler32, 256 )
  adler32 = (adler32 - byte2) / 256
  local byte1 = mod( adler32, 256 )
  adler32 = (adler32 - byte1) / 256
  local byte0 = mod( adler32, 256 )

  WriteBits( byte0, 8 )
  WriteBits( byte1, 8 )
  WriteBits( byte2, 8 )
  WriteBits( byte3, 8 )
  local total_bitlen, result = FlushWriter( _FLUSH_MODE_OUTPUT )
  local padding_bitlen = mod( 8 - mod( total_bitlen, 8 ), 8 )
  return result, padding_bitlen
end

--- Compress using the raw deflate format.
-- @param str [string] The data to be compressed.
-- @param configs [table/nil] The configuration table to control the compression
-- . If nil, use the default configuration.
-- @return [string] The compressed data.
-- @return [integer] The number of bits padded at the end of output.
-- 0 <= bits < 8  <br>
-- This means the most significant "bits" of the last byte of the returned
-- compressed data are padding bits and they don't affect decompression.
-- You don't need to use this value unless you want to do some postprocessing
-- to the compressed data.
-- @see compression_configs
-- @see LibDeflate:DecompressDeflate
function LibDeflate:CompressDeflate( str, configs )
  local arg_valid, arg_err = IsValidArguments( str, false, nil, true, configs )
  if not arg_valid then
    error( ("Usage: LibDeflate:CompressDeflate(str, configs): "
      .. arg_err), 2 )
  end
  return CompressDeflateInternal( str, nil, configs )
end

--- Compress using the raw deflate format with a preset dictionary.
-- @param str [string] The data to be compressed.
-- @param dictionary [table] The preset dictionary produced by
-- LibDeflate:CreateDictionary
-- @param configs [table/nil] The configuration table to control the compression
-- . If nil, use the default configuration.
-- @return [string] The compressed data.
-- @return [integer] The number of bits padded at the end of output.
-- 0 <= bits < 8  <br>
-- This means the most significant "bits" of the last byte of the returned
-- compressed data are padding bits and they don't affect decompression.
-- You don't need to use this value unless you want to do some postprocessing
-- to the compressed data.
-- @see compression_configs
-- @see LibDeflate:CreateDictionary
-- @see LibDeflate:DecompressDeflateWithDict
function LibDeflate:CompressDeflateWithDict( str, dictionary, configs )
  local arg_valid, arg_err = IsValidArguments( str, true, dictionary
  , true, configs )
  if not arg_valid then
    error( ("Usage: LibDeflate:CompressDeflateWithDict"
      .. "(str, dictionary, configs): "
      .. arg_err), 2 )
  end
  return CompressDeflateInternal( str, dictionary, configs )
end

--- Compress using the zlib format.
-- @param str [string] the data to be compressed.
-- @param configs [table/nil] The configuration table to control the compression
-- . If nil, use the default configuration.
-- @return [string] The compressed data.
-- @return [integer] The number of bits padded at the end of output.
-- Should always be 0.
-- Zlib formatted compressed data never has padding bits at the end.
-- @see compression_configs
-- @see LibDeflate:DecompressZlib
function LibDeflate:CompressZlib( str, configs )
  local arg_valid, arg_err = IsValidArguments( str, false, nil, true, configs )
  if not arg_valid then
    error( ("Usage: LibDeflate:CompressZlib(str, configs): "
      .. arg_err), 2 )
  end
  return CompressZlibInternal( str, nil, configs )
end

--- Compress using the zlib format with a preset dictionary.
-- @param str [string] the data to be compressed.
-- @param dictionary [table] A preset dictionary produced
-- by LibDeflate:CreateDictionary()
-- @param configs [table/nil] The configuration table to control the compression
-- . If nil, use the default configuration.
-- @return [string] The compressed data.
-- @return [integer] The number of bits padded at the end of output.
-- Should always be 0.
-- Zlib formatted compressed data never has padding bits at the end.
-- @see compression_configs
-- @see LibDeflate:CreateDictionary
-- @see LibDeflate:DecompressZlibWithDict
function LibDeflate:CompressZlibWithDict( str, dictionary, configs )
  local arg_valid, arg_err = IsValidArguments( str, true, dictionary
  , true, configs )
  if not arg_valid then
    error( ("Usage: LibDeflate:CompressZlibWithDict"
      .. "(str, dictionary, configs): "
      .. arg_err), 2 )
  end
  return CompressZlibInternal( str, dictionary, configs )
end

--[[ --------------------------------------------------------------------------
	Decompress code
--]] --------------------------------------------------------------------------

--[[
	Create a reader to easily reader stuffs as the unit of bits.
	Return values:
	1. ReadBits(bitlen)
	2. ReadBytes(bytelen, buffer, buffer_size)
	3. Decode(huffman_bitlen_count, huffman_symbol, min_bitlen)
	4. ReaderBitlenLeft()
	5. SkipToByteBoundary()
--]]
local function CreateReader( input_string )
  local input = input_string
  local input_strlen = string.len( input_string )
  local input_next_byte_pos = 1
  local cache_bitlen = 0
  local cache = 0

  -- Read some bits.
  -- To improve speed, this function does not
  -- check if the input has been exhausted.
  -- Use ReaderBitlenLeft() < 0 to check it.
  -- @param bitlen the number of bits to read
  -- @return the data is read.
  local function ReadBits( bitlen )
    local rshift_mask = _pow2[ bitlen ]
    local code
    if bitlen <= cache_bitlen then
      code = mod( cache, rshift_mask )
      cache = (cache - code) / rshift_mask
      cache_bitlen = cache_bitlen - bitlen
    else -- Whether input has been exhausted is not checked.
      local lshift_mask = _pow2[ cache_bitlen ]
      local byte1, byte2, byte3, byte4 = string_byte( input
      , input_next_byte_pos, input_next_byte_pos + 3 )
      -- This requires lua number to be at least double ()
      cache = cache + ((byte1 or 0) + (byte2 or 0) * 256
        + (byte3 or 0) * 65536 + (byte4 or 0) * 16777216) * lshift_mask
      input_next_byte_pos = input_next_byte_pos + 4
      cache_bitlen = cache_bitlen + 32 - bitlen
      code = mod( cache, rshift_mask )
      cache = (cache - code) / rshift_mask
    end
    return code
  end

  -- Read some bytes from the reader.
  -- Assume reader is on the byte boundary.
  -- @param bytelen The number of bytes to be read.
  -- @param buffer The byte read will be stored into this buffer.
  -- @param buffer_size The buffer will be modified starting from
  --	buffer[buffer_size+1], ending at buffer[buffer_size+bytelen-1]
  -- @return the new buffer_size
  local function ReadBytes( bytelen, buffer, buffer_size )
    assert( mod( cache_bitlen, 8 ) == 0 )

    local byte_from_cache = (cache_bitlen / 8 < bytelen)
        and (cache_bitlen / 8) or bytelen
    for _ = 1, byte_from_cache do
      local byte = mod( cache, 256 )
      buffer_size = buffer_size + 1
      buffer[ buffer_size ] = string_char( byte )
      cache = (cache - byte) / 256
    end
    cache_bitlen = cache_bitlen - byte_from_cache * 8
    bytelen = bytelen - byte_from_cache
    if (input_strlen - input_next_byte_pos - bytelen + 1) * 8
        + cache_bitlen < 0 then
      return -1 -- out of input
    end
    for i = input_next_byte_pos, input_next_byte_pos + bytelen - 1 do
      buffer_size = buffer_size + 1
      buffer[ buffer_size ] = string_sub( input, i, i )
    end

    input_next_byte_pos = input_next_byte_pos + bytelen
    return buffer_size
  end

  -- Decode huffman code
  -- To improve speed, this function does not check
  -- if the input has been exhausted.
  -- Use ReaderBitlenLeft() < 0 to check it.
  -- Credits for Mark Adler. This code is from puff:Decode()
  -- @see puff:Decode(...)
  -- @param huffman_bitlen_count
  -- @param huffman_symbol
  -- @param min_bitlen The minimum huffman bit length of all symbols
  -- @return The decoded deflate code.
  --	Negative value is returned if decoding fails.
  local function Decode( huffman_bitlen_counts, huffman_symbols, min_bitlen )
    local code = 0
    local first = 0
    local index = 0
    local count
    if min_bitlen > 0 then
      if cache_bitlen < 15 and input then
        local lshift_mask = _pow2[ cache_bitlen ]
        local byte1, byte2, byte3, byte4 =
            string_byte( input, input_next_byte_pos
            , input_next_byte_pos + 3 )
        -- This requires lua number to be at least double ()
        cache = cache + ((byte1 or 0) + (byte2 or 0) * 256
          + (byte3 or 0) * 65536 + (byte4 or 0) * 16777216) * lshift_mask
        input_next_byte_pos = input_next_byte_pos + 4
        cache_bitlen = cache_bitlen + 32
      end

      local rshift_mask = _pow2[ min_bitlen ]
      cache_bitlen = cache_bitlen - min_bitlen
      code = mod( cache, rshift_mask )
      cache = (cache - code) / rshift_mask
      -- Reverse the bits
      code = _reverse_bits_tbl[ min_bitlen ][ code ]

      count = huffman_bitlen_counts[ min_bitlen ]
      if code < count then
        return huffman_symbols[ code ]
      end
      index = count
      first = count * 2
      code = code * 2
    end

    for bitlen = min_bitlen + 1, 15 do
      local bit
      bit = mod( cache, 2 )
      cache = (cache - bit) / 2
      cache_bitlen = cache_bitlen - 1

      code = (bit == 1) and (code + 1 - mod( code, 2 )) or code
      count = huffman_bitlen_counts[ bitlen ] or 0
      local diff = code - first
      if diff < count then
        return huffman_symbols[ index + diff ]
      end
      index = index + count
      first = first + count
      first = first * 2
      code = code * 2
    end
    -- invalid literal/length or distance code
    -- in fixed or dynamic block (run out of code)
    return -10
  end

  local function ReaderBitlenLeft()
    return (input_strlen - input_next_byte_pos + 1) * 8 + cache_bitlen
  end

  local function SkipToByteBoundary()
    local skipped_bitlen = mod( cache_bitlen, 8 )
    local rshift_mask = _pow2[ skipped_bitlen ]
    cache_bitlen = cache_bitlen - skipped_bitlen
    cache = (cache - mod( cache, rshift_mask )) / rshift_mask
  end

  return ReadBits, ReadBytes, Decode, ReaderBitlenLeft, SkipToByteBoundary
end

-- Create a deflate state, so I can pass in less arguments to functions.
-- @param str the whole string to be decompressed.
-- @param dictionary The preset dictionary. nil if not provided.
--		This dictionary should be produced by LibDeflate:CreateDictionary(str)
-- @return The decomrpess state.
local function CreateDecompressState( str, dictionary )
  local ReadBits, ReadBytes, Decode, ReaderBitlenLeft
  , SkipToByteBoundary = CreateReader( str )
  local state =
  {
    ReadBits = ReadBits,
    ReadBytes = ReadBytes,
    Decode = Decode,
    ReaderBitlenLeft = ReaderBitlenLeft,
    SkipToByteBoundary = SkipToByteBoundary,
    buffer_size = 0,
    buffer = {},
    result_buffer = {},
    dictionary = dictionary,
  }
  return state
end

-- Get the stuffs needed to decode huffman codes
-- @see puff.c:construct(...)
-- @param huffman_bitlen The huffman bit length of the huffman codes.
-- @param max_symbol The maximum symbol
-- @param max_bitlen The min huffman bit length of all codes
-- @return zero or positive for success, negative for failure.
-- @return The count of each huffman bit length.
-- @return A table to convert huffman codes to deflate codes.
-- @return The minimum huffman bit length.
local function GetHuffmanForDecode( huffman_bitlens, max_symbol, max_bitlen )
  local huffman_bitlen_counts = {}
  local min_bitlen = max_bitlen
  for symbol = 0, max_symbol do
    local bitlen = huffman_bitlens[ symbol ] or 0
    min_bitlen = (bitlen > 0 and bitlen < min_bitlen)
        and bitlen or min_bitlen
    huffman_bitlen_counts[ bitlen ] = (huffman_bitlen_counts[ bitlen ] or 0) + 1
  end

  if huffman_bitlen_counts[ 0 ] == max_symbol + 1 then -- No Codes
    return 0, huffman_bitlen_counts, {}, 0             -- Complete, but decode will fail
  end

  local left = 1
  for len = 1, max_bitlen do
    left = left * 2
    left = left - (huffman_bitlen_counts[ len ] or 0)
    if left < 0 then
      return left -- Over-subscribed, return negative
    end
  end

  -- Generate offsets info symbol table for each length for sorting
  local offsets = {}
  offsets[ 1 ] = 0
  for len = 1, max_bitlen - 1 do
    offsets[ len + 1 ] = offsets[ len ] + (huffman_bitlen_counts[ len ] or 0)
  end

  local huffman_symbols = {}
  for symbol = 0, max_symbol do
    local bitlen = huffman_bitlens[ symbol ] or 0
    if bitlen ~= 0 then
      local offset = offsets[ bitlen ]
      huffman_symbols[ offset ] = symbol
      offsets[ bitlen ] = offsets[ bitlen ] + 1
    end
  end

  -- Return zero for complete set, positive for incomplete set.
  return left, huffman_bitlen_counts, huffman_symbols, min_bitlen
end

-- Decode a fixed or dynamic huffman blocks, excluding last block identifier
-- and block type identifer.
-- @see puff.c:codes()
-- @param state decompression state that will be modified by this function.
--	@see CreateDecompressState
-- @param ... Read the source code
-- @return 0 on success, other value on failure.
local function DecodeUntilEndOfBlock( state, lcodes_huffman_bitlens
    , lcodes_huffman_symbols, lcodes_huffman_min_bitlen
    , dcodes_huffman_bitlens, dcodes_huffman_symbols
    , dcodes_huffman_min_bitlen )
  local buffer, buffer_size, ReadBits, Decode, ReaderBitlenLeft
  , result_buffer =
      state.buffer, state.buffer_size, state.ReadBits, state.Decode
      , state.ReaderBitlenLeft, state.result_buffer
  local dictionary = state.dictionary
  local dict_string_table
  local dict_strlen

  local buffer_end = 1
  if dictionary and not buffer[ 0 ] then
    -- If there is a dictionary, copy the last 258 bytes into
    -- the string_table to make the copy in the main loop quicker.
    -- This is done only once per decompression.
    dict_string_table = dictionary.string_table
    dict_strlen = dictionary.strlen
    buffer_end = -dict_strlen + 1
    for i = 0, (-dict_strlen + 1) < -257 and -257 or (-dict_strlen + 1), -1 do
      buffer[ i ] = _byte_to_char[ dict_string_table[ dict_strlen + i ] ]
    end
  end

  repeat
    local symbol = Decode( lcodes_huffman_bitlens
    , lcodes_huffman_symbols, lcodes_huffman_min_bitlen )
    if symbol < 0 or symbol > 285 then
      -- invalid literal/length or distance code in fixed or dynamic block
      return -10
    elseif symbol < 256 then -- Literal
      buffer_size = buffer_size + 1
      buffer[ buffer_size ] = _byte_to_char[ symbol ]
    elseif symbol > 256 then -- Length code
      symbol = symbol - 256
      local bitlen = _literal_deflate_code_to_base_len[ symbol ]
      bitlen = (symbol >= 8)
          and (bitlen
            + ReadBits( _literal_deflate_code_to_extra_bitlen[ symbol ] ))
          or bitlen
      symbol = Decode( dcodes_huffman_bitlens, dcodes_huffman_symbols
      , dcodes_huffman_min_bitlen )
      if symbol < 0 or symbol > 29 then
        -- invalid literal/length or distance code in fixed or dynamic block
        return -10
      end
      local dist = _dist_deflate_code_to_base_dist[ symbol ]
      dist = (dist > 4) and (dist
        + ReadBits( _dist_deflate_code_to_extra_bitlen[ symbol ] )) or dist

      local char_buffer_index = buffer_size - dist + 1
      if char_buffer_index < buffer_end then
        -- distance is too far back in fixed or dynamic block
        return -11
      end
      if char_buffer_index >= -257 then
        for _ = 1, bitlen do
          buffer_size = buffer_size + 1
          buffer[ buffer_size ] = buffer[ char_buffer_index ]
          char_buffer_index = char_buffer_index + 1
        end
      else
        char_buffer_index = dict_strlen + char_buffer_index
        for _ = 1, bitlen do
          buffer_size = buffer_size + 1
          buffer[ buffer_size ] =
              _byte_to_char[ dict_string_table[ char_buffer_index ] ]
          char_buffer_index = char_buffer_index + 1
        end
      end
    end

    if ReaderBitlenLeft() < 0 then
      return 2 -- available inflate data did not terminate
    end

    if buffer_size >= 65536 then
      result_buffer[ table.getn( result_buffer ) + 1 ] =
          table_concat( buffer, "", 1, 32768 )
      for i = 32769, buffer_size do
        buffer[ i - 32768 ] = buffer[ i ]
      end
      buffer_size = buffer_size - 32768
      buffer[ buffer_size + 1 ] = nil
      -- NOTE: buffer[32769..end] and buffer[-257..0] are not cleared.
      -- This is why "buffer_size" variable is needed.
    end
  until symbol == 256

  state.buffer_size = buffer_size

  return 0
end

-- Decompress a store block
-- @param state decompression state that will be modified by this function.
-- @return 0 if succeeds, other value if fails.
local function DecompressStoreBlock( state )
  local buffer, buffer_size, ReadBits, ReadBytes, ReaderBitlenLeft
  , SkipToByteBoundary, result_buffer =
      state.buffer, state.buffer_size, state.ReadBits, state.ReadBytes
      , state.ReaderBitlenLeft, state.SkipToByteBoundary, state.result_buffer

  SkipToByteBoundary()
  local bytelen = ReadBits( 16 )
  if ReaderBitlenLeft() < 0 then
    return 2 -- available inflate data did not terminate
  end
  local bytelenComp = ReadBits( 16 )
  if ReaderBitlenLeft() < 0 then
    return 2 -- available inflate data did not terminate
  end

  if mod( bytelen, 256 ) + mod( bytelenComp, 256 ) ~= 255 then
    return -2 -- Not one's complement
  end
  if (bytelen - mod( bytelen, 256 )) / 256
      + (bytelenComp - mod( bytelenComp, 256 )) / 256 ~= 255 then
    return -2 -- Not one's complement
  end

  -- Note that ReadBytes will skip to the next byte boundary first.
  buffer_size = ReadBytes( bytelen, buffer, buffer_size )
  if buffer_size < 0 then
    return 2 -- available inflate data did not terminate
  end

  -- memory clean up when there are enough bytes in the buffer.
  if buffer_size >= 65536 then
    result_buffer[ table.getn( result_buffer ) + 1 ] = table_concat( buffer, "", 1, 32768 )
    for i = 32769, buffer_size do
      buffer[ i - 32768 ] = buffer[ i ]
    end
    buffer_size = buffer_size - 32768
    buffer[ buffer_size + 1 ] = nil
  end
  state.buffer_size = buffer_size
  return 0
end

-- Decompress a fixed block
-- @param state decompression state that will be modified by this function.
-- @return 0 if succeeds other value if fails.
local function DecompressFixBlock( state )
  return DecodeUntilEndOfBlock( state
  , _fix_block_literal_huffman_bitlen_count
  , _fix_block_literal_huffman_to_deflate_code, 7
  , _fix_block_dist_huffman_bitlen_count
  , _fix_block_dist_huffman_to_deflate_code, 5 )
end

-- Decompress a dynamic block
-- @param state decompression state that will be modified by this function.
-- @return 0 if success, other value if fails.
local function DecompressDynamicBlock( state )
  local ReadBits, Decode = state.ReadBits, state.Decode
  local nlen = ReadBits( 5 ) + 257
  local ndist = ReadBits( 5 ) + 1
  local ncode = ReadBits( 4 ) + 4
  if nlen > 286 or ndist > 30 then
    -- dynamic block code description: too many length or distance codes
    return -3
  end

  local rle_codes_huffman_bitlens = {}

  for i = 1, ncode do
    rle_codes_huffman_bitlens[ _rle_codes_huffman_bitlen_order[ i ] ] =
        ReadBits( 3 )
  end

  local rle_codes_err, rle_codes_huffman_bitlen_counts,
  rle_codes_huffman_symbols, rle_codes_huffman_min_bitlen =
      GetHuffmanForDecode( rle_codes_huffman_bitlens, 18, 7 )
  if rle_codes_err ~= 0 then -- Require complete code set here
    -- dynamic block code description: code lengths codes incomplete
    return -4
  end

  local lcodes_huffman_bitlens = {}
  local dcodes_huffman_bitlens = {}
  -- Read length/literal and distance code length tables
  local index = 0
  while index < nlen + ndist do
    local symbol -- Decoded value
    local bitlen -- Last length to repeat

    symbol = Decode( rle_codes_huffman_bitlen_counts
    , rle_codes_huffman_symbols, rle_codes_huffman_min_bitlen )

    if symbol < 0 then
      return symbol -- Invalid symbol
    elseif symbol < 16 then
      if index < nlen then
        lcodes_huffman_bitlens[ index ] = symbol
      else
        dcodes_huffman_bitlens[ index - nlen ] = symbol
      end
      index = index + 1
    else
      bitlen = 0
      if symbol == 16 then
        if index == 0 then
          -- dynamic block code description: repeat lengths
          -- with no first length
          return -5
        end
        if index - 1 < nlen then
          bitlen = lcodes_huffman_bitlens[ index - 1 ]
        else
          bitlen = dcodes_huffman_bitlens[ index - nlen - 1 ]
        end
        symbol = 3 + ReadBits( 2 )
      elseif symbol == 17 then -- Repeat zero 3..10 times
        symbol = 3 + ReadBits( 3 )
      else                     -- == 18, repeat zero 11.138 times
        symbol = 11 + ReadBits( 7 )
      end
      if index + symbol > nlen + ndist then
        -- dynamic block code description:
        -- repeat more than specified lengths
        return -6
      end
      while symbol > 0 do -- Repeat last or zero symbol times
        symbol = symbol - 1
        if index < nlen then
          lcodes_huffman_bitlens[ index ] = bitlen
        else
          dcodes_huffman_bitlens[ index - nlen ] = bitlen
        end
        index = index + 1
      end
    end
  end

  if (lcodes_huffman_bitlens[ 256 ] or 0) == 0 then
    -- dynamic block code description: missing end-of-block code
    return -9
  end

  local lcodes_err, lcodes_huffman_bitlen_counts
  , lcodes_huffman_symbols, lcodes_huffman_min_bitlen =
      GetHuffmanForDecode( lcodes_huffman_bitlens, nlen - 1, 15 )
  --dynamic block code description: invalid literal/length code lengths,
  -- Incomplete code ok only for single length 1 code
  if (lcodes_err ~= 0 and (lcodes_err < 0
        or nlen ~= (lcodes_huffman_bitlen_counts[ 0 ] or 0)
        + (lcodes_huffman_bitlen_counts[ 1 ] or 0))) then
    return -7
  end

  local dcodes_err, dcodes_huffman_bitlen_counts
  , dcodes_huffman_symbols, dcodes_huffman_min_bitlen =
      GetHuffmanForDecode( dcodes_huffman_bitlens, ndist - 1, 15 )
  -- dynamic block code description: invalid distance code lengths,
  -- Incomplete code ok only for single length 1 code
  if (dcodes_err ~= 0 and (dcodes_err < 0
        or ndist ~= (dcodes_huffman_bitlen_counts[ 0 ] or 0)
        + (dcodes_huffman_bitlen_counts[ 1 ] or 0))) then
    return -8
  end

  -- Build buffman table for literal/length codes
  return DecodeUntilEndOfBlock( state, lcodes_huffman_bitlen_counts
  , lcodes_huffman_symbols, lcodes_huffman_min_bitlen
  , dcodes_huffman_bitlen_counts, dcodes_huffman_symbols
  , dcodes_huffman_min_bitlen )
end

-- Decompress a deflate stream
-- @param state: a decompression state
-- @return the decompressed string if succeeds. nil if fails.
local function Inflate( state )
  local ReadBits = state.ReadBits

  local is_last_block
  while not is_last_block do
    is_last_block = (ReadBits( 1 ) == 1)
    local block_type = ReadBits( 2 )
    local status
    if block_type == 0 then
      status = DecompressStoreBlock( state )
    elseif block_type == 1 then
      status = DecompressFixBlock( state )
    elseif block_type == 2 then
      status = DecompressDynamicBlock( state )
    else
      return nil, -1 -- invalid block type (type == 3)
    end
    if status ~= 0 then
      return nil, status
    end
  end

  state.result_buffer[ table.getn( state.result_buffer ) + 1 ] =
      table_concat( state.buffer, "", 1, state.buffer_size )
  local result = table_concat( state.result_buffer )
  return result
end

-- @see LibDeflate:DecompressDeflate(str)
-- @see LibDeflate:DecompressDeflateWithDict(str, dictionary)
local function DecompressDeflateInternal( str, dictionary )
  local state = CreateDecompressState( str, dictionary )
  local result, status = Inflate( state )
  if not result then
    return nil, status
  end

  local bitlen_left = state.ReaderBitlenLeft()
  local bytelen_left = (bitlen_left - mod( bitlen_left, 8 )) / 8
  return result, bytelen_left
end

-- @see LibDeflate:DecompressZlib(str)
-- @see LibDeflate:DecompressZlibWithDict(str)
local function DecompressZlibInternal( str, dictionary )
  local state = CreateDecompressState( str, dictionary )
  local ReadBits = state.ReadBits

  local CMF = ReadBits( 8 )
  if state.ReaderBitlenLeft() < 0 then
    return nil, 2 -- available inflate data did not terminate
  end
  local CM = mod( CMF, 16 )
  local CINFO = (CMF - CM) / 16
  if CM ~= 8 then
    return nil, -12 -- invalid compression method
  end
  if CINFO > 7 then
    return nil, -13 -- invalid window size
  end

  local FLG = ReadBits( 8 )
  if state.ReaderBitlenLeft() < 0 then
    return nil, 2 -- available inflate data did not terminate
  end
  if mod( CMF * 256 + FLG, 31 ) ~= 0 then
    return nil, -14 -- invalid header checksum
  end

  local FDIST = mod( (FLG - mod( FLG, 32 )) / 32, 2 )
  local FLEVEL = mod( (FLG - mod( FLG, 64 )) / 64, 4 ) -- luacheck: ignore FLEVEL

  if FDIST == 1 then
    if not dictionary then
      return nil, -16 -- need dictonary, but dictionary is not provided.
    end
    local byte3 = ReadBits( 8 )
    local byte2 = ReadBits( 8 )
    local byte1 = ReadBits( 8 )
    local byte0 = ReadBits( 8 )
    local actual_adler32 = byte3 * 16777216 + byte2 * 65536 + byte1 * 256 + byte0
    if state.ReaderBitlenLeft() < 0 then
      return nil, 2 -- available inflate data did not terminate
    end
    if not IsEqualAdler32( actual_adler32, dictionary.adler32 ) then
      return nil, -17 -- dictionary adler32 does not match
    end
  end
  local result, status = Inflate( state )
  if not result then
    return nil, status
  end
  state.SkipToByteBoundary()

  local adler_byte0 = ReadBits( 8 )
  local adler_byte1 = ReadBits( 8 )
  local adler_byte2 = ReadBits( 8 )
  local adler_byte3 = ReadBits( 8 )
  if state.ReaderBitlenLeft() < 0 then
    return nil, 2 -- available inflate data did not terminate
  end

  local adler32_expected = adler_byte0 * 16777216
      + adler_byte1 * 65536 + adler_byte2 * 256 + adler_byte3
  local adler32_actual = LibDeflate:Adler32( result )
  if not IsEqualAdler32( adler32_expected, adler32_actual ) then
    return nil, -15 -- Adler32 checksum does not match
  end

  local bitlen_left = state.ReaderBitlenLeft()
  local bytelen_left = (bitlen_left - mod( bitlen_left, 8 )) / 8
  return result, bytelen_left
end

--- Decompress a raw deflate compressed data.
-- @param str [string] The data to be decompressed.
-- @return [string/nil] If the decompression succeeds, return the decompressed
-- data. If the decompression fails, return nil. You should check if this return
-- value is non-nil to know if the decompression succeeds.
-- @return [integer] If the decompression succeeds, return the number of
-- unprocessed bytes in the input compressed data. This return value is a
-- positive integer if the input data is a valid compressed data appended by an
-- arbitary non-empty string. This return value is 0 if the input data does not
-- contain any extra bytes.<br>
-- If the decompression fails (The first return value of this function is nil),
-- this return value is undefined.
-- @see LibDeflate:CompressDeflate
function LibDeflate:DecompressDeflate( str )
  local arg_valid, arg_err = IsValidArguments( str )
  if not arg_valid then
    error( ("Usage: LibDeflate:DecompressDeflate(str): "
      .. arg_err), 2 )
  end
  return DecompressDeflateInternal( str )
end

--- Decompress a raw deflate compressed data with a preset dictionary.
-- @param str [string] The data to be decompressed.
-- @param dictionary [table] The preset dictionary used by
-- LibDeflate:CompressDeflateWithDict when the compressed data is produced.
-- Decompression and compression must use the same dictionary.
-- Otherwise wrong decompressed data could be produced without generating any
-- error.
-- @return [string/nil] If the decompression succeeds, return the decompressed
-- data. If the decompression fails, return nil. You should check if this return
-- value is non-nil to know if the decompression succeeds.
-- @return [integer] If the decompression succeeds, return the number of
-- unprocessed bytes in the input compressed data. This return value is a
-- positive integer if the input data is a valid compressed data appended by an
-- arbitary non-empty string. This return value is 0 if the input data does not
-- contain any extra bytes.<br>
-- If the decompression fails (The first return value of this function is nil),
-- this return value is undefined.
-- @see LibDeflate:CompressDeflateWithDict
function LibDeflate:DecompressDeflateWithDict( str, dictionary )
  local arg_valid, arg_err = IsValidArguments( str, true, dictionary )
  if not arg_valid then
    error( ("Usage: LibDeflate:DecompressDeflateWithDict(str, dictionary): "
      .. arg_err), 2 )
  end
  return DecompressDeflateInternal( str, dictionary )
end

--- Decompress a zlib compressed data.
-- @param str [string] The data to be decompressed
-- @return [string/nil] If the decompression succeeds, return the decompressed
-- data. If the decompression fails, return nil. You should check if this return
-- value is non-nil to know if the decompression succeeds.
-- @return [integer] If the decompression succeeds, return the number of
-- unprocessed bytes in the input compressed data. This return value is a
-- positive integer if the input data is a valid compressed data appended by an
-- arbitary non-empty string. This return value is 0 if the input data does not
-- contain any extra bytes.<br>
-- If the decompression fails (The first return value of this function is nil),
-- this return value is undefined.
-- @see LibDeflate:CompressZlib
function LibDeflate:DecompressZlib( str )
  local arg_valid, arg_err = IsValidArguments( str )
  if not arg_valid then
    error( ("Usage: LibDeflate:DecompressZlib(str): "
      .. arg_err), 2 )
  end
  return DecompressZlibInternal( str )
end

--- Decompress a zlib compressed data with a preset dictionary.
-- @param str [string] The data to be decompressed
-- @param dictionary [table] The preset dictionary used by
-- LibDeflate:CompressDeflateWithDict when the compressed data is produced.
-- Decompression and compression must use the same dictionary.
-- Otherwise wrong decompressed data could be produced without generating any
-- error.
-- @return [string/nil] If the decompression succeeds, return the decompressed
-- data. If the decompression fails, return nil. You should check if this return
-- value is non-nil to know if the decompression succeeds.
-- @return [integer] If the decompression succeeds, return the number of
-- unprocessed bytes in the input compressed data. This return value is a
-- positive integer if the input data is a valid compressed data appended by an
-- arbitary non-empty string. This return value is 0 if the input data does not
-- contain any extra bytes.<br>
-- If the decompression fails (The first return value of this function is nil),
-- this return value is undefined.
-- @see LibDeflate:CompressZlibWithDict
function LibDeflate:DecompressZlibWithDict( str, dictionary )
  local arg_valid, arg_err = IsValidArguments( str, true, dictionary )
  if not arg_valid then
    error( ("Usage: LibDeflate:DecompressZlibWithDict(str, dictionary): "
      .. arg_err), 2 )
  end
  return DecompressZlibInternal( str, dictionary )
end

-- Calculate the huffman code of fixed block
do
  _fix_block_literal_huffman_bitlen = {}
  for sym = 0, 143 do
    _fix_block_literal_huffman_bitlen[ sym ] = 8
  end
  for sym = 144, 255 do
    _fix_block_literal_huffman_bitlen[ sym ] = 9
  end
  for sym = 256, 279 do
    _fix_block_literal_huffman_bitlen[ sym ] = 7
  end
  for sym = 280, 287 do
    _fix_block_literal_huffman_bitlen[ sym ] = 8
  end

  _fix_block_dist_huffman_bitlen = {}
  for dist = 0, 31 do
    _fix_block_dist_huffman_bitlen[ dist ] = 5
  end
  local status
  status, _fix_block_literal_huffman_bitlen_count
  , _fix_block_literal_huffman_to_deflate_code =
      GetHuffmanForDecode( _fix_block_literal_huffman_bitlen, 287, 9 )
  assert( status == 0 )
  status, _fix_block_dist_huffman_bitlen_count,
  _fix_block_dist_huffman_to_deflate_code =
      GetHuffmanForDecode( _fix_block_dist_huffman_bitlen, 31, 5 )
  assert( status == 0 )

  _fix_block_literal_huffman_code =
      GetHuffmanCodeFromBitlen( _fix_block_literal_huffman_bitlen_count
      , _fix_block_literal_huffman_bitlen, 287, 9 )
  _fix_block_dist_huffman_code =
      GetHuffmanCodeFromBitlen( _fix_block_dist_huffman_bitlen_count
      , _fix_block_dist_huffman_bitlen, 31, 5 )
end

-- Prefix encoding algorithm
-- Credits to LibCompress.
-- The code has been rewritten by the author of LibDeflate.
------------------------------------------------------------------------------

-- to be able to match any requested byte value, the search
-- string must be preprocessed characters to escape with %:
-- ( ) . % + - * ? [ ] ^ $
-- "illegal" byte values:
-- 0 is replaces %z
local _gsub_escape_table = {
  [ "\000" ] = "%z",
  [ "(" ] = "%(",
  [ ")" ] = "%)",
  [ "." ] = "%.",
  [ "%" ] = "%%",
  [ "+" ] = "%+",
  [ "-" ] = "%-",
  [ "*" ] = "%*",
  [ "?" ] = "%?",
  [ "[" ] = "%[",
  [ "]" ] = "%]",
  [ "^" ] = "%^",
  [ "$" ] = "%$",
}

local function escape_for_gsub( str )
  return string.gsub( str, "([%z%(%)%.%%%+%-%*%?%[%]%^%$])", _gsub_escape_table )
end

--- Create a custom codec with encoder and decoder. <br>
-- This codec is used to convert an input string to make it not contain
-- some specific bytes.
-- This created codec and the parameters of this function do NOT take
-- localization into account. One byte (0-255) in the string is exactly one
-- character (0-255).
-- Credits to LibCompress.
-- The code has been rewritten by the author of LibDeflate. <br>
-- @param reserved_chars [string] The created encoder will ensure encoded
-- data does not contain any single character in reserved_chars. This parameter
-- should be non-empty.
-- @param escape_chars [string] The escape character(s) used in the created
-- codec. The codec converts any character included in reserved\_chars /
-- escape\_chars / map\_chars to (one escape char + one character not in
-- reserved\_chars / escape\_chars / map\_chars).
-- You usually only need to provide a length-1 string for this parameter.
-- Length-2 string is only needed when
-- reserved\_chars + escape\_chars + map\_chars is longer than 127.
-- This parameter should be non-empty.
-- @param map_chars [string] The created encoder will map every
-- reserved\_chars:sub(i, i) (1 <= i <= #map\_chars) to map\_chars:sub(i, i).
-- This parameter CAN be empty string.
-- @return [table/nil] If the codec cannot be created, return nil.<br>
-- If the codec can be created according to the given
-- parameters, return the codec, which is a encode/decode table.
-- The table contains two functions: <br>
-- t:Encode(str) returns the encoded string. <br>
-- t:Decode(str) returns the decoded string if succeeds. nil if fails.
-- @return [nil/string] If the codec is successfully created, return nil.
-- If not, return a string that describes the reason why the codec cannot be
-- created.
-- @usage
-- -- Create an encoder/decoder that maps all "\000" to "\003",
-- -- and escape "\001" (and "\002" and "\003") properly
-- local codec = LibDeflate:CreateCodec("\000\001", "\002", "\003")
--
-- local encoded = codec:Encode(SOME_STRING)
-- -- "encoded" does not contain "\000" or "\001"
-- local decoded = codec:Decode(encoded)
-- -- assert(decoded == SOME_STRING)
function LibDeflate:CreateCodec( reserved_chars, escape_chars
    , map_chars )
  if type( reserved_chars ) ~= "string"
      or type( escape_chars ) ~= "string"
      or type( map_chars ) ~= "string" then
    error(
      "Usage: LibDeflate:CreateCodec(reserved_chars,"
      .. " escape_chars, map_chars):"
      .. " All arguments must be string.", 2 )
  end

  if escape_chars == "" then
    return nil, "No escape characters supplied."
  end
  if string.len( reserved_chars ) < string.len( map_chars ) then
    return nil, "The number of reserved characters must be"
        .. " at least as many as the number of mapped chars."
  end
  if reserved_chars == "" then
    return nil, "No characters to encode."
  end

  local encode_bytes = reserved_chars .. escape_chars .. map_chars
  -- build list of bytes not available as a suffix to a prefix byte
  local taken = {}
  for i = 1, string.len( encode_bytes ) do
    local byte = string_byte( encode_bytes, i, i )
    if taken[ byte ] then
      return nil, "There must be no duplicate characters in the"
          .. " concatenation of reserved_chars, escape_chars and"
          .. " map_chars."
    end
    taken[ byte ] = true
  end

  local decode_patterns = {}
  local decode_repls = {}

  -- the encoding can be a single gsub
  -- , but the decoding can require multiple gsubs
  local encode_search = {}
  local encode_translate = {}

  -- map single byte to single byte
  if string.len( map_chars ) > 0 then
    local decode_search = {}
    local decode_translate = {}
    for i = 1, string.len( map_chars ) do
      local from = string_sub( reserved_chars, i, i )
      local to = string_sub( map_chars, i, i )
      encode_translate[ from ] = to
      encode_search[ table.getn( encode_search ) + 1 ] = from
      decode_translate[ to ] = from
      decode_search[ table.getn( decode_search ) + 1 ] = to
    end
    decode_patterns[ table.getn( decode_patterns ) + 1 ] =
        "([" .. escape_for_gsub( table_concat( decode_search ) ) .. "])"
    decode_repls[ table.getn( decode_repls ) + 1 ] = decode_translate
  end

  local escape_char_index = 1
  local escape_char = string_sub( escape_chars
  , escape_char_index, escape_char_index )
  -- map single byte to double-byte
  local r = 0 -- suffix char value to the escapeChar

  local decode_search = {}
  local decode_translate = {}
  for i = 1, string.len( encode_bytes ) do
    local c = string_sub( encode_bytes, i, i )
    if not encode_translate[ c ] then
      while r >= 256 or taken[ r ] do
        r = r + 1
        if r > 255 then -- switch to next escapeChar
          decode_patterns[ table.getn( decode_patterns ) + 1 ] =
              escape_for_gsub( escape_char )
              .. "(["
              .. escape_for_gsub( table_concat( decode_search ) ) .. "])"
          decode_repls[ table.getn( decode_repls ) + 1 ] = decode_translate

          escape_char_index = escape_char_index + 1
          escape_char = string_sub( escape_chars, escape_char_index
          , escape_char_index )
          r = 0
          decode_search = {}
          decode_translate = {}

          if not escape_char or escape_char == "" then
            -- actually I don't need to check
            -- "not ecape_char", but what if Lua changes
            -- the behavior of string.sub() in the future?
            -- we are out of escape chars and we need more!
            return nil, "Out of escape characters."
          end
        end
      end

      local char_r = _byte_to_char[ r ]
      encode_translate[ c ] = escape_char .. char_r
      encode_search[ table.getn( encode_search ) + 1 ] = c
      decode_translate[ char_r ] = c
      decode_search[ table.getn( decode_search ) + 1 ] = char_r
      r = r + 1
    end
    if i == string.len( encode_bytes ) then
      decode_patterns[ table.getn( decode_patterns ) + 1 ] =
          escape_for_gsub( escape_char ) .. "(["
          .. escape_for_gsub( table_concat( decode_search ) ) .. "])"
      decode_repls[ table.getn( decode_repls ) + 1 ] = decode_translate
    end
  end

  local codec = {}

  local encode_pattern = "(["
      .. escape_for_gsub( table_concat( encode_search ) ) .. "])"
  local encode_repl = encode_translate

  function codec:Encode( str )
    if type( str ) ~= "string" then
      error( string.format( "Usage: codec:Encode(str):"
        .. " 'str' - string expected got '%s'.", type( str ) ), 2 )
    end
    return string_gsub( str, encode_pattern, encode_repl )
  end

  local decode_tblsize = table.getn( decode_patterns )
  local decode_fail_pattern = "(["
      .. escape_for_gsub( reserved_chars ) .. "])"

  function codec:Decode( str )
    if type( str ) ~= "string" then
      error( string.format( "Usage: codec:Decode(str):"
        .. " 'str' - string expected got '%s'.", type( str ) ), 2 )
    end
    if string_find( str, decode_fail_pattern ) then
      return nil
    end
    for i = 1, decode_tblsize do
      str = string_gsub( str, decode_patterns[ i ], decode_repls[ i ] )
    end
    return str
  end

  return codec
end

local _addon_channel_codec

local function GenerateWoWAddonChannelCodec()
  return LibDeflate:CreateCodec( "\000", "\001", "" )
end

--- Encode the string to make it ready to be transmitted in World of
-- Warcraft addon channel. <br>
-- The encoded string is guaranteed to contain no NULL ("\000") character.
-- @param str [string] The string to be encoded.
-- @return The encoded string.
-- @see LibDeflate:DecodeForWoWAddonChannel
function LibDeflate:EncodeForWoWAddonChannel( str )
  if type( str ) ~= "string" then
    error( string.format( "Usage: LibDeflate:EncodeForWoWAddonChannel(str):"
      .. " 'str' - string expected got '%s'.", type( str ) ), 2 )
  end
  if not _addon_channel_codec then
    _addon_channel_codec = GenerateWoWAddonChannelCodec()
  end
  return _addon_channel_codec:Encode( str )
end

--- Decode the string produced by LibDeflate:EncodeForWoWAddonChannel
-- @param str [string] The string to be decoded.
-- @return [string/nil] The decoded string if succeeds. nil if fails.
-- @see LibDeflate:EncodeForWoWAddonChannel
function LibDeflate:DecodeForWoWAddonChannel( str )
  if type( str ) ~= "string" then
    error( string.format( "Usage: LibDeflate:DecodeForWoWAddonChannel(str):"
      .. " 'str' - string expected got '%s'.", type( str ) ), 2 )
  end
  if not _addon_channel_codec then
    _addon_channel_codec = GenerateWoWAddonChannelCodec()
  end
  return _addon_channel_codec:Decode( str )
end

-- For World of Warcraft Chat Channel Encoding
-- Credits to LibCompress.
-- The code has been rewritten by the author of LibDeflate. <br>
-- Following byte values are not allowed:
-- \000, s, S, \010, \013, \124, %
-- Because SendChatMessage will error
-- if an UTF8 multibyte character is incomplete,
-- all character values above 127 have to be encoded to avoid this.
-- This costs quite a bit of bandwidth (about 13-14%)
-- Also, because drunken status is unknown for the received
-- , strings used with SendChatMessage should be terminated with
-- an identifying byte value, after which the server MAY add "...hic!"
-- or as much as it can fit(!).
-- Pass the identifying byte as a reserved character to this function
-- to ensure the encoding doesn't contain that value.
-- or use this: local message, match = arg1:gsub("^(.*)\029.-$", "%1")
-- arg1 is message from channel, \029 is the string terminator
-- , but may be used in the encoded datastream as well. :-)
-- This encoding will expand data anywhere from:
-- 0% (average with pure ascii text)
-- 53.5% (average with random data valued zero to 255)
-- 100% (only encoding data that encodes to two bytes)
local function GenerateWoWChatChannelCodec()
  local r = {}
  for i = 128, 255 do
    r[ table.getn( r ) + 1 ] = _byte_to_char[ i ]
  end

  local reserved_chars = "sS\000\010\013\124%" .. table_concat( r )
  return LibDeflate:CreateCodec( reserved_chars
  , "\029\031", "\015\020" )
end

local _chat_channel_codec

--- Encode the string to make it ready to be transmitted in World of
-- Warcraft chat channel. <br>
-- See also https://wow.gamepedia.com/ValidChatMessageCharacters
-- @param str [string] The string to be encoded.
-- @return [string] The encoded string.
-- @see LibDeflate:DecodeForWoWChatChannel
function LibDeflate:EncodeForWoWChatChannel( str )
  if type( str ) ~= "string" then
    error( string.format( "Usage: LibDeflate:EncodeForWoWChatChannel(str):"
      .. " 'str' - string expected got '%s'.", type( str ) ), 2 )
  end
  if not _chat_channel_codec then
    _chat_channel_codec = GenerateWoWChatChannelCodec()
  end
  return _chat_channel_codec:Encode( str )
end

--- Decode the string produced by LibDeflate:EncodeForWoWChatChannel.
-- @param str [string] The string to be decoded.
-- @return [string/nil] The decoded string if succeeds. nil if fails.
-- @see LibDeflate:EncodeForWoWChatChannel
function LibDeflate:DecodeForWoWChatChannel( str )
  if type( str ) ~= "string" then
    error( string.format( "Usage: LibDeflate:DecodeForWoWChatChannel(str):"
      .. " 'str' - string expected got '%s'.", type( str ) ), 2 )
  end
  if not _chat_channel_codec then
    _chat_channel_codec = GenerateWoWChatChannelCodec()
  end
  return _chat_channel_codec:Decode( str )
end

-- Credits to WeakAuras2 and Galmok for the 6 bit encoding algorithm.
-- The code has been rewritten by the author of LibDeflate.
-- The result of encoding will be 25% larger than the
-- origin string, but every single byte of the encoding result will be
-- printable characters as the following.
local _byte_to_6bit_char = {
  [ 0 ] = "a",
  "b",
  "c",
  "d",
  "e",
  "f",
  "g",
  "h",
  "i",
  "j",
  "k",
  "l",
  "m",
  "n",
  "o",
  "p",
  "q",
  "r",
  "s",
  "t",
  "u",
  "v",
  "w",
  "x",
  "y",
  "z",
  "A",
  "B",
  "C",
  "D",
  "E",
  "F",
  "G",
  "H",
  "I",
  "J",
  "K",
  "L",
  "M",
  "N",
  "O",
  "P",
  "Q",
  "R",
  "S",
  "T",
  "U",
  "V",
  "W",
  "X",
  "Y",
  "Z",
  "0",
  "1",
  "2",
  "3",
  "4",
  "5",
  "6",
  "7",
  "8",
  "9",
  "(",
  ")",
}

local _6bit_to_byte = {
  [ 97 ] = 0,
  [ 98 ] = 1,
  [ 99 ] = 2,
  [ 100 ] = 3,
  [ 101 ] = 4,
  [ 102 ] = 5,
  [ 103 ] = 6,
  [ 104 ] = 7,
  [ 105 ] = 8,
  [ 106 ] = 9,
  [ 107 ] = 10,
  [ 108 ] = 11,
  [ 109 ] = 12,
  [ 110 ] = 13,
  [ 111 ] = 14,
  [ 112 ] = 15,
  [ 113 ] = 16,
  [ 114 ] = 17,
  [ 115 ] = 18,
  [ 116 ] = 19,
  [ 117 ] = 20,
  [ 118 ] = 21,
  [ 119 ] = 22,
  [ 120 ] = 23,
  [ 121 ] = 24,
  [ 122 ] = 25,
  [ 65 ] = 26,
  [ 66 ] = 27,
  [ 67 ] = 28,
  [ 68 ] = 29,
  [ 69 ] = 30,
  [ 70 ] = 31,
  [ 71 ] = 32,
  [ 72 ] = 33,
  [ 73 ] = 34,
  [ 74 ] = 35,
  [ 75 ] = 36,
  [ 76 ] = 37,
  [ 77 ] = 38,
  [ 78 ] = 39,
  [ 79 ] = 40,
  [ 80 ] = 41,
  [ 81 ] = 42,
  [ 82 ] = 43,
  [ 83 ] = 44,
  [ 84 ] = 45,
  [ 85 ] = 46,
  [ 86 ] = 47,
  [ 87 ] = 48,
  [ 88 ] = 49,
  [ 89 ] = 50,
  [ 90 ] = 51,
  [ 48 ] = 52,
  [ 49 ] = 53,
  [ 50 ] = 54,
  [ 51 ] = 55,
  [ 52 ] = 56,
  [ 53 ] = 57,
  [ 54 ] = 58,
  [ 55 ] = 59,
  [ 56 ] = 60,
  [ 57 ] = 61,
  [ 40 ] = 62,
  [ 41 ] = 63,
}

--- Encode the string to make it printable. <br>
--
-- Credit to WeakAuras2, this function is equivalant to the implementation
-- it is using right now. <br>
-- The code has been rewritten by the author of LibDeflate. <br>
-- The encoded string will be 25% larger than the origin string. However, every
-- single byte of the encoded string will be one of 64 printable ASCII
-- characters, which are can be easier copied, pasted and displayed.
-- (26 lowercase letters, 26 uppercase letters, 10 numbers digits,
-- left parenthese, or right parenthese)
-- @param str [string] The string to be encoded.
-- @return [string] The encoded string.
function LibDeflate:EncodeForPrint( str )
  if type( str ) ~= "string" then
    error( string.format( "Usage: LibDeflate:EncodeForPrint(str):"
      .. " 'str' - string expected got '%s'.", type( str ) ), 2 )
  end
  local strlen = string.len( str )
  local strlenMinus2 = strlen - 2
  local i = 1
  local buffer = {}
  local buffer_size = 0
  while i <= strlenMinus2 do
    local x1, x2, x3 = string_byte( str, i, i + 2 )
    i = i + 3
    local cache = x1 + x2 * 256 + x3 * 65536
    local b1 = mod( cache, 64 )
    cache = (cache - b1) / 64
    local b2 = mod( cache, 64 )
    cache = (cache - b2) / 64
    local b3 = mod( cache, 64 )
    local b4 = (cache - b3) / 64
    buffer_size = buffer_size + 1
    buffer[ buffer_size ] =
        _byte_to_6bit_char[ b1 ] .. _byte_to_6bit_char[ b2 ]
        .. _byte_to_6bit_char[ b3 ] .. _byte_to_6bit_char[ b4 ]
  end

  local cache = 0
  local cache_bitlen = 0
  while i <= strlen do
    local x = string_byte( str, i, i )
    cache = cache + x * _pow2[ cache_bitlen ]
    cache_bitlen = cache_bitlen + 8
    i = i + 1
  end
  while cache_bitlen > 0 do
    local bit6 = mod( cache, 64 )
    buffer_size = buffer_size + 1
    buffer[ buffer_size ] = _byte_to_6bit_char[ bit6 ]
    cache = (cache - bit6) / 64
    cache_bitlen = cache_bitlen - 6
  end

  return table_concat( buffer )
end

--- Decode the printable string produced by LibDeflate:EncodeForPrint.
-- "str" will have its prefixed and trailing control characters or space
-- removed before it is decoded, so it is easier to use if "str" comes form
-- user copy and paste with some prefixed or trailing spaces.
-- Then decode fails if the string contains any characters cant be produced by
-- LibDeflate:EncodeForPrint. That means, decode fails if the string contains a
-- characters NOT one of 26 lowercase letters, 26 uppercase letters,
-- 10 numbers digits, left parenthese, or right parenthese.
-- @param str [string] The string to be decoded
-- @return [string/nil] The decoded string if succeeds. nil if fails.
function LibDeflate:DecodeForPrint( str )
  if type( str ) ~= "string" then
    error( string.format( "Usage: LibDeflate:DecodeForPrint(str):"
      .. " 'str' - string expected got '%s'.", type( str ) ), 2 )
  end
  str = string.gsub( str, "^[%c ]+", "" )
  str = string.gsub( str, "[%c ]+$", "" )

  local strlen = string.len( str )
  if strlen == 1 then
    return nil
  end
  local strlenMinus3 = strlen - 3
  local i = 1
  local buffer = {}
  local buffer_size = 0
  while i <= strlenMinus3 do
    local x1, x2, x3, x4 = string_byte( str, i, i + 3 )
    x1 = _6bit_to_byte[ x1 ]
    x2 = _6bit_to_byte[ x2 ]
    x3 = _6bit_to_byte[ x3 ]
    x4 = _6bit_to_byte[ x4 ]
    if not (x1 and x2 and x3 and x4) then
      return nil
    end
    i = i + 4
    local cache = x1 + x2 * 64 + x3 * 4096 + x4 * 262144
    local b1 = mod( cache, 256 )
    cache = (cache - b1) / 256
    local b2 = mod( cache, 256 )
    local b3 = (cache - b2) / 256
    buffer_size = buffer_size + 1
    buffer[ buffer_size ] =
        _byte_to_char[ b1 ] .. _byte_to_char[ b2 ] .. _byte_to_char[ b3 ]
  end

  local cache        = 0
  local cache_bitlen = 0
  while i <= strlen do
    local x = string_byte( str, i, i )
    x = _6bit_to_byte[ x ]
    if not x then
      return nil
    end
    cache = cache + x * _pow2[ cache_bitlen ]
    cache_bitlen = cache_bitlen + 6
    i = i + 1
  end

  while cache_bitlen >= 8 do
    local byte = mod( cache, 256 )
    buffer_size = buffer_size + 1
    buffer[ buffer_size ] = _byte_to_char[ byte ]
    cache = (cache - byte) / 256
    cache_bitlen = cache_bitlen - 8
  end

  return table_concat( buffer )
end

local function InternalClearCache()
  _chat_channel_codec = nil
  _addon_channel_codec = nil
end

-- For test. Don't use the functions in this table for real application.
-- Stuffs in this table is subject to change.
LibDeflate.internals = {
  LoadStringToTable = LoadStringToTable,
  IsValidDictionary = IsValidDictionary,
  IsEqualAdler32 = IsEqualAdler32,
  _byte_to_6bit_char = _byte_to_6bit_char,
  _6bit_to_byte = _6bit_to_byte,
  InternalClearCache = InternalClearCache,
}

--[[-- Commandline options
@class table
@name CommandlineOptions
@usage lua LibDeflate.lua [OPTION] [INPUT] [OUTPUT]
\-0    store only. no compression.
\-1    fastest compression.
\-9    slowest and best compression.
\-d    do decompression instead of compression.
\--dict <filename> specify the file that contains
the entire preset dictionary.
\-h    give this help.
\--strategy <fixed/huffman_only/dynamic> specify a special compression strategy.
\-v    print the version and copyright info.
\--zlib  use zlib format instead of raw deflate.
]]

-- currently no plan to support stdin and stdout.
-- Because Lua in Windows does not set stdout with binary mode.
if io and os and debug and _G.arg then
  local io = io
  local os = os
  local debug = debug
  local arg = _G.arg
  local debug_info = debug.getinfo( 1 )
  if debug_info.source == arg[ 0 ]
      or debug_info.short_src == arg[ 0 ] then
    -- We are indeed runnning THIS file from the commandline.
    local input
    local output
    local i = 1
    local status
    local is_zlib = false
    local is_decompress = false
    local level
    local strategy
    local dictionary
    while (arg[ i ]) do
      local a = arg[ i ]
      if a == "-h" then
        print( LibDeflate._COPYRIGHT
          .. "\nUsage: lua LibDeflate.lua [OPTION] [INPUT] [OUTPUT]\n"
          .. "  -0    store only. no compression.\n"
          .. "  -1    fastest compression.\n"
          .. "  -9    slowest and best compression.\n"
          .. "  -d    do decompression instead of compression.\n"
          .. "  --dict <filename> specify the file that contains"
          .. " the entire preset dictionary.\n"
          .. "  -h    give this help.\n"
          .. "  --strategy <fixed/huffman_only/dynamic>"
          .. " specify a special compression strategy.\n"
          .. "  -v    print the version and copyright info.\n"
          .. "  --zlib  use zlib format instead of raw deflate.\n" )
        os.exit( 0 )
      elseif a == "-v" then
        print( LibDeflate._COPYRIGHT )
        os.exit( 0 )
      elseif string.find( a, "^%-[0-9]$" ) then
        level = tonumber( string.sub( a, 2, 2 ) )
      elseif a == "-d" then
        is_decompress = true
      elseif a == "--dict" then
        i = i + 1
        local dict_filename = arg[ i ]
        if not dict_filename then
          io.stderr:write( "You must speicify the dict filename" )
          os.exit( 1 )
        end
        local dict_file, dict_status = io.open( dict_filename, "rb" )
        if not dict_file then
          io.stderr:write(
            string.format( "LibDeflate: Cannot read the dictionary file '%s': %s", dict_filename, dict_status ) )
          os.exit( 1 )
        end
        local dict_str = dict_file:read( "*all" )
        dict_file:close()
        -- In your lua program, you should pass in adler32 as a CONSTANT
        -- , so it actually prevent you from modifying dictionary
        -- unintentionally during the program development. I do this
        -- here just because no convenient way to verify in commandline.
        dictionary = LibDeflate:CreateDictionary( dict_str,
          string.len( dict_str ), LibDeflate:Adler32( dict_str ) )
      elseif a == "--strategy" then
        -- Not sure if I should check error here
        -- If I do, redudant code.
        i = i + 1
        strategy = arg[ i ]
      elseif a == "--zlib" then
        is_zlib = true
      elseif string.find( a, "^%-" ) then
        io.stderr:write( string.format( "LibDeflate: Invalid argument: %s", a ) )
        os.exit( 1 )
      else
        if not input then
          input, status = io.open( a, "rb" )
          if not input then
            io.stderr:write(
              string.format( "LibDeflate: Cannot read the file '%s': %s", a, tostring( status ) ) )
            os.exit( 1 )
          end
        elseif not output then
          output, status = io.open( a, "wb" )
          if not output then
            io.stderr:write(
              string.format( "LibDeflate: Cannot write the file '%s': %s", a, tostring( status ) ) )
            os.exit( 1 )
          end
        end
      end
      i = i + 1
    end -- while (arg[i])

    if not input or not output then
      io.stderr:write( "LibDeflate:"
        .. " You must specify both input and output files." )
      os.exit( 1 )
    end

    local input_data = input:read( "*all" )
    local configs = {
      level = level,
      strategy = strategy,
    }
    local output_data
    if not is_decompress then
      if not is_zlib then
        if not dictionary then
          output_data =
              LibDeflate:CompressDeflate( input_data, configs )
        else
          output_data =
              LibDeflate:CompressDeflateWithDict( input_data, dictionary
              , configs )
        end
      else
        if not dictionary then
          output_data =
              LibDeflate:CompressZlib( input_data, configs )
        else
          output_data =
              LibDeflate:CompressZlibWithDict( input_data, dictionary
              , configs )
        end
      end
    else
      if not is_zlib then
        if not dictionary then
          output_data = LibDeflate:DecompressDeflate( input_data )
        else
          output_data = LibDeflate:DecompressDeflateWithDict(
            input_data, dictionary )
        end
      else
        if not dictionary then
          output_data = LibDeflate:DecompressZlib( input_data )
        else
          output_data = LibDeflate:DecompressZlibWithDict(
            input_data, dictionary )
        end
      end
    end

    if not output_data then
      io.stderr:write( "LibDeflate: Decompress fails." )
      os.exit( 1 )
    end

    output:write( output_data )
    if input and input ~= io.stdin then
      input:close()
    end
    if output and output ~= io.stdout then
      output:close()
    end

    io.stderr:write( string.format( "Successfully writes %d bytes", output_data:len() ) )
    os.exit( 0 )
  end
end

return LibDeflate