728 lines
21 KiB
Lua

-- luacheck: push ignore
--[[**************************************************************************]]
-- base64.lua
-- Copyright 2014 Ernest R. Ewert
--
-- This Lua module contains the implementation of a Lua base64 encode
-- and decode library.
--
-- The library exposes these methods.
--
-- Method Args
-- ----------- ----------------------------------------------
-- encode String in / out
-- decode String in / out
--
-- encode String, function(value) predicate
-- decode String, function(value) predicate
--
-- encode file, function(value) predicate
-- deocde file, function(value) predicate
--
-- encode file, file
-- deocde file, file
--
-- alpha alphabet, term char
--
--------------------------------------------------------------------------------
-- known_base64_alphabets
--
--
local known_base64_alphabets=
{
base64= -- RFC 2045 (Ignores max line length restrictions)
{
_alpha="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
_strip="[^%a%d%+%/%=]",
_term="="
},
base64noterm= -- RFC 2045 (Ignores max line length restrictions)
{
_alpha="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
_strip="[^%a%d%+%/]",
_term=""
},
base64url= -- RFC 4648 'base64url'
{
_alpha="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
_strip="[^%a%d%+%-%_=]",
_term=""
},
}
local c_alpha=known_base64_alphabets.base64
local pattern_strip
--[[**************************************************************************]]
--[[****************************** Encoding **********************************]]
--[[**************************************************************************]]
-- Precomputed tables (compromise using more memory for speed)
local b64e -- 6 bit patterns to ANSI 'char' values
local b64e_a -- ready to use
local b64e_a2 -- byte addend
local b64e_b1 -- byte addend
local b64e_b2 -- byte addend
local b64e_c1 -- byte addend
local b64e_c -- ready to use
-- Tail padding values
local tail_padd64=
{
"==", -- two bytes modulo
"=" -- one byte modulo
}
--------------------------------------------------------------------------------
-- e64
--
-- Helper function to convert three eight bit values into four encoded
-- 6 (significant) bit values.
--
-- 7 0 7 0 7 0
-- e64(a a a a a a a a,b b b b b b b b,c c c c c c c c)
-- | | | |
-- return [ a a a a a a]| | |
-- [ a a b b b b]| |
-- [ b b b b c c]|
-- [ c c c c c c]
--
local function e64( a, b, c )
-- Return pre-calculated values for encoded value 1 and 4
-- Get the pre-calculated extractions for value 2 and 3, look them
-- up and return the proper value.
--
return b64e_a[a],
b64e[ b64e_a2[a]+b64e_b1[b] ],
b64e[ b64e_b2[b]+b64e_c1[c] ],
b64e_c[c]
end
--------------------------------------------------------------------------------
-- encode_tail64
--
-- Send a tail pad value to the output predicate provided.
--
local function encode_tail64( out, x, y )
-- If we have a number of input bytes that isn't exactly divisible
-- by 3 then we need to pad the tail
if x ~= nil then
local a,b,r = x,0,1
if y ~= nil then
r = 2
b = y
end
-- Encode three bytes of info, with the tail byte as zeros and
-- ignore any fourth encoded ASCII value. (We should NOT have a
-- forth byte at this point.)
local b1, b2, b3 = e64( a, b, 0 )
-- always add the first 2 six bit values to the res table
-- 1 remainder input byte needs 8 output bits
local tail_value = string.char( b1, b2 )
-- two remainder input bytes will need 18 output bits (2 as pad)
if r == 2 then
tail_value=tail_value..string.char( b3 )
end
-- send the last 4 byte sequence with appropriate tail padding
out( tail_value .. tail_padd64[r] )
end
end
--------------------------------------------------------------------------------
-- encode64_io_iterator
--
-- Create an io input iterator to read an input file and split values for
-- proper encoding.
--
local function encode64_io_iterator(file)
assert( io.type(file) == "file", "argument must be readable file handle" )
assert( file.read ~= nil, "argument must be readable file handle" )
local ii = { } -- Table for the input iterator
setmetatable(ii,{ __tostring=function() return "base64.io_iterator" end})
-- Begin returns an input read iterator
--
function ii.begin()
local sb = string.byte
-- The iterator returns three bytes from the file for encoding or nil
-- when the end of the file has been reached.
--
return function()
s = file:read(3)
if s ~= nil and #s == 3 then
return sb(s,1,3)
end
return nil
end
end
-- The tail method on the iterator allows the routines to run faster
-- because each sequence of bytes doesn't have to test for EOF.
--
function ii.tail()
-- If one or two "overflow" bytes exist, return those.
--
if s ~= nil then return s:byte(1,2) end
end
return ii
end
--------------------------------------------------------------------------------
-- encode64_with_ii
--
-- Convert the value provided by an encode iterator that provides a begin
-- method, a tail method, and an iterator that returns three bytes for
-- each call until at the end. The tail method should return either 1 or 2
-- tail bytes (for source values that are not evenly divisible by three).
--
local function encode64_with_ii( ii, out )
local sc=string.char
for a, b, c in ii.begin() do
out( sc( e64( a, b, c ) ) )
end
encode_tail64( out, ii.tail() )
end
--------------------------------------------------------------------------------
-- encode64_with_predicate
--
-- Implements the basic raw data --> base64 conversion. Each three byte
-- sequence in the input string is converted to the encoded string and
-- given to the predicate provided in 4 output byte chunks. This method
-- is slightly faster for traversing existing strings in memory.
--
local function encode64_with_predicate( raw, out )
local rem=#raw%3 -- remainder
local len=#raw-rem -- 3 byte input adjusted
local sb=string.byte -- Mostly notational (slight performance)
local sc=string.char -- Mostly notational (slight performance)
-- Main encode loop converts three input bytes to 4 base64 encoded
-- ACSII values and calls the predicate with the value.
for i=1,len,3 do
-- This really isn't intended as obfuscation. It is more about
-- loop optimization and removing temporaries.
--
out( sc( e64( sb( raw ,i , i+3 ) ) ) )
-- | | |
-- | | byte i to i + 3
-- | |
-- | returns 4 encoded values
-- |
-- creates a string with the 4 returned values
end
-- If we have a number of input bytes that isn't exactly divisible
-- by 3 then we need to pad the tail
if rem > 0 then
local x, y = sb( raw, len+1 )
if rem > 1 then
y = sb( raw, len+2 )
end
encode_tail64( out, x, y )
end
end
--------------------------------------------------------------------------------
-- encode64_tostring
--
-- Convenience method that accepts a string value and returns the
-- encoded version of that string.
--
local function encode64_tostring(raw)
local sb={} -- table to build string
local function collection_predicate(v)
sb[#sb+1]=v
end
-- Test with an 818K string in memory. Result is 1.1M of data.
--
-- lua_base64 base64 (gnu 8.21)
-- 202ms 54ms
-- 203ms 48ms
-- 204ms 50ms
-- 203ms 42ms
-- 205ms 46ms
--
encode64_with_predicate( raw, collection_predicate )
return table.concat(sb)
end
--[[**************************************************************************]]
--[[****************************** Decoding **********************************]]
--[[**************************************************************************]]
-- Precomputed tables (compromise using more memory for speed)
local b64d -- ANSI 'char' to right shifted bit pattern
local b64d_a1 -- byte addend
local b64d_a2 -- byte addend
local b64d_b1 -- byte addend
local b64d_b2 -- byte addend
local b64d_c1 -- byte addend
local b64d_z -- zero
--------------------------------------------------------------------------------
-- d64
--
-- Helper function to convert four six bit values into three full eight
-- bit values. Input values are the integer expression of the six bit value
-- encoded in the original base64 encoded string.
--
-- d64( _ _1 1 1 1 1 1,
-- | _ _ 2 2 2 2 2 2,
-- | | _ _ 3 3 3 3 3 3,
-- | | | _ _ 4 4 4 4 4 4)
-- | | | |
-- return ', [1 1 1 1 1 1 2 2] | |
-- ', [2 2 2 2 3 3 3 3] |
-- ' [3 3 4 4 4 4 4 4]
--
local function d64( b1, b2, b3, b4 )
-- We can get away with addition instead of anding the values together
-- because there are no overlapping bit patterns.
--
return
b64d_a1[b1] + b64d_a2[b2],
b64d_b1[b2] + b64d_b2[b3],
b64d_c1[b3] + b64d[b4]
end
--------------------------------------------------------------------------------
-- decode_tail64
--
-- Send the end of stream bytes that didn't get decoded via the main loop.
--
local function decode_tail64( out, e1, e2 ,e3, e4 )
if tail_padd64[2] == "" or e4 == tail_padd64[2]:byte() then
local n3 = b64d_z
if e3 ~= nil and e3 ~= tail_padd64[2]:byte() then
n3 = e3
end
-- Unpack the six bit values into the 8 bit values
local b1, b2 = d64( e1, e2, n3, b64d_z )
-- And add them to the res table
if e3 ~= nil and e3 ~= tail_padd64[2]:byte() then
out( string.char( b1, b2 ) )
else
out( string.char( b1 ) )
end
end
end
--------------------------------------------------------------------------------
-- decode64_io_iterator
--
-- Create an io input iterator to read an input file and split values for
-- proper decoding.
--
local function decode64_io_iterator( file )
local ii = { }
-- An enumeration coroutine that handles the reading of an input file
-- to break data into proper pieces for building the original string.
--
local function enummerate( file )
local sc=string.char
local sb=string.byte
local ll="" -- last line storage
local len
local yield = coroutine.yield
-- Read a "reasonable amount" of data into the line buffer. Line by
-- line is not used so that a file with no line breaks doesn't
-- cause an inordinate amount of memory usage.
--
for cl in file:lines(2048) do
-- Reset the current line to contain valid chars and any previous
-- "leftover" bytes from the previous read
--
cl = ll .. cl:gsub(pattern_strip,"")
-- | |
-- | Remove "Invalid" chars (white space etc)
-- |
-- Left over from last line
--
len = (#cl-4)-(#cl%4)
-- see the comments in decode64_with_predicate for a rundown of
-- the results of this loop (sans the coroutine)
for i=1,len,4 do
yield( sc( d64( sb( cl, i, i+4 ) ) ) )
end
ll = cl:sub( len +1, #cl )
end
local l = #ll
if l >= 4 and ll:sub(-1) ~= tail_padd64[2] then
yield( sc( d64( sb( ll, 1, 4 ) ) ) )
l=l-4
end
if l > 0 then
local e1,e2,e3,e4 = ll:byte( 0 - l, -1 )
if e1 ~= nil then
decode_tail64( function(s) yield( s ) end, e1, e2, e3, e4 )
end
end
end
-- Returns an input iterator that is implemented as a coroutine. Each
-- yield of the co-routine sends reconstructed bytes to the loop handling
-- the iteration.
--
function ii.begin()
local co = coroutine.create( function() enummerate(file) end )
return function()
local code,res = coroutine.resume(co)
assert(code == true)
return res
end
end
return ii
end
--------------------------------------------------------------------------------
-- decode64_with_ii
--
-- Convert the value provided by a decode iterator that provides a begin
-- method, a tail method, and an iterator that returns four (usable!) bytes
-- for each call until at the end.
--
local function decode64_with_ii( ii, out )
-- Uses the iterator to pull values. Each reconstructed string
-- is sent to the output predicate.
--
for l in ii.begin() do out( l ) end
end
--------------------------------------------------------------------------------
-- decode64_with_predicate
--
-- Decode an entire base64 encoded string in memory using the predicate for
-- output.
--
local function decode64_with_predicate( raw, out )
-- Sanitize the input to strip characters that are not in the alphabet.
--
-- Note: This is a deviation from strict implementations where "bad data"
-- in the input stream is unsupported.
--
local san = raw:gsub(pattern_strip,"")
local len = #san-#san%4
local rem = #san-len
local sc = string.char
local sb = string.byte
if san:sub(-1,-1) == tail_padd64[2] then
rem = rem + 4
len = len - 4
end
for i=1,len,4 do
out( sc( d64( sb( san, i, i+4 ) ) ) )
end
if rem > 0 then
decode_tail64( out, sb( san, 0-rem, -1 ) )
end
end
--------------------------------------------------------------------------------
-- decode64_tostring
--
-- Takes a string that is encoded in base64 and returns the decoded value in
-- a new string.
--
local function decode64_tostring( raw )
local sb={} -- table to build string
local function collection_predicate(v)
sb[#sb+1]=v
end
decode64_with_predicate( raw, collection_predicate )
return table.concat(sb)
end
--------------------------------------------------------------------------------
-- set_and_get_alphabet
--
-- Sets and returns the encode / decode alphabet.
--
--
local function set_and_get_alphabet(alpha,term)
if alpha ~= nil then
local magic=
{
-- ["%"]="%%",
[" "]="% ",
["^"]="%^",
["$"]="%$",
["("]="%(",
[")"]="%)",
["."]="%.",
["["]="%[",
["]"]="%]",
["*"]="%*",
["+"]="%+",
["-"]="%-",
["?"]="%?",
}
c_alpha=known_base64_alphabets[alpha]
if c_alpha == nil then
c_alpha={ _alpha=alpha, _term=term }
end
assert( #c_alpha._alpha == 64, "The alphabet ~must~ be 64 unique values." )
assert( #c_alpha._term <= 1, "Specify zero or one termination character.")
b64d={} -- Decode table alpha -> right shifted int values
b64e={} -- Encode table 0-63 (6 bits) -> char table
local s=""
for i = 1,64 do
local byte = c_alpha._alpha:byte(i)
local str = string.char(byte)
b64e[i-1]=byte
assert( b64d[byte] == nil, "Duplicate value '"..str.."'" )
b64d[byte]=i-1
s=s..str
end
local ext --Alias for extraction routine that avoids extra table lookups
if bit32 then
ext = bit32.extract -- slight speed, vast visual (IMO)
elseif bit then
local band = bit.band
local rshift = bit.rshift
ext =
function(n, field, width)
width = width or 1
return band(rshift(n, field), 2^width-1)
end
else
error("Neither Lua 5.2 bit32 nor LuaJit bit library found!")
end
-- preload encode lookup tables
b64e_a = {}
b64e_a2 = {}
b64e_b1 = {}
b64e_b2 = {}
b64e_c1 = {}
b64e_c = {}
for f = 0,255 do
b64e_a [f]=b64e[ext(f,2,6)]
b64e_a2 [f]=ext(f,0,2)*16
b64e_b1 [f]=ext(f,4,4)
b64e_b2 [f]=ext(f,0,4)*4
b64e_c1 [f]=ext(f,6,2)
b64e_c [f]=b64e[ext(f,0,6)]
end
-- preload decode lookup tables
b64d_a1 = {}
b64d_a2 = {}
b64d_b1 = {}
b64d_b2 = {}
b64d_c1 = {}
b64d_z = b64e[0]
for k,v in pairs(b64d) do
-- Each comment shows the rough C expression that would be used to
-- generate the returned triple.
--
b64d_a1 [k] = v*4 -- ([b1] ) << 2
b64d_a2 [k] = math.floor( v / 16 ) -- ([b2] & 0x30) >> 4
b64d_b1 [k] = ext( v, 0, 4 ) * 16 -- ([b2] & 0x0F) << 4
b64d_b2 [k] = math.floor( v / 4 ) -- ([b3] & 0x3c) >> 2
b64d_c1 [k] = ext( v, 0, 2 ) * 64 -- ([b3] & 0x03) << 6
end
if c_alpha._term ~= "" then
tail_padd64[1]=string.char(c_alpha._term:byte(),c_alpha._term:byte())
tail_padd64[2]=string.char(c_alpha._term:byte())
else
tail_padd64[1]=""
tail_padd64[2]=""
end
local esc_term
if magic[c_alpha._term] ~= nil then
esc_term=c_alpha._term:gsub(magic[c_alpha._term],function (s) return magic[s] end)
elseif c_alpha._term == "%" then
esc_term = "%%"
else
esc_term=c_alpha._term
end
if not c_alpha._strip then
local p=s:gsub("%%",function (s) return "__unique__" end)
for k,v in pairs(magic)
do
p=p:gsub(v,function (s) return magic[s] end )
end
local mr=p:gsub("__unique__",function() return "%%" end)
c_alpha._strip = string.format("[^%s%s]",mr,esc_term)
end
assert( c_alpha._strip )
pattern_strip = c_alpha._strip
local c =0 for i in pairs(b64d) do c=c+1 end
assert( c_alpha._alpha == s, "Integrity error." )
assert( c == 64, "The alphabet must be 64 unique values." )
if esc_term ~= "" then
assert( not c_alpha._alpha:find(esc_term), "Tail characters must not exist in alphabet." )
end
if known_base64_alphabets[alpha] == nil then
known_base64_alphabets[alpha]=c_alpha
end
end
return c_alpha._alpha,c_alpha._term
end
--------------------------------------------------------------------------------
-- encode64
--
-- Entry point mode selector.
--
--
local function encode64(i,o)
local method
if o ~= nil and io.type(o) == "file" then
local file_out = o
o = function(s) file_out:write(s) end
end
if type(i) == "string" then
if type(o) == "function" then
method = encode64_with_predicate
else
assert( o == nil, "unsupported request")
method = encode64_tostring
end
elseif io.type(i) == "file" then
assert( type(o) == "function", "file source requires output predicate")
i = encode64_io_iterator(i)
method = encode64_with_ii
else
assert( false, "unsupported mode" )
end
return method(i,o)
end
--------------------------------------------------------------------------------
-- decode64
--
-- Entry point mode selector.
--
--
local function decode64(i,o)
local method
if o ~= nil and io.type(o) == "file" then
local file_out = o
o = function(s) file_out:write(s) end
end
if type(i) == "string" then
if type(o) == "function" then
method = decode64_with_predicate
else
assert( o == nil, "unsupported request")
method = decode64_tostring
end
elseif io.type(i) == "file" then
assert( type(o) == "function", "file source requires output predicate")
i = decode64_io_iterator(i)
method = decode64_with_ii
else
assert( false, "unsupported mode" )
end
return method(i,o)
end
set_and_get_alphabet("base64")
--[[**************************************************************************]]
--[[****************************** Module **********************************]]
--[[**************************************************************************]]
return
{
encode = encode64,
decode = decode64,
alpha = set_and_get_alphabet,
}
-- luacheck: pop