Module:TableTools: Difference between revisions

    From Nonbinary Wiki
    m>Mr. Stradivarius
    (add type-checking to removeDuplicates)
    m>Mr. Stradivarius
    (only allow two or more arguments for the set functions)
    Line 92: Line 92:
    function p.union(...)
    function p.union(...)
    local lim = select('#', ...)  
    local lim = select('#', ...)  
    if lim == 0 then
    if lim < 2 then
    error("no arguments passed to 'union'", 2)
    error(lim .. ' argument' .. (lim == 1 and '' or 's') .. " passed to 'union' (minimum is 2)", 2)
    end
    end
    local ret, trackArrays = {}, {}
    local ret, trackArrays = {}, {}
    Line 168: Line 168:
    function p.intersection(...)
    function p.intersection(...)
    local lim = select('#', ...)  
    local lim = select('#', ...)  
    if lim == 0 then
    if lim < 2 then
    error("no arguments passed to 'intersection'", 2)
    error(lim .. ' argument' .. (lim == 1 and '' or 's') .. " passed to 'intersection' (minimum is 2)", 2)
    end
    end
    local ret, track, pairCounts = {}, {}, {}
    local ret, track, pairCounts = {}, {}, {}
    Line 254: Line 254:
    function p.complement(...)
    function p.complement(...)
    local lim = select('#', ...)  
    local lim = select('#', ...)  
    if lim == 0 then
    if lim < 2 then
    error("no arguments passed to 'complement' (minimum is two)", 2)
    error(lim .. ' argument' .. (lim == 1 and '' or 's') .. " passed to 'complement' (minimum is 2)", 2)
    elseif lim == 1 then
    error("only one argument passed to 'complement' (minimum is two)", 2)
    end
    end
    --[[
    --[[

    Revision as of 23:46, 19 December 2013

    Documentation for this module may be created at Module:TableTools/doc

    --[[
    ------------------------------------------------------------------------------------
    --                               TableTools                                       --
    --                                                                                --
    -- This module includes a number of functions for dealing with Lua tables.        --
    -- It is a meta-module, meant to be called from other Lua modules, and should     --
    -- not be called directly from #invoke.                                           --
    ------------------------------------------------------------------------------------
    --]]
    
    local libraryUtil = require('libraryUtil')
    
    local p = {}
    
    -- Define often-used variables and functions.
    local floor = math.floor
    local infinity = math.huge
    local checkType = libraryUtil.checkType
    
    --[[
    ------------------------------------------------------------------------------------
    -- isPositiveInteger
    --
    -- This function returns true if the given value is a positive integer, and false
    -- if not. Although it doesn't operate on tables, it is included here as it is
    -- useful for determining whether a given table key is in the array part or the
    -- hash part of a table.
    ------------------------------------------------------------------------------------
    --]]
    function p.isPositiveInteger(v)
    	if type(v) == 'number' and v >= 1 and floor(v) == v and v < infinity then
    		return true
    	else
    		return false
    	end
    end
    
    --[[
    ------------------------------------------------------------------------------------
    -- isNan
    --
    -- This function returns true if the given number is a NaN value, and false
    -- if not. Although it doesn't operate on tables, it is included here as it is
    -- useful for determining whether a value can be a valid table key. Lua will
    -- generate an error if a NaN is used as a table key.
    ------------------------------------------------------------------------------------
    --]]
    function p.isNan(v)
    	if type(v) == 'number' and tostring(v) == '-nan' then
    		return true
    	else
    		return false
    	end
    end
    
    --[[
    ------------------------------------------------------------------------------------
    -- removeDuplicates
    --
    -- This removes duplicate values from an array. Non-positive-integer keys are
    -- ignored. The earliest value is kept, and all subsequent duplicate values are
    -- removed, but otherwise the array order is unchanged.
    ------------------------------------------------------------------------------------
    --]]
    function p.removeDuplicates(t)
    	checkType('removeDuplicates', 1, t, 'table')
    	local isNan = p.isNan
    	local ret, exists = {}, {}
    	for i, v in ipairs(t) do
    		if isNan(v) then
    			-- NaNs can't be table keys, and they are also unique, so we don't need to check existence.
    			ret[#ret + 1] = v
    		else
    			if not exists[v] then
    				ret[#ret + 1] = v
    				exists[v] = true
    			end
    		end	
    	end
    	return ret
    end
    
    --[[
    ------------------------------------------------------------------------------------
    -- union
    --
    -- This returns the union of the key/value pairs of n tables. If any of the tables
    -- contain different values for the same table key, the table value is converted
    -- to an array holding all of the different values.
    ------------------------------------------------------------------------------------
    --]]
    function p.union(...)
    	local lim = select('#', ...) 
    	if lim < 2 then
    		error(lim .. ' argument' .. (lim == 1 and '' or 's') .. " passed to 'union' (minimum is 2)", 2)
    	end
    	local ret, trackArrays = {}, {}
    	for i = 1, lim do
    		local t = select(i, ...)
    		checkType('union', i, t, 'table')
    		for k, v in pairs(t) do
    			local retKey = ret[k]
    			if retKey == nil then
    				ret[k] = v
    			elseif retKey ~= v then
    				if trackArrays[k] then
    					local array = ret[k]
    					local valExists
    					for _, arrayVal in ipairs(array) do
    						if arrayVal == v then
    							valExists = true
    							break
    						end
    					end
    					if not valExists then
    						array[#array + 1] = v
    						ret[k] = array
    					end
    				else
    					ret[k] = {ret[k], v}
    					trackArrays[k] = true
    				end
    			end
    		end
    	end
    	return ret
    end				
    
    --[[
    ------------------------------------------------------------------------------------
    -- valueUnion
    --
    -- This returns the union of the values of n tables, as an array. For example, for
    -- the tables {1, 3, 4, 5, foo = 7} and {2, bar = 3, 5, 6}, union will return
    -- {1, 2, 3, 4, 5, 6, 7}.
    ------------------------------------------------------------------------------------
    --]]
    function p.valueUnion(...)
    	local lim = select('#', ...) 
    	if lim < 2 then
    		error(lim .. ' argument' .. (lim == 1 and '' or 's') .. " passed to 'valueUnion' (minimum is 2)", 2)
    	end
    	local isNan = p.isNan
    	local ret, exists = {}, {}
    	for i = 1, lim do
    		local t = select(i, ...)
    		checkType('valueUnion', i, t, 'table')
    		for k, v in pairs(t) do
    			if isNan(v) then
    				ret[#ret + 1] = v
    			elseif not exists[v] then
    				ret[#ret + 1] = v
    				exists[v] = true
    			end
    		end
    	end
    	return ret
    end	
    
    --[[
    ------------------------------------------------------------------------------------
    -- intersection
    --
    -- This returns the intersection of the key/value pairs of n tables. Both the key
    -- and the value must match to be included in the resulting table.
    ------------------------------------------------------------------------------------
    --]]
    function p.intersection(...)
    	local lim = select('#', ...) 
    	if lim < 2 then
    		error(lim .. ' argument' .. (lim == 1 and '' or 's') .. " passed to 'intersection' (minimum is 2)", 2)
    	end
    	local ret, track, pairCounts = {}, {}, {}
    	for i = 1, lim do
    		local t = select(i, ...)
    		checkType('intersection', i, t, 'table')
    		for k, v in pairs(t) do
    			local trackVal = track[k]
    			if trackVal == nil then
    				track[k] = v
    				pairCounts[k] = 1
    			elseif trackVal == v then
    				pairCounts[k] = pairCounts[k] + 1
    			end
    		end
    	end
    	for k, v in pairs(track) do
    		if pairCounts[k] == lim then
    			ret[k] = v
    		end
    	end
    	return ret
    end
    
    --[[
    ------------------------------------------------------------------------------------
    -- valueIntersection
    --
    -- This returns the intersection of the values of n tables, as an array. For
    -- example, for the tables {1, 3, 4, 5, foo = 7} and {2, bar = 3, 5, 6}, 
    -- intersection will return {3, 5}.
    ------------------------------------------------------------------------------------
    --]]
    function p.valueIntersection(...)
    	local lim = select('#', ...) 
    	if lim < 2 then
    		error(lim .. ' argument' .. (lim == 1 and '' or 's') .. " passed to 'valueIntersection' (minimum is 2)", 2)
    	end
    	local isNan = p.isNan
    	local vals, ret = {}, {}
    	local isSameTable = true -- Tracks table equality.
    	local tableTemp -- Used to store the table from the previous loop so that we can check table equality.
    	for i = 1, lim do
    		local t = select(i, ...)
    		checkType('valueIntersection', i, t, 'table')
    		if tableTemp and t ~= tableTemp then
    			isSameTable = false
    		end
    		tableTemp = t
    		for k, v in pairs(t) do
    			-- NaNs are never equal to any other value, so they can't be in the intersection.
    			-- Which is lucky, as they also can't be table keys.
    			if not isNan(v) then
    				local valCount = vals[v] or 0
    				vals[v] = valCount + 1
    			end
    		end
    	end
    	if isSameTable then
    		-- If all the tables are equal, then the intersection is that table (including NaNs).
    		-- All we need to do is convert it to an array and remove duplicate values.
    		for k, v in pairs(tableTemp) do
    			ret[#ret + 1] = v
    		end
    		return p.removeDuplicates(ret)
    	end
    	for val, count in pairs(vals) do
    		if count == lim then
    			ret[#ret + 1] = val
    		end
    	end
    	return ret
    end
    
    --[[
    ------------------------------------------------------------------------------------
    -- complement
    --
    -- This returns the relative complement of t1, t2, ..., in tn. The complement
    -- is of key/value pairs. This is equivalent to all the key/value pairs that are in
    -- tn but are not in t1, t2, ... tn-1.
    ------------------------------------------------------------------------------------
    --]]
    function p.complement(...)
    	local lim = select('#', ...) 
    	if lim < 2 then
    		error(lim .. ' argument' .. (lim == 1 and '' or 's') .. " passed to 'complement' (minimum is 2)", 2)
    	end
    	--[[
    	-- Now we know that we have at least two sets.
    	-- First, get all the key/value pairs in tn. We can't simply make ret equal to tn,
    	-- as that will affect the value of tn for the whole module.
    	--]]
    	local tn = select(lim, ...)
    	checkType('complement', lim, tn, 'table')
    	local ret = {}
    	for k, v in pairs(tn) do
    		ret[k] = v
    	end
    	-- Remove all the key/value pairs in t1, t2, ..., tn-1.
    	for i = 1, lim - 1 do
    		local t = select(i, ...)
    		checkType('complement', i, t, 'table')
    		for k, v in pairs(t) do
    			if ret[k] == v then
    				ret[k] = nil
    			end
    		end
    	end
    	return ret
    end
    
    --[[
    ------------------------------------------------------------------------------------
    -- numKeys
    --
    -- This takes a table and returns an array containing the numbers of any numerical
    -- keys that have non-nil values, sorted in numerical order.
    ------------------------------------------------------------------------------------
    --]]
    function p.numKeys(t)
    	checkType('numKeys', 1, t, 'table')
    	local isPositiveInteger = p.isPositiveInteger
    	local nums = {}
    	for k, v in pairs(t) do
    		if isPositiveInteger(k) then
    			nums[#nums + 1] = k
    		end
    	end
    	table.sort(nums)
    	return nums
    end
    
    --[[
    ------------------------------------------------------------------------------------
    -- affixNums
    --
    -- This takes a table and returns an array containing the numbers of keys with the
    -- specified prefix and suffix. For example, for the table
    -- {a1 = 'foo', a3 = 'bar', a6 = 'baz'} and the prefix "a", affixNums will
    -- return {1, 3, 6}.
    ------------------------------------------------------------------------------------
    --]]
    function p.affixNums(t, prefix, suffix)
    	checkType('affixNums', 1, t, 'table')
    	checkType('affixNums', 2, prefix, 'string', true)
    	checkType('affixNums', 3, suffix, 'string', true)
    	prefix = prefix or ''
    	suffix = suffix or ''
    	local pattern = '^' .. prefix .. '([1-9]%d*)' .. suffix .. '$'
    	local nums = {}
    	for k, v in pairs(t) do
    		if type(k) == 'string' then			
    			local num = mw.ustring.match(k, pattern)
    			if num then
    				nums[#nums + 1] = tonumber(num)
    			end
    		end
    	end
    	table.sort(nums)
    	return nums
    end
    
    --[[
    ------------------------------------------------------------------------------------
    -- compressSparseArray
    --
    -- This takes an array with one or more nil values, and removes the nil values
    -- while preserving the order, so that the array can be safely traversed with
    -- ipairs.
    ------------------------------------------------------------------------------------
    --]]
    function p.compressSparseArray(t)
    	checkType('compressSparseArray', 1, t, 'table')
    	local ret = {}
    	local nums = p.numKeys(t)
    	for _, num in ipairs(nums) do
    		ret[#ret + 1] = t[num]
    	end
    	return ret
    end
    
    --[[
    ------------------------------------------------------------------------------------
    -- sparseIpairs
    --
    -- This is an iterator for sparse arrays. It can be used like ipairs, but can
    -- handle nil values.
    ------------------------------------------------------------------------------------
    --]]
    function p.sparseIpairs(t)
    	checkType('sparseIpairs', 1, t, 'table')
    	local nums = p.numKeys(t)
    	local i = 0
    	local lim = #nums
    	return function ()
    		i = i + 1
    		if i <= lim then
    			local key = nums[i]
    			return key, t[key]
    		end
    	end
    end
    
    --[[
    ------------------------------------------------------------------------------------
    -- size
    --
    -- This returns the size of a key/value pair table. It will also work on arrays,
    -- but for arrays it is more efficient to use the # operator.
    ------------------------------------------------------------------------------------
    --]]
    function p.size(t)
    	checkType('size', 1, t, 'table')
    	local i = 0
    	for k in pairs(t) do
    		i = i + 1
    	end
    	return i
    end
    
    return p