Module:String: Difference between revisions
separate annotations for str.match from those for str._match
m (48 revisions imported from templatewiki:Module:String) |
(separate annotations for str.match from those for str._match) |
||
Line 1: | Line 1: | ||
--[[ | --[[ | ||
This module is intended to provide access to basic string functions. | This module is intended to provide access to basic string functions. | ||
Most of the functions provided here can be invoked with named parameters, | Most of the functions provided here can be invoked with named parameters, | ||
unnamed parameters, or a mixture. If named parameters are used, Mediawiki will | unnamed parameters, or a mixture. If named parameters are used, Mediawiki will | ||
automatically remove any leading or trailing whitespace from the parameter. | automatically remove any leading or trailing whitespace from the parameter. | ||
Depending on the intended use, it may be advantageous to either preserve or | Depending on the intended use, it may be advantageous to either preserve or | ||
remove such whitespace. | remove such whitespace. | ||
Global options | Global options | ||
ignore_errors: If set to 'true' or 1, any error condition will result in | ignore_errors: If set to 'true' or 1, any error condition will result in | ||
an empty string being returned rather than an error message. | an empty string being returned rather than an error message. | ||
error_category: If an error occurs, specifies the name of a category to | error_category: If an error occurs, specifies the name of a category to | ||
include with the error message. The default category is | include with the error message. The default category is | ||
[Category:Errors reported by Module String]. | [Category:Errors reported by Module String]. | ||
no_category: If set to 'true' or 1, no category will be added if an error | no_category: If set to 'true' or 1, no category will be added if an error | ||
is generated. | is generated. | ||
Unit tests for this module are available at Module:String/tests. | Unit tests for this module are available at Module:String/tests. | ||
]] | ]] | ||
Line 39: | Line 39: | ||
If invoked using named parameters, Mediawiki will automatically remove any leading or | If invoked using named parameters, Mediawiki will automatically remove any leading or | ||
trailing whitespace from the target string. | trailing whitespace from the target string. | ||
]] | ]] | ||
function str.len( frame ) | function str.len( frame ) | ||
local new_args = str._getParameters( frame.args, {'s'} ) | |||
local s = new_args['s'] or '' | |||
return mw.ustring.len( s ) | |||
end | end | ||
Line 61: | Line 61: | ||
i: The fist index of the substring to return, defaults to 1. | i: The fist index of the substring to return, defaults to 1. | ||
j: The last index of the string to return, defaults to the last character. | j: The last index of the string to return, defaults to the last character. | ||
The first character of the string is assigned an index of 1. If either i or j | The first character of the string is assigned an index of 1. If either i or j | ||
is a negative value, it is interpreted the same as selecting a character by | is a negative value, it is interpreted the same as selecting a character by | ||
counting from the end of the string. Hence, a value of -1 is the same as | counting from the end of the string. Hence, a value of -1 is the same as | ||
selecting the last character of the string. | selecting the last character of the string. | ||
If the requested indices are out of range for the given string, an error is | If the requested indices are out of range for the given string, an error is | ||
reported. | reported. | ||
]] | ]] | ||
function str.sub( frame ) | function str.sub( frame ) | ||
local new_args = str._getParameters( frame.args, { 's', 'i', 'j' } ) | |||
local s = new_args['s'] or '' | |||
local i = tonumber( new_args['i'] ) or 1 | |||
local j = tonumber( new_args['j'] ) or -1 | |||
local len = mw.ustring.len( s ) | |||
-- Convert negatives for range checking | |||
if i < 0 then | |||
i = len + i + 1 | |||
end | |||
if j < 0 then | |||
j = len + j + 1 | |||
end | |||
if i > len or j > len or i < 1 or j < 1 then | |||
return str._error( 'String subset index out of range' ) | |||
end | |||
if j < i then | |||
return str._error( 'String subset indices out of order' ) | |||
end | |||
return mw.ustring.sub( s, i, j ) | |||
end | end | ||
Line 101: | Line 101: | ||
]] | ]] | ||
function str.sublength( frame ) | function str.sublength( frame ) | ||
local i = tonumber( frame.args.i ) or 0 | |||
local len = tonumber( frame.args.len ) | |||
return mw.ustring.sub( frame.args.s, i + 1, len and ( i + len ) ) | |||
end | |||
--[[ | |||
_match | |||
This function returns a substring from the source string that matches a | |||
specified pattern. It is exported for use in other modules | |||
Usage: | |||
strmatch = require("Module:String")._match | |||
sresult = strmatch( s, pattern, start, match, plain, nomatch ) | |||
Parameters | |||
s: The string to search | |||
pattern: The pattern or string to find within the string | |||
start: The index within the source string to start the search. The first | |||
character of the string has index 1. Defaults to 1. | |||
match: In some cases it may be possible to make multiple matches on a single | |||
string. This specifies which match to return, where the first match is | |||
match= 1. If a negative number is specified then a match is returned | |||
counting from the last match. Hence match = -1 is the same as requesting | |||
the last match. Defaults to 1. | |||
plain: A flag indicating that the pattern should be understood as plain | |||
text. Defaults to false. | |||
nomatch: If no match is found, output the "nomatch" value rather than an error. | |||
For information on constructing Lua patterns, a form of [regular expression], see: | |||
* http://www.lua.org/manual/5.1/manual.html#5.4.1 | |||
* http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Patterns | |||
* http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns | |||
]] | |||
-- This sub-routine is exported for use in other modules | |||
function str._match( s, pattern, start, match_index, plain_flag, nomatch ) | |||
if s == '' then | |||
return str._error( 'Target string is empty' ) | |||
end | |||
if pattern == '' then | |||
return str._error( 'Pattern string is empty' ) | |||
end | |||
start = tonumber(start) or 1 | |||
if math.abs(start) < 1 or math.abs(start) > mw.ustring.len( s ) then | |||
return str._error( 'Requested start is out of range' ) | |||
end | |||
if match_index == 0 then | |||
return str._error( 'Match index is out of range' ) | |||
end | |||
if plain_flag then | |||
pattern = str._escapePattern( pattern ) | |||
end | |||
local result | |||
if match_index == 1 then | |||
-- Find first match is simple case | |||
result = mw.ustring.match( s, pattern, start ) | |||
else | |||
if start > 1 then | |||
s = mw.ustring.sub( s, start ) | |||
end | |||
local iterator = mw.ustring.gmatch(s, pattern) | |||
if match_index > 0 then | |||
-- Forward search | |||
for w in iterator do | |||
match_index = match_index - 1 | |||
if match_index == 0 then | |||
result = w | |||
break | |||
end | |||
end | |||
else | |||
-- Reverse search | |||
local result_table = {} | |||
local count = 1 | |||
for w in iterator do | |||
result_table[count] = w | |||
count = count + 1 | |||
end | |||
result = result_table[ count + match_index ] | |||
end | |||
end | |||
if result == nil then | |||
if nomatch == nil then | |||
return str._error( 'Match not found' ) | |||
else | |||
return nomatch | |||
end | |||
else | |||
return result | |||
end | |||
end | end | ||
Line 109: | Line 202: | ||
match | match | ||
This function returns a substring from the source string that matches a | This function returns a substring from the source string that matches a | ||
specified pattern. | specified pattern. | ||
Line 123: | Line 216: | ||
start: The index within the source string to start the search. The first | start: The index within the source string to start the search. The first | ||
character of the string has index 1. Defaults to 1. | character of the string has index 1. Defaults to 1. | ||
match: In some cases it may be possible to make multiple matches on a single | match: In some cases it may be possible to make multiple matches on a single | ||
string. This specifies which match to return, where the first match is | string. This specifies which match to return, where the first match is | ||
match= 1. If a negative number is specified then a match is returned | match= 1. If a negative number is specified then a match is returned | ||
counting from the last match. Hence match = -1 is the same as requesting | counting from the last match. Hence match = -1 is the same as requesting | ||
the last match. Defaults to 1. | the last match. Defaults to 1. | ||
Line 133: | Line 226: | ||
If invoked using named parameters, Mediawiki will automatically remove any leading or | If invoked using named parameters, Mediawiki will automatically remove any leading or | ||
trailing whitespace from each string. In some circumstances this is desirable, in | trailing whitespace from each string. In some circumstances this is desirable, in | ||
other cases one may want to preserve the whitespace. | other cases one may want to preserve the whitespace. | ||
If the match_number or start_index are out of range for the string being queried, then | If the match_number or start_index are out of range for the string being queried, then | ||
this function generates an error. An error is also generated if no match is found. | this function generates an error. An error is also generated if no match is found. | ||
If one adds the parameter ignore_errors=true, then the error will be suppressed and | If one adds the parameter ignore_errors=true, then the error will be suppressed and | ||
an empty string will be returned on any failure. | an empty string will be returned on any failure. | ||
For information on constructing Lua patterns, a form of [regular expression], see: | For information on constructing Lua patterns, a form of [regular expression], see: | ||
* | * http://www.lua.org/manual/5.1/manual.html#5.4.1 | ||
* | * http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Patterns | ||
* | * http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns | ||
]] | ]] | ||
-- This is the entry point for #invoke:String|match | |||
function str.match( frame ) | function str.match( frame ) | ||
local new_args = str._getParameters( frame.args, {'s', 'pattern', 'start', 'match', 'plain', 'nomatch'} ) | |||
local s = new_args['s'] or '' | |||
local start = tonumber( new_args['start'] ) or 1 | |||
local plain_flag = str._getBoolean( new_args['plain'] or false ) | |||
local pattern = new_args['pattern'] or '' | |||
local match_index = math.floor( tonumber(new_args['match']) or 1 ) | |||
local nomatch = new_args['nomatch'] | |||
return str._match( s, pattern, start, match_index, plain_flag, nomatch ) | |||
end | end | ||
Line 231: | Line 269: | ||
If invoked using named parameters, Mediawiki will automatically remove any leading or | If invoked using named parameters, Mediawiki will automatically remove any leading or | ||
trailing whitespace from the target string. In some circumstances this is desirable, in | trailing whitespace from the target string. In some circumstances this is desirable, in | ||
other cases one may want to preserve the whitespace. | other cases one may want to preserve the whitespace. | ||
The first character has an index value of 1. | The first character has an index value of 1. | ||
If one requests a negative value, this function will select a character by counting backwards | If one requests a negative value, this function will select a character by counting backwards | ||
from the end of the string. In other words pos = -1 is the same as asking for the last character. | from the end of the string. In other words pos = -1 is the same as asking for the last character. | ||
Line 242: | Line 280: | ||
]] | ]] | ||
function str.pos( frame ) | function str.pos( frame ) | ||
local new_args = str._getParameters( frame.args, {'target', 'pos'} ) | |||
local target_str = new_args['target'] or '' | |||
local pos = tonumber( new_args['pos'] ) or 0 | |||
if pos == 0 or math.abs(pos) > mw.ustring.len( target_str ) then | |||
return str._error( 'String index out of range' ) | |||
end | |||
return mw.ustring.sub( target_str, pos, pos ) | |||
end | end | ||
Line 257: | Line 295: | ||
This function duplicates the behavior of {{str_find}}, including all of its quirks. | This function duplicates the behavior of {{str_find}}, including all of its quirks. | ||
This is provided in order to support existing templates, but is NOT RECOMMENDED for | This is provided in order to support existing templates, but is NOT RECOMMENDED for | ||
new code and templates. New code is recommended to use the "find" function instead. | new code and templates. New code is recommended to use the "find" function instead. | ||
Line 268: | Line 306: | ||
]] | ]] | ||
function str.str_find( frame ) | function str.str_find( frame ) | ||
local new_args = str._getParameters( frame.args, {'source', 'target'} ) | |||
local source_str = new_args['source'] or '' | |||
local target_str = new_args['target'] or '' | |||
if target_str == '' then | |||
return 1 | |||
end | |||
local start = mw.ustring.find( source_str, target_str, 1, true ) | |||
if start == nil then | |||
start = -1 | |||
end | |||
return start | |||
end | end | ||
Line 303: | Line 341: | ||
If invoked using named parameters, Mediawiki will automatically remove any leading or | If invoked using named parameters, Mediawiki will automatically remove any leading or | ||
trailing whitespace from the parameter. In some circumstances this is desirable, in | trailing whitespace from the parameter. In some circumstances this is desirable, in | ||
other cases one may want to preserve the whitespace. | other cases one may want to preserve the whitespace. | ||
This function returns the first index >= "start" where "target" can be found | This function returns the first index >= "start" where "target" can be found | ||
within "source". Indices are 1-based. If "target" is not found, then this | within "source". Indices are 1-based. If "target" is not found, then this | ||
function returns 0. If either "source" or "target" are missing / empty, this | function returns 0. If either "source" or "target" are missing / empty, this | ||
function also returns 0. | function also returns 0. | ||
Line 314: | Line 352: | ||
]] | ]] | ||
function str.find( frame ) | function str.find( frame ) | ||
local new_args = str._getParameters( frame.args, {'source', 'target', 'start', 'plain' } ) | |||
local source_str = new_args['source'] or '' | |||
local pattern = new_args['target'] or '' | |||
local start_pos = tonumber(new_args['start']) or 1 | |||
local plain = new_args['plain'] or true | |||
if source_str == '' or pattern == '' then | |||
return 0 | |||
end | |||
plain = str._getBoolean( plain ) | |||
local start = mw.ustring.find( source_str, pattern, start_pos, plain ) | |||
if start == nil then | |||
start = 0 | |||
end | |||
return start | |||
end | end | ||
Line 352: | Line 390: | ||
count: The number of occurences to replace, defaults to all. | count: The number of occurences to replace, defaults to all. | ||
plain: Boolean flag indicating that pattern should be understood as plain | plain: Boolean flag indicating that pattern should be understood as plain | ||
text and not as a Lua style regular expression, defaults to true | text and not as a Lua style regular expression, defaults to true | ||
]] | ]] | ||
function str.replace( frame ) | function str.replace( frame ) | ||
local new_args = str._getParameters( frame.args, {'source', 'pattern', 'replace', 'count', 'plain' } ) | |||
local source_str = new_args['source'] or '' | |||
local pattern = new_args['pattern'] or '' | |||
local replace = new_args['replace'] or '' | |||
local count = tonumber( new_args['count'] ) | |||
local plain = new_args['plain'] or true | |||
if source_str == '' or pattern == '' then | |||
return source_str | |||
end | |||
plain = str._getBoolean( plain ) | |||
if plain then | |||
pattern = str._escapePattern( pattern ) | |||
replace = mw.ustring.gsub( replace, "%%", "%%%%" ) --Only need to escape replacement sequences. | |||
end | |||
local result | |||
if count ~= nil then | |||
result = mw.ustring.gsub( source_str, pattern, replace, count ) | |||
else | |||
result = mw.ustring.gsub( source_str, pattern, replace ) | |||
end | |||
return result | |||
end | end | ||
--[[ | --[[ | ||
simple function to pipe string.rep to templates. | simple function to pipe string.rep to templates. | ||
]] | ]] | ||
function str.rep( frame ) | |||
local repetitions = tonumber( frame.args[2] ) | |||
if not repetitions then | |||
return str._error( 'function rep expects a number as second parameter, received "' .. ( frame.args[2] or '' ) .. '"' ) | |||
end | |||
return string.rep( frame.args[1] or '', repetitions ) | |||
end | |||
--[[ | |||
escapePattern | |||
This function escapes special characters from a Lua string pattern. See [1] | |||
for details on how patterns work. | |||
[1] https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Patterns | |||
Usage: | |||
{{#invoke:String|escapePattern|pattern_string}} | |||
Parameters | |||
pattern_string: The pattern string to escape. | |||
]] | |||
function str.escapePattern( frame ) | |||
local pattern_str = frame.args[1] | |||
if not pattern_str then | |||
return str._error( 'No pattern string specified' ) | |||
end | |||
local result = str._escapePattern( pattern_str ) | |||
return result | |||
end | |||
--[[ | |||
count | |||
This function counts the number of occurrences of one string in another. | |||
]] | |||
function str.count(frame) | |||
local args = str._getParameters(frame.args, {'source', 'pattern', 'plain'}) | |||
local source = args.source or '' | |||
local pattern = args.pattern or '' | |||
local plain = str._getBoolean(args.plain or true) | |||
if plain then | |||
pattern = str._escapePattern(pattern) | |||
end | |||
local _, count = mw.ustring.gsub(source, pattern, '') | |||
return count | |||
end | |||
--[[ | |||
endswith | |||
This function determines whether a string ends with another string. | |||
]] | |||
function str.endswith(frame) | |||
local args = str._getParameters(frame.args, {'source', 'pattern'}) | |||
local source = args.source or '' | |||
local pattern = args.pattern or '' | |||
if pattern == '' then | |||
-- All strings end with the empty string. | |||
return "yes" | |||
end | |||
if mw.ustring.sub(source, -mw.ustring.len(pattern), -1) == pattern then | |||
return "yes" | |||
else | |||
return "" | |||
end | |||
end | |||
function str. | --[[ | ||
join | |||
Join all non empty arguments together; the first argument is the separator. | |||
Usage: | |||
{{#invoke:String|join|sep|one|two|three}} | |||
]] | |||
function str.join(frame) | |||
local args = {} | |||
local sep | |||
for _, v in ipairs( frame.args ) do | |||
if sep then | |||
if v ~= '' then | |||
table.insert(args, v) | |||
end | |||
else | |||
sep = v | |||
end | |||
end | |||
return table.concat( args, sep or '' ) | |||
end | end | ||
Line 402: | Line 519: | ||
]] | ]] | ||
function str._getParameters( frame_args, arg_list ) | function str._getParameters( frame_args, arg_list ) | ||
local new_args = {} | |||
local index = 1 | |||
local value | |||
for _, arg in ipairs( arg_list ) do | |||
value = frame_args[arg] | |||
if value == nil then | |||
value = frame_args[index] | |||
index = index + 1 | |||
end | |||
new_args[arg] = value | |||
end | |||
return new_args | |||
end | end | ||
--[[ | --[[ | ||
Line 422: | Line 539: | ||
]] | ]] | ||
function str._error( error_str ) | function str._error( error_str ) | ||
local frame = mw.getCurrentFrame() | |||
local error_category = frame.args.error_category or 'Errors reported by Module String' | |||
local ignore_errors = frame.args.ignore_errors or false | |||
local no_category = frame.args.no_category or false | |||
if str._getBoolean(ignore_errors) then | |||
return '' | |||
end | |||
local error_str = '<strong class="error">String Module Error: ' .. error_str .. '</strong>' | |||
if error_category ~= '' and not str._getBoolean( no_category ) then | |||
error_str = '[[Category:' .. error_category .. ']]' .. error_str | |||
end | |||
return error_str | |||
end | end | ||
Line 443: | Line 560: | ||
]] | ]] | ||
function str._getBoolean( boolean_str ) | function str._getBoolean( boolean_str ) | ||
local boolean_value | |||
if type( boolean_str ) == 'string' then | |||
boolean_str = boolean_str:lower() | |||
if boolean_str == 'false' or boolean_str == 'no' or boolean_str == '0' | |||
or boolean_str == '' then | |||
boolean_value = false | |||
else | |||
boolean_value = true | |||
end | |||
elseif type( boolean_str ) == 'boolean' then | |||
boolean_value = boolean_str | |||
else | |||
error( 'No boolean value found' ) | |||
end | |||
return boolean_value | |||
end | end | ||
--[[ | --[[ | ||
Helper function that escapes all pattern characters so that they will be treated | Helper function that escapes all pattern characters so that they will be treated | ||
as plain text. | as plain text. | ||
]] | ]] | ||
function str._escapePattern( pattern_str ) | function str._escapePattern( pattern_str ) | ||
return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" ) | |||
end | end | ||
return str | return str |