Module:String: Difference between revisions
Created page with "-- <nowiki> --[[ Source: http://en.wikipedia.org/wiki/Module:String This module is intended to provide access to basic string functions. Most of the functions provided her..."
(update urls) |
imported>Unai01 (Created page with "-- <nowiki> --[[ Source: http://en.wikipedia.org/wiki/Module:String This module is intended to provide access to basic string functions. Most of the functions provided her...") |
||
Line 1: | Line 1: | ||
-- <nowiki> | |||
--[[ | --[[ | ||
Source: http://en.wikipedia.org/wiki/Module:String | |||
This module is intended to provide access to basic string functions. | This module is intended to provide access to basic string functions. | ||
Most of the functions provided here can be invoked with named parameters, | Most of the functions provided here can be invoked with named parameters, | ||
unnamed parameters, or a mixture. If named parameters are used, Mediawiki will | unnamed parameters, or a mixture. If named parameters are used, Mediawiki will | ||
Line 8: | Line 9: | ||
Depending on the intended use, it may be advantageous to either preserve or | Depending on the intended use, it may be advantageous to either preserve or | ||
remove such whitespace. | remove such whitespace. | ||
Global options | Global options | ||
ignore_errors: If set to 'true' or 1, any error condition will result in | ignore_errors: If set to 'true' or 1, any error condition will result in | ||
an empty string being returned rather than an error message. | an empty string being returned rather than an error message. | ||
error_category: If an error occurs, specifies the name of a category to | error_category: If an error occurs, specifies the name of a category to | ||
include with the error message. The default category is | include with the error message. The default category is | ||
[Category:Errors reported by Module String]. | [Category:Errors reported by Module String]. | ||
no_category: If set to 'true' or 1, no category will be added if an error | no_category: If set to 'true' or 1, no category will be added if an error | ||
is generated. | is generated. | ||
Unit tests for this module are available at Module:String/tests. | Unit tests for this module are available at Module:String/tests. | ||
]] | ]] | ||
local str = {} | local str = {} | ||
--[[ | --[[ | ||
len | len | ||
This function returns the length of the target string. | This function returns the length of the target string. | ||
Usage: | Usage: | ||
{{#invoke:String|len|target_string|}} | {{#invoke:String|len|target_string|}} | ||
OR | OR | ||
{{#invoke:String|len|s=target_string}} | {{#invoke:String|len|s=target_string}} | ||
Parameters | Parameters | ||
s: The string whose length to report | s: The string whose length to report | ||
If invoked using named parameters, Mediawiki will automatically remove any leading or | If invoked using named parameters, Mediawiki will automatically remove any leading or | ||
trailing whitespace from the target string. | trailing whitespace from the target string. | ||
Line 46: | Line 47: | ||
return mw.ustring.len( s ) | return mw.ustring.len( s ) | ||
end | end | ||
--[[ | --[[ | ||
sub | sub | ||
This function returns a substring of the target string at specified indices. | This function returns a substring of the target string at specified indices. | ||
Usage: | Usage: | ||
{{#invoke:String|sub|target_string|start_index|end_index}} | {{#invoke:String|sub|target_string|start_index|end_index}} | ||
OR | OR | ||
{{#invoke:String|sub|s=target_string|i=start_index|j=end_index}} | {{#invoke:String|sub|s=target_string|i=start_index|j=end_index}} | ||
Parameters | Parameters | ||
s: The string to return a subset of | s: The string to return a subset of | ||
i: The fist index of the substring to return, defaults to 1. | i: The fist index of the substring to return, defaults to 1. | ||
j: The last index of the string to return, defaults to the last character. | j: The last index of the string to return, defaults to the last character. | ||
The first character of the string is assigned an index of 1. If either i or j | The first character of the string is assigned an index of 1. If either i or j | ||
is a negative value, it is interpreted the same as selecting a character by | is a negative value, it is interpreted the same as selecting a character by | ||
counting from the end of the string. Hence, a value of -1 is the same as | counting from the end of the string. Hence, a value of -1 is the same as | ||
selecting the last character of the string. | selecting the last character of the string. | ||
If the requested indices are out of range for the given string, an error is | If the requested indices are out of range for the given string, an error is | ||
reported. | reported. | ||
Line 75: | Line 76: | ||
local i = tonumber( new_args['i'] ) or 1; | local i = tonumber( new_args['i'] ) or 1; | ||
local j = tonumber( new_args['j'] ) or -1; | local j = tonumber( new_args['j'] ) or -1; | ||
local len = mw.ustring.len( s ); | local len = mw.ustring.len( s ); | ||
-- Convert negatives for range checking | -- Convert negatives for range checking | ||
if i < 0 then | if i < 0 then | ||
Line 85: | Line 86: | ||
j = len + j + 1; | j = len + j + 1; | ||
end | end | ||
if i > len or j > len or i < 1 or j < 1 then | if i > len or j > len or i < 1 or j < 1 then | ||
return str._error( 'String subset index out of range' ); | return str._error( 'String subset index out of range' ); | ||
Line 92: | Line 93: | ||
return str._error( 'String subset indices out of order' ); | return str._error( 'String subset indices out of order' ); | ||
end | end | ||
return mw.ustring.sub( s, i, j ) | return mw.ustring.sub( s, i, j ) | ||
end | end | ||
--[[ | --[[ | ||
This function implements that features of {{str sub old}} and is kept in order | This function implements that features of {{str sub old}} and is kept in order | ||
Line 105: | Line 106: | ||
return mw.ustring.sub( frame.args.s, i + 1, len and ( i + len ) ) | return mw.ustring.sub( frame.args.s, i + 1, len and ( i + len ) ) | ||
end | end | ||
--[[ | --[[ | ||
match | match | ||
This function returns a substring from the source string that matches a | This function returns a substring from the source string that matches a | ||
specified pattern. | specified pattern. | ||
Usage: | Usage: | ||
{{#invoke:String|match|source_string|pattern_string|start_index|match_number|plain_flag|nomatch_output}} | {{#invoke:String|match|source_string|pattern_string|start_index|match_number|plain_flag|nomatch_output}} | ||
Line 117: | Line 118: | ||
{{#invoke:String|match|s=source_string|pattern=pattern_string|start=start_index | {{#invoke:String|match|s=source_string|pattern=pattern_string|start=start_index | ||
|match=match_number|plain=plain_flag|nomatch=nomatch_output}} | |match=match_number|plain=plain_flag|nomatch=nomatch_output}} | ||
Parameters | Parameters | ||
s: The string to search | s: The string to search | ||
Line 131: | Line 132: | ||
text. Defaults to false. | text. Defaults to false. | ||
nomatch: If no match is found, output the "nomatch" value rather than an error. | nomatch: If no match is found, output the "nomatch" value rather than an error. | ||
If invoked using named parameters, Mediawiki will automatically remove any leading or | If invoked using named parameters, Mediawiki will automatically remove any leading or | ||
trailing whitespace from each string. In some circumstances this is desirable, in | trailing whitespace from each string. In some circumstances this is desirable, in | ||
other cases one may want to preserve the whitespace. | other cases one may want to preserve the whitespace. | ||
If the match_number or start_index are out of range for the string being queried, then | If the match_number or start_index are out of range for the string being queried, then | ||
this function generates an error. An error is also generated if no match is found. | this function generates an error. An error is also generated if no match is found. | ||
If one adds the parameter ignore_errors=true, then the error will be suppressed and | If one adds the parameter ignore_errors=true, then the error will be suppressed and | ||
an empty string will be returned on any failure. | an empty string will be returned on any failure. | ||
For information on constructing Lua patterns, a form of [regular expression], see: | For information on constructing Lua patterns, a form of [regular expression], see: | ||
* | * http://www.lua.org/manual/5.1/manual.html#5.4.1 | ||
* | * http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Patterns | ||
* | * http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns | ||
]] | ]] | ||
function str.match( frame ) | function str.match( frame ) | ||
Line 156: | Line 157: | ||
local match_index = math.floor( tonumber(new_args['match']) or 1 ); | local match_index = math.floor( tonumber(new_args['match']) or 1 ); | ||
local nomatch = new_args['nomatch']; | local nomatch = new_args['nomatch']; | ||
if s == '' then | if s == '' then | ||
return str._error( 'Target string is empty' ); | return str._error( 'Target string is empty' ); | ||
Line 172: | Line 173: | ||
pattern = str._escapePattern( pattern ); | pattern = str._escapePattern( pattern ); | ||
end | end | ||
local result | local result | ||
if match_index == 1 then | if match_index == 1 then | ||
Line 181: | Line 182: | ||
s = mw.ustring.sub( s, start ); | s = mw.ustring.sub( s, start ); | ||
end | end | ||
local iterator = mw.ustring.gmatch(s, pattern); | local iterator = mw.ustring.gmatch(s, pattern); | ||
if match_index > 0 then | if match_index > 0 then | ||
Line 200: | Line 201: | ||
count = count + 1; | count = count + 1; | ||
end | end | ||
result = result_table[ count + match_index ]; | result = result_table[ count + match_index ]; | ||
end | end | ||
end | end | ||
if result == nil then | if result == nil then | ||
if nomatch == nil then | if nomatch == nil then | ||
Line 215: | Line 216: | ||
end | end | ||
end | end | ||
--[[ | --[[ | ||
pos | pos | ||
This function returns a single character from the target string at position pos. | This function returns a single character from the target string at position pos. | ||
Usage: | Usage: | ||
{{#invoke:String|pos|target_string|index_value}} | {{#invoke:String|pos|target_string|index_value}} | ||
OR | OR | ||
{{#invoke:String|pos|target=target_string|pos=index_value}} | {{#invoke:String|pos|target=target_string|pos=index_value}} | ||
Parameters | Parameters | ||
target: The string to search | target: The string to search | ||
pos: The index for the character to return | pos: The index for the character to return | ||
If invoked using named parameters, Mediawiki will automatically remove any leading or | If invoked using named parameters, Mediawiki will automatically remove any leading or | ||
trailing whitespace from the target string. In some circumstances this is desirable, in | trailing whitespace from the target string. In some circumstances this is desirable, in | ||
other cases one may want to preserve the whitespace. | other cases one may want to preserve the whitespace. | ||
The first character has an index value of 1. | The first character has an index value of 1. | ||
If one requests a negative value, this function will select a character by counting backwards | If one requests a negative value, this function will select a character by counting backwards | ||
from the end of the string. In other words pos = -1 is the same as asking for the last character. | from the end of the string. In other words pos = -1 is the same as asking for the last character. | ||
A requested value of zero, or a value greater than the length of the string returns an error. | A requested value of zero, or a value greater than the length of the string returns an error. | ||
]] | ]] | ||
Line 245: | Line 246: | ||
local target_str = new_args['target'] or ''; | local target_str = new_args['target'] or ''; | ||
local pos = tonumber( new_args['pos'] ) or 0; | local pos = tonumber( new_args['pos'] ) or 0; | ||
if pos == 0 or math.abs(pos) > mw.ustring.len( target_str ) then | if pos == 0 or math.abs(pos) > mw.ustring.len( target_str ) then | ||
return str._error( 'String index out of range' ); | return str._error( 'String index out of range' ); | ||
end | end | ||
return mw.ustring.sub( target_str, pos, pos ); | return mw.ustring.sub( target_str, pos, pos ); | ||
end | end | ||
--[[ | --[[ | ||
str_find | str_find | ||
This function duplicates the behavior of {{str_find}}, including all of its quirks. | This function duplicates the behavior of {{str_find}}, including all of its quirks. | ||
This is provided in order to support existing templates, but is NOT RECOMMENDED for | This is provided in order to support existing templates, but is NOT RECOMMENDED for | ||
new code and templates. New code is recommended to use the "find" function instead. | new code and templates. New code is recommended to use the "find" function instead. | ||
Returns the first index in "source" that is a match to "target". Indexing is 1-based, | Returns the first index in "source" that is a match to "target". Indexing is 1-based, | ||
and the function returns -1 if the "target" string is not present in "source". | and the function returns -1 if the "target" string is not present in "source". | ||
Important Note: If the "target" string is empty / missing, this function returns a | Important Note: If the "target" string is empty / missing, this function returns a | ||
value of "1", which is generally unexpected behavior, and must be accounted for | value of "1", which is generally unexpected behavior, and must be accounted for | ||
Line 271: | Line 272: | ||
local source_str = new_args['source'] or ''; | local source_str = new_args['source'] or ''; | ||
local target_str = new_args['target'] or ''; | local target_str = new_args['target'] or ''; | ||
if target_str == '' then | if target_str == '' then | ||
return 1; | return 1; | ||
end | end | ||
local start = mw.ustring.find( source_str, target_str, 1, true ) | local start = mw.ustring.find( source_str, target_str, 1, true ) | ||
if start == nil then | if start == nil then | ||
start = -1 | start = -1 | ||
end | end | ||
return start | return start | ||
end | end | ||
--[[ | --[[ | ||
find | find | ||
This function allows one to search for a target string or pattern within another | This function allows one to search for a target string or pattern within another | ||
string. | string. | ||
Usage: | Usage: | ||
{{#invoke:String|find|source_str|target_string|start_index|plain_flag}} | {{#invoke:String|find|source_str|target_string|start_index|plain_flag}} | ||
OR | OR | ||
{{#invoke:String|find|source=source_str|target=target_str|start=start_index|plain=plain_flag}} | {{#invoke:String|find|source=source_str|target=target_str|start=start_index|plain=plain_flag}} | ||
Parameters | Parameters | ||
source: The string to search | source: The string to search | ||
Line 301: | Line 302: | ||
plain: Boolean flag indicating that target should be understood as plain | plain: Boolean flag indicating that target should be understood as plain | ||
text and not as a Lua style regular expression, defaults to true | text and not as a Lua style regular expression, defaults to true | ||
If invoked using named parameters, Mediawiki will automatically remove any leading or | If invoked using named parameters, Mediawiki will automatically remove any leading or | ||
trailing whitespace from the parameter. In some circumstances this is desirable, in | trailing whitespace from the parameter. In some circumstances this is desirable, in | ||
other cases one may want to preserve the whitespace. | other cases one may want to preserve the whitespace. | ||
This function returns the first index >= "start" where "target" can be found | This function returns the first index >= "start" where "target" can be found | ||
within "source". Indices are 1-based. If "target" is not found, then this | within "source". Indices are 1-based. If "target" is not found, then this | ||
function returns 0. If either "source" or "target" are missing / empty, this | function returns 0. If either "source" or "target" are missing / empty, this | ||
function also returns 0. | function also returns 0. | ||
This function should be safe for UTF-8 strings. | This function should be safe for UTF-8 strings. | ||
]] | ]] | ||
Line 319: | Line 320: | ||
local start_pos = tonumber(new_args['start']) or 1; | local start_pos = tonumber(new_args['start']) or 1; | ||
local plain = new_args['plain'] or true; | local plain = new_args['plain'] or true; | ||
if source_str == '' or pattern == '' then | if source_str == '' or pattern == '' then | ||
return 0; | return 0; | ||
end | end | ||
plain = str._getBoolean( plain ); | plain = str._getBoolean( plain ); | ||
local start = mw.ustring.find( source_str, pattern, start_pos, plain ) | local start = mw.ustring.find( source_str, pattern, start_pos, plain ) | ||
if start == nil then | if start == nil then | ||
start = 0 | start = 0 | ||
end | end | ||
return start | return start | ||
end | end | ||
--[[ | --[[ | ||
replace | replace | ||
This function allows one to replace a target string or pattern within another | This function allows one to replace a target string or pattern within another | ||
string. | string. | ||
Usage: | Usage: | ||
{{#invoke:String|replace|source_str|pattern_string|replace_string|replacement_count|plain_flag}} | {{#invoke:String|replace|source_str|pattern_string|replace_string|replacement_count|plain_flag}} | ||
Line 345: | Line 346: | ||
{{#invoke:String|replace|source=source_string|pattern=pattern_string|replace=replace_string| | {{#invoke:String|replace|source=source_string|pattern=pattern_string|replace=replace_string| | ||
count=replacement_count|plain=plain_flag}} | count=replacement_count|plain=plain_flag}} | ||
Parameters | Parameters | ||
source: The string to search | source: The string to search | ||
Line 361: | Line 362: | ||
local count = tonumber( new_args['count'] ); | local count = tonumber( new_args['count'] ); | ||
local plain = new_args['plain'] or true; | local plain = new_args['plain'] or true; | ||
if source_str == '' or pattern == '' then | if source_str == '' or pattern == '' then | ||
return source_str; | return source_str; | ||
end | end | ||
plain = str._getBoolean( plain ); | plain = str._getBoolean( plain ); | ||
if plain then | if plain then | ||
pattern = str._escapePattern( pattern ); | pattern = str._escapePattern( pattern ); | ||
replace = mw.ustring.gsub( replace, "%%", "%%%%" ); --Only need to escape replacement sequences. | replace = mw.ustring.gsub( replace, "%%", "%%%%" ); --Only need to escape replacement sequences. | ||
end | end | ||
local result; | local result; | ||
if count ~= nil then | if count ~= nil then | ||
result = mw.ustring.gsub( source_str, pattern, replace, count ); | result = mw.ustring.gsub( source_str, pattern, replace, count ); | ||
Line 379: | Line 380: | ||
result = mw.ustring.gsub( source_str, pattern, replace ); | result = mw.ustring.gsub( source_str, pattern, replace ); | ||
end | end | ||
return result; | return result; | ||
end | end | ||
--[[ | --[[ | ||
simple function to pipe string.rep to templates. | simple function to pipe string.rep to templates. | ||
]] | ]] | ||
function str.rep( frame ) | function str.rep( frame ) | ||
local repetitions = tonumber( frame.args[2] ) | local repetitions = tonumber( frame.args[2] ) | ||
Line 394: | Line 395: | ||
return string.rep( frame.args[1] or '', repetitions ) | return string.rep( frame.args[1] or '', repetitions ) | ||
end | end | ||
--[[ | --[[ | ||
Helper function that populates the argument list given that user may need to use a mix of | Helper function that populates the argument list given that user may need to use a mix of | ||
Line 405: | Line 406: | ||
local index = 1; | local index = 1; | ||
local value; | local value; | ||
for i,arg in ipairs( arg_list ) do | for i,arg in ipairs( arg_list ) do | ||
value = frame_args[arg] | value = frame_args[arg] | ||
Line 414: | Line 415: | ||
new_args[arg] = value; | new_args[arg] = value; | ||
end | end | ||
return new_args; | return new_args; | ||
end | end | ||
--[[ | --[[ | ||
Helper function to handle error messages. | Helper function to handle error messages. | ||
Line 426: | Line 427: | ||
local ignore_errors = frame.args.ignore_errors or false; | local ignore_errors = frame.args.ignore_errors or false; | ||
local no_category = frame.args.no_category or false; | local no_category = frame.args.no_category or false; | ||
if str._getBoolean(ignore_errors) then | if str._getBoolean(ignore_errors) then | ||
return ''; | return ''; | ||
end | end | ||
local error_str = '<strong class="error">String Module Error: ' .. error_str .. '</strong>'; | local error_str = '<strong class="error">String Module Error: ' .. error_str .. '</strong>'; | ||
if error_category ~= '' and not str._getBoolean( no_category ) then | if error_category ~= '' and not str._getBoolean( no_category ) then | ||
error_str = '[[Category:' .. error_category .. ']]' .. error_str; | error_str = '[[Category:' .. error_category .. ']]' .. error_str; | ||
end | end | ||
return error_str; | return error_str; | ||
end | end | ||
--[[ | --[[ | ||
Helper Function to interpret boolean strings | Helper Function to interpret boolean strings | ||
Line 444: | Line 445: | ||
function str._getBoolean( boolean_str ) | function str._getBoolean( boolean_str ) | ||
local boolean_value; | local boolean_value; | ||
if type( boolean_str ) == 'string' then | if type( boolean_str ) == 'string' then | ||
boolean_str = boolean_str:lower(); | boolean_str = boolean_str:lower(); | ||
Line 460: | Line 461: | ||
return boolean_value | return boolean_value | ||
end | end | ||
--[[ | --[[ | ||
Helper function that escapes all pattern characters so that they will be treated | Helper function that escapes all pattern characters so that they will be treated | ||
Line 468: | Line 469: | ||
return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" ); | return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" ); | ||
end | end | ||
return str | return str |