Module:String: Difference between revisions
improves commenting, adds whitespace and error handling to sub, improvements to error handling generally.
m>Dragons flight (add description and whitespace handling option for len) |
m>Dragons flight (improves commenting, adds whitespace and error handling to sub, improvements to error handling generally.) |
||
Line 1: | Line 1: | ||
--[[ | |||
This module is intended to provide access to basic string functions. | |||
Most of the functions provided here can be invoked with named parameters, | |||
unnamed parameters, or a mixture. If named parameters are used, Mediawiki will | |||
automatically remove any leading or trailing whitespace from the parameter. | |||
Depending on the intended use, it may be advantageous to either preserve or | |||
remove such whitespace. | |||
Global options | |||
ignore_errors: If set to 'true' or 1, any error condition will result in | |||
an empty string being returned rather than an error message. | |||
error_category: If an error occurs, specifies the name of a category to | |||
include with the error message. The default category is | |||
[Category:Errors reported by Module String]. If set to an empty | |||
string, no category will be added during an error. | |||
]] | |||
local str = {} | local str = {} | ||
--[ | --[[ | ||
len | len | ||
Line 9: | Line 29: | ||
{{#invoke:String|len|target_string|}} | {{#invoke:String|len|target_string|}} | ||
OR | OR | ||
{{#invoke:String| | {{#invoke:String|len|s=target_string}} | ||
Parameters | Parameters | ||
Line 16: | Line 36: | ||
If invoked using named parameters, Mediawiki will automatically remove any leading or | If invoked using named parameters, Mediawiki will automatically remove any leading or | ||
trailing whitespace from the target string. | trailing whitespace from the target string. | ||
] | ]] | ||
function str.len( frame ) | function str.len( frame ) | ||
local new_args = str._getParameters( frame.args, {'s'} ); | local new_args = str._getParameters( frame.args, {'s'} ); | ||
Line 23: | Line 43: | ||
end | end | ||
--[[ | |||
sub | |||
This function returns a substring of the target string at specified indices. | |||
Usage: | |||
{{#invoke:String|sub|target_string|start_index|end_index}} | |||
OR | |||
{{#invoke:String|sub|s=target_string|i=start_index|j=end_index}} | |||
Parameters | |||
s: The string to return a subset of | |||
i: The fist index of the substring to return, defaults to 1. | |||
j: The last index of the string to return, defaults to the last character. | |||
The first character of the string is assigned an index of 1. If either i or j | |||
is a negative value, it is interpreted the same as selecting a character by | |||
counting from the end of the string. Hence, a value of -1 is the same as | |||
selecting the last character of the string. | |||
If the requested indices are out of range for the given string, an error is | |||
reported. | |||
]] | |||
function str.sub( frame ) | function str.sub( frame ) | ||
local new_args = str._getParameters( frame.args, { 's', 'i', 'j' } ); | |||
local s = new_args['s'] or ''; | |||
local i = tonumber( new_args['i'] ) or 1; | |||
local j = tonumber( new_args['j'] ) or -1; | |||
local len = mw.ustring.len( s ); | |||
-- Convert negatives for range checking | |||
if i < 0 then | |||
i = len + i + 1; | |||
end | |||
if j < 0 then | |||
j = len + j + 1; | |||
end | |||
if j < i then | |||
return str._error( 'String subset indices out of order' ); | |||
end | |||
if i > len or j > len or i < 1 or j < 1 then | |||
return str._error( 'String subset index out of range' ); | |||
end | |||
return mw.ustring.sub( s, i, j ) | |||
end | end | ||
Line 37: | Line 102: | ||
end | end | ||
--[ | --[[ | ||
pos | pos | ||
Line 61: | Line 126: | ||
A requested value of zero, or a value greater than the length of the string returns an error. | A requested value of zero, or a value greater than the length of the string returns an error. | ||
] | ]] | ||
function str.pos( frame ) | function str.pos( frame ) | ||
local new_args = str._getParameters( frame.args, {'target', 'pos'} ); | local new_args = str._getParameters( frame.args, {'target', 'pos'} ); | ||
Line 68: | Line 133: | ||
if pos == 0 or math.abs(pos) > mw.ustring.len( target_str ) then | if pos == 0 or math.abs(pos) > mw.ustring.len( target_str ) then | ||
return ' | return str._error( 'String index out of range' ); | ||
end | end | ||
Line 74: | Line 139: | ||
end | end | ||
--[ | --[[ | ||
str_find | str_find | ||
Line 87: | Line 152: | ||
value of "1", which is generally unexpected behavior, and must be accounted for | value of "1", which is generally unexpected behavior, and must be accounted for | ||
separatetly. | separatetly. | ||
] | ]] | ||
function str.str_find( frame ) | function str.str_find( frame ) | ||
local new_args = str._getParameters( frame.args, {'source', 'target'} ); | local new_args = str._getParameters( frame.args, {'source', 'target'} ); | ||
Line 105: | Line 170: | ||
end | end | ||
--[ | --[[ | ||
find | find | ||
Line 133: | Line 198: | ||
This function should be safe for UTF-8 strings. | This function should be safe for UTF-8 strings. | ||
] | ]] | ||
function str.find( frame ) | function str.find( frame ) | ||
local new_args = str._getParameters( frame.args, {'source', 'target', 'start', 'plain' } ); | local new_args = str._getParameters( frame.args, {'source', 'target', 'start', 'plain' } ); | ||
Line 155: | Line 220: | ||
end | end | ||
--[ | --[[ | ||
replace | replace | ||
Line 174: | Line 239: | ||
plain: Boolean flag indicating that pattern should be understood as plain | plain: Boolean flag indicating that pattern should be understood as plain | ||
text and not as a Lua style regular expression, defaults to true | text and not as a Lua style regular expression, defaults to true | ||
] | ]] | ||
function str.replace( frame ) | function str.replace( frame ) | ||
local new_args = str._getParameters( frame.args, {'source', 'pattern', 'replace', 'count', 'plain' } ); | local new_args = str._getParameters( frame.args, {'source', 'pattern', 'replace', 'count', 'plain' } ); | ||
Line 204: | Line 269: | ||
end | end | ||
--[ | --[[ | ||
Helper function that populates the argument list given that user may need to use a mix of | Helper function that populates the argument list given that user may need to use a mix of | ||
named and unnamed parameters. This is relevant because named parameters are not | named and unnamed parameters. This is relevant because named parameters are not | ||
identical to unnamed parameters due to string trimming, and when dealing with strings | identical to unnamed parameters due to string trimming, and when dealing with strings | ||
we sometimes want to either preserve or remove that whitespace depending on the application. | we sometimes want to either preserve or remove that whitespace depending on the application. | ||
] | ]] | ||
function str._getParameters( frame_args, arg_list ) | function str._getParameters( frame_args, arg_list ) | ||
local new_args = {}; | local new_args = {}; | ||
Line 227: | Line 292: | ||
end | end | ||
--[====[ | --[[ | ||
Helper function to handle error messages. | |||
]] | |||
function str._error( error_str ) | |||
local frame = mw.getCurrentFrame(); | |||
local error_category = frame.args.error_category or 'Errors reported by Module String'; | |||
local ignore_errors = frame.args.ignore_errors or false; | |||
if str._getBoolean(ignore_errors) then | |||
return ''; | |||
end | |||
local error_str = '<strong class="error">String Module Error: ' .. error_str .. '</strong>'; | |||
if error_category ~= '' then | |||
error_str = '[[Category:' .. error_category .. ']]' .. error_str; | |||
end | |||
return error_str; | |||
end | |||
--[[ | |||
Helper Function to interpret boolean strings | Helper Function to interpret boolean strings | ||
] | ]] | ||
function str._getBoolean( boolean_str ) | function str._getBoolean( boolean_str ) | ||
local boolean_value; | local boolean_value; | ||
Line 248: | Line 333: | ||
end | end | ||
--[ | --[[ | ||
Helper function that escapes all pattern characters so that they will be treated | Helper function that escapes all pattern characters so that they will be treated | ||
as plain text. | as plain text. | ||
] | ]] | ||
function str._escapePattern( pattern_str ) | function str._escapePattern( pattern_str ) | ||
return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" ); | return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" ); |