385
edits
m>Dragons flight (typo) |
m (21 revisions imported from wikipedia:Module:Citation/CS1: see Topic:Vtixlm0q28eo6jtf) |
||
(122 intermediate revisions by 16 users not shown) | |||
Line 1: | Line 1: | ||
function | require('Module:No globals'); | ||
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | |||
each of these counts against the Lua upvalue limit | |||
]] | |||
local validation; -- functions in Module:Citation/CS1/Date_validation | |||
local utilities; -- functions in Module:Citation/CS1/Utilities | |||
local z ={}; -- table of tables in Module:Citation/CS1/Utilities | |||
local identifiers; -- functions and tables in Module:Citation/CS1/Identifiers | |||
local metadata; -- functions in Module:Citation/CS1/COinS | |||
local cfg = {}; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration | |||
local whitelist = {}; -- table of tables listing valid template parameter names; defined in Module:Citation/CS1/Whitelist | |||
--[[------------------< P A G E S C O P E V A R I A B L E S >--------------- | |||
declare variables here that have page-wide scope that are not brought in from | |||
other modules; that are created here and used here | |||
]] | |||
local added_deprecated_cat; -- Boolean flag so that the category is added only once | |||
local added_discouraged_cat; -- Boolean flag so that the category is added only once | |||
local added_vanc_errs; -- Boolean flag so we only emit one Vancouver error / category | |||
local Frame; -- holds the module's frame table | |||
--[[--------------------------< F I R S T _ S E T >------------------------------------------------------------ | |||
Locates and returns the first set value in a table of values where the order established in the table, | |||
left-to-right (or top-to-bottom), is the order in which the values are evaluated. Returns nil if none are set. | |||
This version replaces the original 'for _, val in pairs do' and a similar version that used ipairs. With the pairs | |||
version the order of evaluation could not be guaranteed. With the ipairs version, a nil value would terminate | |||
the for-loop before it reached the actual end of the list. | |||
]] | |||
local function first_set (list, count) | |||
local i = 1; | |||
while i <= count do -- loop through all items in list | |||
if utilities.is_set( list[i] ) then | |||
return list[i]; -- return the first set list member | |||
end | |||
i = i + 1; -- point to next | |||
end | |||
end | end | ||
function | |||
--[[--------------------------< A D D _ V A N C _ E R R O R >---------------------------------------------------- | |||
Adds a single Vancouver system error message to the template's output regardless of how many error actually exist. | |||
To prevent duplication, added_vanc_errs is nil until an error message is emitted. | |||
added_vanc_errs is a Boolean declared in page scope variables above | |||
]] | |||
local function add_vanc_error (source, position) | |||
if added_vanc_errs then return end | |||
added_vanc_errs = true; -- note that we've added this category | |||
table.insert( z.message_tail, { utilities.set_message ( 'err_vancouver', {source, position}, true ) } ); | |||
end | end | ||
-- | |||
--[[--------------------------< I S _ S C H E M E >------------------------------------------------------------ | |||
does this thing that purports to be a URI scheme seem to be a valid scheme? The scheme is checked to see if it | |||
is in agreement with http://tools.ietf.org/html/std66#section-3.1 which says: | |||
Scheme names consist of a sequence of characters beginning with a | |||
letter and followed by any combination of letters, digits, plus | |||
("+"), period ("."), or hyphen ("-"). | |||
returns true if it does, else false | |||
]] | |||
local function is_scheme (scheme) | |||
return scheme and scheme:match ('^%a[%a%d%+%.%-]*:'); -- true if scheme is set and matches the pattern | |||
end | end | ||
--[=[-------------------------< I S _ D O M A I N _ N A M E >-------------------------------------------------- | |||
Does this thing that purports to be a domain name seem to be a valid domain name? | |||
Syntax defined here: http://tools.ietf.org/html/rfc1034#section-3.5 | |||
BNF defined here: https://tools.ietf.org/html/rfc4234 | |||
Single character names are generally reserved; see https://tools.ietf.org/html/draft-ietf-dnsind-iana-dns-01#page-15; | |||
see also [[Single-letter second-level domain]] | |||
list of TLDs: https://www.iana.org/domains/root/db | |||
RFC 952 (modified by RFC 1123) requires the first and last character of a hostname to be a letter or a digit. Between | |||
the first and last characters the name may use letters, digits, and the hyphen. | |||
Also allowed are IPv4 addresses. IPv6 not supported | |||
domain is expected to be stripped of any path so that the last character in the last character of the TLD. tld | |||
is two or more alpha characters. Any preceding '//' (from splitting a URL with a scheme) will be stripped | |||
here. Perhaps not necessary but retained in case it is necessary for IPv4 dot decimal. | |||
There are several tests: | |||
the first character of the whole domain name including subdomains must be a letter or a digit | |||
internationalized domain name (ASCII characters with .xn-- ASCII Compatible Encoding (ACE) prefix xn-- in the TLD) see https://tools.ietf.org/html/rfc3490 | |||
single-letter/digit second-level domains in the .org, .cash, and .today TLDs | |||
q, x, and z SL domains in the .com TLD | |||
i and q SL domains in the .net TLD | |||
single-letter SL domains in the ccTLDs (where the ccTLD is two letters) | |||
two-character SL domains in gTLDs (where the gTLD is two or more letters) | |||
three-plus-character SL domains in gTLDs (where the gTLD is two or more letters) | |||
IPv4 dot-decimal address format; TLD not allowed | |||
returns true if domain appears to be a proper name and TLD or IPv4 address, else false | |||
]=] | |||
local function is_domain_name (domain) | |||
if not domain then | |||
return false; -- if not set, abandon | |||
end | |||
domain = domain:gsub ('^//', ''); -- strip '//' from domain name if present; done here so we only have to do it once | |||
if not domain:match ('^[%w]') then -- first character must be letter or digit | |||
return false; | |||
end | |||
if domain:match ('^%a+:') then -- hack to detect things that look like s:Page:Title where Page: is namespace at Wikisource | |||
return false; | |||
end | |||
local patterns = { -- patterns that look like URLs | |||
'%f[%w][%w][%w%-]+[%w]%.%a%a+$', -- three or more character hostname.hostname or hostname.tld | |||
'%f[%w][%w][%w%-]+[%w]%.xn%-%-[%w]+$', -- internationalized domain name with ACE prefix | |||
'%f[%a][qxz]%.com$', -- assigned one character .com hostname (x.com times out 2015-12-10) | |||
'%f[%a][iq]%.net$', -- assigned one character .net hostname (q.net registered but not active 2015-12-10) | |||
'%f[%w][%w]%.%a%a$', -- one character hostname and ccTLD (2 chars) | |||
'%f[%w][%w][%w]%.%a%a+$', -- two character hostname and TLD | |||
'^%d%d?%d?%.%d%d?%d?%.%d%d?%d?%.%d%d?%d?', -- IPv4 address | |||
} | |||
for _, pattern in ipairs (patterns) do -- loop through the patterns list | |||
if domain:match (pattern) then | |||
return true; -- if a match then we think that this thing that purports to be a URL is a URL | |||
end | |||
end | |||
for _, d in ipairs ({'cash', 'company', 'today', 'org'}) do -- look for single letter second level domain names for these top level domains | |||
if domain:match ('%f[%w][%w]%.' .. d) then | |||
return true | |||
end | |||
end | |||
return false; -- no matches, we don't know what this thing is | |||
end | end | ||
--[[--------------------------< I S _ U R L >------------------------------------------------------------------ | |||
returns true if the scheme and domain parts of a URL appear to be a valid URL; else false. | |||
This function is the last step in the validation process. This function is separate because there are cases that | |||
are not covered by split_url(), for example is_parameter_ext_wikilink() which is looking for bracketted external | |||
wikilinks. | |||
]] | |||
local function is_url (scheme, domain) | |||
if utilities.is_set (scheme) then -- if scheme is set check it and domain | |||
return is_scheme (scheme) and is_domain_name (domain); | |||
else | |||
return is_domain_name (domain); -- scheme not set when URL is protocol-relative | |||
end | |||
end | end | ||
--[[--------------------------< S P L I T _ U R L >------------------------------------------------------------ | |||
Split a URL into a scheme, authority indicator, and domain. | |||
First remove Fully Qualified Domain Name terminator (a dot following TLD) (if any) and any path(/), query(?) or fragment(#). | |||
If protocol-relative URL, return nil scheme and domain else return nil for both scheme and domain. | |||
When not protocol-relative, get scheme, authority indicator, and domain. If there is an authority indicator (one | |||
or more '/' characters immediately following the scheme's colon), make sure that there are only 2. | |||
Any URL that does not have news: scheme must have authority indicator (//). TODO: are there other common schemes | |||
like news: that don't use authority indicator? | |||
Strip off any port and path; | |||
]] | |||
local function split_url (url_str) | |||
local scheme, authority, domain; | |||
url_str = url_str:gsub ('([%a%d])%.?[/%?#].*$', '%1'); -- strip FQDN terminator and path(/), query(?), fragment (#) (the capture prevents false replacement of '//') | |||
if url_str:match ('^//%S*') then -- if there is what appears to be a protocol-relative URL | |||
domain = url_str:match ('^//(%S*)') | |||
elseif url_str:match ('%S-:/*%S+') then -- if there is what appears to be a scheme, optional authority indicator, and domain name | |||
scheme, authority, domain = url_str:match ('(%S-:)(/*)(%S+)'); -- extract the scheme, authority indicator, and domain portions | |||
if utilities.is_set (authority) then | |||
authority = authority:gsub ('//', '', 1); -- replace place 1 pair of '/' with nothing; | |||
if utilities.is_set(authority) then -- if anything left (1 or 3+ '/' where authority should be) then | |||
return scheme; -- return scheme only making domain nil which will cause an error message | |||
end | |||
else | |||
if not scheme:match ('^news:') then -- except for news:..., MediaWiki won't link URLs that do not have authority indicator; TODO: a better way to do this test? | |||
return scheme; -- return scheme only making domain nil which will cause an error message | |||
end | |||
end | |||
domain = domain:gsub ('(%a):%d+', '%1'); -- strip port number if present | |||
end | |||
return scheme, domain; | |||
end | end | ||
function | |||
--[[--------------------------< L I N K _ P A R A M _ O K >--------------------------------------------------- | |||
checks the content of |title-link=, |series-link=, |author-link=, etc. for properly formatted content: no wikilinks, no URLs | |||
Link parameters are to hold the title of a Wikipedia article, so none of the WP:TITLESPECIALCHARACTERS are allowed: | |||
# < > [ ] | { } _ | |||
except the underscore which is used as a space in wiki URLs and # which is used for section links | |||
returns false when the value contains any of these characters. | |||
When there are no illegal characters, this function returns TRUE if value DOES NOT appear to be a valid URL (the | |||
|<param>-link= parameter is ok); else false when value appears to be a valid URL (the |<param>-link= parameter is NOT ok). | |||
]] | |||
local function link_param_ok (value) | |||
local scheme, domain; | |||
if value:find ('[<>%[%]|{}]') then -- if any prohibited characters | |||
return false; | |||
end | |||
scheme, domain = split_url (value); -- get scheme or nil and domain or nil from URL; | |||
return not is_url (scheme, domain); -- return true if value DOES NOT appear to be a valid URL | |||
end | end | ||
function | |||
--[[--------------------------< L I N K _ T I T L E _ O K >--------------------------------------------------- | |||
Use link_param_ok() to validate |<param>-link= value and its matching |<title>= value. | |||
|<title>= may be wiki-linked but not when |<param>-link= has a value. This function emits an error message when | |||
that condition exists | |||
check <link> for inter-language interwiki-link prefix. prefix must be a MediaWiki-recognized language | |||
code and must begin with a colon. | |||
]] | |||
local function link_title_ok (link, lorig, title, torig) | |||
local orig; | |||
if utilities.is_set (link) then -- don't bother if <param>-link doesn't have a value | |||
if not link_param_ok (link) then -- check |<param>-link= markup | |||
orig = lorig; -- identify the failing link parameter | |||
elseif title:find ('%[%[') then -- check |title= for wikilink markup | |||
orig = torig; -- identify the failing |title= parameter | |||
elseif link:match ('^%a+:') then -- if the link is what looks like an interwiki | |||
local prefix = link:match ('^(%a+):'):lower(); -- get the interwiki prefix | |||
if cfg.inter_wiki_map[prefix] then -- if prefix is in the map, must have preceding colon | |||
orig = lorig; -- flag as error | |||
end | |||
end | |||
end | |||
if utilities.is_set (orig) then | |||
link = ''; -- unset | |||
table.insert( z.message_tail, { utilities.set_message ( 'err_bad_paramlink', orig)}); -- URL or wikilink in |title= with |title-link=; | |||
end | |||
return link; -- link if ok, empty string else | |||
end | end | ||
function url( | --[[--------------------------< C H E C K _ U R L >------------------------------------------------------------ | ||
Determines whether a URL string appears to be valid. | |||
First we test for space characters. If any are found, return false. Then split the URL into scheme and domain | |||
portions, or for protocol-relative (//example.com) URLs, just the domain. Use is_url() to validate the two | |||
portions of the URL. If both are valid, or for protocol-relative if domain is valid, return true, else false. | |||
Because it is different from a standard URL, and because this module used external_link() to make external links | |||
that work for standard and news: links, we validate newsgroup names here. The specification for a newsgroup name | |||
is at https://tools.ietf.org/html/rfc5536#section-3.1.4 | |||
]] | |||
local function check_url( url_str ) | |||
if nil == url_str:match ("^%S+$") then -- if there are any spaces in |url=value it can't be a proper URL | |||
return false; | |||
end | |||
local scheme, domain; | |||
scheme, domain = split_url (url_str); -- get scheme or nil and domain or nil from URL; | |||
if 'news:' == scheme then -- special case for newsgroups | |||
return domain:match('^[%a%d%+%-_]+%.[%a%d%+%-_%.]*[%a%d%+%-_]$'); | |||
end | |||
return is_url (scheme, domain); -- return true if value appears to be a valid URL | |||
end | |||
--[=[-------------------------< I S _ P A R A M E T E R _ E X T _ W I K I L I N K >---------------------------- | |||
Return true if a parameter value has a string that begins and ends with square brackets [ and ] and the first | |||
non-space characters following the opening bracket appear to be a URL. The test will also find external wikilinks | |||
that use protocol-relative URLs. Also finds bare URLs. | |||
The frontier pattern prevents a match on interwiki-links which are similar to scheme:path URLs. The tests that | |||
find bracketed URLs are required because the parameters that call this test (currently |title=, |chapter=, |work=, | |||
and |publisher=) may have wikilinks and there are articles or redirects like '//Hus' so, while uncommon, |title=[[//Hus]] | |||
is possible as might be [[en://Hus]]. | |||
]=] | |||
local function is_parameter_ext_wikilink (value) | |||
local scheme, domain; | |||
if value:match ('%f[%[]%[%a%S*:%S+.*%]') then -- if ext. wikilink with scheme and domain: [xxxx://yyyyy.zzz] | |||
scheme, domain = split_url (value:match ('%f[%[]%[(%a%S*:%S+).*%]')); | |||
elseif value:match ('%f[%[]%[//%S+.*%]') then -- if protocol-relative ext. wikilink: [//yyyyy.zzz] | |||
scheme, domain = split_url (value:match ('%f[%[]%[(//%S+).*%]')); | |||
elseif value:match ('%a%S*:%S+') then -- if bare URL with scheme; may have leading or trailing plain text | |||
scheme, domain = split_url (value:match ('(%a%S*:%S+)')); | |||
elseif value:match ('//%S+') then -- if protocol-relative bare URL: //yyyyy.zzz; may have leading or trailing plain text | |||
scheme, domain = split_url (value:match ('(//%S+)')); -- what is left should be the domain | |||
else | |||
return false; -- didn't find anything that is obviously a URL | |||
end | |||
return is_url (scheme, domain); -- return true if value appears to be a valid URL | |||
end | |||
--[[-------------------------< C H E C K _ F O R _ U R L >----------------------------------------------------- | |||
loop through a list of parameters and their values. Look at the value and if it has an external link, emit an error message. | |||
]] | |||
local function check_for_url (parameter_list) | |||
local error_message = ''; | |||
for k, v in pairs (parameter_list) do -- for each parameter in the list | |||
if is_parameter_ext_wikilink (v) then -- look at the value; if there is a URL add an error message | |||
if utilities.is_set(error_message) then -- once we've added the first portion of the error message ... | |||
error_message = error_message .. ", "; -- ... add a comma space separator | |||
end | |||
error_message = error_message .. "|" .. k .. "="; -- add the failed parameter | |||
end | |||
end | |||
if utilities.is_set (error_message) then -- done looping, if there is an error message, display it | |||
table.insert( z.message_tail, { utilities.set_message ( 'err_param_has_ext_link', {error_message}, true ) } ); | |||
end | |||
end | end | ||
function | |||
--[[--------------------------< S A F E _ F O R _ U R L >------------------------------------------------------ | |||
Escape sequences for content that will be used for URL descriptions | |||
]] | |||
local function safe_for_url( str ) | |||
if str:match( "%[%[.-%]%]" ) ~= nil then | |||
table.insert( z.message_tail, { utilities.set_message ( 'err_wikilink_in_url', {}, true ) } ); | |||
end | |||
return str:gsub( '[%[%]\n]', { | |||
['['] = '[', | |||
[']'] = ']', | |||
['\n'] = ' ' } ); | |||
end | end | ||
function | |||
--[[--------------------------< E X T E R N A L _ L I N K >---------------------------------------------------- | |||
Format an external link with error checking | |||
]] | |||
local function external_link( URL, label, source, access) | |||
local error_str = ""; | |||
local domain; | |||
local path; | |||
local base_url; | |||
if not utilities.is_set ( label ) then | |||
label = URL; | |||
if utilities.is_set ( source ) then | |||
error_str = utilities.set_message ( 'err_bare_url_missing_title', { utilities.wrap_style ('parameter', source) }, false, " " ); | |||
else | |||
error( cfg.messages["bare_url_no_origin"] ); | |||
end | |||
end | |||
if not check_url( URL ) then | |||
error_str = utilities.set_message ( 'err_bad_url', {utilities.wrap_style ('parameter', source)}, false, " " ) .. error_str; | |||
end | |||
domain, path = URL:match ('^([/%.%-%+:%a%d]+)([/%?#].*)$'); -- split the URL into scheme plus domain and path | |||
if path then -- if there is a path portion | |||
path = path:gsub ('[%[%]]', {['['] = '%5b', [']'] = '%5d'}); -- replace '[' and ']' with their percent-encoded values | |||
URL = table.concat ({domain, path}); -- and reassemble | |||
end | |||
base_url = table.concat({ "[", URL, " ", safe_for_url (label), "]" }); -- assemble a wiki-markup URL | |||
if utilities.is_set (access) then -- access level (subscription, registration, limited) | |||
base_url = utilities.substitute (cfg.presentation['ext-link-access-signal'], {cfg.presentation[access].class, cfg.presentation[access].title, base_url}); -- add the appropriate icon | |||
end | |||
return table.concat ({base_url, error_str}); | |||
end | end | ||
--[[--------------------------< D E P R E C A T E D _ P A R A M E T E R >-------------------------------------- | |||
Categorize and emit an error message when the citation contains one or more deprecated parameters. The function includes the | |||
offending parameter name to the error message. Only one error message is emitted regardless of the number of deprecated | |||
parameters in the citation. | |||
added_deprecated_cat is a Boolean declared in page scope variables above | |||
]] | |||
local function deprecated_parameter(name) | |||
if not added_deprecated_cat then | |||
added_deprecated_cat = true; -- note that we've added this category | |||
table.insert( z.message_tail, { utilities.set_message ( 'err_deprecated_params', {name}, true ) } ); -- add error message | |||
end | |||
end | end | ||
--[[--------------------------< D I S C O U R A G E D _ P A R A M E T E R >------------------------------------ | |||
Categorize and emit an maintenance message when the citation contains one or more discouraged parameters. Only | |||
one error message is emitted regardless of the number of discouraged parameters in the citation. | |||
added_discouraged_cat is a Boolean declared in page scope variables above | |||
]] | |||
local function discouraged_parameter(name) | |||
if not added_discouraged_cat then | |||
added_discouraged_cat = true; -- note that we've added this category | |||
table.insert( z.message_tail, { utilities.set_message ( 'maint_discouraged', {name}, true ) } ); -- add maint message | |||
end | |||
end | end | ||
function | |||
--[=[-------------------------< K E R N _ Q U O T E S >-------------------------------------------------------- | |||
Apply kerning to open the space between the quote mark provided by the module and a leading or trailing quote | |||
mark contained in a |title= or |chapter= parameter's value. | |||
This function will positive kern either single or double quotes: | |||
"'Unkerned title with leading and trailing single quote marks'" | |||
" 'Kerned title with leading and trailing single quote marks' " (in real life the kerning isn't as wide as this example) | |||
Double single quotes (italic or bold wiki-markup) are not kerned. | |||
Replaces Unicode quote marks in plain text or in the label portion of a [[L|D]] style wikilink with typewriter | |||
quote marks regardless of the need for kerning. Unicode quote marks are not replaced in simple [[D]] wikilinks. | |||
Call this function for chapter titles, for website titles, etc.; not for book titles. | |||
]=] | |||
local function kern_quotes (str) | |||
local cap = ''; | |||
local cap2 = ''; | |||
local wl_type, label, link; | |||
wl_type, label, link = utilities.is_wikilink (str); -- wl_type is: 0, no wl (text in label variable); 1, [[D]]; 2, [[L|D]] | |||
if 1 == wl_type then -- [[D]] simple wikilink with or without quote marks | |||
if mw.ustring.match (str, '%[%[[\"“”\'‘’].+[\"“”\'‘’]%]%]') then -- leading and trailing quote marks | |||
str = utilities.substitute (cfg.presentation['kern-wl-both'], str); | |||
elseif mw.ustring.match (str, '%[%[[\"“”\'‘’].+%]%]') then -- leading quote marks | |||
str = utilities.substitute (cfg.presentation['kern-wl-left'], str); | |||
elseif mw.ustring.match (str, '%[%[.+[\"“”\'‘’]%]%]') then -- trailing quote marks | |||
str = utilities.substitute (cfg.presentation['kern-wl-right'], str); | |||
end | |||
else -- plain text or [[L|D]]; text in label variable | |||
label = mw.ustring.gsub (label, '[“”]', '\"'); -- replace “” (U+201C & U+201D) with " (typewriter double quote mark) | |||
label = mw.ustring.gsub (label, '[‘’]', '\''); -- replace ‘’ (U+2018 & U+2019) with ' (typewriter single quote mark) | |||
cap, cap2 = mw.ustring.match (label, "^([\"\'])([^\'].+)"); -- match leading double or single quote but not doubled single quotes (italic markup) | |||
if utilities.is_set (cap) then | |||
label = utilities.substitute (cfg.presentation['kern-left'], {cap, cap2}); | |||
end | |||
cap, cap2 = mw.ustring.match (label, "^(.+[^\'])([\"\'])$") -- match trailing double or single quote but not doubled single quotes (italic markup) | |||
if utilities.is_set (cap) then | |||
label = utilities.substitute (cfg.presentation['kern-right'], {cap, cap2}); | |||
end | |||
if 2 == wl_type then | |||
str = utilities.make_wikilink (link, label); -- reassemble the wikilink | |||
else | |||
str = label; | |||
end | |||
end | |||
return str; | |||
end | end | ||
function | |||
--[[--------------------------< F O R M A T _ S C R I P T _ V A L U E >---------------------------------------- | |||
|script-title= holds title parameters that are not written in Latin-based scripts: Chinese, Japanese, Arabic, Hebrew, etc. These scripts should | |||
not be italicized and may be written right-to-left. The value supplied by |script-title= is concatenated onto Title after Title has been wrapped | |||
in italic markup. | |||
Regardless of language, all values provided by |script-title= are wrapped in <bdi>...</bdi> tags to isolate RTL languages from the English left to right. | |||
|script-title= provides a unique feature. The value in |script-title= may be prefixed with a two-character ISO 639-1 language code and a colon: | |||
|script-title=ja:*** *** (where * represents a Japanese character) | |||
Spaces between the two-character code and the colon and the colon and the first script character are allowed: | |||
|script-title=ja : *** *** | |||
|script-title=ja: *** *** | |||
|script-title=ja :*** *** | |||
Spaces preceding the prefix are allowed: |script-title = ja:*** *** | |||
The prefix is checked for validity. If it is a valid ISO 639-1 language code, the lang attribute (lang="ja") is added to the <bdi> tag so that browsers can | |||
know the language the tag contains. This may help the browser render the script more correctly. If the prefix is invalid, the lang attribute | |||
is not added. At this time there is no error message for this condition. | |||
Supports |script-title=, |script-chapter=, |script-<periodical>= | |||
]] | |||
local function format_script_value (script_value, script_param) | |||
local lang=''; -- initialize to empty string | |||
local name; | |||
if script_value:match('^%l%l%l?%s*:') then -- if first 3 or 4 non-space characters are script language prefix | |||
lang = script_value:match('^(%l%l%l?)%s*:%s*%S.*'); -- get the language prefix or nil if there is no script | |||
if not utilities.is_set (lang) then | |||
table.insert( z.message_tail, { utilities.set_message ( 'err_script_parameter', {script_param, 'missing title part'}, true ) } ); -- prefix without 'title'; add error message | |||
return ''; -- script_value was just the prefix so return empty string | |||
end | |||
-- if we get this far we have prefix and script | |||
name = cfg.lang_code_remap[lang] or mw.language.fetchLanguageName( lang, cfg.this_wiki_code ); -- get language name so that we can use it to categorize | |||
if utilities.is_set (name) then -- is prefix a proper ISO 639-1 language code? | |||
script_value = script_value:gsub ('^%l+%s*:%s*', ''); -- strip prefix from script | |||
-- is prefix one of these language codes? | |||
if utilities.in_array (lang, cfg.script_lang_codes) then | |||
utilities.add_prop_cat ('script_with_name', {name, lang}) | |||
else | |||
table.insert( z.message_tail, { utilities.set_message ( 'err_script_parameter', {script_param, 'unknown language code'}, true ) } ); -- unknown script-language; add error message | |||
end | |||
lang = ' lang="' .. lang .. '" '; -- convert prefix into a lang attribute | |||
else | |||
table.insert( z.message_tail, { utilities.set_message ( 'err_script_parameter', {script_param, 'invalid language code'}, true ) } ); -- invalid language code; add error message | |||
lang = ''; -- invalid so set lang to empty string | |||
end | |||
else | |||
table.insert( z.message_tail, { utilities.set_message ( 'err_script_parameter', {script_param, 'missing prefix'}, true ) } ); -- no language code prefix; add error message | |||
end | |||
script_value = utilities.substitute (cfg.presentation['bdi'], {lang, script_value}); -- isolate in case script is RTL | |||
return script_value; | |||
end | end | ||
--[[--------------------------< S C R I P T _ C O N C A T E N A T E >------------------------------------------ | |||
Initially for |title= and |script-title=, this function concatenates those two parameter values after the script value has been | |||
wrapped in <bdi> tags. | |||
]] | |||
local function script_concatenate (title, script, script_param) | |||
if utilities.is_set (script) then | |||
script = format_script_value (script, script_param); -- <bdi> tags, lang attribute, categorization, etc.; returns empty string on error | |||
if utilities.is_set (script) then | |||
title = title .. ' ' .. script; -- concatenate title and script title | |||
end | |||
end | |||
return title; | |||
end | end | ||
function | |||
--[[--------------------------< W R A P _ M S G >-------------------------------------------------------------- | |||
Applies additional message text to various parameter values. Supplied string is wrapped using a message_list | |||
configuration taking one argument. Supports lower case text for {{citation}} templates. Additional text taken | |||
from citation_config.messages - the reason this function is similar to but separate from wrap_style(). | |||
]] | |||
local function wrap_msg (key, str, lower) | |||
if not utilities.is_set ( str ) then | |||
return ""; | |||
end | |||
if true == lower then | |||
local msg; | |||
msg = cfg.messages[key]:lower(); -- set the message to lower case before | |||
return utilities.substitute ( msg, str ); -- including template text | |||
else | |||
return utilities.substitute ( cfg.messages[key], str ); | |||
end | |||
end | end | ||
function | |||
--[[----------------< W I K I S O U R C E _ U R L _ M A K E >------------------- | |||
Makes a Wikisource URL from Wikisource interwiki-link. Returns the URL and appropriate | |||
label; nil else. | |||
str is the value assigned to |chapter= (or aliases) or |title= or |title-link= | |||
]] | |||
local function wikisource_url_make (str) | |||
local wl_type, D, L; | |||
local ws_url, ws_label; | |||
local wikisource_prefix = table.concat ({'https://', cfg.this_wiki_code, '.wikisource.org/wiki/'}); | |||
wl_type, D, L = utilities.is_wikilink (str); -- wl_type is 0 (not a wikilink), 1 (simple wikilink), 2 (complex wikilink) | |||
if 0 == wl_type then -- not a wikilink; might be from |title-link= | |||
str = D:match ('^[Ww]ikisource:(.+)') or D:match ('^[Ss]:(.+)'); -- article title from interwiki link with long-form or short-form namespace | |||
if utilities.is_set (str) then | |||
ws_url = table.concat ({ -- build a Wikisource URL | |||
wikisource_prefix, -- prefix | |||
str, -- article title | |||
}); | |||
ws_label = str; -- label for the URL | |||
end | |||
elseif 1 == wl_type then -- simple wikilink: [[Wikisource:ws article]] | |||
str = D:match ('^[Ww]ikisource:(.+)') or D:match ('^[Ss]:(.+)'); -- article title from interwiki link with long-form or short-form namespace | |||
if utilities.is_set (str) then | |||
ws_url = table.concat ({ -- build a Wikisource URL | |||
wikisource_prefix, -- prefix | |||
str, -- article title | |||
}); | |||
ws_label = str; -- label for the URL | |||
end | |||
elseif 2 == wl_type then -- non-so-simple wikilink: [[Wikisource:ws article|displayed text]] ([[L|D]]) | |||
str = L:match ('^[Ww]ikisource:(.+)') or L:match ('^[Ss]:(.+)'); -- article title from interwiki link with long-form or short-form namespace | |||
if utilities.is_set (str) then | |||
ws_label = D; -- get ws article name from display portion of interwiki link | |||
ws_url = table.concat ({ -- build a Wikisource URL | |||
wikisource_prefix, -- prefix | |||
str, -- article title without namespace from link portion of wikilink | |||
}); | |||
end | |||
end | |||
if ws_url then | |||
ws_url = mw.uri.encode (ws_url, 'WIKI'); -- make a usable URL | |||
ws_url = ws_url:gsub ('%%23', '#'); -- undo percent-encoding of fragment marker | |||
end | |||
return ws_url, ws_label, L or D; -- return proper URL or nil and a label or nil | |||
end | end | ||
--[[----------------< F O R M A T _ P E R I O D I C A L >----------------------- | |||
Format the three periodical parameters: |script-<periodical>=, |<periodical>=, | |||
and |trans-<periodical>= into a single Periodical meta-parameter. | |||
]] | |||
local function format_periodical (script_periodical, script_periodical_source, periodical, trans_periodical) | |||
local periodical_error = ''; | |||
if not utilities.is_set (periodical) then | |||
periodical = ''; -- to be safe for concatenation | |||
else | |||
periodical = utilities.wrap_style ('italic-title', periodical); -- style | |||
end | |||
periodical = script_concatenate (periodical, script_periodical, script_periodical_source); -- <bdi> tags, lang attribute, categorization, etc.; must be done after title is wrapped | |||
if utilities.is_set (trans_periodical) then | |||
trans_periodical = utilities.wrap_style ('trans-italic-title', trans_periodical); | |||
if utilities.is_set (periodical) then | |||
periodical = periodical .. ' ' .. trans_periodical; | |||
else -- here when trans-periodical without periodical or script-periodical | |||
periodical = trans_periodical; | |||
periodical_error = ' ' .. utilities.set_message ('err_trans_missing_title', {'periodical'}); | |||
end | |||
end | |||
return periodical .. periodical_error; | |||
end | end | ||
--[[------------------< F O R M A T _ C H A P T E R _ T I T L E >--------------- | |||
Format the four chapter parameters: |script-chapter=, |chapter=, |trans-chapter=, | |||
and |chapter-url= into a single chapter meta- parameter (chapter_url_source used | |||
for error messages). | |||
]] | |||
local function format_chapter_title (script_chapter, script_chapter_source, chapter, chapter_source, trans_chapter, trans_chapter_source, chapter_url, chapter_url_source, no_quotes, access) | |||
local chapter_error = ''; | |||
local ws_url, ws_label, L = wikisource_url_make (chapter); -- make a wikisource URL and label from a wikisource interwiki link | |||
if ws_url then | |||
ws_label = ws_label:gsub ('_', ' '); -- replace underscore separators with space characters | |||
chapter = ws_label; | |||
end | |||
if not utilities.is_set (chapter) then | |||
chapter = ''; -- to be safe for concatenation | |||
else | else | ||
if false == no_quotes then | |||
chapter = kern_quotes (chapter); -- if necessary, separate chapter title's leading and trailing quote marks from module provided quote marks | |||
chapter = utilities.wrap_style ('quoted-title', chapter); | |||
end | |||
end | |||
chapter = script_concatenate (chapter, script_chapter, script_chapter_source); -- <bdi> tags, lang attribute, categorization, etc.; must be done after title is wrapped | |||
if utilities.is_set (chapter_url) then | |||
chapter = external_link (chapter_url, chapter, chapter_url_source, access); -- adds bare_url_missing_title error if appropriate | |||
elseif ws_url then | |||
chapter = external_link (ws_url, chapter .. ' ', 'ws link in chapter'); -- adds bare_url_missing_title error if appropriate; space char to move icon away from chap text; TODO: better way to do this? | |||
chapter = utilities.substitute (cfg.presentation['interwiki-icon'], {cfg.presentation['class-wikisource'], L, chapter}); | |||
end | |||
if utilities.is_set (trans_chapter) then | |||
trans_chapter = utilities.wrap_style ('trans-quoted-title', trans_chapter); | |||
if utilities.is_set (chapter) then | |||
chapter = chapter .. ' ' .. trans_chapter; | |||
else -- here when trans_chapter without chapter or script-chapter | |||
chapter = trans_chapter; | |||
chapter_source = trans_chapter_source:match ('trans%-?(.+)'); -- when no chapter, get matching name from trans-<param> | |||
chapter_error = ' ' .. utilities.set_message ('err_trans_missing_title', {chapter_source}); | |||
end | |||
end | |||
return chapter .. chapter_error; | |||
end | |||
--[[----------------< H A S _ I N V I S I B L E _ C H A R S >------------------- | |||
This function searches a parameter's value for non-printable or invisible characters. | |||
The search stops at the first match. | |||
This function will detect the visible replacement character when it is part of the Wikisource. | |||
Detects but ignores nowiki and math stripmarkers. Also detects other named stripmarkers | |||
(gallery, math, pre, ref) and identifies them with a slightly different error message. | |||
See also coins_cleanup(). | |||
Output of this function is an error message that identifies the character or the | |||
Unicode group, or the stripmarker that was detected along with its position (or, | |||
for multi-byte characters, the position of its first byte) in the parameter value. | |||
]] | |||
local function has_invisible_chars (param, v) | |||
local position = ''; -- position of invisible char or starting position of stripmarker | |||
local capture; -- used by stripmarker detection to hold name of the stripmarker | |||
local stripmarker; -- boolean set true when a stripmarker is found | |||
capture = string.match (v, '[%w%p ]*'); -- test for values that are simple ASCII text and bypass other tests if true | |||
if capture == v then -- if same there are no Unicode characters | |||
return; | |||
end | |||
for _, invisible_char in ipairs (cfg.invisible_chars) do | |||
local char_name = invisible_char[1]; -- the character or group name | |||
local pattern = invisible_char[2]; -- the pattern used to find it | |||
position, _, capture = mw.ustring.find (v, pattern); -- see if the parameter value contains characters that match the pattern | |||
if position and (cfg.invisible_defs.zwj == capture) then -- if we found a zero-width joiner character | |||
if mw.ustring.find (v, cfg.indic_script) then -- it's ok if one of the Indic scripts | |||
position = nil; -- unset position | |||
elseif cfg.emoji[mw.ustring.codepoint (v, position+1)] then -- is zwj followed by a character listed in emoji{}? | |||
position = nil; -- unset position | |||
end | |||
end | |||
if position then | |||
if 'nowiki' == capture or 'math' == capture or -- nowiki and math stripmarkers (not an error condition) | |||
('templatestyles' == capture and utilities.in_array (param, {'id', 'quote'})) then -- templatestyles stripmarker allowed in these parameters | |||
stripmarker = true; -- set a flag | |||
elseif true == stripmarker and cfg.invisible_defs.del == capture then -- because stripmakers begin and end with the delete char, assume that we've found one end of a stripmarker | |||
position = nil; -- unset | |||
else | |||
local err_msg; | |||
if capture and not (cfg.invisible_defs.del == capture or cfg.invisible_defs.zwj == capture) then | |||
err_msg = capture .. ' ' .. char_name; | |||
else | |||
err_msg = char_name .. ' ' .. 'character'; | |||
end | |||
table.insert (z.message_tail, {utilities.set_message ('err_invisible_char', {err_msg, utilities.wrap_style ('parameter', param), position}, true)}); -- add error message | |||
return; -- and done with this parameter | |||
end | |||
end | |||
end | |||
end | end | ||
--[[-------------------< A R G U M E N T _ W R A P P E R >---------------------- | |||
Argument wrapper. This function provides support for argument mapping defined | |||
in the configuration file so that multiple names can be transparently aliased to | |||
single internal variable. | |||
]] | |||
local function argument_wrapper ( args ) | |||
local origin = {}; | |||
return setmetatable({ | |||
ORIGIN = function ( self, k ) | |||
local dummy = self[k]; -- force the variable to be loaded. | |||
return origin[k]; | |||
end | |||
}, | |||
{ | |||
__index = function ( tbl, k ) | |||
if origin[k] ~= nil then | |||
return nil; | |||
end | |||
local args, list, v = args, cfg.aliases[k]; | |||
if type( list ) == 'table' then | |||
v, origin[k] = utilities.select_one ( args, list, 'err_redundant_parameters' ); | |||
if origin[k] == nil then | |||
origin[k] = ''; -- Empty string, not nil | |||
end | |||
elseif list ~= nil then | |||
v, origin[k] = args[list], list; | |||
else | |||
-- maybe let through instead of raising an error? | |||
-- v, origin[k] = args[k], k; | |||
error( cfg.messages['unknown_argument_map'] .. ': ' .. k); | |||
end | |||
-- Empty strings, not nil; | |||
if v == nil then | |||
v = ''; | |||
origin[k] = ''; | |||
end | |||
tbl = rawset( tbl, k, v ); | |||
return v; | |||
end, | |||
}); | |||
end | end | ||
--[[--------------------------< N O W R A P _ D A T E >------------------------- | |||
When date is YYYY-MM-DD format wrap in nowrap span: <span ...>YYYY-MM-DD</span>. | |||
When date is DD MMMM YYYY or is MMMM DD, YYYY then wrap in nowrap span: | |||
<span ...>DD MMMM</span> YYYY or <span ...>MMMM DD,</span> YYYY | |||
DOES NOT yet support MMMM YYYY or any of the date ranges. | |||
]] | |||
local function nowrap_date (date) | |||
local cap = ''; | |||
local cap2 = ''; | |||
if date:match("^%d%d%d%d%-%d%d%-%d%d$") then | |||
date = utilities.substitute (cfg.presentation['nowrap1'], date); | |||
elseif date:match("^%a+%s*%d%d?,%s+%d%d%d%d$") or date:match ("^%d%d?%s*%a+%s+%d%d%d%d$") then | |||
cap, cap2 = string.match (date, "^(.*)%s+(%d%d%d%d)$"); | |||
date = utilities.substitute (cfg.presentation['nowrap2'], {cap, cap2}); | |||
end | |||
return date; | |||
end | |||
--[[--------------------------< S E T _ T I T L E T Y P E >--------------------- | |||
This function sets default title types (equivalent to the citation including | |||
|type=<default value>) for those templates that have defaults. Also handles the | |||
special case where it is desirable to omit the title type from the rendered citation | |||
(|type=none). | |||
]] | |||
local function set_titletype (cite_class, title_type) | |||
if utilities.is_set (title_type) then | |||
if 'none' == cfg.keywords_xlate[title_type] then | |||
title_type = ''; -- if |type=none then type parameter not displayed | |||
end | |||
return title_type; -- if |type= has been set to any other value use that value | |||
end | |||
return cfg.title_types [cite_class] or ''; -- set template's default title type; else empty string for concatenation | |||
end | |||
--[[--------------------------< H Y P H E N _ T O _ D A S H >-------------------------------------------------- | |||
Converts a hyphen to a dash under certain conditions. The hyphen must separate | |||
like items; unlike items are returned unmodified. These forms are modified: | |||
letter - letter (A - B) | |||
digit - digit (4-5) | |||
digit separator digit - digit separator digit (4.1-4.5 or 4-1-4-5) | |||
letterdigit - letterdigit (A1-A5) (an optional separator between letter and | |||
digit is supported – a.1-a.5 or a-1-a-5) | |||
digitletter - digitletter (5a - 5d) (an optional separator between letter and | |||
digit is supported – 5.a-5.d or 5-a-5-d) | |||
any other forms are returned unmodified. | |||
str may be a comma- or semicolon-separated list | |||
]] | |||
local function hyphen_to_dash( str ) | |||
if not utilities.is_set (str) then | |||
return str; | |||
end | |||
local accept; -- Boolean | |||
str = str:gsub ('&[nm]dash;', {['–'] = '–', ['—'] = '—'}); -- replace — and – entities with their characters; semicolon mucks up the text.split | |||
str = str:gsub ('-', '-'); -- replace HTML numeric entity with hyphen character | |||
str = str:gsub (' ', ' '); -- replace entity with generic keyboard space character | |||
local out = {}; | |||
local list = mw.text.split (str, '%s*[,;]%s*'); -- split str at comma or semicolon separators if there are any | |||
for _, item in ipairs (list) do -- for each item in the list | |||
item, accept = utilities.has_accept_as_written (item); -- remove accept-this-as-written markup when it wraps all of item | |||
if not accept and mw.ustring.match (item, '^%w*[%.%-]?%w+%s*[%-–—]%s*%w*[%.%-]?%w+$') then -- if a hyphenated range or has endash or emdash separators | |||
if item:match ('^%a+[%.%-]?%d+%s*%-%s*%a+[%.%-]?%d+$') or -- letterdigit hyphen letterdigit (optional separator between letter and digit) | |||
item:match ('^%d+[%.%-]?%a+%s*%-%s*%d+[%.%-]?%a+$') or -- digitletter hyphen digitletter (optional separator between digit and letter) | |||
item:match ('^%d+[%.%-]%d+%s*%-%s*%d+[%.%-]%d+$') or -- digit separator digit hyphen digit separator digit | |||
item:match ('^%d+%s*%-%s*%d+$') or -- digit hyphen digit | |||
item:match ('^%a+%s*%-%s*%a+$') then -- letter hyphen letter | |||
item = item:gsub ('(%w*[%.%-]?%w+)%s*%-%s*(%w*[%.%-]?%w+)', '%1–%2'); -- replace hyphen, remove extraneous space characters | |||
else | |||
item = mw.ustring.gsub (item, '%s*[–—]%s*', '–'); -- for endash or emdash separated ranges, replace em with en, remove extraneous whitespace | |||
end | |||
end | |||
table.insert (out, item); -- add the (possibly modified) item to the output table | |||
end | |||
local temp_str = ''; -- concatenate the output table into a comma separated string | |||
temp_str, accept = utilities.has_accept_as_written (table.concat (out, ', ')); -- remove accept-this-as-written markup when it wraps all of concatenated out | |||
if accept then | |||
temp_str = utilities.has_accept_as_written (str); -- when global markup removed, return original str; do it this way to suppress boolean second return value | |||
return temp_str; | |||
else | |||
return temp_str; -- else, return assembled temp_str | |||
end | |||
end | |||
--[[--------------------------< S A F E _ J O I N >----------------------------- | |||
Joins a sequence of strings together while checking for duplicate separation characters. | |||
]] | |||
local function safe_join( tbl, duplicate_char ) | |||
local f = {}; -- create a function table appropriate to type of 'duplicate character' | |||
if 1 == #duplicate_char then -- for single byte ASCII characters use the string library functions | |||
f.gsub = string.gsub | |||
f.match = string.match | |||
f.sub = string.sub | |||
else -- for multi-byte characters use the ustring library functions | |||
f.gsub = mw.ustring.gsub | |||
f.match = mw.ustring.match | |||
f.sub = mw.ustring.sub | |||
end | |||
local str = ''; -- the output string | |||
local comp = ''; -- what does 'comp' mean? | |||
local end_chr = ''; | |||
local trim; | |||
for _, value in ipairs( tbl ) do | |||
if value == nil then value = ''; end | |||
if str == '' then -- if output string is empty | |||
str = value; -- assign value to it (first time through the loop) | |||
elseif value ~= '' then | |||
if value:sub(1, 1) == '<' then -- special case of values enclosed in spans and other markup. | |||
comp = value:gsub( "%b<>", "" ); -- remove HTML markup (<span>string</span> -> string) | |||
else | |||
comp = value; | |||
end | |||
-- typically duplicate_char is sepc | |||
if f.sub(comp, 1, 1) == duplicate_char then -- is first character same as duplicate_char? why test first character? | |||
-- Because individual string segments often (always?) begin with terminal punct for the | |||
-- preceding segment: 'First element' .. 'sepc next element' .. etc.? | |||
trim = false; | |||
end_chr = f.sub(str, -1, -1); -- get the last character of the output string | |||
-- str = str .. "<HERE(enchr=" .. end_chr .. ")" -- debug stuff? | |||
if end_chr == duplicate_char then -- if same as separator | |||
str = f.sub(str, 1, -2); -- remove it | |||
elseif end_chr == "'" then -- if it might be wiki-markup | |||
if f.sub(str, -3, -1) == duplicate_char .. "''" then -- if last three chars of str are sepc'' | |||
str = f.sub(str, 1, -4) .. "''"; -- remove them and add back '' | |||
elseif f.sub(str, -5, -1) == duplicate_char .. "]]''" then -- if last five chars of str are sepc]]'' | |||
trim = true; -- why? why do this and next differently from previous? | |||
elseif f.sub(str, -4, -1) == duplicate_char .. "]''" then -- if last four chars of str are sepc]'' | |||
trim = true; -- same question | |||
end | |||
elseif end_chr == "]" then -- if it might be wiki-markup | |||
if f.sub(str, -3, -1) == duplicate_char .. "]]" then -- if last three chars of str are sepc]] wikilink | |||
trim = true; | |||
elseif f.sub(str, -3, -1) == duplicate_char .. '"]' then -- if last three chars of str are sepc"] quoted external link | |||
trim = true; | |||
elseif f.sub(str, -2, -1) == duplicate_char .. "]" then -- if last two chars of str are sepc] external link | |||
trim = true; | |||
elseif f.sub(str, -4, -1) == duplicate_char .. "'']" then -- normal case when |url=something & |title=Title. | |||
trim = true; | |||
end | |||
elseif end_chr == " " then -- if last char of output string is a space | |||
if f.sub(str, -2, -1) == duplicate_char .. " " then -- if last two chars of str are <sepc><space> | |||
str = f.sub(str, 1, -3); -- remove them both | |||
end | |||
end | |||
if trim then | |||
if value ~= comp then -- value does not equal comp when value contains HTML markup | |||
local dup2 = duplicate_char; | |||
if f.match(dup2, "%A" ) then dup2 = "%" .. dup2; end -- if duplicate_char not a letter then escape it | |||
value = f.gsub(value, "(%b<>)" .. dup2, "%1", 1 ) -- remove duplicate_char if it follows HTML markup | |||
else | |||
value = f.sub(value, 2, -1 ); -- remove duplicate_char when it is first character | |||
end | |||
end | |||
end | |||
str = str .. value; -- add it to the output string | |||
end | |||
end | |||
return str; | |||
end | |||
--[[--------------------------< I S _ S U F F I X >----------------------------- | |||
returns true is suffix is properly formed Jr, Sr, or ordinal in the range 1–9. | |||
Puncutation not allowed. | |||
]] | |||
local function is_suffix (suffix) | |||
if utilities.in_array (suffix, {'Jr', 'Sr', 'Jnr', 'Snr', '1st', '2nd', '3rd'}) or suffix:match ('^%dth$') then | |||
return true; | |||
end | |||
return false; | |||
end | |||
--[[--------------------< I S _ G O O D _ V A N C _ N A M E >------------------- | |||
For Vancouver style, author/editor names are supposed to be rendered in Latin | |||
(read |