Module:Citation/CS1/Identifiers: Difference between revisions
sync from sandbox;
m (47 revisions imported from templatewiki:Module:Citation/CS1/Identifiers) |
(sync from sandbox;) |
||
Line 1: | Line 1: | ||
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | --[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | ||
Line 11: | Line 8: | ||
local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration | local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration | ||
Line 26: | Line 21: | ||
local url_string = options.id; | local url_string = options.id; | ||
local ext_link; | local ext_link; | ||
local this_wiki_code; | local this_wiki_code = cfg.this_wiki_code; -- wikipedia subdomain; 'en' for en.wikipedia.org | ||
local wd_article; -- article title from wikidata | local wd_article; -- article title from wikidata | ||
Line 38: | Line 33: | ||
end | end | ||
-- this_wiki_code = mw.language.getContentLanguage():getCode(); -- get this wikipedia's language code (subdomain) | |||
if string.match (mw.site.server, 'wikidata') then | if string.match (mw.site.server, 'wikidata') then | ||
Line 44: | Line 39: | ||
end | end | ||
if is_set (options.q) and mw.wikibase then | if is_set (options.q) and mw.wikibase then -- wikibase test here avoids script errors in third party wikis that aren't using mw.wikibase extension | ||
wd_article = mw.wikibase.getEntity (options.q):getSitelink (this_wiki_code .. 'wiki'); -- fetch article title from wd | wd_article = mw.wikibase.getEntity (options.q):getSitelink (this_wiki_code .. 'wiki'); -- fetch article title from wd | ||
if wd_article then | if wd_article then | ||
Line 66: | Line 61: | ||
local function internal_link_id(options) | local function internal_link_id(options) | ||
local id = mw.ustring.gsub (options.id, '%d', cfg.date_names.local_digits); -- translate 'local' digits to Western 0-9 | |||
return table.concat ( | return table.concat ( | ||
Line 75: | Line 71: | ||
{ | { | ||
options.prefix, | options.prefix, | ||
id, -- translated to western digits | |||
options.suffix or '' | options.suffix or '' | ||
}), | }), | ||
mw.text.nowiki (options.id) | substitute (cfg.presentation['bdi'], {'', mw.text.nowiki (options.id)}) -- bdi tags to prevent Latn script identifiers from being reversed at rtl language wikis | ||
); | ); -- nowiki because MediaWiki still has magic links for ISBN and the like; TODO: is it really required? | ||
}); | }); | ||
end | end | ||
Line 285: | Line 281: | ||
1–4 must be digits and must represent a year in the range of 1000 – next year | 1–4 must be digits and must represent a year in the range of 1000 – next year | ||
5 must be a letter | 5 must be a letter | ||
6–8 must be letter, digit, ampersand, or dot (ampersand cannot directly precede a dot; &. ) | |||
9–18 must be letter, digit, or dot | 9–18 must be letter, digit, or dot | ||
19 must be a letter or dot | 19 must be a letter or dot | ||
Line 304: | Line 299: | ||
err_type = 'length'; | err_type = 'length'; | ||
else | else | ||
year = id:match ("^(%d%d%d%d)[%a][% | year = id:match ("^(%d%d%d%d)[%a][%w&%.][%w&%.][%w&%.][%w.]+[%a%.]$") -- | ||
if not year then -- if nil then no pattern match | if not year then -- if nil then no pattern match | ||
err_type = 'value'; -- so value error | err_type = 'value'; -- so value error | ||
Line 395: | Line 390: | ||
if is_set(inactive) then | if is_set(inactive) then | ||
local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date | local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date | ||
if is_set(inactive_year) then | local inactive_month, good; | ||
table.insert( z.error_categories, | |||
if is_set (inactive_year) then | |||
if 4 < inactive:len() then -- inactive date has more than just a year (could be anything) | |||
local lang_obj = mw.getContentLanguage(); -- get a language object for this wiki | |||
good, inactive_month = pcall (lang_obj.formatDate, lang_obj, 'F', inactive); -- try to get the month name from the inactive date | |||
if not good then | |||
inactive_month = nil; -- something went wrong so make sure this is unset | |||
end | |||
end | |||
else | |||
inactive_year = nil; -- |doi-broken= has something but it isn't a date | |||
end | |||
if is_set(inactive_year) and is_set (inactive_month) then | |||
table.insert( z.error_categories, 'Pages with DOIs inactive as of ' .. inactive_year .. ' ' .. inactive_month); -- use inactive month in category name | |||
elseif is_set(inactive_year) then | |||
table.insert( z.error_categories, 'Pages with DOIs inactive as of ' .. inactive_year); | |||
else | else | ||
table.insert( z.error_categories, | table.insert( z.error_categories, 'Pages with inactive DOIs'); -- when inactive doesn't contain a recognizable date | ||
end | end | ||
inactive = " (" .. cfg.messages['inactive'] .. | inactive = " (" .. cfg.messages['inactive'] .. ' ' .. inactive .. ')'; | ||
end | end | ||
text = external_link_id({link = handler.link, label = handler.label, q = handler.q, | text = external_link_id({link = handler.link, label = handler.label, q = handler.q, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '') | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '') | ||
-- if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") or -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma | |||
cat = ' ' .. set_error( 'bad_doi' ); | -- id: match ('^10.5555') then -- test doi will never resolve | ||
-- cat = ' ' .. set_error ('bad_doi'); | |||
-- end | |||
local registrant = id:match ('^10%.([^/]+)/[^%s–]-[^%.,]$'); -- registrant set when doi has the proper basic form | |||
registrant_err_patterns = { -- these patterns are for code ranges that are not supported | |||
'^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); accecpts: 10000–39999 | |||
'^[^1-3]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 40000+); accecpts: 10000–39999 | |||
'^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); accecpts: 1000–9999 | |||
'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accecpts: 1000–9999 | |||
'^%d%d%d%d%d%d+', -- 6 or more digits | |||
'^%d%d?%d?%.%d%d*$', -- less than 4 digits with subcode | |||
'^%d%d?%d?$', -- less than 4 digits without subcode | |||
'^5555$', -- test registrant will never resolve | |||
} | |||
if registrant then -- when doi has proper form | |||
for i, pattern in ipairs (registrant_err_patterns) do -- spin through error patterns | |||
if registrant:match (pattern) then -- to validate registrant codes | |||
cat = ' ' .. set_error ('bad_doi'); -- when found, mark this doi as bad | |||
break; -- and done | |||
end | |||
end | |||
else | |||
cat = ' ' .. set_error ('bad_doi'); -- invalid directory or malformed | |||
end | end | ||
Line 471: | Line 508: | ||
if isbn_str:match( "^97[89]%d*$" ) == nil then | if isbn_str:match( "^97[89]%d*$" ) == nil then | ||
return false, 'invalid prefix'; -- fail when isbn13 does not begin with 978 or 979 | return false, 'invalid prefix'; -- fail when isbn13 does not begin with 978 or 979 | ||
end | |||
if isbn_str:match ('^9790') then | |||
return false, 'invalid group id'; -- group identifier '0' is reserved to ismn | |||
end | end | ||
return is_valid_isxn_13 (isbn_str), 'checksum'; | return is_valid_isxn_13 (isbn_str), 'checksum'; | ||
Line 732: | Line 772: | ||
--[[--------------------------< O C L C >---------------------------------------------------------------------- | --[[--------------------------< O C L C >---------------------------------------------------------------------- | ||
Validate and format an oclc id. https://www.oclc.org/batchload/controlnumber.en.html | Validate and format an oclc id. https://www.oclc.org/batchload/controlnumber.en.html {{dead link}} | ||
archived at: https://web.archive.org/web/20161228233804/https://www.oclc.org/batchload/controlnumber.en.html | |||
]] | ]] | ||
Line 835: | Line 876: | ||
local function pmc(id, embargo) | local function pmc(id, embargo) | ||
local test_limit = | local test_limit = 7000000; -- update this value as PMCs approach | ||
local handler = cfg.id_handlers['PMC']; | local handler = cfg.id_handlers['PMC']; | ||
local err_cat = ''; -- presume that PMC is valid | local err_cat = ''; -- presume that PMC is valid | ||
Line 962: | Line 1,003: | ||
A numerical identifier in the form nnnn.nnnnn - leading zeros in the first quartet optional | A numerical identifier in the form nnnn.nnnnn - leading zeros in the first quartet optional | ||
format described here: http://emis.mi.sanu.ac.rs/ZMATH/zmath/en/help/search/ | |||
temporary format is apparently eight digits. Anything else is an error | |||
]] | ]] | ||
Line 967: | Line 1,012: | ||
local function zbl (id) | local function zbl (id) | ||
local handler = cfg.id_handlers['ZBL']; | local handler = cfg.id_handlers['ZBL']; | ||
local err_cat = ''; | local err_cat = ''; | ||
if id:match('^%d%d%d%d%d%d%d%d$') then -- is this identifier using temporary format? | |||
add_maint_cat ('zbl'); -- yes, add maint cat | |||
elseif not id:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then -- not temporary, is it normal format? | |||
add_maint_cat (' | err_cat = ' ' .. set_error( 'bad_zbl' ); -- no, set an error message | ||
err_cat = ' ' .. set_error( 'bad_zbl' ); -- set an error message | |||
end | end | ||
Line 1,112: | Line 1,148: | ||
Parameters which have a predefined access level (e.g. arxiv) do not use this | Parameters which have a predefined access level (e.g. arxiv) do not use this | ||
function as they are directly rendered as free without using an additional parameter. | function as they are directly rendered as free without using an additional parameter. | ||
access-level values must match the case used in cfg.keywords_lists['id-access'] (lowercase unless there is some special reason for something else) | |||
]] | ]] | ||
Line 1,118: | Line 1,156: | ||
local id_accesses_list = {}; | local id_accesses_list = {}; | ||
for k, v in pairs( cfg.id_handlers ) do | for k, v in pairs( cfg.id_handlers ) do | ||
local access_param = v.custom_access; | local access_param = v.custom_access; -- name of identifier's access-level parameter | ||
if is_set(access_param) then | if is_set(access_param) then | ||
local access_level = args[access_param]; | local access_level = args[access_param]; -- get the assigned value if there is one | ||
if is_set(access_level) then | if is_set (access_level) then | ||
if not in_array (access_level | if not in_array (access_level, cfg.keywords_lists['id-access']) then -- exact match required | ||
table.insert( z.message_tail, { set_error( 'invalid_param_val', {access_param, access_level}, true ) } ); | table.insert( z.message_tail, { set_error( 'invalid_param_val', {access_param, access_level}, true ) } ); | ||
access_level = nil; | access_level = nil; -- invalid so unset | ||
end | end | ||
if not is_set(id_list[k]) then | if not is_set(id_list[k]) then -- identifer access-level must have a matching identifier | ||
table.insert( z.message_tail, { set_error( 'param_access_requires_param', { | table.insert( z.message_tail, { set_error( 'param_access_requires_param', {k:lower()}, true ) } ); -- param name is uppercase in cfg.id_handlers (k); lowercase for error message | ||
end | end | ||
id_accesses_list[k] = cfg.keywords_xlate[access_level]; -- get translated keyword | |||
id_accesses_list[k] = access_level; | |||
end | end | ||
end | end | ||
Line 1,161: | Line 1,195: | ||
end | end | ||
--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------ | |||
]] | |||
return { | return { |