Module:Citation/CS1/Identifiers: Difference between revisions

sync from sandbox;
m (47 revisions imported from templatewiki:Module:Citation/CS1/Identifiers)
(sync from sandbox;)
Line 1: Line 1:
local identifiers = {};


--[[--------------------------< F O R W A R D  D E C L A R A T I O N S >--------------------------------------
--[[--------------------------< F O R W A R D  D E C L A R A T I O N S >--------------------------------------
Line 11: Line 8:


local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
local wd_int_lang = (mw.site.server:match ('wikidata') and mw.getCurrentFrame():preprocess('{{int:lang}}')) or '';




Line 26: Line 21:
local url_string = options.id;
local url_string = options.id;
local ext_link;
local ext_link;
local this_wiki_code; -- wikipedia subdomain; 'en' for en.wikipedia.org
local this_wiki_code = cfg.this_wiki_code; -- wikipedia subdomain; 'en' for en.wikipedia.org
local wd_article; -- article title from wikidata
local wd_article; -- article title from wikidata
Line 38: Line 33:
end
end


this_wiki_code = mw.language.getContentLanguage():getCode(); -- get this wikipedia's language code (subdomain)
-- this_wiki_code = mw.language.getContentLanguage():getCode(); -- get this wikipedia's language code (subdomain)
if string.match (mw.site.server, 'wikidata') then
if string.match (mw.site.server, 'wikidata') then
Line 44: Line 39:
end
end
if is_set (options.q) and mw.wikibase then
if is_set (options.q) and mw.wikibase then -- wikibase test here avoids script errors in third party wikis that aren't using mw.wikibase extension
wd_article = mw.wikibase.getEntity (options.q):getSitelink (this_wiki_code .. 'wiki'); -- fetch article title from wd
wd_article = mw.wikibase.getEntity (options.q):getSitelink (this_wiki_code .. 'wiki'); -- fetch article title from wd
if wd_article then
if wd_article then
Line 66: Line 61:


local function internal_link_id(options)
local function internal_link_id(options)
local id = mw.ustring.gsub (options.id, '%d', cfg.date_names.local_digits); -- translate 'local' digits to Western 0-9


return table.concat (
return table.concat (
Line 75: Line 71:
{
{
options.prefix,
options.prefix,
options.id,
id, -- translated to western digits
options.suffix or ''
options.suffix or ''
}),
}),
mw.text.nowiki (options.id)
substitute (cfg.presentation['bdi'], {'', mw.text.nowiki (options.id)}) -- bdi tags to prevent Latn script identifiers from being reversed at rtl language wikis
);
); -- nowiki because MediaWiki still has magic links for ISBN and the like; TODO: is it really required?
});
});
end
end
Line 285: Line 281:
1–4 must be digits and must represent a year in the range of 1000 – next year
1–4 must be digits and must represent a year in the range of 1000 – next year
5 must be a letter
5 must be a letter
6 must be letter, ampersand, or dot (ampersand cannot directly precede a dot; &. )
6–8 must be letter, digit, ampersand, or dot (ampersand cannot directly precede a dot; &. )
7–8 must be letter, digit, ampersand, or dot (ampersand cannot directly precede a dot; &. )
9–18 must be letter, digit, or dot
9–18 must be letter, digit, or dot
19 must be a letter or dot
19 must be a letter or dot
Line 304: Line 299:
err_type = 'length';
err_type = 'length';
else
else
year = id:match ("^(%d%d%d%d)[%a][%a&%.][%a&%.%d][%a&%.%d][%a%d%.]+[%a%.]$") --  
year = id:match ("^(%d%d%d%d)[%a][%w&%.][%w&%.][%w&%.][%w.]+[%a%.]$") --  
if not year then -- if nil then no pattern match
if not year then -- if nil then no pattern match
err_type = 'value'; -- so value error
err_type = 'value'; -- so value error
Line 395: Line 390:
if is_set(inactive) then
if is_set(inactive) then
local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
if is_set(inactive_year) then
local inactive_month, good;
table.insert( z.error_categories, "Pages with DOIs inactive since " .. inactive_year );
 
if is_set (inactive_year) then
if 4 < inactive:len() then -- inactive date has more than just a year (could be anything)
local lang_obj = mw.getContentLanguage(); -- get a language object for this wiki
good, inactive_month = pcall (lang_obj.formatDate, lang_obj, 'F', inactive); -- try to get the month name from the inactive date
if not good then
inactive_month = nil; -- something went wrong so make sure this is unset
end
end
else
inactive_year = nil; -- |doi-broken= has something but it isn't a date
end
if is_set(inactive_year) and is_set (inactive_month) then
table.insert( z.error_categories, 'Pages with DOIs inactive as of ' .. inactive_year .. ' ' .. inactive_month); -- use inactive month in category name
elseif is_set(inactive_year) then
table.insert( z.error_categories, 'Pages with DOIs inactive as of ' .. inactive_year);
else
else
table.insert( z.error_categories, "Pages with inactive DOIs" ); -- when inactive doesn't contain a recognizable year
table.insert( z.error_categories, 'Pages with inactive DOIs'); -- when inactive doesn't contain a recognizable date
end
end
inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")"
inactive = " (" .. cfg.messages['inactive'] .. ' ' .. inactive .. ')';
end
end
text = external_link_id({link = handler.link, label = handler.label, q = handler.q,
text = external_link_id({link = handler.link, label = handler.label, q = handler.q,
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '')
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '')


if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma
-- if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") or -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma
cat = ' ' .. set_error( 'bad_doi' );
-- id: match ('^10.5555') then -- test doi will never resolve
-- cat = ' ' .. set_error ('bad_doi');
-- end
local registrant = id:match ('^10%.([^/]+)/[^%s–]-[^%.,]$'); -- registrant set when doi has the proper basic form
registrant_err_patterns = { -- these patterns are for code ranges that are not supported
'^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); accecpts: 10000–39999
'^[^1-3]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 40000+); accecpts: 10000–39999
'^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); accecpts: 1000–9999
'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accecpts: 1000–9999
'^%d%d%d%d%d%d+', -- 6 or more digits
'^%d%d?%d?%.%d%d*$', -- less than 4 digits with subcode
'^%d%d?%d?$', -- less than 4 digits without subcode
'^5555$', -- test registrant will never resolve
}
if registrant then -- when doi has proper form
for i, pattern in ipairs (registrant_err_patterns) do -- spin through error patterns
if registrant:match (pattern) then -- to validate registrant codes
cat = ' ' .. set_error ('bad_doi'); -- when found, mark this doi as bad
break; -- and done
end
end
else
cat = ' ' .. set_error ('bad_doi'); -- invalid directory or malformed
end
end


Line 471: Line 508:
if isbn_str:match( "^97[89]%d*$" ) == nil then
if isbn_str:match( "^97[89]%d*$" ) == nil then
return false, 'invalid prefix'; -- fail when isbn13 does not begin with 978 or 979
return false, 'invalid prefix'; -- fail when isbn13 does not begin with 978 or 979
end
if isbn_str:match ('^9790') then
return false, 'invalid group id'; -- group identifier '0' is reserved to ismn
end
end
return is_valid_isxn_13 (isbn_str), 'checksum';
return is_valid_isxn_13 (isbn_str), 'checksum';
Line 732: Line 772:
--[[--------------------------< O C L C >----------------------------------------------------------------------
--[[--------------------------< O C L C >----------------------------------------------------------------------


Validate and format an oclc id.  https://www.oclc.org/batchload/controlnumber.en.html
Validate and format an oclc id.  https://www.oclc.org/batchload/controlnumber.en.html {{dead link}}
archived at: https://web.archive.org/web/20161228233804/https://www.oclc.org/batchload/controlnumber.en.html


]]
]]
Line 835: Line 876:


local function pmc(id, embargo)
local function pmc(id, embargo)
local test_limit = 6500000; -- update this value as PMCs approach
local test_limit = 7000000; -- update this value as PMCs approach
local handler = cfg.id_handlers['PMC'];
local handler = cfg.id_handlers['PMC'];
local err_cat = ''; -- presume that PMC is valid
local err_cat = ''; -- presume that PMC is valid
Line 962: Line 1,003:


A numerical identifier in the form nnnn.nnnnn - leading zeros in the first quartet optional
A numerical identifier in the form nnnn.nnnnn - leading zeros in the first quartet optional
format described here: http://emis.mi.sanu.ac.rs/ZMATH/zmath/en/help/search/
temporary format is apparently eight digits.  Anything else is an error


]]
]]
Line 967: Line 1,012:
local function zbl (id)
local function zbl (id)
local handler = cfg.id_handlers['ZBL'];
local handler = cfg.id_handlers['ZBL'];
local id_num;
local err_cat = '';
local err_cat = '';
id_num = id:match ('^[Zz][Bb][Ll](.*)$'); -- identifier with zbl prefix; extract identifier
if id:match('^%d%d%d%d%d%d%d%d$') then -- is this identifier using temporary format?
 
add_maint_cat ('zbl'); -- yes, add maint cat
if is_set (id_num) then
elseif not id:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then -- not temporary, is it normal format?
add_maint_cat ('zbl_format');
err_cat = ' ' .. set_error( 'bad_zbl' ); -- no, set an error message
else -- plain number without zbl prefix
id_num = id; -- if here id does not have prefix
end
 
if id_num:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then
id = id_num; -- id matches pattern
else
err_cat = ' ' .. set_error( 'bad_zbl' ); -- set an error message
end
end
Line 1,112: Line 1,148:
Parameters which have a predefined access level (e.g. arxiv) do not use this
Parameters which have a predefined access level (e.g. arxiv) do not use this
function as they are directly rendered as free without using an additional parameter.
function as they are directly rendered as free without using an additional parameter.
access-level values must match the case used in cfg.keywords_lists['id-access'] (lowercase unless there is some special reason for something else)


]]
]]
Line 1,118: Line 1,156:
local id_accesses_list = {};
local id_accesses_list = {};
for k, v in pairs( cfg.id_handlers ) do
for k, v in pairs( cfg.id_handlers ) do
local access_param = v.custom_access;
local access_param = v.custom_access; -- name of identifier's access-level parameter
local k_lower = string.lower(k);
if is_set(access_param) then
if is_set(access_param) then
local access_level = args[access_param];
local access_level = args[access_param]; -- get the assigned value if there is one
if is_set(access_level) then
if is_set (access_level) then
if not in_array (access_level:lower(), cfg.keywords['id-access']) then
if not in_array (access_level, cfg.keywords_lists['id-access']) then -- exact match required
table.insert( z.message_tail, { set_error( 'invalid_param_val', {access_param, access_level}, true ) } );
table.insert( z.message_tail, { set_error( 'invalid_param_val', {access_param, access_level}, true ) } );
access_level = nil;
access_level = nil; -- invalid so unset
end
end
if not is_set(id_list[k]) then
if not is_set(id_list[k]) then -- identifer access-level must have a matching identifier
table.insert( z.message_tail, { set_error( 'param_access_requires_param', {k_lower}, true ) } );
table.insert( z.message_tail, { set_error( 'param_access_requires_param', {k:lower()}, true ) } ); -- param name is uppercase in cfg.id_handlers (k); lowercase for error message
end
end
if is_set(access_level) then
id_accesses_list[k] = cfg.keywords_xlate[access_level]; -- get translated keyword
access_level = access_level:lower();
end
id_accesses_list[k] = access_level;
end
end
end
end
Line 1,161: Line 1,195:
end
end


--[[--------------------------< E X P O R T E D  F U N C T I O N S >------------------------------------------
]]


return {
return {