Module:Citation/CS1/Identifiers: Difference between revisions
sync from sandbox;
(trying to fix error with {{cite}} templates) |
(sync from sandbox;) |
||
Line 1: | Line 1: | ||
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | --[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | ||
Line 12: | Line 9: | ||
local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration | local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration | ||
--============================<< H E L P E R F U N C T I O N S >>============================================ | |||
--[[--------------------------< W I K I D A T A _ A R T I C L E _ N A M E _ G E T >---------------------------- | |||
as an aid to internationalizing identifier-label wikilinks, gets identifier article names from wikidata. | |||
returns :<lang code>:<article title> when <q> has an <article title> for <lang code>; nil else | |||
for identifiers that do not have q, returns nil | |||
for wikis that do not have mw.wikibase installed, returns nil | |||
]] | |||
local function wikidata_article_name_get (q) | |||
if not is_set (q) or (q and not mw.wikibase) then -- when no q number or when a q number but mw.wikibase not installed on this wiki | |||
return nil; -- abandon | |||
end | |||
local wd_article; | |||
local this_wiki_code = cfg.this_wiki_code; -- wikipedia subdomain; 'en' for en.wikipedia.org | |||
wd_article = mw.wikibase.getSitelink (q, this_wiki_code .. 'wiki'); -- fetch article title from wd; nil when no title available at this wiki | |||
if wd_article then | |||
wd_article = table.concat ({':', this_wiki_code, ':', wd_article}); -- interwiki-style link without brackets if taken from wd; leading colon required | |||
end | |||
return wd_article; -- article title from wd; nil else | |||
end | |||
--[[--------------------------< E X T E R N A L _ L I N K _ I D >---------------------------------------------- | --[[--------------------------< E X T E R N A L _ L I N K _ I D >---------------------------------------------- | ||
Line 26: | Line 51: | ||
local url_string = options.id; | local url_string = options.id; | ||
local ext_link; | local ext_link; | ||
local this_wiki_code; | local this_wiki_code = cfg.this_wiki_code; -- wikipedia subdomain; 'en' for en.wikipedia.org | ||
local wd_article; -- article title from wikidata | |||
if options.encode == true or options.encode == nil then | if options.encode == true or options.encode == nil then | ||
Line 37: | Line 63: | ||
end | end | ||
if not (cfg.use_identifier_redirects and is_set (options.redirect)) then -- redirect has priority so if enabled and available don't fetch from wikidata because expensive | |||
wd_article = wikidata_article_name_get (options.q); -- if wikidata has an article title for this wiki, get it; | |||
end | |||
local label_link = (cfg.use_identifier_redirects and is_set (options.redirect) and options.redirect) or wd_article or options.link; | |||
return table.concat ({ | return table.concat ({ | ||
make_wikilink ( | make_wikilink (label_link, options.label), -- redirect, wikidata link, or locally specified link (in that order) | ||
options.separator or ' ', | options.separator or ' ', | ||
ext_link | ext_link | ||
Line 58: | Line 84: | ||
local function internal_link_id(options) | local function internal_link_id(options) | ||
local id = mw.ustring.gsub (options.id, '%d', cfg.date_names.local_digits); -- translate 'local' digits to Western 0-9 | |||
if not (cfg.use_identifier_redirects and is_set (options.redirect)) then -- redirect has priority so if enabled and available don't fetch from wikidata because expensive | |||
wd_article = wikidata_article_name_get (options.q); -- if wikidata has an article title for this wiki, get it; | |||
end | |||
local label_link = (cfg.use_identifier_redirects and is_set (options.redirect) and options.redirect) or wd_article or options.link; | |||
return table.concat ( | return table.concat ( | ||
{ | { | ||
make_wikilink ( | make_wikilink (label_link, options.label), -- wiki link the identifier label | ||
options.separator or ' ', | options.separator or ' ', -- add the separator | ||
make_wikilink ( | make_wikilink ( | ||
table.concat ( | table.concat ( | ||
{ | { | ||
options.prefix, | options.prefix, | ||
id, -- translated to western digits | |||
options.suffix or '' | options.suffix or '' | ||
}), | }), | ||
mw.text.nowiki (options.id) | substitute (cfg.presentation['bdi'], {'', mw.text.nowiki (options.id)}) -- bdi tags to prevent Latn script identifiers from being reversed at rtl language wikis | ||
); | ); -- nowiki because MediaWiki still has magic links for ISBN and the like; TODO: is it really required? | ||
}); | }); | ||
end | end | ||
Line 101: | Line 134: | ||
end | end | ||
return ''; -- |embargo= not set return empty string | return ''; -- |embargo= not set return empty string | ||
end | |||
--[=[-------------------------< I S _ V A L I D _ B I O R X I V _ D A T E >------------------------------------ | |||
returns true if: | |||
2019-12-11T00:00Z <= biorxiv_date < today + 2 days | |||
The dated form of biorxiv identifier has a start date of 2019-12-11. The unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400 | |||
biorxiv_date is the date provided in those |biorxiv= parameter values that are dated at time 00:00:00 UTC | |||
today is the current date at time 00:00:00 UTC plus 48 hours | |||
if today is 2015-01-01T00:00:00 then | |||
adding 24 hours gives 2015-01-02T00:00:00 – one second more than today | |||
adding 24 hours gives 2015-01-03T00:00:00 – one second more than tomorrow | |||
This function does not work if it is fed month names for languages other than English. Wikimedia #time: parser | |||
apparently doesn't understand non-Engish date month names. This function will always return false when the date | |||
contains a non-English month name because good1 is false after the call to lang.formatDate(). To get around that | |||
call this function with YYYY-MM-DD format dates. | |||
]=] | |||
local function is_valid_biorxiv_date (biorxiv_date) | |||
local good1, good2; | |||
local biorxiv_ts, tomorrow_ts; -- to hold unix time stamps representing the dates | |||
local lang_object = mw.getContentLanguage(); | |||
good1, biorxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', biorxiv_date ); -- convert biorxiv_date value to unix timesatmp | |||
good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow | |||
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which which tonumber() may not understand | |||
biorxiv_ts = tonumber (biorxiv_ts) or lang_object:parseFormattedNumber (biorxiv_ts); -- convert to numbers for the comparison; | |||
tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts); | |||
else | |||
return false; -- one or both failed to convert to unix time stamp | |||
end | |||
return ((1576022400 <= biorxiv_ts) and (biorxiv_ts < tomorrow_ts)) -- 2012-12-11T00:00Z <= biorxiv_date < tomorrow's date | |||
end | end | ||
Line 116: | Line 188: | ||
local function is_valid_isxn (isxn_str, len) | local function is_valid_isxn (isxn_str, len) | ||
local temp = 0; | local temp = 0; | ||
isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58 | isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58 | ||
len = len+1; | len = len+1; -- adjust to be a loop counter | ||
for i, v in ipairs( isxn_str ) do | for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum | ||
if v == string.byte( "X" ) then | if v == string.byte( "X" ) then -- if checkdigit is X (compares the byte value of 'X' which is 0x58) | ||
temp = temp + 10*( len - i ); -- it represents 10 decimal | temp = temp + 10*( len - i ); -- it represents 10 decimal | ||
else | else | ||
temp = temp + tonumber( string.char(v) )*(len-i); | temp = temp + tonumber( string.char(v) )*(len-i); | ||
end | end | ||
end | end | ||
return temp % 11 == 0; | return temp % 11 == 0; -- returns true if calculation result is zero | ||
end | end | ||
Line 250: | Line 322: | ||
err_cat = err_cat and table.concat ({' ', set_error ('bad_arxiv')}) or ''; -- set error message if flag is true | err_cat = err_cat and table.concat ({' ', set_error ('bad_arxiv')}) or ''; -- set error message if flag is true | ||
text = external_link_id({link = handler.link, label = handler.label, q = handler.q, | text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; | ||
Line 277: | Line 349: | ||
1–4 must be digits and must represent a year in the range of 1000 – next year | 1–4 must be digits and must represent a year in the range of 1000 – next year | ||
5 must be a letter | 5 must be a letter | ||
6–8 must be letter, digit, ampersand, or dot (ampersand cannot directly precede a dot; &. ) | |||
9–18 must be letter, digit, or dot | 9–18 must be letter, digit, or dot | ||
19 must be a letter or dot | 19 must be a letter or dot | ||
Line 289: | Line 360: | ||
local year; | local year; | ||
local text = external_link_id({link=handler.link, label=handler.label, q = handler.q, | local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode, | prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode, | ||
access=access}); | access=access}); | ||
if 19 ~= id:len() then | if 19 ~= id:len() then | ||
err_type = | err_type = cfg.err_msg_supl.length; | ||
else | else | ||
year = id:match ("^(%d%d%d%d)[%a][% | year = id:match ("^(%d%d%d%d)[%a][%w&%.][%w&%.][%w&%.][%w.]+[%a%.]$") -- | ||
if not year then -- if nil then no pattern match | if not year then -- if nil then no pattern match | ||
err_type = | err_type = cfg.err_msg_supl.value; -- so value error | ||
else | else | ||
local next_year = tonumber(os.date ('%Y'))+1; -- get the current year as a number and add one for next year | local next_year = tonumber(os.date ('%Y'))+1; -- get the current year as a number and add one for next year | ||
year = tonumber (year); -- convert year portion of bibcode to a number | year = tonumber (year); -- convert year portion of bibcode to a number | ||
if (1000 > year) or (year > next_year) then | if (1000 > year) or (year > next_year) then | ||
err_type = | err_type = cfg.err_msg_supl.year; -- year out of bounds | ||
end | end | ||
if id:find('&%.') then | if id:find('&%.') then | ||
err_type = | err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does its missing a letter) | ||
end | end | ||
end | end | ||
Line 320: | Line 391: | ||
--[[--------------------------< B I O R X I V >----------------------------------------------------------------- | --[[--------------------------< B I O R X I V >----------------------------------------------------------------- | ||
Format bioRxiv id and do simple error checking. | Format bioRxiv id and do simple error checking. Before 2019-12-11, biorXiv ids were 10.1101/ followed by exactly | ||
The bioRxiv id is the | 6 digits. After 2019-12-11, biorXiv ids retained the six-digit identifier but prefixed that with a yyyy.mm.dd. | ||
https://doi.org/10.1101/078733 -> 078733 | date and suffixed with an optional version identifier. | ||
The bioRxiv id is the string of characters: | |||
https://doi.org/10.1101/078733 -> 10.1101/078733 | |||
or a date followed by a six-digit number followed by an optional version indicator 'v' and one or more digits: | |||
https://www.biorxiv.org/content/10.1101/2019.12.11.123456v2 -> 10.1101/2019.12.11.123456v2 | |||
see https://www.biorxiv.org/about-biorxiv | |||
]] | ]] | ||
Line 328: | Line 406: | ||
local function biorxiv(id) | local function biorxiv(id) | ||
local handler = cfg.id_handlers['BIORXIV']; | local handler = cfg.id_handlers['BIORXIV']; | ||
local err_cat = | local err_cat = true; -- flag; assume that there will be an error | ||
local patterns = { | |||
'^10.1101/%d%d%d%d%d%d$', -- simple 6-digit identifier (before 2019-12-11) | |||
'^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11) | |||
'^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11) | |||
} | |||
return external_link_id({link = handler.link, label = handler.label, q = handler.q, | for _, pattern in ipairs (patterns) do -- spin through the patterns looking for a match | ||
if id:match (pattern) then | |||
local y, m, d = id:match (pattern); -- found a match, attempt to get year, month and date from the identifier | |||
if m then -- m is nil when id is the six-digit form | |||
if not is_valid_biorxiv_date (y .. '-' .. m .. '-' .. d) then -- validate the encoded date; TODO: don't ignore leapyear and actual month lengths ({{#time:}} is a poor date validator) | |||
break; -- date fail; break out early so we don't unset the error message | |||
end | |||
end | |||
err_cat = nil; -- we found a match so unset the error message | |||
break; -- and done | |||
end | |||
end -- err_cat remains set here when no match | |||
return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | |||
prefix=handler.prefix,id=id,separator=handler.separator, | prefix=handler.prefix,id=id,separator=handler.separator, | ||
encode=handler.encode, access=handler.access}) .. err_cat; | encode=handler.encode, access=handler.access}) .. (err_cat and (' ' .. set_error( 'bad_biorxiv')) or ''); | ||
end | end | ||
Line 351: | Line 445: | ||
local matched; | local matched; | ||
local text = external_link_id({link=handler.link, label=handler.label, q = handler.q, | local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode, | prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode, | ||
access=handler.access}); | access=handler.access}); | ||
Line 387: | Line 481: | ||
if is_set(inactive) then | if is_set(inactive) then | ||
local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date | local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date | ||
if is_set(inactive_year) then | local inactive_month, good; | ||
table.insert( z.error_categories, | |||
if is_set (inactive_year) then | |||
if 4 < inactive:len() then -- inactive date has more than just a year (could be anything) | |||
local lang_obj = mw.getContentLanguage(); -- get a language object for this wiki | |||
good, inactive_month = pcall (lang_obj.formatDate, lang_obj, 'F', inactive); -- try to get the month name from the inactive date | |||
if not good then | |||
inactive_month = nil; -- something went wrong so make sure this is unset | |||
end | |||
end | |||
else | |||
inactive_year = nil; -- |doi-broken= has something but it isn't a date | |||
end | |||
if is_set(inactive_year) and is_set (inactive_month) then | |||
table.insert( z.error_categories, 'Pages with DOIs inactive as of ' .. inactive_year .. ' ' .. inactive_month); -- use inactive month in category name | |||
elseif is_set(inactive_year) then | |||
table.insert( z.error_categories, 'Pages with DOIs inactive as of ' .. inactive_year); | |||
else | else | ||
table.insert( z.error_categories, | table.insert( z.error_categories, 'Pages with inactive DOIs'); -- when inactive doesn't contain a recognizable date | ||
end | end | ||
inactive = " (" .. cfg.messages['inactive'] .. | inactive = " (" .. cfg.messages['inactive'] .. ' ' .. inactive .. ')'; | ||
end | end | ||
text = external_link_id({link = handler.link, label = handler.label, q = handler.q, | |||
text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | |||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '') | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '') | ||
local registrant = id:match ('^10%.([^/]+)/[^%s–]-[^%.,]$'); -- registrant set when doi has the proper basic form | |||
cat = ' ' .. set_error( 'bad_doi' ); | |||
registrant_err_patterns = { -- these patterns are for code ranges that are not supported | |||
'^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); accecpts: 10000–39999 | |||
'^[^1-4]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 40000+); accecpts: 10000–49999 | |||
'^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); accecpts: 1000–9999 | |||
'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accecpts: 1000–9999 | |||
'^%d%d%d%d%d%d+', -- 6 or more digits | |||
'^%d%d?%d?$', -- less than 4 digits without subcode (with subcode is legitimate) | |||
'^5555$', -- test registrant will never resolve | |||
'%s', -- any space character in registrant | |||
} | |||
if registrant then -- when doi has proper form | |||
for i, pattern in ipairs (registrant_err_patterns) do -- spin through error patterns | |||
if registrant:match (pattern) then -- to validate registrant codes | |||
cat = ' ' .. set_error ('bad_doi'); -- when found, mark this doi as bad | |||
break; -- and done | |||
end | |||
end | |||
else | |||
cat = ' ' .. set_error ('bad_doi'); -- invalid directory or malformed | |||
end | end | ||
Line 419: | Line 550: | ||
terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely | terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely | ||
if ever used in HDLs. | if ever used in HDLs. | ||
Query string parameters are named here: http://www.handle.net/proxy_servlet.html. query strings are not displayed | |||
but since '?' is anallowed character in an hdl, '?' followed by one of the query parameters is the only way we | |||
have to detect the query string so that it isn't url encoded with the rest of the identifier. | |||
]] | ]] | ||
Line 424: | Line 559: | ||
local function hdl(id, access) | local function hdl(id, access) | ||
local handler = cfg.id_handlers['HDL']; | local handler = cfg.id_handlers['HDL']; | ||
local query_params = { -- list of known query parameters from http://www.handle.net/proxy_servlet.html | |||
'noredirect', | |||
'ignore_aliases', | |||
'auth', | |||
'cert', | |||
'index', | |||
'type', | |||
'urlappend', | |||
'locatt', | |||
'action', | |||
} | |||
local text = external_link_id({link = handler.link, label = handler.label, q = handler.q, | local hdl, suffix, param = id:match ('(.-)(%?(%a+).+)$'); -- look for query string | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) | local found; | ||
if hdl then -- when there are query strings, this is the handle identifier portion | |||
for _, q in ipairs (query_params) do -- spin through the list of query parameters | |||
if param:match ('^' .. q) then -- if the query string begins with one of the parameters | |||
found = true; -- announce a find | |||
break; -- and stop looking | |||
end | |||
end | |||
end | |||
if found then | |||
id = hdl; -- found so replace id with the handle portion; this will be url encoded, suffix will not | |||
else | |||
suffix = ''; -- make sure suffix is empty string for concatenation else | |||
end | |||
local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | |||
prefix=handler.prefix, id=id, suffix=suffix, separator=handler.separator, encode=handler.encode, access=access}) | |||
if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- hdl must contain a fwd slash, must not contain spaces, endashes, and must not end with period or comma | if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- hdl must contain a fwd slash, must not contain spaces, endashes, and must not end with period or comma | ||
Line 443: | Line 607: | ||
local function isbn( isbn_str ) | local function isbn( isbn_str ) | ||
if nil ~= isbn_str:match("[^%s-0-9X]") then | if nil ~= isbn_str:match("[^%s-0-9X]") then | ||
return false, | return false, cfg.err_msg_supl.char; -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X | ||
end | end | ||
isbn_str = isbn_str:gsub( "-", "" ):gsub( " ", "" ); -- remove hyphens and spaces | isbn_str = isbn_str:gsub( "-", "" ):gsub( " ", "" ); -- remove hyphens and spaces | ||
Line 449: | Line 613: | ||
if len ~= 10 and len ~= 13 then | if len ~= 10 and len ~= 13 then | ||
return false, | return false, cfg.err_msg_supl.length; -- fail if incorrect length | ||
end | end | ||
if len == 10 then | if len == 10 then | ||
if isbn_str:match( "^%d*X?$" ) == nil then -- fail if isbn_str has 'X' anywhere but last position | if isbn_str:match( "^%d*X?$" ) == nil then -- fail if isbn_str has 'X' anywhere but last position | ||
return false, | return false, cfg.err_msg_supl.form; | ||
end | end | ||
return is_valid_isxn(isbn_str, 10), | return is_valid_isxn(isbn_str, 10), cfg.err_msg_supl.check; | ||
else | else | ||
if isbn_str:match( "^%d+$" ) == nil then | if isbn_str:match( "^%d+$" ) == nil then | ||
return false, | return false, cfg.err_msg_supl.char; -- fail if isbn13 is not all digits | ||
end | end | ||
if isbn_str:match( "^97[89]%d*$" ) == nil then | if isbn_str:match( "^97[89]%d*$" ) == nil then | ||
return false, | return false, cfg.err_msg_supl.prefix; -- fail when isbn13 does not begin with 978 or 979 | ||
end | |||
if isbn_str:match ('^9790') then | |||
return false, cfg.err_msg_supl.group; -- group identifier '0' is reserved to ismn | |||
end | end | ||
return is_valid_isxn_13 (isbn_str), | return is_valid_isxn_13 (isbn_str), cfg.err_msg_supl.check; | ||
end | end | ||
end | end | ||
Line 504: | Line 671: | ||
end | end | ||
local handler = cfg.id_handlers['ASIN']; | local handler = cfg.id_handlers['ASIN']; | ||
return external_link_id({link=handler.link, | return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix .. domain .. "/dp/", | |||
id=id, encode=handler.encode, separator = handler.separator}) .. err_cat; | id=id, encode=handler.encode, separator = handler.separator}) .. err_cat; | ||
end | end | ||
Line 533: | Line 700: | ||
end | end | ||
-- text = internal_link_id({link = handler.link, label = handler.label, -- use this (or external version) when there is some place to link to | -- text = internal_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, -- use this (or external version) when there is some place to link to | ||
-- prefix=handler.prefix,id= | -- prefix=handler.prefix, id=id_copy, separator=handler.separator, encode=handler.encode}) | ||
text = table.concat ( | local label_link = (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or wd_article or handler.link; -- because no place to link to yet | ||
text = table.concat ( -- because no place to link to yet | |||
{ | { | ||
make_wikilink ( | make_wikilink (label_link, handler.label), | ||
handler.separator, | handler.separator, | ||
id_copy | id_copy | ||
}); | }); | ||
if false == valid_ismn then | if false == valid_ismn then | ||
Line 592: | Line 761: | ||
end | end | ||
text = external_link_id({link = handler.link, label = handler.label, q = handler.q, | text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) | ||
if false == valid_issn then | if false == valid_issn then | ||
text = text .. ' ' .. set_error( 'bad_issn', e and 'e' or '' ) -- add an error message if the issn is invalid | text = text .. ' ' .. set_error( 'bad_issn', e and 'e' or '' ) -- add an error message if the issn is invalid | ||
Line 628: | Line 797: | ||
end | end | ||
return external_link_id({link = handler.link, label = handler.label, q = handler.q, | return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | ||
end | end | ||
Line 685: | Line 854: | ||
end | end | ||
return external_link_id({link = handler.link, label = handler.label, q = handler.q, | return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat; | prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat; | ||
end | end | ||
Line 717: | Line 886: | ||
end | end | ||
return external_link_id({link = handler.link, label = handler.label, q = handler.q, | return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | ||
end | end | ||
Line 724: | Line 893: | ||
--[[--------------------------< O C L C >---------------------------------------------------------------------- | --[[--------------------------< O C L C >---------------------------------------------------------------------- | ||
Validate and format an oclc id. https://www.oclc.org/batchload/controlnumber.en.html | Validate and format an oclc id. https://www.oclc.org/batchload/controlnumber.en.html {{dead link}} | ||
archived at: https://web.archive.org/web/20161228233804/https://www.oclc.org/batchload/controlnumber.en.html | |||
]] | ]] | ||
Line 757: | Line 927: | ||
end | end | ||
local text = external_link_id({link=handler.link, label=handler.label, q = handler.q, | local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode}) .. err_msg; | prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode}) .. err_msg; | ||
Line 778: | Line 948: | ||
if not is_set (ident) then -- if malformed return an error | if not is_set (ident) then -- if malformed return an error | ||
return external_link_id({link=handler.link, label=handler.label, q = handler.q, | return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix .. 'OL', | prefix=handler.prefix .. 'OL', | ||
id=id, separator=handler.separator, encode = handler.encode, | id=id, separator=handler.separator, encode = handler.encode, | ||
Line 787: | Line 957: | ||
if ( code == "A" ) then | if ( code == "A" ) then | ||
return external_link_id({link=handler.link, label=handler.label, q = handler.q, | return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix .. 'authors/OL', | prefix=handler.prefix .. 'authors/OL', | ||
id=id, separator=handler.separator, encode = handler.encode, | id=id, separator=handler.separator, encode = handler.encode, | ||
Line 794: | Line 964: | ||
if ( code == "M" ) then | if ( code == "M" ) then | ||
return external_link_id({link=handler.link, label=handler.label, q = handler.q, | return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix .. 'books/OL', | prefix=handler.prefix .. 'books/OL', | ||
id=id, separator=handler.separator, encode = handler.encode, | id=id, separator=handler.separator, encode = handler.encode, | ||
Line 801: | Line 971: | ||
if ( code == "W" ) then | if ( code == "W" ) then | ||
return external_link_id({link=handler.link, label=handler.label, q = handler.q, | return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix .. 'works/OL', | prefix=handler.prefix .. 'works/OL', | ||
id=id, separator=handler.separator, encode = handler.encode, | id=id, separator=handler.separator, encode = handler.encode, | ||
Line 827: | Line 997: | ||
local function pmc(id, embargo) | local function pmc(id, embargo) | ||
local handler = cfg.id_handlers['PMC']; | local handler = cfg.id_handlers['PMC']; | ||
local err_cat = ''; -- presume that PMC is valid | local err_cat = ''; -- presume that PMC is valid | ||
Line 843: | Line 1,012: | ||
if is_set (id_num) then -- id_num has a value so test it | if is_set (id_num) then -- id_num has a value so test it | ||
id_num = tonumber(id_num); -- convert id_num to a number for range testing | id_num = tonumber(id_num); -- convert id_num to a number for range testing | ||
if 1 > id_num or | if 1 > id_num or handler.id_limit < id_num then -- if PMC is outside test limit boundaries | ||
err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message | err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message | ||
else | else | ||
Line 861: | Line 1,030: | ||
}); | }); | ||
else | else | ||
text = external_link_id({link = handler.link, label = handler.label, q = handler.q, -- no embargo date or embargo has expired, ok to link to article | text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, -- no embargo date or embargo has expired, ok to link to article | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; | ||
end | end | ||
Line 877: | Line 1,046: | ||
local function pmid(id) | local function pmid(id) | ||
local handler = cfg.id_handlers['PMID']; | local handler = cfg.id_handlers['PMID']; | ||
local err_cat = ''; -- presume that PMID is valid | local err_cat = ''; -- presume that PMID is valid | ||
Line 885: | Line 1,053: | ||
else -- PMID is only digits | else -- PMID is only digits | ||
local id_num = tonumber(id); -- convert id to a number for range testing | local id_num = tonumber(id); -- convert id to a number for range testing | ||
if 1 > id_num or | if 1 > id_num or handler.id_limit < id_num then -- if PMID is outside test limit boundaries | ||
err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message | err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message | ||
end | end | ||
end | end | ||
return external_link_id({link = handler.link, label = handler.label, q = handler.q, | return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | ||
end | |||
--[[--------------------------< S 2 C I D >-------------------------------------------------------------------- | |||
Format an s2cid, do simple error checking | |||
S2CIDs are sequential numbers beginning at 1 and counting up. This code checks the s2cid to see that it is only | |||
digits and is less than test_limit; the value in local variable test_limit will need to be updated periodically | |||
as more S2CIDs are issued. | |||
]] | |||
local function s2cid (id, access) | |||
local handler = cfg.id_handlers['S2CID']; | |||
local err_cat = ''; -- presume that S2CID is valid | |||
local id_num; | |||
local text; | |||
id_num = id:match ('^[1-9]%d*$'); -- id must be all digits; must not begin with 0; no open access flag | |||
if is_set (id_num) then -- id_num has a value so test it | |||
id_num = tonumber(id_num); -- convert id_num to a number for range testing | |||
if handler.id_limit < id_num then -- if S2CID is outside test limit boundaries | |||
err_cat = ' ' .. set_error( 'bad_s2cid' ); -- set an error message | |||
end | |||
else -- when id format incorrect | |||
err_cat = ' ' .. set_error( 'bad_s2cid' ); -- set an error message | |||
end | |||
text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | |||
prefix=handler.prefix, id=id:gsub ('%.%a%a', ''), separator=handler.separator, encode=handler.encode, access=access}) .. err_cat; | |||
return text; | |||
end | |||
--[[--------------------------< S B N >------------------------------------------------------------------------ | |||
9-digit form of isbn10; uses same check-digit validation when sbn is prefixed with an additional '0' to make 10 digits | |||
]] | |||
local function sbn (id) | |||
local check; | |||
local err_type = ''; | |||
if nil ~= id:match("[^%s-0-9X]") then | |||
return false, cfg.err_msg_supl.char; -- fail if sbn contains anything but digits, hyphens, or the uppercase X | |||
end | |||
id=id:gsub( "[%s-]", "" ); -- strip spaces and hyphens from the sbn | |||
if 9 ~= id:len() then | |||
return false, cfg.err_msg_supl.length; -- fail if incorrect length | |||
end | |||
if id:match( "^%d*X?$" ) == nil then -- fail if sbn has 'X' anywhere but last position | |||
return false, cfg.err_msg_supl.form; | |||
end | |||
return is_valid_isxn('0' .. id, 10), cfg.err_msg_supl.check; -- prefix sbn with '0' and validate as isbn10 | |||
end | end | ||
Line 906: | Line 1,137: | ||
local function ssrn (id) | local function ssrn (id) | ||
local handler = cfg.id_handlers['SSRN']; | local handler = cfg.id_handlers['SSRN']; | ||
local err_cat = ''; -- presume that SSRN is valid | local err_cat = ''; -- presume that SSRN is valid | ||
Line 916: | Line 1,146: | ||
if is_set (id_num) then -- id_num has a value so test it | if is_set (id_num) then -- id_num has a value so test it | ||
id_num = tonumber(id_num); -- convert id_num to a number for range testing | id_num = tonumber(id_num); -- convert id_num to a number for range testing | ||
if 100 > id_num or | if 100 > id_num or handler.id_limit < id_num then -- if SSRN is outside test limit boundaries | ||
err_cat = ' ' .. set_error( 'bad_ssrn' ); -- set an error message | err_cat = ' ' .. set_error( 'bad_ssrn' ); -- set an error message | ||
end | end | ||
Line 923: | Line 1,153: | ||
end | end | ||
text = external_link_id({link = handler.link, label = handler.label, q = handler.q, | text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; | ||
Line 940: | Line 1,170: | ||
local handler = cfg.id_handlers['USENETID']; | local handler = cfg.id_handlers['USENETID']; | ||
local text = external_link_id({link = handler.link, label = handler.label, q = handler.q, | local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) | ||
Line 954: | Line 1,184: | ||
A numerical identifier in the form nnnn.nnnnn - leading zeros in the first quartet optional | A numerical identifier in the form nnnn.nnnnn - leading zeros in the first quartet optional | ||
format described here: http://emis.mi.sanu.ac.rs/ZMATH/zmath/en/help/search/ | |||
temporary format is apparently eight digits. Anything else is an error | |||
]] | ]] | ||
Line 959: | Line 1,193: | ||
local function zbl (id) | local function zbl (id) | ||
local handler = cfg.id_handlers['ZBL']; | local handler = cfg.id_handlers['ZBL']; | ||
local err_cat = ''; | local err_cat = ''; | ||
if id:match('^%d%d%d%d%d%d%d%d$') then -- is this identifier using temporary format? | |||
add_maint_cat ('zbl'); -- yes, add maint cat | |||
elseif not id:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then -- not temporary, is it normal format? | |||
add_maint_cat (' | err_cat = ' ' .. set_error( 'bad_zbl' ); -- no, set an error message | ||
err_cat = ' ' .. set_error( 'bad_zbl' ); -- set an error message | |||
end | end | ||
return external_link_id({link = handler.link, label = handler.label, q = handler.q, | return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | ||
end | end | ||
Line 1,057: | Line 1,282: | ||
elseif k == 'PMID' then | elseif k == 'PMID' then | ||
table.insert( new_list, {handler.label, pmid( v ) } ); | table.insert( new_list, {handler.label, pmid( v ) } ); | ||
elseif k == 'S2CID' then | |||
table.insert( new_list, {handler.label, s2cid( v, handler.access ) } ); | |||
elseif k == 'SBN' then | |||
local SBN = internal_link_id (handler); | |||
local check; -- boolean validation result | |||
local err_type = ''; | |||
check, err_type = sbn (v); | |||
if not check then | |||
SBN = SBN .. set_error( 'bad_sbn', {err_type}, false, " ", "" ); -- display an error message | |||
end | |||
table.insert( new_list, {handler.label, SBN } ); | |||
elseif k == 'SSRN' then | elseif k == 'SSRN' then | ||
table.insert( new_list, {handler.label, ssrn( v ) } ); | table.insert( new_list, {handler.label, ssrn( v ) } ); | ||
Line 1,104: | Line 1,340: | ||
Parameters which have a predefined access level (e.g. arxiv) do not use this | Parameters which have a predefined access level (e.g. arxiv) do not use this | ||
function as they are directly rendered as free without using an additional parameter. | function as they are directly rendered as free without using an additional parameter. | ||
access-level values must match the case used in cfg.keywords_lists['id-access'] (lowercase unless there is some special reason for something else) | |||
]] | ]] | ||
Line 1,110: | Line 1,348: | ||
local id_accesses_list = {}; | local id_accesses_list = {}; | ||
for k, v in pairs( cfg.id_handlers ) do | for k, v in pairs( cfg.id_handlers ) do | ||
local access_param = v.custom_access; | local access_param = v.custom_access; -- name of identifier's access-level parameter | ||
if is_set(access_param) then | if is_set(access_param) then | ||
local access_level = args[access_param]; | local access_level = args[access_param]; -- get the assigned value if there is one | ||
if is_set(access_level) then | if is_set (access_level) then | ||
if not in_array (access_level | if not in_array (access_level, cfg.keywords_lists['id-access']) then -- exact match required | ||
table.insert( z.message_tail, { set_error( 'invalid_param_val', {access_param, access_level}, true ) } ); | table.insert( z.message_tail, { set_error( 'invalid_param_val', {access_param, access_level}, true ) } ); | ||
access_level = nil; | access_level = nil; -- invalid so unset | ||
end | end | ||
if not is_set(id_list[k]) then | if not is_set(id_list[k]) then -- identifer access-level must have a matching identifier | ||
table.insert( z.message_tail, { set_error( 'param_access_requires_param', { | table.insert( z.message_tail, { set_error( 'param_access_requires_param', {k:lower()}, true ) } ); -- param name is uppercase in cfg.id_handlers (k); lowercase for error message | ||
end | end | ||
id_accesses_list[k] = cfg.keywords_xlate[access_level]; -- get translated keyword | |||
id_accesses_list[k] = access_level; | |||
end | end | ||
end | end | ||
Line 1,153: | Line 1,387: | ||
end | end | ||
--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------ | |||
]] | |||
return { | return { |