Module:Citation/CS1/Identifiers: Difference between revisions
sync from sandbox;
(changed PMC test limit to 8000000, since PMCs greater than 7000000 have now been issued, see https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7003341/) |
(sync from sandbox;) |
||
Line 11: | Line 11: | ||
--============================<< H E L P E R F U N C T I O N S >>============================================ | --============================<< H E L P E R F U N C T I O N S >>============================================ | ||
--[[--------------------------< W I K I D A T A _ A R T I C L E _ N A M E _ G E T >---------------------------- | |||
as an aid to internationalizing identifier-label wikilinks, gets identifier article names from wikidata. | |||
returns :<lang code>:<article title> when <q> has an <article title> for <lang code>; nil else | |||
for identifiers that do not have q, returns nil | |||
for wikis that do not have mw.wikibase installed, returns nil | |||
The call to mw.wikibase.getEntity() bumps the expensive parser function count | |||
]] | |||
local function wikidata_article_name_get (q) | |||
if not is_set (q) or (q and not mw.wikibase) then -- when no q number or when a q number but mw.wikibase not installed on this wiki | |||
return nil; -- abandon | |||
end | |||
local wd_article; | |||
local this_wiki_code = cfg.this_wiki_code; -- wikipedia subdomain; 'en' for en.wikipedia.org | |||
wd_article = mw.wikibase.getEntity (q):getSitelink (this_wiki_code .. 'wiki'); -- fetch article title from wd; nil when no title available at this wiki; bumps expensive parser function count | |||
if wd_article then | |||
wd_article = table.concat ({':', this_wiki_code, ':', wd_article}); -- interwiki-style link without brackets if taken from wd; leading colon required | |||
end | |||
return wd_article; -- article title from wd; nil else | |||
end | |||
--[[--------------------------< E X T E R N A L _ L I N K _ I D >---------------------------------------------- | --[[--------------------------< E X T E R N A L _ L I N K _ I D >---------------------------------------------- | ||
Line 33: | Line 65: | ||
end | end | ||
if not (cfg.use_identifier_redirects and is_set (options.redirect)) then -- redirect has priority so if enabled and available don't fetch from wikidata because expensive | |||
wd_article = wikidata_article_name_get (options.q); -- if wikidata has an article title for this wiki, get it; | |||
end | end | ||
local label_link = (cfg.use_identifier_redirects and is_set (options.redirect) and options.redirect) or wd_article or options.link; | |||
return table.concat ({ | return table.concat ({ | ||
make_wikilink ( | make_wikilink (label_link, options.label), -- redirect, wikidata link, or locally specified link (in that order) | ||
options.separator or ' ', | options.separator or ' ', | ||
ext_link | ext_link | ||
Line 62: | Line 87: | ||
local function internal_link_id(options) | local function internal_link_id(options) | ||
local id = mw.ustring.gsub (options.id, '%d', cfg.date_names.local_digits); -- translate 'local' digits to Western 0-9 | local id = mw.ustring.gsub (options.id, '%d', cfg.date_names.local_digits); -- translate 'local' digits to Western 0-9 | ||
if not (cfg.use_identifier_redirects and is_set (options.redirect)) then -- redirect has priority so if enabled and available don't fetch from wikidata because expensive | |||
wd_article = wikidata_article_name_get (options.q); -- if wikidata has an article title for this wiki, get it; | |||
end | |||
local label_link = (cfg.use_identifier_redirects and is_set (options.redirect) and options.redirect) or wd_article or options.link; | |||
return table.concat ( | return table.concat ( | ||
{ | { | ||
make_wikilink ( | make_wikilink (label_link, options.label), -- wiki link the identifier label | ||
options.separator or ' ', | options.separator or ' ', -- add the separator | ||
make_wikilink ( | make_wikilink ( | ||
table.concat ( | table.concat ( | ||
Line 105: | Line 136: | ||
end | end | ||
return ''; -- |embargo= not set return empty string | return ''; -- |embargo= not set return empty string | ||
end | |||
--[=[-------------------------< I S _ V A L I D _ B I O R X I V _ D A T E >------------------------------------ | |||
returns true if: | |||
2019-12-11T00:00Z <= biorxiv_date < today + 2 days | |||
The dated form of biorxiv identifier has a start date of 2019-12-11. The unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400 | |||
biorxiv_date is the date provided in those |biorxiv= parameter values that are dated at time 00:00:00 UTC | |||
today is the current date at time 00:00:00 UTC plus 48 hours | |||
if today is 2015-01-01T00:00:00 then | |||
adding 24 hours gives 2015-01-02T00:00:00 – one second more than today | |||
adding 24 hours gives 2015-01-03T00:00:00 – one second more than tomorrow | |||
This function does not work if it is fed month names for languages other than English. Wikimedia #time: parser | |||
apparently doesn't understand non-Engish date month names. This function will always return false when the date | |||
contains a non-English month name because good1 is false after the call to lang.formatDate(). To get around that | |||
call this function with YYYY-MM-DD format dates. | |||
]=] | |||
local function is_valid_biorxiv_date (biorxiv_date) | |||
local good1, good2; | |||
local biorxiv_ts, tomorrow_ts; -- to hold unix time stamps representing the dates | |||
local lang_object = mw.getContentLanguage(); | |||
good1, biorxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', biorxiv_date ); -- convert biorxiv_date value to unix timesatmp | |||
good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow | |||
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which which tonumber() may not understand | |||
biorxiv_ts = tonumber (biorxiv_ts) or lang_object:parseFormattedNumber (biorxiv_ts); -- convert to numbers for the comparison; | |||
tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts); | |||
else | |||
return false; -- one or both failed to convert to unix time stamp | |||
end | |||
return ((1576022400 <= biorxiv_ts) and (biorxiv_ts < tomorrow_ts)) -- 2012-12-11T00:00Z <= biorxiv_date < tomorrow's date | |||
end | end | ||
Line 120: | Line 190: | ||
local function is_valid_isxn (isxn_str, len) | local function is_valid_isxn (isxn_str, len) | ||
local temp = 0; | local temp = 0; | ||
isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58 | isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58 | ||
len = len+1; | len = len+1; -- adjust to be a loop counter | ||
for i, v in ipairs( isxn_str ) do | for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum | ||
if v == string.byte( "X" ) then | if v == string.byte( "X" ) then -- if checkdigit is X (compares the byte value of 'X' which is 0x58) | ||
temp = temp + 10*( len - i ); -- it represents 10 decimal | temp = temp + 10*( len - i ); -- it represents 10 decimal | ||
else | else | ||
temp = temp + tonumber( string.char(v) )*(len-i); | temp = temp + tonumber( string.char(v) )*(len-i); | ||
end | end | ||
end | end | ||
return temp % 11 == 0; | return temp % 11 == 0; -- returns true if calculation result is zero | ||
end | end | ||
Line 254: | Line 324: | ||
err_cat = err_cat and table.concat ({' ', set_error ('bad_arxiv')}) or ''; -- set error message if flag is true | err_cat = err_cat and table.concat ({' ', set_error ('bad_arxiv')}) or ''; -- set error message if flag is true | ||
text = external_link_id({link = handler.link, label = handler.label, q = handler.q, | text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; | ||
Line 292: | Line 362: | ||
local year; | local year; | ||
local text = external_link_id({link=handler.link, label=handler.label, q = handler.q, | local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode, | prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode, | ||
access=access}); | access=access}); | ||
if 19 ~= id:len() then | if 19 ~= id:len() then | ||
err_type = | err_type = cfg.err_msg_supl.length; | ||
else | else | ||
year = id:match ("^(%d%d%d%d)[%a][%w&%.][%w&%.][%w&%.][%w.]+[%a%.]$") -- | year = id:match ("^(%d%d%d%d)[%a][%w&%.][%w&%.][%w&%.][%w.]+[%a%.]$") -- | ||
if not year then -- if nil then no pattern match | if not year then -- if nil then no pattern match | ||
err_type = | err_type = cfg.err_msg_supl.value; -- so value error | ||
else | else | ||
local next_year = tonumber(os.date ('%Y'))+1; -- get the current year as a number and add one for next year | local next_year = tonumber(os.date ('%Y'))+1; -- get the current year as a number and add one for next year | ||
year = tonumber (year); -- convert year portion of bibcode to a number | year = tonumber (year); -- convert year portion of bibcode to a number | ||
if (1000 > year) or (year > next_year) then | if (1000 > year) or (year > next_year) then | ||
err_type = | err_type = cfg.err_msg_supl.year; -- year out of bounds | ||
end | end | ||
if id:find('&%.') then | if id:find('&%.') then | ||
err_type = | err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does its missing a letter) | ||
end | end | ||
end | end | ||
Line 323: | Line 393: | ||
--[[--------------------------< B I O R X I V >----------------------------------------------------------------- | --[[--------------------------< B I O R X I V >----------------------------------------------------------------- | ||
Format bioRxiv id and do simple error checking. | Format bioRxiv id and do simple error checking. Before 2019-12-11, biorXiv ids were 10.1101/ followed by exactly | ||
The bioRxiv id is the | 6 digits. After 2019-12-11, biorXiv ids retained the six-digit identifier but prefixed that with a yyyy.mm.dd. | ||
https://doi.org/10.1101/078733 -> 078733 | date and suffixed with an optional version identifier. | ||
The bioRxiv id is the string of characters: | |||
https://doi.org/10.1101/078733 -> 10.1101/078733 | |||
or a date followed by a six-digit number followed by an optional version indicator 'v' and one or more digits: | |||
https://www.biorxiv.org/content/10.1101/2019.12.11.123456v2 -> 10.1101/2019.12.11.123456v2 | |||
see https://www.biorxiv.org/about-biorxiv | |||
]] | ]] | ||
Line 331: | Line 408: | ||
local function biorxiv(id) | local function biorxiv(id) | ||
local handler = cfg.id_handlers['BIORXIV']; | local handler = cfg.id_handlers['BIORXIV']; | ||
local err_cat = ''; | local err_cat = ' ' .. set_error( 'bad_biorxiv'); -- assume that there will be an error | ||
local patterns = { | |||
'^10.1101/%d%d%d%d%d%d$', -- simple 6-digit identifier (before 2019-12-11) | |||
'^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11) | |||
'^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11) | |||
} | |||
return external_link_id({link = handler.link, label = handler.label, q = handler.q, | for _, pattern in ipairs (patterns) do -- spin through the patterns looking for a match | ||
if id:match (pattern) then | |||
local y, m, d = id:match (pattern); -- found a match, attempt to get year, month and date from the identifier | |||
if m then -- m is nil when id is the six-digit form | |||
if not is_valid_biorxiv_date (y .. '-' .. m .. '-' .. d) then -- validate the encoded date; TODO: don't ignore leapyear and actual month lengths ({{#time:}} is a poor date validator) | |||
break; -- break out early so we don't unset the error message | |||
end | |||
end | |||
err_cat = ''; -- we found a match so unset the error message | |||
break; -- and done | |||
end | |||
end -- err_cat remains set here when no match | |||
return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | |||
prefix=handler.prefix,id=id,separator=handler.separator, | prefix=handler.prefix,id=id,separator=handler.separator, | ||
encode=handler.encode, access=handler.access}) .. err_cat; | encode=handler.encode, access=handler.access}) .. err_cat; | ||
Line 354: | Line 447: | ||
local matched; | local matched; | ||
local text = external_link_id({link=handler.link, label=handler.label, q = handler.q, | local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode, | prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode, | ||
access=handler.access}); | access=handler.access}); | ||
Line 414: | Line 507: | ||
end | end | ||
text = external_link_id({link = handler.link, label = handler.label, q = handler.q, | text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '') | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '') | ||
Line 427: | Line 520: | ||
'^%d%d?%d?$', -- less than 4 digits without subcode (with subcode is legitimate) | '^%d%d?%d?$', -- less than 4 digits without subcode (with subcode is legitimate) | ||
'^5555$', -- test registrant will never resolve | '^5555$', -- test registrant will never resolve | ||
'%s', -- any space character in registrant | |||
} | } | ||
Line 458: | Line 552: | ||
terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely | terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely | ||
if ever used in HDLs. | if ever used in HDLs. | ||
Query string parameters are named here: http://www.handle.net/proxy_servlet.html. query strings are not displayed | |||
but since '?' is anallowed character in an hdl, '?' followed by one of the query parameters is the only way we | |||
have to detect the query string so that it isn't url encoded with the rest of the identifier. | |||
]] | ]] | ||
Line 463: | Line 561: | ||
local function hdl(id, access) | local function hdl(id, access) | ||
local handler = cfg.id_handlers['HDL']; | local handler = cfg.id_handlers['HDL']; | ||
local query_params = { -- list of known query parameters from http://www.handle.net/proxy_servlet.html | |||
'noredirect', | |||
'ignore_aliases', | |||
'auth', | |||
'cert', | |||
'index', | |||
'type', | |||
'urlappend', | |||
'locatt', | |||
'action', | |||
} | |||
local text = external_link_id({link = handler.link, label = handler.label, q = handler.q, | local hdl, suffix, param = id:match ('(.-)(%?(%a+).+)$'); -- look for query string | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) | local found; | ||
if hdl then -- when there are query strings, this is the handle identifier portion | |||
for _, q in ipairs (query_params) do -- spin through the list of query parameters | |||
if param:match ('^' .. q) then -- if the query string begins with one of the parameters | |||
found = true; -- announce a find | |||
break; -- and stop looking | |||
end | |||
end | |||
end | |||
if found then | |||
id = hdl; -- found so replace id with the handle portion; this will be url encoded, suffix will not | |||
else | |||
suffix = ''; -- make sure suffix is empty string for concatenation else | |||
end | |||
local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | |||
prefix=handler.prefix, id=id, suffix=suffix, separator=handler.separator, encode=handler.encode, access=access}) | |||
if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- hdl must contain a fwd slash, must not contain spaces, endashes, and must not end with period or comma | if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- hdl must contain a fwd slash, must not contain spaces, endashes, and must not end with period or comma | ||
Line 482: | Line 609: | ||
local function isbn( isbn_str ) | local function isbn( isbn_str ) | ||
if nil ~= isbn_str:match("[^%s-0-9X]") then | if nil ~= isbn_str:match("[^%s-0-9X]") then | ||
return false, | return false, cfg.err_msg_supl.char; -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X | ||
end | end | ||
isbn_str = isbn_str:gsub( "-", "" ):gsub( " ", "" ); -- remove hyphens and spaces | isbn_str = isbn_str:gsub( "-", "" ):gsub( " ", "" ); -- remove hyphens and spaces | ||
Line 488: | Line 615: | ||
if len ~= 10 and len ~= 13 then | if len ~= 10 and len ~= 13 then | ||
return false, | return false, cfg.err_msg_supl.length; -- fail if incorrect length | ||
end | end | ||
if len == 10 then | if len == 10 then | ||
if isbn_str:match( "^%d*X?$" ) == nil then -- fail if isbn_str has 'X' anywhere but last position | if isbn_str:match( "^%d*X?$" ) == nil then -- fail if isbn_str has 'X' anywhere but last position | ||
return false, | return false, cfg.err_msg_supl.form; | ||
end | end | ||
return is_valid_isxn(isbn_str, 10), | return is_valid_isxn(isbn_str, 10), cfg.err_msg_supl.check; | ||
else | else | ||
if isbn_str:match( "^%d+$" ) == nil then | if isbn_str:match( "^%d+$" ) == nil then | ||
return false, | return false, cfg.err_msg_supl.char; -- fail if isbn13 is not all digits | ||
end | end | ||
if isbn_str:match( "^97[89]%d*$" ) == nil then | if isbn_str:match( "^97[89]%d*$" ) == nil then | ||
return false, | return false, cfg.err_msg_supl.prefix; -- fail when isbn13 does not begin with 978 or 979 | ||
end | end | ||
if isbn_str:match ('^9790') then | if isbn_str:match ('^9790') then | ||
return false, | return false, cfg.err_msg_supl.group; -- group identifier '0' is reserved to ismn | ||
end | end | ||
return is_valid_isxn_13 (isbn_str), | return is_valid_isxn_13 (isbn_str), cfg.err_msg_supl.check; | ||
end | end | ||
end | end | ||
Line 546: | Line 673: | ||
end | end | ||
local handler = cfg.id_handlers['ASIN']; | local handler = cfg.id_handlers['ASIN']; | ||
return external_link_id({link=handler.link, | return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix .. domain .. "/dp/", | |||
id=id, encode=handler.encode, separator = handler.separator}) .. err_cat; | id=id, encode=handler.encode, separator = handler.separator}) .. err_cat; | ||
end | end | ||
Line 575: | Line 702: | ||
end | end | ||
-- text = internal_link_id({link = handler.link, label = handler.label, -- use this (or external version) when there is some place to link to | -- text = internal_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, -- use this (or external version) when there is some place to link to | ||
-- prefix=handler.prefix,id= | -- prefix=handler.prefix, id=id_copy, separator=handler.separator, encode=handler.encode}) | ||
text = table.concat ( | text = table.concat ( | ||
Line 634: | Line 761: | ||
end | end | ||
text = external_link_id({link = handler.link, label = handler.label, q = handler.q, | text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) | ||
if false == valid_issn then | if false == valid_issn then | ||
text = text .. ' ' .. set_error( 'bad_issn', e and 'e' or '' ) -- add an error message if the issn is invalid | text = text .. ' ' .. set_error( 'bad_issn', e and 'e' or '' ) -- add an error message if the issn is invalid | ||
Line 670: | Line 797: | ||
end | end | ||
return external_link_id({link = handler.link, label = handler.label, q = handler.q, | return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | ||
end | end | ||
Line 727: | Line 854: | ||
end | end | ||
return external_link_id({link = handler.link, label = handler.label, q = handler.q, | return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat; | prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat; | ||
end | end | ||
Line 759: | Line 886: | ||
end | end | ||
return external_link_id({link = handler.link, label = handler.label, q = handler.q, | return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | ||
end | end | ||
Line 800: | Line 927: | ||
end | end | ||
local text = external_link_id({link=handler.link, label=handler.label, q = handler.q, | local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode}) .. err_msg; | prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode}) .. err_msg; | ||
Line 821: | Line 948: | ||
if not is_set (ident) then -- if malformed return an error | if not is_set (ident) then -- if malformed return an error | ||
return external_link_id({link=handler.link, label=handler.label, q = handler.q, | return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix .. 'OL', | prefix=handler.prefix .. 'OL', | ||
id=id, separator=handler.separator, encode = handler.encode, | id=id, separator=handler.separator, encode = handler.encode, | ||
Line 830: | Line 957: | ||
if ( code == "A" ) then | if ( code == "A" ) then | ||
return external_link_id({link=handler.link, label=handler.label, q = handler.q, | return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix .. 'authors/OL', | prefix=handler.prefix .. 'authors/OL', | ||
id=id, separator=handler.separator, encode = handler.encode, | id=id, separator=handler.separator, encode = handler.encode, | ||
Line 837: | Line 964: | ||
if ( code == "M" ) then | if ( code == "M" ) then | ||
return external_link_id({link=handler.link, label=handler.label, q = handler.q, | return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix .. 'books/OL', | prefix=handler.prefix .. 'books/OL', | ||
id=id, separator=handler.separator, encode = handler.encode, | id=id, separator=handler.separator, encode = handler.encode, | ||
Line 844: | Line 971: | ||
if ( code == "W" ) then | if ( code == "W" ) then | ||
return external_link_id({link=handler.link, label=handler.label, q = handler.q, | return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix .. 'works/OL', | prefix=handler.prefix .. 'works/OL', | ||
id=id, separator=handler.separator, encode = handler.encode, | id=id, separator=handler.separator, encode = handler.encode, | ||
Line 870: | Line 997: | ||
local function pmc(id, embargo) | local function pmc(id, embargo) | ||
local test_limit = | local test_limit = cfg.id_limits.pmc; -- update this value as PMCs approach | ||
local handler = cfg.id_handlers['PMC']; | local handler = cfg.id_handlers['PMC']; | ||
local err_cat = ''; -- presume that PMC is valid | local err_cat = ''; -- presume that PMC is valid | ||
Line 904: | Line 1,031: | ||
}); | }); | ||
else | else | ||
text = external_link_id({link = handler.link, label = handler.label, q = handler.q, -- no embargo date or embargo has expired, ok to link to article | text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, -- no embargo date or embargo has expired, ok to link to article | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; | ||
end | end | ||
Line 920: | Line 1,047: | ||
local function pmid(id) | local function pmid(id) | ||
local test_limit = | local test_limit = cfg.id_limits.pmid; -- update this value as PMIDs approach | ||
local handler = cfg.id_handlers['PMID']; | local handler = cfg.id_handlers['PMID']; | ||
local err_cat = ''; -- presume that PMID is valid | local err_cat = ''; -- presume that PMID is valid | ||
Line 933: | Line 1,060: | ||
end | end | ||
return external_link_id({link = handler.link, label = handler.label, q = handler.q, | return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | ||
end | |||
--[[--------------------------< S 2 C I D >-------------------------------------------------------------------- | |||
Format an s2cid, do simple error checking | |||
S2CIDs are sequential numbers beginning at 1 and counting up. This code checks the s2cid to see that it is only | |||
digits and is less than test_limit; the value in local variable test_limit will need to be updated periodically | |||
as more S2CIDs are issued. | |||
]] | |||
local function s2cid (id, access) | |||
local test_limit = cfg.id_limits.s2cid; -- update this value as S2CIDs approach | |||
local handler = cfg.id_handlers['S2CID']; | |||
local err_cat = ''; -- presume that S2CID is valid | |||
local id_num; | |||
local text; | |||
id_num = id:match ('^[1-9]%d+$'); -- id must be all digits; must not begin with 0; no open access flag | |||
if is_set (id_num) then -- id_num has a value so test it | |||
id_num = tonumber(id_num); -- convert id_num to a number for range testing | |||
if test_limit < id_num then -- if S2CID is outside test limit boundaries | |||
err_cat = ' ' .. set_error( 'bad_s2cid' ); -- set an error message | |||
end | |||
else -- when id format incorrect | |||
err_cat = ' ' .. set_error( 'bad_s2cid' ); -- set an error message | |||
end | |||
text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | |||
prefix=handler.prefix, id=id:gsub ('%.%a%a', ''), separator=handler.separator, encode=handler.encode, access=access}) .. err_cat; | |||
return text; | |||
end | |||
--[[--------------------------< S B N >------------------------------------------------------------------------ | |||
9-digit form of isbn10; uses same check-digit validation when sbn is prefixed with an additional '0' to make 10 digits | |||
]] | |||
local function sbn (id) | |||
local check; | |||
local err_type = ''; | |||
if nil ~= id:match("[^%s-0-9X]") then | |||
return false, cfg.err_msg_supl.char; -- fail if sbn contains anything but digits, hyphens, or the uppercase X | |||
end | |||
id=id:gsub( "[%s-]", "" ); -- strip spaces and hyphens from the sbn | |||
if 9 ~= id:len() then | |||
return false, cfg.err_msg_supl.length; -- fail if incorrect length | |||
end | |||
if id:match( "^%d*X?$" ) == nil then -- fail if sbn has 'X' anywhere but last position | |||
return false, cfg.err_msg_supl.form; | |||
end | |||
return is_valid_isxn('0' .. id, 10), cfg.err_msg_supl.check; -- prefix sbn with '0' and validate as isbn10 | |||
end | end | ||
Line 949: | Line 1,140: | ||
local function ssrn (id) | local function ssrn (id) | ||
local test_limit = | local test_limit = cfg.id_limits.ssrn; -- update this value as SSRNs approach | ||
local handler = cfg.id_handlers['SSRN']; | local handler = cfg.id_handlers['SSRN']; | ||
local err_cat = ''; -- presume that SSRN is valid | local err_cat = ''; -- presume that SSRN is valid | ||
Line 966: | Line 1,157: | ||
end | end | ||
text = external_link_id({link = handler.link, label = handler.label, q = handler.q, | text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; | ||
Line 983: | Line 1,174: | ||
local handler = cfg.id_handlers['USENETID']; | local handler = cfg.id_handlers['USENETID']; | ||
local text = external_link_id({link = handler.link, label = handler.label, q = handler.q, | local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) | ||
Line 1,014: | Line 1,205: | ||
end | end | ||
return external_link_id({link = handler.link, label = handler.label, q = handler.q, | return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | ||
end | end | ||
Line 1,095: | Line 1,286: | ||
elseif k == 'PMID' then | elseif k == 'PMID' then | ||
table.insert( new_list, {handler.label, pmid( v ) } ); | table.insert( new_list, {handler.label, pmid( v ) } ); | ||
elseif k == 'S2CID' then | |||
table.insert( new_list, {handler.label, s2cid( v, handler.access ) } ); | |||
elseif k == 'SBN' then | |||
local SBN = internal_link_id (handler); | |||
local check; -- boolean validation result | |||
local err_type = ''; | |||
check, err_type = sbn (v); | |||
if not check then | |||
SBN = SBN .. set_error( 'bad_sbn', {err_type}, false, " ", "" ); -- display an error message | |||
end | |||
table.insert( new_list, {handler.label, SBN } ); | |||
elseif k == 'SSRN' then | elseif k == 'SSRN' then | ||
table.insert( new_list, {handler.label, ssrn( v ) } ); | table.insert( new_list, {handler.label, ssrn( v ) } ); |