Editing Module:Citation/CS1/Identifiers
The edit can be undone. Please check the comparison below to verify that this is what you want to do, and then publish the changes below to finish undoing the edit.
Latest revision | Your text | ||
Line 77: | Line 77: | ||
Formats a wiki-style external link | Formats a wiki-style external link | ||
NB. Wikidata P1630 has a formatter URL with $1 placeholder for the ID which could be worked into our prefix/id/suffix | |||
string, either overriding local definitions (auto-update) or as fallback for identifiers without local definitions. | |||
But is expensive and could be risky if WD gets vandalized. | |||
See Template_talk:Authority_control/Archive_8#Use_Wikidata_as_the_source_for_the_external_link | |||
]] | ]] | ||
Line 87: | Line 93: | ||
if options.encode == true or options.encode == nil then | if options.encode == true or options.encode == nil then | ||
url_string = mw.uri.encode (url_string | url_string = mw.uri.encode( url_string ); | ||
end | end | ||
Line 94: | Line 100: | ||
end | end | ||
ext_link = mw.ustring.format ('[%s%s%s %s]', options.prefix, url_string, options.suffix or "", mw.text.nowiki (options.id)); | ext_link = mw.ustring.format ('[%s%s%s %s]', options.prefix, url_string, options.suffix or "", mw.text.nowiki(options.id)); | ||
if is_set (options.access) then | if is_set(options.access) then | ||
ext_link = substitute (cfg.presentation['ext-link-access-signal'], {cfg.presentation[options.access].class, cfg.presentation[options.access].title, ext_link}); -- add the free-to-read / paywall lock | ext_link = substitute (cfg.presentation['ext-link-access-signal'], {cfg.presentation[options.access].class, cfg.presentation[options.access].title, ext_link}); -- add the free-to-read / paywall lock | ||
end | end | ||
Line 110: | Line 116: | ||
Formats a wiki-style internal link | Formats a wiki-style internal link | ||
]] | ]] | ||
local function internal_link_id (options) | local function internal_link_id(options) | ||
local id = mw.ustring.gsub (options.id, '%d', cfg.date_names.local_digits); -- translate 'local' digits to Western 0-9 | local id = mw.ustring.gsub (options.id, '%d', cfg.date_names.local_digits); -- translate 'local' digits to Western 0-9 | ||
Line 139: | Line 141: | ||
--[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------ | --[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------ | ||
Determines if a PMC identifier's online version is embargoed. Compares the date in |pmc-embargo-date= against | Determines if a PMC identifier's online version is embargoed. Compares the date in |pmc-embargo-date= against today's date. If embargo date is | ||
today's date. If embargo date is in the future, returns the content of |pmc-embargo-date=; otherwise, returns | in the future, returns the content of |pmc-embargo-date=; otherwise, returns an empty string because the embargo has expired or because | ||
an empty string because the embargo has expired or because |pmc-embargo-date= was not set in this cite. | |pmc-embargo-date= was not set in this cite. | ||
]] | ]] | ||
Line 190: | Line 192: | ||
local lang_object = mw.getContentLanguage(); | local lang_object = mw.getContentLanguage(); | ||
good1, biorxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', biorxiv_date); -- convert biorxiv_date value to Unix timestamp | good1, biorxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', biorxiv_date ); -- convert biorxiv_date value to Unix timestamp | ||
good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow | good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow | ||
Line 218: | Line 220: | ||
isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58 | isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58 | ||
len = len + 1; -- adjust to be a loop counter | len = len + 1; -- adjust to be a loop counter | ||
for i, v in ipairs (isxn_str) do -- loop through all of the bytes and calculate the checksum | for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum | ||
if v == string.byte ("X" ) then -- if checkdigit is X (compares the byte value of 'X' which is 0x58) | if v == string.byte( "X" ) then -- if checkdigit is X (compares the byte value of 'X' which is 0x58) | ||
temp = temp + 10 * (len - i); -- it represents 10 decimal | temp = temp + 10*( len - i ); -- it represents 10 decimal | ||
else | else | ||
temp = temp + tonumber (string.char (v) )*(len-i); | temp = temp + tonumber( string.char(v) )*(len-i); | ||
end | end | ||
end | end | ||
Line 241: | Line 243: | ||
isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39 | isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39 | ||
for i, v in ipairs (isxn_str) do | for i, v in ipairs( isxn_str ) do | ||
temp = temp + (3 - 2*(i % 2)) * tonumber (string.char (v) ); | temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit | ||
end | end | ||
return temp % 10 == 0; -- sum modulo 10 is zero when ISBN-13/ISMN is correct | return temp % 10 == 0; -- sum modulo 10 is zero when ISBN-13/ISMN is correct | ||
Line 313: | Line 315: | ||
]] | ]] | ||
local function arxiv ( | local function arxiv (id, class) | ||
local handler = cfg.id_handlers['ARXIV']; | |||
local handler = | |||
local year, month, version; | local year, month, version; | ||
local err_cat = false; -- assume no error message | local err_cat = false; -- assume no error message | ||
Line 323: | Line 323: | ||
if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9107-0703 format with or without version | if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9107-0703 format with or without version | ||
year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$"); | year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$"); | ||
year = tonumber (year); | year = tonumber(year); | ||
month = tonumber (month); | month = tonumber(month); | ||
if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month | if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month | ||
((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok? | ((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok? | ||
Line 332: | Line 332: | ||
elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 with or without version | elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 with or without version | ||
year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$"); | year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$"); | ||
year = tonumber (year); | year = tonumber(year); | ||
month = tonumber (month); | month = tonumber(month); | ||
if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years) | if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years) | ||
((7 == year) and (4 > month)) then -- when year is 07, is month invalid (before April)? | ((7 == year) and (4 > month)) then -- when year is 07, is month invalid (before April)? | ||
Line 341: | Line 341: | ||
elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format with or without version | elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format with or without version | ||
year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$"); | year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$"); | ||
year = tonumber (year); | year = tonumber(year); | ||
month = tonumber (month); | month = tonumber(month); | ||
if ((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years) | if ((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years) | ||
err_cat = true; -- flag for error message | err_cat = true; -- flag for error message | ||
Line 351: | Line 351: | ||
end | end | ||
err_cat = err_cat and table.concat ({' ', set_message ('err_bad_arxiv')}) or ''; -- set error message if flag is true | err_cat = err_cat and table.concat ({' ', set_message ('err_bad_arxiv')}) or ''; -- set error message if flag is true | ||
Line 390: | Line 386: | ||
]] | ]] | ||
local function bibcode ( | local function bibcode (id, access) | ||
local handler = cfg.id_handlers['BIBCODE']; | |||
local handler = | |||
local err_type; | local err_type; | ||
local year; | local year; | ||
Line 408: | Line 402: | ||
err_type = cfg.err_msg_supl.value; -- so value error | err_type = cfg.err_msg_supl.value; -- so value error | ||
else | else | ||
local next_year = tonumber (os.date ('%Y')) + 1; | local next_year = tonumber(os.date ('%Y')) + 1; -- get the current year as a number and add one for next year | ||
year = tonumber (year); -- convert year portion of bibcode to a number | year = tonumber (year); -- convert year portion of bibcode to a number | ||
if (1000 > year) or (year > next_year) then | if (1000 > year) or (year > next_year) then | ||
Line 420: | Line 414: | ||
if is_set (err_type) then -- if there was an error detected | if is_set (err_type) then -- if there was an error detected | ||
text = text .. ' ' .. set_message ('err_bad_bibcode', {err_type}); | text = text .. ' ' .. set_message( 'err_bad_bibcode', {err_type}); | ||
end | end | ||
return text; | return text; | ||
Line 443: | Line 435: | ||
]] | ]] | ||
local function biorxiv ( | local function biorxiv(id) | ||
local handler = cfg.id_handlers['BIORXIV']; | |||
local handler = | |||
local err_cat = true; -- flag; assume that there will be an error | local err_cat = true; -- flag; assume that there will be an error | ||
Line 468: | Line 459: | ||
end -- err_cat remains set here when no match | end -- err_cat remains set here when no match | ||
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, | prefix = handler.prefix, id = id, separator = handler.separator, | ||
encode = handler.encode, access = handler.access}) .. (err_cat and (' ' .. set_message ('err_bad_biorxiv')) or ''); | encode = handler.encode, access = handler.access}) .. (err_cat and (' ' .. set_message( 'err_bad_biorxiv')) or ''); | ||
end | end | ||
Line 485: | Line 472: | ||
]] | ]] | ||
local function citeseerx ( | local function citeseerx (id) | ||
local handler = cfg.id_handlers['CITESEERX']; | |||
local handler = | |||
local matched; | local matched; | ||
Line 496: | Line 482: | ||
matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$"); | matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$"); | ||
if not matched then | if not matched then | ||
text = text .. ' ' .. set_message ('err_bad_citeseerx' ); | text = text .. ' ' .. set_message( 'err_bad_citeseerx' ); | ||
end | end | ||
return text; | return text; | ||
Line 520: | Line 505: | ||
]] | ]] | ||
local function doi ( | local function doi (id, inactive, access, ignore_invalid) | ||
local err_cat; | local err_cat; | ||
local handler = cfg.id_handlers['DOI']; | |||
local text; | local text; | ||
if is_set (inactive) then | if is_set (inactive) then | ||
Line 545: | Line 526: | ||
end | end | ||
if is_set (inactive_year) and is_set (inactive_month) then | if is_set(inactive_year) and is_set (inactive_month) then | ||
set_message ('maint_doi_inactive_dated', {inactive_year, inactive_month, ' '}); | set_message ('maint_doi_inactive_dated', {inactive_year, inactive_month, ' '}); | ||
elseif is_set (inactive_year) then | elseif is_set(inactive_year) then | ||
set_message ('maint_doi_inactive_dated', {inactive_year, '', ''}); | set_message ('maint_doi_inactive_dated', {inactive_year, '', ''}); | ||
else | else | ||
Line 559: | Line 540: | ||
local registrant_err_patterns = { -- these patterns are for code ranges that are not supported | local registrant_err_patterns = { -- these patterns are for code ranges that are not supported | ||
'^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999 | '^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999 | ||
'^[^1- | '^[^1-4]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 40000+); accepts: 10000–49999 | ||
'^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); accepts: 1000–9999 | '^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); accepts: 1000–9999 | ||
'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999 | '^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999 | ||
Line 565: | Line 546: | ||
'^%d%d?%d?$', -- less than 4 digits without subcode (with subcode is legitimate) | '^%d%d?%d?$', -- less than 4 digits without subcode (with subcode is legitimate) | ||
'^5555$', -- test registrant will never resolve | '^5555$', -- test registrant will never resolve | ||
' | '%s', -- any space character in registrant | ||
} | } | ||
if not ignore_invalid then | if not ignore_invalid then | ||
if registrant then | if registrant then -- when DOI has proper form | ||
for i, pattern in ipairs (registrant_err_patterns) do | for i, pattern in ipairs (registrant_err_patterns) do -- spin through error patterns | ||
if registrant:match (pattern) then | if registrant:match (pattern) then -- to validate registrant codes | ||
err_cat = ' ' .. set_message ('err_bad_doi'); | err_cat = ' ' .. set_message ('err_bad_doi'); -- when found, mark this DOI as bad | ||
break; | break; -- and done | ||
end | end | ||
end | end | ||
else | else | ||
err_cat = ' ' .. set_message ('err_bad_doi'); | err_cat = ' ' .. set_message ('err_bad_doi'); -- invalid directory or malformed | ||
end | end | ||
else | else | ||
Line 583: | Line 564: | ||
end | end | ||
text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access, | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access, | ||
Line 617: | Line 594: | ||
]] | ]] | ||
local function hdl ( | local function hdl(id, access) | ||
local handler = cfg.id_handlers['HDL']; | |||
local handler = | |||
local query_params = { -- list of known query parameters from http://www.handle.net/proxy_servlet.html | local query_params = { -- list of known query parameters from http://www.handle.net/proxy_servlet.html | ||
'noredirect', | 'noredirect', | ||
Line 655: | Line 630: | ||
if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- HDL must contain a forward slash, must not contain spaces, endashes, and must not end with period or comma | if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- HDL must contain a forward slash, must not contain spaces, endashes, and must not end with period or comma | ||
text = text .. ' ' .. set_message ('err_bad_hdl' ); | text = text .. ' ' .. set_message( 'err_bad_hdl' ); | ||
end | end | ||
return text; | return text; | ||
Line 668: | Line 642: | ||
]] | ]] | ||
local function isbn ( | local function isbn (isbn_str, ignore_invalid) | ||
local handler = cfg.id_handlers['ISBN']; | |||
local handler = | |||
local function return_result (check, err_type) -- local function to handle the various returns | local function return_result (check, err_type) -- local function to handle the various returns | ||
local ISBN = internal_link_id ({link = handler.link, label = handler.label, redirect = handler.redirect, | local ISBN = internal_link_id ({link = handler.link, label = handler.label, redirect = handler.redirect, | ||
Line 680: | Line 651: | ||
else -- here when not ignoring | else -- here when not ignoring | ||
if not check then -- and there is an error | if not check then -- and there is an error | ||
return ISBN .. set_message ('err_bad_isbn', {err_type}, false, ' '); -- display an error message | return ISBN .. set_message ('err_bad_isbn', {err_type}, false, ' '); -- display an error message | ||
end | end | ||
Line 703: | Line 673: | ||
return return_result (false, cfg.err_msg_supl.form); | return return_result (false, cfg.err_msg_supl.form); | ||
end | end | ||
return return_result (is_valid_isxn(id, 10), cfg.err_msg_supl.check); | |||
else | else | ||
if id:match ('^%d+$') == nil then | if id:match ('^%d+$') == nil then | ||
Line 732: | Line 696: | ||
Error message if not 10 characters, if not ISBN-10, if mixed and first character is a digit. | Error message if not 10 characters, if not ISBN-10, if mixed and first character is a digit. | ||
|asin=630....... | |asin=630....... is (apparently) not a legitimate ISBN though it checksums as one; these do not cause this | ||
do not cause this function to emit the maint_asin message | function to emit the maint_asin message | ||
This function is positioned here because it calls isbn() | This function is positioned here because it calls isbn() | ||
Line 739: | Line 703: | ||
]] | ]] | ||
local function asin ( | local function asin (id, domain) | ||
local err_cat = "" | |||
local err_cat = | |||
if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then | if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then | ||
Line 749: | Line 710: | ||
else | else | ||
if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X) | if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X) | ||
if | if isbn (id) then -- see if ASIN value is or validates as ISBN-10 | ||
if not id:find ('^ | if not id:find ('^630') then -- 630xxxxxxx is (apparently) not a valid isbn prefix but is used by amazon as a numeric identifier | ||
set_message ('maint_asin'); -- begins with something other than 630 so possibly an isbn | |||
end | end | ||
elseif not is_set (err_cat) then | elseif not is_set (err_cat) then | ||
Line 760: | Line 721: | ||
end | end | ||
end | end | ||
if | if not is_set(domain) then | ||
domain = "com"; | domain = "com"; | ||
elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom | elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom | ||
domain = "co." .. domain; | domain = "co." .. domain; | ||
elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico | |||
elseif in_array (domain, {'au', 'br', 'mx | |||
domain = "com." .. domain; | domain = "com." .. domain; | ||
end | end | ||
local handler = cfg.id_handlers['ASIN']; | |||
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix .. domain .. "/dp/", | prefix = handler.prefix .. domain .. "/dp/", | ||
Line 790: | Line 740: | ||
same check digit calculations. See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf | same check digit calculations. See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf | ||
section 2, pages 9–12. | section 2, pages 9–12. | ||
]] | ]] | ||
local function ismn ( | local function ismn (id) | ||
local handler = cfg.id_handlers['ISMN']; | |||
local handler = | |||
local text; | local text; | ||
local valid_ismn = true; | local valid_ismn = true; | ||
Line 806: | Line 752: | ||
id = id:gsub ('[%s-]', ''); -- remove hyphens and white space | id = id:gsub ('[%s-]', ''); -- remove hyphens and white space | ||
if 13 ~= id:len() or id:match ("^9790%d*$" ) == nil then -- ISMN must be 13 digits and begin with 9790 | if 13 ~= id:len() or id:match( "^9790%d*$" ) == nil then -- ISMN must be 13 digits and begin with 9790 | ||
valid_ismn = false; | valid_ismn = false; | ||
else | else | ||
Line 823: | Line 769: | ||
if false == valid_ismn then | if false == valid_ismn then | ||
text = text .. ' ' .. set_message( 'err_bad_ismn' ) -- add an error message if the ISMN is invalid | |||
text = text .. ' ' .. set_message ('err_bad_ismn' ) -- add an error message if the ISMN is invalid | |||
end | end | ||
Line 846: | Line 791: | ||
]] | ]] | ||
local function issn ( | local function issn (id, e, ignore_invalid) | ||
local issn_copy = id; -- save a copy of unadulterated ISSN; use this version for display if ISSN does not validate | local issn_copy = id; -- save a copy of unadulterated ISSN; use this version for display if ISSN does not validate | ||
local handler; | |||
local text; | local text; | ||
local valid_issn = true; | local valid_issn = true; | ||
handler = cfg.id_handlers[e and 'EISSN' or 'ISSN']; | |||
id = id:gsub ('[%s-]', ''); -- remove hyphens and whitespace | id = id:gsub ('[%s-]', ''); -- remove hyphens and whitespace | ||
if 8 ~= id:len() or nil == id:match ("^%d*X?$" ) then -- validate the ISSN: 8 digits long, containing only 0-9 or X in the last position | if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the ISSN: 8 digits long, containing only 0-9 or X in the last position | ||
valid_issn = false; -- wrong length or improper character | valid_issn = false; -- wrong length or improper character | ||
else | else | ||
valid_issn = is_valid_isxn (id, 8); -- validate ISSN | valid_issn = is_valid_isxn(id, 8); -- validate ISSN | ||
end | end | ||
if true == valid_issn then | if true == valid_issn then | ||
id = string.sub (id, 1, 4 ) .. "-" .. string.sub (id, 5 ); -- if valid, display correctly formatted version | id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version | ||
else | else | ||
id = issn_copy; -- if not valid, show the invalid ISSN with error message | id = issn_copy; -- if not valid, show the invalid ISSN with error message | ||
Line 876: | Line 820: | ||
else | else | ||
if false == valid_issn then | if false == valid_issn then | ||
text = text .. ' ' .. set_message ('err_bad_issn', e and 'e' or ''); -- add an error message if the ISSN is invalid | |||
text = text .. ' ' .. set_message ('err_bad_issn', | |||
end | end | ||
end | end | ||
Line 891: | Line 834: | ||
]] | ]] | ||
local function jfm ( | local function jfm (id) | ||
local handler = cfg.id_handlers['JFM']; | |||
local handler = | |||
local id_num; | local id_num; | ||
local err_cat = ''; | local err_cat = ''; | ||
Line 901: | Line 843: | ||
if is_set (id_num) then | if is_set (id_num) then | ||
set_message ('maint_jfm_format'); | set_message ('maint_jfm_format'); | ||
else -- plain number without | else -- plain number without mr prefix | ||
id_num = id; -- if here id does not have prefix | id_num = id; -- if here id does not have prefix | ||
end | end | ||
Line 908: | Line 850: | ||
id = id_num; -- jfm matches pattern | id = id_num; -- jfm matches pattern | ||
else | else | ||
err_cat = ' ' .. set_message ('err_bad_jfm' ); -- set an error message | err_cat = ' ' .. set_message( 'err_bad_jfm' ); -- set an error message | ||
end | end | ||
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_cat; | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_cat; | ||
end | end | ||
Line 953: | Line 872: | ||
]] | ]] | ||
local function lccn ( | local function lccn(lccn) | ||
local handler = cfg.id_handlers['LCCN']; | |||
local handler = | |||
local err_cat = ''; -- presume that LCCN is valid | local err_cat = ''; -- presume that LCCN is valid | ||
local id = lccn; -- local copy of the LCCN | local id = lccn; -- local copy of the LCCN | ||
Line 990: | Line 908: | ||
if not is_set (err_cat) and nil ~= lccn:find ('%s') then | if not is_set (err_cat) and nil ~= lccn:find ('%s') then | ||
err_cat = ' ' .. set_message ('err_bad_lccn'); -- lccn contains a space, set an error message | err_cat = ' ' .. set_message ('err_bad_lccn'); -- lccn contains a space, set an error message | ||
end | end | ||
Line 1,007: | Line 921: | ||
]] | ]] | ||
local function mr ( | local function mr (id) | ||
local handler = cfg.id_handlers['MR']; | |||
local handler = | |||
local id_num; | local id_num; | ||
local id_len; | local id_len; | ||
Line 1,024: | Line 937: | ||
id_len = id_num and id_num:len() or 0; | id_len = id_num and id_num:len() or 0; | ||
if (7 >= id_len) and (0 ~= id_len) then | if (7 >= id_len) and (0 ~= id_len) then | ||
id = string.rep ('0', 7-id_len) .. id_num; -- zero-fill leading digits | id = string.rep ('0', 7-id_len ) .. id_num; -- zero-fill leading digits | ||
else | else | ||
err_cat = ' ' .. set_message ('err_bad_mr'); -- set an error message | err_cat = ' ' .. set_message ('err_bad_mr'); -- set an error message | ||
end | end | ||
Line 1,042: | Line 954: | ||
]] | ]] | ||
local function oclc ( | local function oclc (id) | ||
local handler = cfg.id_handlers['OCLC']; | |||
local handler = | |||
local number; | local number; | ||
local err_msg = ''; -- empty string for concatenation | local err_msg = ''; -- empty string for concatenation | ||
Line 1,070: | Line 981: | ||
else | else | ||
err_msg = ' ' .. set_message ('err_bad_oclc') -- add an error message if the id is malformed | err_msg = ' ' .. set_message ('err_bad_oclc') -- add an error message if the id is malformed | ||
end | end | ||
Line 1,086: | Line 996: | ||
]] | ]] | ||
local function openlibrary ( | local function openlibrary(id, access) | ||
local handler = cfg.id_handlers['OL']; | |||
local handler = | |||
local ident, code = id:gsub('^OL', ''):match("^(%d+([AMW]))$"); -- strip optional OL prefix followed immediately by digits followed by 'A', 'M', or 'W'; | local ident, code = id:gsub('^OL', ''):match("^(%d+([AMW]))$"); -- strip optional OL prefix followed immediately by digits followed by 'A', 'M', or 'W'; | ||
local error_msg = ''; | local error_msg = ''; | ||
Line 1,103: | Line 1,011: | ||
ident = id; -- copy id to ident so that we display the flawed identifier | ident = id; -- copy id to ident so that we display the flawed identifier | ||
error_msg = ' ' .. set_message ('err_bad_ol'); | error_msg = ' ' .. set_message ('err_bad_ol'); | ||
end | end | ||
Line 1,115: | Line 1,017: | ||
id = ident, separator = handler.separator, encode = handler.encode, | id = ident, separator = handler.separator, encode = handler.encode, | ||
access = access}) .. error_msg; | access = access}) .. error_msg; | ||
end | end | ||
Line 1,167: | Line 1,037: | ||
]] | ]] | ||
local function pmc ( | local function pmc (id, embargo) | ||
local handler = cfg.id_handlers['PMC']; | |||
local handler = | |||
local err_cat; | local err_cat; | ||
local id_num; | local id_num; | ||
Line 1,184: | Line 1,052: | ||
if is_set (id_num) then -- id_num has a value so test it | if is_set (id_num) then -- id_num has a value so test it | ||
id_num = tonumber (id_num); -- convert id_num to a number for range testing | id_num = tonumber(id_num); -- convert id_num to a number for range testing | ||
if 1 > id_num or handler.id_limit < id_num then -- if PMC is outside test limit boundaries | if 1 > id_num or handler.id_limit < id_num then -- if PMC is outside test limit boundaries | ||
err_cat = ' ' .. set_message ('err_bad_pmc'); -- set an error message | err_cat = ' ' .. set_message ('err_bad_pmc'); -- set an error message | ||
Line 1,205: | Line 1,073: | ||
text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, -- no embargo date or embargo has expired, ok to link to article | text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, -- no embargo date or embargo has expired, ok to link to article | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access, | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access, | ||
auto_link = not err_cat and 'pmc' or nil | auto_link = not err_cat and 'pmc' or nil -- do not auto-link when PMC has error | ||
}) .. (err_cat and err_cat or ''); -- parentheses required | }) .. (err_cat and err_cat or ''); -- parentheses required | ||
end | end | ||
return text; | return text; | ||
end | end | ||
Line 1,225: | Line 1,088: | ||
]] | ]] | ||
local function pmid ( | local function pmid(id) | ||
local handler = cfg.id_handlers['PMID']; | |||
local handler = | |||
local err_cat = ''; -- presume that PMID is valid | local err_cat = ''; -- presume that PMID is valid | ||
if id:match("[^%d]") then -- if PMID has anything but digits | if id:match("[^%d]") then -- if PMID has anything but digits | ||
err_cat = ' ' .. set_message ('err_bad_pmid'); -- set an error message | err_cat = ' ' .. set_message ('err_bad_pmid'); -- set an error message | ||
else -- PMID is only digits | else -- PMID is only digits | ||
local id_num = tonumber (id); -- convert id to a number for range testing | local id_num = tonumber(id); -- convert id to a number for range testing | ||
if 1 > id_num or handler.id_limit < id_num then -- if PMID is outside test limit boundaries | if 1 > id_num or handler.id_limit < id_num then -- if PMID is outside test limit boundaries | ||
err_cat = ' ' .. set_message ('err_bad_pmid'); -- set an error message | err_cat = ' ' .. set_message ('err_bad_pmid'); -- set an error message | ||
end | end | ||
end | end | ||
Line 1,243: | Line 1,103: | ||
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_cat; | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_cat; | ||
end | end | ||
Line 1,287: | Line 1,116: | ||
]] | ]] | ||
local function s2cid ( | local function s2cid (id, access) | ||
local handler = cfg.id_handlers['S2CID']; | |||
local handler = | |||
local err_cat = ''; -- presume that S2CID is valid | local err_cat = ''; -- presume that S2CID is valid | ||
local id_num; | local id_num; | ||
Line 1,298: | Line 1,125: | ||
if is_set (id_num) then -- id_num has a value so test it | if is_set (id_num) then -- id_num has a value so test it | ||
id_num = tonumber (id_num); -- convert id_num to a number for range testing | id_num = tonumber(id_num); -- convert id_num to a number for range testing | ||
if handler.id_limit < id_num then -- if S2CID is outside test limit boundaries | if handler.id_limit < id_num then -- if S2CID is outside test limit boundaries | ||
err_cat = ' ' .. set_message (' | err_cat = ' ' .. set_message ('bad_s2cid_err'); -- set an error message | ||
end | end | ||
else -- when id format incorrect | else -- when id format incorrect | ||
err_cat = ' ' .. set_message (' | err_cat = ' ' .. set_message ('bad_s2cid_err'); -- set an error message | ||
end | end | ||
text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access}) .. err_cat; | prefix = handler.prefix, id = id:gsub ('%.%a%a', ''), separator = handler.separator, encode = handler.encode, access = access}) .. err_cat; | ||
return text; | return text; | ||
Line 1,319: | Line 1,144: | ||
9-digit form of ISBN-10; uses same check-digit validation when SBN is prefixed with an additional '0' to make 10 digits | 9-digit form of ISBN-10; uses same check-digit validation when SBN is prefixed with an additional '0' to make 10 digits | ||
]] | ]] | ||
local function sbn ( | local function sbn (id, ignore_invalid) | ||
local handler = cfg.id_handlers['SBN']; | |||
local handler = | |||
local function return_result (check, err_type) -- local function to handle the various returns | local function return_result (check, err_type) -- local function to handle the various returns | ||
local SBN = internal_link_id ({link = handler.link, label = handler.label, redirect = handler.redirect, | local SBN = internal_link_id ({link = handler.link, label = handler.label, redirect = handler.redirect, | ||
Line 1,334: | Line 1,154: | ||
if not ignore_invalid then -- if not ignoring SBN errors | if not ignore_invalid then -- if not ignoring SBN errors | ||
if not check then | if not check then | ||
return SBN .. set_message ('err_bad_sbn', {err_type}, false, ' '); -- display an error message | return SBN .. set_message ('err_bad_sbn', {err_type}, false, ' '); -- display an error message | ||
end | end | ||
Line 1,347: | Line 1,166: | ||
end | end | ||
local ident = id:gsub ('[%s-]', ''); | local ident = id:gsub ('[%s-]', ''); -- remove hyphens and whitespace; they interfere with the rest of the tests | ||
if 9 ~= ident:len() then | if 9 ~= ident:len() then | ||
Line 1,371: | Line 1,190: | ||
]] | ]] | ||
local function ssrn ( | local function ssrn (id) | ||
local handler = cfg.id_handlers['SSRN']; | |||
local handler = | |||
local err_cat = ''; -- presume that SSRN is valid | local err_cat = ''; -- presume that SSRN is valid | ||
local id_num; | local id_num; | ||
Line 1,381: | Line 1,199: | ||
if is_set (id_num) then -- id_num has a value so test it | if is_set (id_num) then -- id_num has a value so test it | ||
id_num = tonumber (id_num); -- convert id_num to a number for range testing | id_num = tonumber(id_num); -- convert id_num to a number for range testing | ||
if 100 > id_num or handler.id_limit < id_num then -- if SSRN is outside test limit boundaries | if 100 > id_num or handler.id_limit < id_num then -- if SSRN is outside test limit boundaries | ||
err_cat = ' ' .. set_message ('err_bad_ssrn'); -- set an error message | err_cat = ' ' .. set_message ('err_bad_ssrn'); -- set an error message | ||
end | end | ||
else -- when id format incorrect | else -- when id format incorrect | ||
err_cat = ' ' .. set_message ('err_bad_ssrn'); -- set an error message | err_cat = ' ' .. set_message ('err_bad_ssrn'); -- set an error message | ||
end | end | ||
Line 1,405: | Line 1,221: | ||
]] | ]] | ||
local function usenet_id ( | local function usenet_id (id) | ||
local handler = cfg.id_handlers['USENETID']; | |||
local handler = | |||
local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
Line 1,414: | Line 1,229: | ||
if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$') then -- doesn't have '@' or has one or first or last character is '< or '>' | if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$') then -- doesn't have '@' or has one or first or last character is '< or '>' | ||
text = text .. ' ' .. set_message ('err_bad_usenet_id') -- add an error message if the message id is invalid | text = text .. ' ' .. set_message ('err_bad_usenet_id') -- add an error message if the message id is invalid | ||
end | end | ||
Line 1,431: | Line 1,245: | ||
]] | ]] | ||
local function zbl ( | local function zbl (id) | ||
local handler = cfg.id_handlers['ZBL']; | |||
local handler = | |||
local err_cat = ''; | local err_cat = ''; | ||
Line 1,440: | Line 1,253: | ||
elseif not id:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then -- not temporary, is it normal format? | elseif not id:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then -- not temporary, is it normal format? | ||
err_cat = ' ' .. set_message ('err_bad_zbl'); -- no, set an error message | err_cat = ' ' .. set_message ('err_bad_zbl'); -- no, set an error message | ||
end | end | ||
Line 1,449: | Line 1,261: | ||
--============================<< I N T E R F A C E F U N C T I O N S >>========================================== | --============================<< I N T E R F A C E F U N C T I O N S >>========================================== | ||
--[[--------------------------< B U I L D _ I D _ L I S T >-------------------------------------------------------- | |||
Takes a table of IDs created by extract_ids() and turns it into a table of formatted ID outputs. | |||
inputs: | |||
id_list – table of identifiers built by extract_ids() | |||
options – table of various template parameter values used to modify some manually handled identifiers | |||
]] | |||
local function build_id_list( id_list, options ) | |||
local new_list, handler = {}; | |||
local accept; | |||
local function fallback(k) return { __index = function(t,i) return cfg.id_handlers[k][i] end } end; | |||
for k, v in pairs( id_list ) do | |||
v, accept = has_accept_as_written (v); -- remove and note accept-as-written markup if present | |||
-- k is uppercase identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table | |||
-- fallback to read-only cfg | |||
handler = setmetatable( { ['id'] = v, ['access'] = options.IdAccessLevels[k] }, fallback(k) ); | |||
if handler.mode == 'external' then | |||
table.insert( new_list, {handler.label, external_link_id( handler ) } ); | |||
elseif handler.mode == 'internal' then | |||
table.insert( new_list, {handler.label, internal_link_id( handler ) } ); | |||
elseif handler.mode ~= 'manual' then | |||
error( cfg.messages['unknown_ID_mode'] ); | |||
elseif k == 'ARXIV' then | |||
table.insert( new_list, {handler.label, arxiv( v, options.Class ) } ); | |||
elseif k == 'ASIN' then | |||
table.insert( new_list, {handler.label, asin( v, options.ASINTLD ) } ); | |||
elseif k == 'BIBCODE' then | |||
table.insert( new_list, {handler.label, bibcode( v, handler.access ) } ); | |||
elseif k == 'BIORXIV' then | |||
table.insert( new_list, {handler.label, biorxiv( v ) } ); | |||
elseif k == 'CITESEERX' then | |||
table.insert( new_list, {handler.label, citeseerx( v ) } ); | |||
elseif k == 'DOI' then | |||
table.insert( new_list, {handler.label, doi( v, options.DoiBroken, handler.access, accept) } ); | |||
elseif k == 'EISSN' then | |||
table.insert( new_list, {handler.label, issn( v, true, accept) } ); -- true distinguishes EISSN from ISSN | |||
elseif k == 'HDL' then | |||
table.insert( new_list, {handler.label, hdl( v, handler.access ) } ); | |||
elseif k == 'ISBN' then | |||
table.insert( new_list, {handler.label, isbn (v, (accept or options.IgnoreISBN)) } ); | |||
elseif k == 'ISMN' then | |||
table.insert( new_list, {handler.label, ismn( v ) } ); | |||
elseif k == 'ISSN' then | |||
table.insert( new_list, {handler.label, issn( v, false, accept) } ); | |||
elseif k == 'JFM' then | |||
table.insert( new_list, {handler.label, jfm( v ) } ); | |||
elseif k == 'LCCN' then | |||
table.insert( new_list, {handler.label, lccn( v ) } ); | |||
elseif k == 'MR' then | |||
table.insert( new_list, {handler.label, mr( v ) } ); | |||
elseif k == 'OCLC' then | |||
table.insert( new_list, {handler.label, oclc( v ) } ); | |||
elseif k == 'OL' or k == 'OLA' then | |||
table.insert( new_list, {handler.label, openlibrary( v, handler.access ) } ); | |||
elseif k == 'PMC' then | |||
table.insert( new_list, {handler.label, pmc( v, options.Embargo ) } ); | |||
elseif k == 'PMID' then | |||
table.insert( new_list, {handler.label, pmid( v ) } ); | |||
elseif k == 'S2CID' then | |||
table.insert( new_list, {handler.label, s2cid( v, handler.access ) } ); | |||
elseif k == 'SBN' then | |||
table.insert( new_list, {handler.label, sbn (v, accept) } ); | |||
elseif k == 'SSRN' then | |||
table.insert( new_list, {handler.label, ssrn( v ) } ); | |||
elseif k == 'USENETID' then | |||
table.insert( new_list, {handler.label, usenet_id( v ) } ); | |||
elseif k == 'ZBL' then | |||
table.insert( new_list, {handler.label, zbl( v ) } ); | |||
else | |||
error( cfg.messages['unknown_manual_ID'] ); | |||
end | |||
end | |||
local function comp( a, b ) -- used in following table.sort() | |||
return a[1]:lower() < b[1]:lower(); | |||
end | |||
table.sort( new_list, comp ); | |||
for k, v in ipairs( new_list ) do | |||
new_list[k] = v[2]; | |||
end | |||
return new_list; | |||
end | |||
--[[--------------------------< E X T R A C T _ I D S >------------------------------------------------------------ | --[[--------------------------< E X T R A C T _ I D S >------------------------------------------------------------ | ||
Line 1,458: | Line 1,363: | ||
]] | ]] | ||
local function extract_ids (args) | local function extract_ids( args ) | ||
local id_list = {}; -- list of identifiers found in args | local id_list = {}; -- list of identifiers found in args | ||
for k, v in pairs (cfg.id_handlers) do -- k is uppercase identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table | for k, v in pairs( cfg.id_handlers ) do -- k is uppercase identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table | ||
v = select_one (args, v.parameters, 'err_redundant_parameters' ); | v = select_one( args, v.parameters, 'err_redundant_parameters' ); -- v.parameters is a table of aliases for k; here we pick one from args if present | ||
if is_set (v) then id_list[k] = v; end -- if found in args, add identifier to our list | if is_set(v) then id_list[k] = v; end -- if found in args, add identifier to our list | ||
end | end | ||
return id_list; | return id_list; | ||
Line 1,470: | Line 1,375: | ||
--[[--------------------------< E X T R A C T _ I D _ A C C E S S _ L E V E L S >-------------------------------------- | --[[--------------------------< E X T R A C T _ I D _ A C C E S S _ L E V E L S >-------------------------------------- | ||
Fetches custom id access levels from arguments using configuration settings. Parameters which have a predefined access | Fetches custom id access levels from arguments using configuration settings. | ||
level (e.g. arxiv) do not use this function as they are directly rendered as free without using an additional parameter. | Parameters which have a predefined access level (e.g. arxiv) do not use this | ||
function as they are directly rendered as free without using an additional parameter. | |||
access-level values must match the case used in cfg.keywords_lists['id-access'] (lowercase unless there is some special reason for something else) | access-level values must match the case used in cfg.keywords_lists['id-access'] (lowercase unless there is some special reason for something else) | ||
Line 1,479: | Line 1,383: | ||
]] | ]] | ||
local function extract_id_access_levels (args, id_list) | local function extract_id_access_levels( args, id_list ) | ||
local id_accesses_list = {}; | local id_accesses_list = {}; | ||
for k, v in pairs (cfg.id_handlers) do | for k, v in pairs( cfg.id_handlers ) do | ||
local access_param = v.custom_access; -- name of identifier's access-level parameter | local access_param = v.custom_access; -- name of identifier's access-level parameter | ||
if is_set (access_param) then | if is_set(access_param) then | ||
local access_level = args[access_param]; -- get the assigned value if there is one | local access_level = args[access_param]; -- get the assigned value if there is one | ||
if is_set (access_level) then | if is_set (access_level) then | ||
if not in_array (access_level, cfg.keywords_lists['id-access']) then -- exact match required | if not in_array (access_level, cfg.keywords_lists['id-access']) then -- exact match required | ||
table.insert (z.message_tail, { set_message (' | table.insert( z.message_tail, { set_message( 'invalid_param_val', {access_param, access_level}, true ) } ); | ||
access_level = nil; -- invalid so unset | access_level = nil; -- invalid so unset | ||
end | end | ||
if not is_set (id_list[k]) then -- identifier access-level must have a matching identifier | if not is_set(id_list[k]) then -- identifier access-level must have a matching identifier | ||
table.insert (z.message_tail, { set_message ('err_param_access_requires_param', {k:lower()}, true) } ); -- parameter name is uppercase in cfg.id_handlers (k); lowercase for error message | table.insert( z.message_tail, { set_message( 'err_param_access_requires_param', {k:lower()}, true ) } ); -- parameter name is uppercase in cfg.id_handlers (k); lowercase for error message | ||
end | end | ||
id_accesses_list[k] = cfg.keywords_xlate[access_level]; -- get translated keyword | id_accesses_list[k] = cfg.keywords_xlate[access_level]; -- get translated keyword | ||
Line 1,498: | Line 1,402: | ||
end | end | ||
return id_accesses_list; | return id_accesses_list; | ||
end | end | ||
Line 1,645: | Line 1,432: | ||
auto_link_urls = auto_link_urls, -- table of identifier URLs to be used when auto-linking |title= | auto_link_urls = auto_link_urls, -- table of identifier URLs to be used when auto-linking |title= | ||
build_id_list = build_id_list, | |||
extract_ids = extract_ids, | |||
extract_id_access_levels = extract_id_access_levels, | |||
is_embargoed = is_embargoed; | is_embargoed = is_embargoed; | ||
set_selected_modules = set_selected_modules; | set_selected_modules = set_selected_modules; | ||
} | } |