Module:Citation/CS1/Identifiers: Difference between revisions
sync from sandbox;
(sync from sandbox;) |
(sync from sandbox;) |
||
Line 3: | Line 3: | ||
]] | ]] | ||
local is_set, in_array, | local has_accept_as_written, is_set, in_array, set_message, select_one, -- functions in Module:Citation/CS1/Utilities | ||
substitute, make_wikilink; | |||
local z; -- table of tables defined in Module:Citation/CS1/Utilities | local z; -- table of tables defined in Module:Citation/CS1/Utilities | ||
local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration | local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration | ||
--[[--------------------------< P A G E S C O P E V A R I A B L E S >-------------------------------------- | |||
declare variables here that have page-wide scope that are not brought in from other modules; that are created here and used here | |||
]] | |||
local auto_link_urls = {}; -- holds identifier URLs for those identifiers that can auto-link |title= | |||
Line 14: | Line 24: | ||
--[[--------------------------< W I K I D A T A _ A R T I C L E _ N A M E _ G E T >---------------------------- | --[[--------------------------< W I K I D A T A _ A R T I C L E _ N A M E _ G E T >---------------------------- | ||
as an aid to internationalizing identifier-label wikilinks, gets identifier article names from | as an aid to internationalizing identifier-label wikilinks, gets identifier article names from Wikidata. | ||
returns :<lang code>:<article title> when <q> has an <article title> for <lang code>; nil else | returns :<lang code>:<article title> when <q> has an <article title> for <lang code>; nil else | ||
Line 30: | Line 40: | ||
local wd_article; | local wd_article; | ||
local this_wiki_code = cfg.this_wiki_code; -- | local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org | ||
wd_article = mw.wikibase.getSitelink (q, this_wiki_code .. 'wiki'); -- fetch article title from | wd_article = mw.wikibase.getSitelink (q, this_wiki_code .. 'wiki'); -- fetch article title from WD; nil when no title available at this wiki | ||
if wd_article then | if wd_article then | ||
wd_article = table.concat ({':', this_wiki_code, ':', wd_article}); -- interwiki-style link without brackets if taken from | wd_article = table.concat ({':', this_wiki_code, ':', wd_article}); -- interwiki-style link without brackets if taken from WD; leading colon required | ||
end | end | ||
return wd_article; -- article title from | return wd_article; -- article title from WD; nil else | ||
end | |||
--[[--------------------------< L I N K _ L A B E L _ M A K E >------------------------------------------------ | |||
common function to create identifier link label from handler table or from Wikidata | |||
returns the first available of | |||
1. redirect from local wiki's handler table (if enabled) | |||
2. Wikidata (if there is a Wikidata entry for this identifier in the local wiki's language) | |||
3. label specified in the local wiki's handler table | |||
]] | |||
local function link_label_make (handler) | |||
local wd_article; | |||
if not (cfg.use_identifier_redirects and is_set (handler.redirect)) then -- redirect has priority so if enabled and available don't fetch from Wikidata because expensive | |||
wd_article = wikidata_article_name_get (handler.q); -- if Wikidata has an article title for this wiki, get it; | |||
end | |||
return (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or wd_article or handler.link; | |||
end | end | ||
Line 44: | Line 76: | ||
--[[--------------------------< E X T E R N A L _ L I N K _ I D >---------------------------------------------- | --[[--------------------------< E X T E R N A L _ L I N K _ I D >---------------------------------------------- | ||
Formats a wiki style external link | Formats a wiki-style external link | ||
NB. Wikidata P1630 has a formatter URL with $1 placeholder for the ID which could be worked into our prefix/id/suffix | |||
string, either overriding local definitions (auto-update) or as fallback for identifiers without local definitions. | |||
But is expensive and could be risky if WD gets vandalized. | |||
See Template_talk:Authority_control/Archive_8#Use_Wikidata_as_the_source_for_the_external_link | |||
]] | ]] | ||
local function external_link_id(options) | local function external_link_id (options) | ||
local url_string = options.id; | local url_string = options.id; | ||
local ext_link; | local ext_link; | ||
local this_wiki_code = cfg.this_wiki_code; -- | local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org | ||
local wd_article; -- article title from | local wd_article; -- article title from Wikidata | ||
if options.encode == true or options.encode == nil then | if options.encode == true or options.encode == nil then | ||
url_string = mw.uri.encode( url_string ); | url_string = mw.uri.encode( url_string ); | ||
end | |||
if options.auto_link and is_set (options.access) then | |||
auto_link_urls[options.auto_link] = table.concat ({options.prefix, url_string, options.suffix}); | |||
end | end | ||
Line 62: | Line 104: | ||
ext_link = substitute (cfg.presentation['ext-link-access-signal'], {cfg.presentation[options.access].class, cfg.presentation[options.access].title, ext_link}); -- add the free-to-read / paywall lock | ext_link = substitute (cfg.presentation['ext-link-access-signal'], {cfg.presentation[options.access].class, cfg.presentation[options.access].title, ext_link}); -- add the free-to-read / paywall lock | ||
end | end | ||
return table.concat ({ | return table.concat ({ | ||
make_wikilink ( | make_wikilink (link_label_make (options), options.label), -- redirect, Wikidata link, or locally specified link (in that order) | ||
options.separator or ' ', | options.separator or ' ', | ||
ext_link | ext_link | ||
Line 79: | Line 115: | ||
--[[--------------------------< I N T E R N A L _ L I N K _ I D >---------------------------------------------- | --[[--------------------------< I N T E R N A L _ L I N K _ I D >---------------------------------------------- | ||
Formats a wiki style internal link | Formats a wiki-style internal link | ||
]] | ]] | ||
Line 85: | Line 121: | ||
local function internal_link_id(options) | local function internal_link_id(options) | ||
local id = mw.ustring.gsub (options.id, '%d', cfg.date_names.local_digits); -- translate 'local' digits to Western 0-9 | local id = mw.ustring.gsub (options.id, '%d', cfg.date_names.local_digits); -- translate 'local' digits to Western 0-9 | ||
return table.concat ( | return table.concat ( | ||
{ | { | ||
make_wikilink ( | make_wikilink (link_label_make (options), options.label), -- wiki-link the identifier label | ||
options.separator or ' ', -- add the separator | options.separator or ' ', -- add the separator | ||
make_wikilink ( | make_wikilink ( | ||
Line 100: | Line 130: | ||
{ | { | ||
options.prefix, | options.prefix, | ||
id, -- translated to | id, -- translated to Western digits | ||
options.suffix or '' | options.suffix or '' | ||
}), | }), | ||
substitute (cfg.presentation['bdi'], {'', mw.text.nowiki (options.id)}) -- bdi tags to prevent | substitute (cfg.presentation['bdi'], {'', mw.text.nowiki (options.id)}) -- bdi tags to prevent Latin script identifiers from being reversed at RTL language wikis | ||
); -- nowiki because MediaWiki still has magic links for ISBN and the like; TODO: is it really required? | ); -- nowiki because MediaWiki still has magic links for ISBN and the like; TODO: is it really required? | ||
}); | }); | ||
Line 111: | Line 141: | ||
--[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------ | --[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------ | ||
Determines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date. If embargo date is | Determines if a PMC identifier's online version is embargoed. Compares the date in |pmc-embargo-date= against today's date. If embargo date is | ||
in the future, returns the content of |embargo=; otherwise, returns | in the future, returns the content of |pmc-embargo-date=; otherwise, returns an empty string because the embargo has expired or because | ||
|embargo= was not set in this cite. | |pmc-embargo-date= was not set in this cite. | ||
]] | ]] | ||
Line 120: | Line 150: | ||
if is_set (embargo) then | if is_set (embargo) then | ||
local lang = mw.getContentLanguage(); | local lang = mw.getContentLanguage(); | ||
local good1, embargo_date | local good1, embargo_date, todays_date; | ||
good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo ); | good1, embargo_date = pcall (lang.formatDate, lang, 'U', embargo); | ||
todays_date = lang:formatDate ('U'); | |||
if good1 | if good1 then -- if embargo date is a good date | ||
if tonumber( embargo_date ) >= tonumber( todays_date ) then -- is embargo date is in the future? | if tonumber (embargo_date) >= tonumber (todays_date) then -- is embargo date is in the future? | ||
return embargo; -- still embargoed | return embargo; -- still embargoed | ||
else | else | ||
set_message ('maint_pmc_embargo'); -- embargo has expired; add main cat | |||
return ''; -- unset because embargo has expired | return ''; -- unset because embargo has expired | ||
end | end | ||
end | end | ||
end | end | ||
return ''; -- |embargo= not set return empty string | return ''; -- |pmc-embargo-date= not set return empty string | ||
end | end | ||
Line 142: | Line 172: | ||
2019-12-11T00:00Z <= biorxiv_date < today + 2 days | 2019-12-11T00:00Z <= biorxiv_date < today + 2 days | ||
The dated form of biorxiv identifier has a start date of 2019-12-11. The | The dated form of biorxiv identifier has a start date of 2019-12-11. The Unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400 | ||
biorxiv_date is the date provided in those |biorxiv= parameter values that are dated at time 00:00:00 UTC | biorxiv_date is the date provided in those |biorxiv= parameter values that are dated at time 00:00:00 UTC | ||
Line 151: | Line 181: | ||
This function does not work if it is fed month names for languages other than English. Wikimedia #time: parser | This function does not work if it is fed month names for languages other than English. Wikimedia #time: parser | ||
apparently doesn't understand non- | apparently doesn't understand non-English date month names. This function will always return false when the date | ||
contains a non-English month name because good1 is false after the call to lang.formatDate(). To get around that | contains a non-English month name because good1 is false after the call to lang.formatDate(). To get around that | ||
call this function with YYYY-MM-DD format dates. | call this function with YYYY-MM-DD format dates. | ||
Line 159: | Line 189: | ||
local function is_valid_biorxiv_date (biorxiv_date) | local function is_valid_biorxiv_date (biorxiv_date) | ||
local good1, good2; | local good1, good2; | ||
local biorxiv_ts, tomorrow_ts; -- to hold | local biorxiv_ts, tomorrow_ts; -- to hold Unix timestamps representing the dates | ||
local lang_object = mw.getContentLanguage(); | local lang_object = mw.getContentLanguage(); | ||
good1, biorxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', biorxiv_date ); -- convert biorxiv_date value to | good1, biorxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', biorxiv_date ); -- convert biorxiv_date value to Unix timestamp | ||
good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow | good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow | ||
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script | if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which tonumber() may not understand | ||
biorxiv_ts = tonumber (biorxiv_ts) or lang_object:parseFormattedNumber (biorxiv_ts); -- convert to numbers for the comparison; | biorxiv_ts = tonumber (biorxiv_ts) or lang_object:parseFormattedNumber (biorxiv_ts); -- convert to numbers for the comparison; | ||
tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts); | tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts); | ||
else | else | ||
return false; -- one or both failed to convert to | return false; -- one or both failed to convert to Unix timestamp | ||
end | end | ||
Line 178: | Line 208: | ||
--[[--------------------------< IS _ V A L I D _ I S X N >----------------------------------------------------- | --[[--------------------------< IS _ V A L I D _ I S X N >----------------------------------------------------- | ||
ISBN-10 and ISSN validator code calculates checksum across all | ISBN-10 and ISSN validator code calculates checksum across all ISBN/ISSN digits including the check digit. | ||
ISBN-13 is checked in isbn(). | ISBN-13 is checked in isbn(). | ||
If the number is valid the result will be 0. Before calling this function, | If the number is valid the result will be 0. Before calling this function, ISBN/ISSN must be checked for length | ||
and stripped of dashes, spaces and other non- | and stripped of dashes, spaces and other non-ISxN characters. | ||
]] | ]] | ||
Line 189: | Line 219: | ||
local temp = 0; | local temp = 0; | ||
isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58 | isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58 | ||
len = len+1; -- adjust to be a loop counter | len = len + 1; -- adjust to be a loop counter | ||
for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum | for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum | ||
if v == string.byte( "X" ) then -- if checkdigit is X (compares the byte value of 'X' which is 0x58) | if v == string.byte( "X" ) then -- if checkdigit is X (compares the byte value of 'X' which is 0x58) | ||
Line 203: | Line 233: | ||
--[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >----------------------------------------------- | --[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >----------------------------------------------- | ||
ISBN-13 and ISMN validator code calculates checksum across all 13 | ISBN-13 and ISMN validator code calculates checksum across all 13 ISBN/ISMN digits including the check digit. | ||
If the number is valid, the result will be 0. Before calling this function, | If the number is valid, the result will be 0. Before calling this function, ISBN-13/ISMN must be checked for length | ||
and stripped of dashes, spaces and other non- | and stripped of dashes, spaces and other non-ISxN-13 characters. | ||
]] | ]] | ||
Line 216: | Line 246: | ||
temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit | temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit | ||
end | end | ||
return temp % 10 == 0; -- sum modulo 10 is zero when | return temp % 10 == 0; -- sum modulo 10 is zero when ISBN-13/ISMN is correct | ||
end | end | ||
Line 222: | Line 252: | ||
--[[--------------------------< N O R M A L I Z E _ L C C N >-------------------------------------------------- | --[[--------------------------< N O R M A L I Z E _ L C C N >-------------------------------------------------- | ||
LCCN normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization) | |||
1. Remove all blanks. | 1. Remove all blanks. | ||
2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash. | 2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash. | ||
Line 231: | Line 261: | ||
2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six. | 2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six. | ||
Returns a normalized | Returns a normalized LCCN for lccn() to validate. There is no error checking (step 3.b.1) performed in this function. | ||
]] | ]] | ||
local function normalize_lccn (lccn) | local function normalize_lccn (lccn) | ||
lccn = lccn:gsub ("%s", ""); | lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace | ||
if nil ~= string.find (lccn,'/') then | if nil ~= string.find (lccn, '/') then | ||
lccn = lccn:match ("(.-)/"); | lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it | ||
end | end | ||
local prefix | local prefix | ||
local suffix | local suffix | ||
prefix, suffix = lccn:match ("(.+)%-(.+)"); | prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix | ||
if nil ~= suffix then | if nil ~= suffix then -- if there was a hyphen | ||
suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6 | suffix = string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6 | ||
lccn=prefix..suffix; | lccn = prefix..suffix; -- reassemble the LCCN | ||
end | end | ||
return lccn; | return lccn; | ||
end | end | ||
--============================<< I D E N T I F I E R F U N C T I O N S >>==================================== | --============================<< I D E N T I F I E R F U N C T I O N S >>==================================== | ||
Line 260: | Line 291: | ||
format and error check arXiv identifier. There are three valid forms of the identifier: | format and error check arXiv identifier. There are three valid forms of the identifier: | ||
the first form, valid only between date codes | the first form, valid only between date codes 9107 and 0703, is: | ||
arXiv:<archive>.<class>/<date code><number><version> | arXiv:<archive>.<class>/<date code><number><version> | ||
where: | where: | ||
Line 290: | Line 321: | ||
local text; -- output text | local text; -- output text | ||
if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the | if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9107-0703 format with or without version | ||
year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$"); | year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$"); | ||
year = tonumber(year); | year = tonumber(year); | ||
Line 299: | Line 330: | ||
end | end | ||
elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 | elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 with or without version | ||
year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$"); | year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$"); | ||
year = tonumber(year); | year = tonumber(year); | ||
month = tonumber(month); | month = tonumber(month); | ||
if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years) | if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years) | ||
((7 == year) and (4 > month)) then | ((7 == year) and (4 > month)) then -- when year is 07, is month invalid (before April)? | ||
err_cat = true; -- flag for error message | err_cat = true; -- flag for error message | ||
end | end | ||
elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format | elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format with or without version | ||
year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$"); | year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$"); | ||
year = tonumber(year); | year = tonumber(year); | ||
Line 320: | Line 351: | ||
end | end | ||
err_cat = err_cat and table.concat ({' ', | err_cat = err_cat and table.concat ({' ', set_message ('err_bad_arxiv')}) or ''; -- set error message if flag is true | ||
text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access}) .. err_cat; | ||
if is_set (class) then | if is_set (class) then | ||
Line 329: | Line 360: | ||
text = table.concat ({text, ' [[//arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink | text = table.concat ({text, ' [[//arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink | ||
else | else | ||
text = table.concat ({text, ' ', | text = table.concat ({text, ' ', set_message ('err_class_ignored')}); | ||
end | end | ||
end | end | ||
Line 339: | Line 370: | ||
--[[--------------------------< B I B C O D E >-------------------------------------------------------------------- | --[[--------------------------< B I B C O D E >-------------------------------------------------------------------- | ||
Validates (sort of) and formats a bibcode | Validates (sort of) and formats a bibcode ID. | ||
Format for bibcodes is specified here: http://adsabs.harvard.edu/abs_doc/help_pages/data.html#bibcodes | Format for bibcodes is specified here: http://adsabs.harvard.edu/abs_doc/help_pages/data.html#bibcodes | ||
Line 360: | Line 391: | ||
local year; | local year; | ||
local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode, | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, | ||
access=access}); | access = access}); | ||
if 19 ~= id:len() then | if 19 ~= id:len() then | ||
err_type = cfg.err_msg_supl.length; | err_type = cfg.err_msg_supl.length; | ||
else | else | ||
year = id:match ("^(%d%d%d%d)[%a][%w&%.][%w&%.][%w&%.][%w.]+[%a%.]$") | year = id:match ("^(%d%d%d%d)[%a][%w&%.][%w&%.][%w&%.][%w.]+[%a%.]$"); | ||
if not year then -- if nil then no pattern match | if not year then -- if nil then no pattern match | ||
err_type = cfg.err_msg_supl.value; | err_type = cfg.err_msg_supl.value; -- so value error | ||
else | else | ||
local next_year = tonumber(os.date ('%Y'))+1; -- get the current year as a number and add one for next year | local next_year = tonumber(os.date ('%Y')) + 1; -- get the current year as a number and add one for next year | ||
year = tonumber (year); -- convert year portion of bibcode to a number | year = tonumber (year); -- convert year portion of bibcode to a number | ||
if (1000 > year) or (year > next_year) then | if (1000 > year) or (year > next_year) then | ||
err_type = cfg.err_msg_supl.year; | err_type = cfg.err_msg_supl.year; -- year out of bounds | ||
end | end | ||
if id:find('&%.') then | if id:find('&%.') then | ||
err_type = cfg.err_msg_supl.journal; | err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does it's missing a letter) | ||
end | end | ||
end | end | ||
Line 383: | Line 414: | ||
if is_set (err_type) then -- if there was an error detected | if is_set (err_type) then -- if there was an error detected | ||
text = text .. ' ' .. | text = text .. ' ' .. set_message( 'err_bad_bibcode', {err_type}); | ||
end | end | ||
return text; | return text; | ||
Line 391: | Line 422: | ||
--[[--------------------------< B I O R X I V >----------------------------------------------------------------- | --[[--------------------------< B I O R X I V >----------------------------------------------------------------- | ||
Format bioRxiv | Format bioRxiv ID and do simple error checking. Before 2019-12-11, biorXiv IDs were 10.1101/ followed by exactly | ||
6 digits. After 2019-12-11, biorXiv | 6 digits. After 2019-12-11, biorXiv IDs retained the six-digit identifier but prefixed that with a yyyy.mm.dd. | ||
date and suffixed with an optional version identifier. | date and suffixed with an optional version identifier. | ||
The bioRxiv | The bioRxiv ID is the string of characters: | ||
https://doi.org/10.1101/078733 -> 10.1101/078733 | https://doi.org/10.1101/078733 -> 10.1101/078733 | ||
or a date followed by a six-digit number followed by an optional version indicator 'v' and one or more digits: | or a date followed by a six-digit number followed by an optional version indicator 'v' and one or more digits: | ||
Line 412: | Line 443: | ||
'^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11) | '^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11) | ||
'^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11) | '^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11) | ||
} | |||
for _, pattern in ipairs (patterns) do -- spin through the patterns looking for a match | for _, pattern in ipairs (patterns) do -- spin through the patterns looking for a match | ||
Line 419: | Line 450: | ||
if m then -- m is nil when id is the six-digit form | if m then -- m is nil when id is the six-digit form | ||
if not is_valid_biorxiv_date (y .. '-' .. m .. '-' .. d) then -- validate the encoded date; TODO: don't ignore | if not is_valid_biorxiv_date (y .. '-' .. m .. '-' .. d) then -- validate the encoded date; TODO: don't ignore leap-year and actual month lengths ({{#time:}} is a poor date validator) | ||
break; -- date fail; break out early so we don't unset the error message | break; -- date fail; break out early so we don't unset the error message | ||
end | end | ||
Line 428: | Line 459: | ||
end -- err_cat remains set here when no match | end -- err_cat remains set here when no match | ||
return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, | prefix = handler.prefix, id = id, separator = handler.separator, | ||
encode=handler.encode, access=handler.access}) .. (err_cat and (' ' .. | encode = handler.encode, access = handler.access}) .. (err_cat and (' ' .. set_message( 'err_bad_biorxiv')) or ''); | ||
end | end | ||
Line 445: | Line 476: | ||
local matched; | local matched; | ||
local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode, | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, | ||
access=handler.access}); | access = handler.access}); | ||
matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$"); | matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$"); | ||
if not matched then | if not matched then | ||
text = text .. ' ' .. | text = text .. ' ' .. set_message( 'err_bad_citeseerx' ); | ||
end | end | ||
return text; | return text; | ||
Line 465: | Line 496: | ||
Suffix: character string of any length chosen by the registrant | Suffix: character string of any length chosen by the registrant | ||
This function checks a DOI name for: prefix/suffix. If the | This function checks a DOI name for: prefix/suffix. If the DOI name contains spaces or endashes, or, if it ends | ||
with a period or a comma, this function will emit a bad_doi error message. | with a period or a comma, this function will emit a bad_doi error message. | ||
DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash, | DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash, | ||
and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely | and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely | ||
if ever used in | if ever used in DOI names. | ||
]] | ]] | ||
local function doi(id, inactive, access) | local function doi (id, inactive, access, ignore_invalid) | ||
local | local err_cat; | ||
local handler = cfg.id_handlers['DOI']; | local handler = cfg.id_handlers['DOI']; | ||
local text; | local text; | ||
if is_set(inactive) then | if is_set (inactive) then | ||
local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date | local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date | ||
local inactive_month, good; | local inactive_month, good; | ||
Line 492: | Line 523: | ||
end | end | ||
else | else | ||
inactive_year = nil; -- |doi-broken= has something but it isn't a date | inactive_year = nil; -- |doi-broken-date= has something but it isn't a date | ||
end | end | ||
if is_set(inactive_year) and is_set (inactive_month) then | if is_set(inactive_year) and is_set (inactive_month) then | ||
set_message ('maint_doi_inactive_dated', {inactive_year, inactive_month, ' '}); | |||
elseif is_set(inactive_year) then | elseif is_set(inactive_year) then | ||
set_message ('maint_doi_inactive_dated', {inactive_year, '', ''}); | |||
else | else | ||
set_message ('maint_doi_inactive'); | |||
end | end | ||
inactive = " (" .. cfg.messages['inactive'] .. ' ' .. inactive .. ')'; | inactive = " (" .. cfg.messages['inactive'] .. ' ' .. inactive .. ')'; | ||
end | end | ||
local registrant = id:match ('^10%.([^/]+)/[^%s–]-[^%.,]$'); -- registrant set when DOI has the proper basic form | |||
local registrant = id:match ('^10%.([^/]+)/[^%s–]-[^%.,]$'); -- registrant set when | |||
registrant_err_patterns = { | local registrant_err_patterns = { -- these patterns are for code ranges that are not supported | ||
'^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); | '^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999 | ||
'^[^1-4]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 40000+); | '^[^1-4]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 40000+); accepts: 10000–49999 | ||
'^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); | '^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); accepts: 1000–9999 | ||
'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); | '^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999 | ||
'^%d%d%d%d%d%d+', -- 6 or more digits | '^%d%d%d%d%d%d+', -- 6 or more digits | ||
'^%d%d?%d?$', -- less than 4 digits without subcode (with subcode is legitimate) | '^%d%d?%d?$', -- less than 4 digits without subcode (with subcode is legitimate) | ||
Line 520: | Line 548: | ||
'%s', -- any space character in registrant | '%s', -- any space character in registrant | ||
} | } | ||
if not ignore_invalid then | |||
if registrant then -- when DOI has proper form | |||
for i, pattern in ipairs (registrant_err_patterns) do -- spin through error patterns | |||
if registrant:match (pattern) then -- to validate registrant codes | |||
err_cat = ' ' .. set_message ('err_bad_doi'); -- when found, mark this DOI as bad | |||
break; -- and done | |||
end | |||
end | end | ||
else | |||
err_cat = ' ' .. set_message ('err_bad_doi'); -- invalid directory or malformed | |||
end | end | ||
else | else | ||
set_message ('maint_doi_ignore'); | |||
end | end | ||
return text .. | text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access, | |||
auto_link = not (err_cat or is_set (inactive) or ignore_invalid) and 'doi' or nil -- do not auto-link when |doi-broken-date= has a value or when there is a DOI error or (to play it safe, after all, auto-linking is not essential) when invalid DOIs are ignored | |||
}) .. (inactive or ''); | |||
return text .. (err_cat and err_cat or ''); -- parentheses required | |||
end | end | ||
Line 552: | Line 589: | ||
Query string parameters are named here: http://www.handle.net/proxy_servlet.html. query strings are not displayed | Query string parameters are named here: http://www.handle.net/proxy_servlet.html. query strings are not displayed | ||
but since '?' is | but since '?' is an allowed character in an HDL, '?' followed by one of the query parameters is the only way we | ||
have to detect the query string so that it isn't | have to detect the query string so that it isn't URL-encoded with the rest of the identifier. | ||
]] | ]] | ||
Line 584: | Line 621: | ||
if found then | if found then | ||
id = hdl; -- found so replace id with the handle portion; this will be | id = hdl; -- found so replace id with the handle portion; this will be URL-encoded, suffix will not | ||
else | else | ||
suffix = ''; -- make sure suffix is empty string for concatenation else | suffix = ''; -- make sure suffix is empty string for concatenation else | ||
end | end | ||
local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix=handler.prefix, id=id, suffix=suffix, separator=handler.separator, encode=handler.encode, access=access}) | prefix = handler.prefix, id = id, suffix = suffix, separator = handler.separator, encode = handler.encode, access = access}) | ||
if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- | if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- HDL must contain a forward slash, must not contain spaces, endashes, and must not end with period or comma | ||
text = text .. ' ' .. | text = text .. ' ' .. set_message( 'err_bad_hdl' ); | ||
end | end | ||
return text; | return text; | ||
Line 605: | Line 642: | ||
]] | ]] | ||
local function isbn( isbn_str ) | local function isbn (isbn_str, ignore_invalid) | ||
local handler = cfg.id_handlers['ISBN']; | |||
local function return_result (check, err_type) -- local function to handle the various returns | |||
local ISBN = internal_link_id ({link = handler.link, label = handler.label, redirect = handler.redirect, | |||
prefix = handler.prefix, id = isbn_str, separator = handler.separator}); | |||
if ignore_invalid then -- if ignoring ISBN errors | |||
set_message ('maint_isbn_ignore'); -- add a maint category even when there is no error | |||
else -- here when not ignoring | |||
if not check then -- and there is an error | |||
return ISBN .. set_message ('err_bad_isbn', {err_type}, false, ' '); -- display an error message | |||
end | |||
end | |||
return ISBN; | |||
end | end | ||
local len = | if nil ~= isbn_str:match ('[^%s-0-9X]') then | ||
return return_result (false, cfg.err_msg_supl.char); -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X | |||
end | |||
local id = isbn_str:gsub ('[%s-]', ''); -- remove hyphens and whitespace | |||
local len = id:len(); | |||
if len ~= 10 and len ~= 13 then | if len ~= 10 and len ~= 13 then | ||
return false, cfg.err_msg_supl.length; | return return_result (false, cfg.err_msg_supl.length); -- fail if incorrect length | ||
end | end | ||
if len == 10 then | if len == 10 then | ||
if | if id:match ('^%d*X?$') == nil then -- fail if isbn_str has 'X' anywhere but last position | ||
return false, cfg.err_msg_supl.form; | return return_result (false, cfg.err_msg_supl.form); | ||
end | end | ||
return is_valid_isxn( | return return_result (is_valid_isxn(id, 10), cfg.err_msg_supl.check); | ||
else | else | ||
if | if id:match ('^%d+$') == nil then | ||
return false, cfg.err_msg_supl.char; | return return_result (false, cfg.err_msg_supl.char); -- fail if ISBN-13 is not all digits | ||
end | end | ||
if | if id:match ('^97[89]%d*$') == nil then | ||
return false, cfg.err_msg_supl.prefix; | return return_result (false, cfg.err_msg_supl.prefix); -- fail when ISBN-13 does not begin with 978 or 979 | ||
end | end | ||
if | if id:match ('^9790') then | ||
return false, cfg.err_msg_supl.group; | return return_result (false, cfg.err_msg_supl.group); -- group identifier '0' is reserved to ISMN | ||
end | end | ||
return is_valid_isxn_13 ( | return return_result (is_valid_isxn_13 (id), cfg.err_msg_supl.check); | ||
end | end | ||
end | end | ||
--[[--------------------------< A | --[[--------------------------< A S I N >---------------------------------------------------------------------- | ||
Formats a link to Amazon. Do simple error checking: | Formats a link to Amazon. Do simple error checking: ASIN must be mix of 10 numeric or uppercase alpha | ||
characters. If a mix, first character must be uppercase alpha; if all numeric, | characters. If a mix, first character must be uppercase alpha; if all numeric, ASINs must be 10-digit | ||
ISBN. If 10-digit ISBN, add a maintenance category so a bot or AWB script can replace |asin= with |isbn=. | |||
Error message if not 10 characters, if not | Error message if not 10 characters, if not ISBN-10, if mixed and first character is a digit. | ||
|asin=630....... is (apparently) not a legitimate ISBN though it checksums as one; these do not cause this | |||
function to emit the maint_asin message | |||
This function is positioned here because it calls isbn() | This function is positioned here because it calls isbn() | ||
Line 647: | Line 703: | ||
]] | ]] | ||
local function asin(id, domain) | local function asin (id, domain) | ||
local err_cat = "" | local err_cat = "" | ||
if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then | if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message ('err_bad_asin'); -- ASIN is not a mix of 10 uppercase alpha and numeric characters | ||
else | else | ||
if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X) | if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X) | ||
if isbn( id ) then -- see if | if isbn (id) then -- see if ASIN value is or validates as ISBN-10 | ||
if not id:find ('^630') then -- 630xxxxxxx is (apparently) not a valid isbn prefix but is used by amazon as a numeric identifier | |||
set_message ('maint_asin'); -- begins with something other than 630 so possibly an isbn | |||
end | |||
elseif not is_set (err_cat) then | elseif not is_set (err_cat) then | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message ('err_bad_asin'); -- ASIN is not ISBN-10 | ||
end | end | ||
elseif not id:match("^%u[%d%u]+$") then | elseif not id:match("^%u[%d%u]+$") then | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message ('err_bad_asin'); -- asin doesn't begin with uppercase alpha | ||
end | end | ||
end | end | ||
if not is_set(domain) then | if not is_set(domain) then | ||
domain = "com"; | domain = "com"; | ||
elseif in_array (domain, {'jp', 'uk'}) then | elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom | ||
domain = "co." .. domain; | domain = "co." .. domain; | ||
elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico | elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico | ||
domain = "com." .. domain; | domain = "com." .. domain; | ||
end | end | ||
local handler = cfg.id_handlers['ASIN']; | local handler = cfg.id_handlers['ASIN']; | ||
return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix=handler.prefix .. domain .. "/dp/", | prefix = handler.prefix .. domain .. "/dp/", | ||
id=id, encode=handler.encode, separator = handler.separator}) .. err_cat; | id = id, encode = handler.encode, separator = handler.separator}) .. err_cat; | ||
end | end | ||
Line 679: | Line 737: | ||
--[[--------------------------< I S M N >---------------------------------------------------------------------- | --[[--------------------------< I S M N >---------------------------------------------------------------------- | ||
Determines whether an ISMN string is valid. Similar to | Determines whether an ISMN string is valid. Similar to ISBN-13, ISMN is 13 digits beginning 979-0-... and uses the | ||
same check digit calculations. See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf | same check digit calculations. See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf | ||
section 2, pages 9–12. | section 2, pages 9–12. | ||
Line 692: | Line 750: | ||
id_copy = id; -- save a copy because this testing is destructive | id_copy = id; -- save a copy because this testing is destructive | ||
id=id:gsub( | id = id:gsub ('[%s-]', ''); -- remove hyphens and white space | ||
if 13 ~= id:len() or id:match( "^9790%d*$" ) == nil then -- | if 13 ~= id:len() or id:match( "^9790%d*$" ) == nil then -- ISMN must be 13 digits and begin with 9790 | ||
valid_ismn = false; | valid_ismn = false; | ||
else | else | ||
valid_ismn=is_valid_isxn_13 (id); -- validate | valid_ismn=is_valid_isxn_13 (id); -- validate ISMN | ||
end | end | ||
-- text = internal_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, -- use this (or external version) when there is some place to link to | -- text = internal_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, -- use this (or external version) when there is some place to link to | ||
-- prefix=handler.prefix, id=id_copy, separator=handler.separator, encode=handler.encode}) | -- prefix = handler.prefix, id = id_copy, separator = handler.separator, encode = handler.encode}) | ||
text = table.concat ( -- because no place to link to yet | text = table.concat ( -- because no place to link to yet | ||
{ | { | ||
make_wikilink ( | make_wikilink (link_label_make (handler), handler.label), | ||
handler.separator, | handler.separator, | ||
id_copy | id_copy | ||
Line 713: | Line 769: | ||
if false == valid_ismn then | if false == valid_ismn then | ||
text = text .. ' ' .. | text = text .. ' ' .. set_message( 'err_bad_ismn' ) -- add an error message if the ISMN is invalid | ||
end | end | ||
Line 722: | Line 778: | ||
--[[--------------------------< I S S N >---------------------------------------------------------------------- | --[[--------------------------< I S S N >---------------------------------------------------------------------- | ||
Validate and format an | Validate and format an ISSN. This code fixes the case where an editor has included an ISSN in the citation but | ||
has separated the two groups of four digits with a space. When that condition occurred, the resulting link looked | has separated the two groups of four digits with a space. When that condition occurred, the resulting link looked | ||
like this: | like this: | ||
Line 728: | Line 784: | ||
|issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link | |issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link | ||
This code now prevents that by inserting a hyphen at the | This code now prevents that by inserting a hyphen at the ISSN midpoint. It also validates the ISSN for length | ||
and makes sure that the checkdigit agrees with the calculated value. Incorrect length (8 digits), characters | and makes sure that the checkdigit agrees with the calculated value. Incorrect length (8 digits), characters | ||
other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check | other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check ISSN error message. The | ||
ISSN is always displayed with a hyphen, even if the ISSN was given as a single group of 8 digits. | |||
]] | ]] | ||
local function issn(id, e) | local function issn (id, e, ignore_invalid) | ||
local issn_copy = id; -- save a copy of unadulterated | local issn_copy = id; -- save a copy of unadulterated ISSN; use this version for display if ISSN does not validate | ||
local handler; | local handler; | ||
local text; | local text; | ||
local valid_issn = true; | local valid_issn = true; | ||
handler = cfg.id_handlers[e and 'EISSN' or 'ISSN']; | |||
id=id:gsub( | id = id:gsub ('[%s-]', ''); -- remove hyphens and whitespace | ||
if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the | if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the ISSN: 8 digits long, containing only 0-9 or X in the last position | ||
valid_issn=false; -- wrong length or improper character | valid_issn = false; -- wrong length or improper character | ||
else | else | ||
valid_issn=is_valid_isxn(id, 8); -- validate | valid_issn = is_valid_isxn(id, 8); -- validate ISSN | ||
end | end | ||
Line 758: | Line 810: | ||
id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version | id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version | ||
else | else | ||
id = issn_copy; -- if not valid, | id = issn_copy; -- if not valid, show the invalid ISSN with error message | ||
end | end | ||
if false == valid_issn then | text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) | |||
end | |||
if ignore_invalid then | |||
set_message ('maint_issn_ignore'); | |||
else | |||
if false == valid_issn then | |||
text = text .. ' ' .. set_message ('err_bad_issn', e and 'e' or ''); -- add an error message if the ISSN is invalid | |||
end | |||
end | |||
return text | return text | ||
Line 786: | Line 842: | ||
if is_set (id_num) then | if is_set (id_num) then | ||
set_message ('maint_jfm_format'); | |||
else -- plain number without mr prefix | else -- plain number without mr prefix | ||
id_num = id; -- if here id does not have prefix | id_num = id; -- if here id does not have prefix | ||
Line 794: | Line 850: | ||
id = id_num; -- jfm matches pattern | id = id_num; -- jfm matches pattern | ||
else | else | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message( 'err_bad_jfm' ); -- set an error message | ||
end | end | ||
return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_cat; | ||
end | end | ||
Line 809: | Line 865: | ||
length = 8 then all digits | length = 8 then all digits | ||
length = 9 then lccn[1] is | length = 9 then lccn[1] is lowercase alpha | ||
length = 10 then lccn[1] and lccn[2] are both | length = 10 then lccn[1] and lccn[2] are both lowercase alpha or both digits | ||
length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both | length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both lowercase alpha or both digits | ||
length = 12 then lccn[1] and lccn[2] are both | length = 12 then lccn[1] and lccn[2] are both lowercase alpha | ||
]] | ]] | ||
Line 819: | Line 875: | ||
local handler = cfg.id_handlers['LCCN']; | local handler = cfg.id_handlers['LCCN']; | ||
local err_cat = ''; -- presume that LCCN is valid | local err_cat = ''; -- presume that LCCN is valid | ||
local id = lccn; -- local copy of the | local id = lccn; -- local copy of the LCCN | ||
id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes) | id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes) | ||
local len = id:len(); -- get the length of the | local len = id:len(); -- get the length of the LCCN | ||
if 8 == len then | if 8 == len then | ||
if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) | if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message ('err_bad_lccn'); -- set an error message | ||
end | end | ||
elseif 9 == len then -- LCCN should be adddddddd | elseif 9 == len then -- LCCN should be adddddddd | ||
if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern? | if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern? | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message ('err_bad_lccn'); -- set an error message | ||
end | end | ||
elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd | elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd | ||
if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ... | if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ... | ||
if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern | if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message ('err_bad_lccn'); -- no match, set an error message | ||
end | end | ||
end | end | ||
elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd | elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd | ||
if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns | if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message ('err_bad_lccn'); -- no match, set an error message | ||
end | end | ||
elseif 12 == len then -- LCCN should be aadddddddddd | elseif 12 == len then -- LCCN should be aadddddddddd | ||
if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern | if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message ('err_bad_lccn'); -- no match, set an error message | ||
end | end | ||
else | else | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message ('err_bad_lccn'); -- wrong length, set an error message | ||
end | end | ||
if not is_set (err_cat) and nil ~= lccn:find ('%s') then | if not is_set (err_cat) and nil ~= lccn:find ('%s') then | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message ('err_bad_lccn'); -- lccn contains a space, set an error message | ||
end | end | ||
return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat; | prefix = handler.prefix, id = lccn, separator = handler.separator, encode = handler.encode}) .. err_cat; | ||
end | end | ||
Line 874: | Line 930: | ||
if is_set (id_num) then | if is_set (id_num) then | ||
set_message ('maint_mr_format'); -- add maint cat | |||
else -- plain number without mr prefix | else -- plain number without mr prefix | ||
id_num = id:match ('^%d+$'); -- if here id is all digits | id_num = id:match ('^%d+$'); -- if here id is all digits | ||
Line 883: | Line 939: | ||
id = string.rep ('0', 7-id_len ) .. id_num; -- zero-fill leading digits | id = string.rep ('0', 7-id_len ) .. id_num; -- zero-fill leading digits | ||
else | else | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message ('err_bad_mr'); -- set an error message | ||
end | end | ||
return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_cat; | ||
end | end | ||
Line 893: | Line 949: | ||
--[[--------------------------< O C L C >---------------------------------------------------------------------- | --[[--------------------------< O C L C >---------------------------------------------------------------------- | ||
Validate and format an | Validate and format an OCLC ID. https://www.oclc.org/batchload/controlnumber.en.html {{dead link}} | ||
archived at: https://web.archive.org/web/20161228233804/https://www.oclc.org/batchload/controlnumber.en.html | archived at: https://web.archive.org/web/20161228233804/https://www.oclc.org/batchload/controlnumber.en.html | ||
Line 912: | Line 968: | ||
number = id:match('%(OCoLC%)([1-9]%d*)'); -- get the number | number = id:match('%(OCoLC%)([1-9]%d*)'); -- get the number | ||
if 9 < number:len() then | if 9 < number:len() then | ||
number = nil; -- | number = nil; -- constrain to 1 to 9 digits; change this when OCLC issues 10-digit numbers | ||
end | end | ||
elseif id:match('^%d+$') then -- no prefix | elseif id:match('^%d+$') then -- no prefix | ||
number = id; -- get the number | number = id; -- get the number | ||
if 10 < number:len() then | if 10 < number:len() then | ||
number = nil; -- | number = nil; -- constrain to 1 to 10 digits; change this when OCLC issues 11-digit numbers | ||
end | end | ||
end | end | ||
Line 924: | Line 980: | ||
id = number; -- exclude prefix, if any, from external link | id = number; -- exclude prefix, if any, from external link | ||
else | else | ||
err_msg = ' ' .. | err_msg = ' ' .. set_message ('err_bad_oclc') -- add an error message if the id is malformed | ||
end | end | ||
local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode}) .. err_msg; | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_msg; | ||
return text; | return text; | ||
Line 941: | Line 997: | ||
local function openlibrary(id, access) | local function openlibrary(id, access) | ||
local handler = cfg.id_handlers['OL']; | local handler = cfg.id_handlers['OL']; | ||
local | local ident, code = id:gsub('^OL', ''):match("^(%d+([AMW]))$"); -- strip optional OL prefix followed immediately by digits followed by 'A', 'M', or 'W'; | ||
local error_msg = ''; | |||
local prefix = { -- these are appended to the handler.prefix according to code | |||
['A']='authors/OL', | |||
['M']='books/OL', | |||
['W']='works/OL', | |||
['X']='OL' -- not a code; spoof when 'code' in id is invalid | |||
}; | |||
if not | if not ident then | ||
code = 'X'; -- no code or id completely invalid | |||
ident = id; -- copy id to ident so that we display the flawed identifier | |||
error_msg = ' ' .. set_message ('err_bad_ol'); | |||
end | end | ||
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | |||
prefix = handler.prefix .. prefix[code], | |||
id = ident, separator = handler.separator, encode = handler.encode, | |||
access = access}) .. error_msg; | |||
end | end | ||
Line 996: | Line 1,037: | ||
]] | ]] | ||
local function pmc(id, embargo) | local function pmc (id, embargo) | ||
local handler = cfg.id_handlers['PMC']; | local handler = cfg.id_handlers['PMC']; | ||
local err_cat | local err_cat; | ||
local id_num; | local id_num; | ||
local text; | local text; | ||
id_num = id:match ('^[Pp][Mm][Cc](%d+)$'); -- identifier with | id_num = id:match ('^[Pp][Mm][Cc](%d+)$'); -- identifier with PMC prefix | ||
if is_set (id_num) then | if is_set (id_num) then | ||
set_message ('maint_pmc_format'); | |||
else -- plain number without | else -- plain number without PMC prefix | ||
id_num = id:match ('^%d+$'); -- if here id is all digits | id_num = id:match ('^%d+$'); -- if here id is all digits | ||
end | end | ||
Line 1,013: | Line 1,054: | ||
id_num = tonumber(id_num); -- convert id_num to a number for range testing | id_num = tonumber(id_num); -- convert id_num to a number for range testing | ||
if 1 > id_num or handler.id_limit < id_num then -- if PMC is outside test limit boundaries | if 1 > id_num or handler.id_limit < id_num then -- if PMC is outside test limit boundaries | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message ('err_bad_pmc'); -- set an error message | ||
else | else | ||
id = tostring (id_num); -- make sure id is a string | id = tostring (id_num); -- make sure id is a string | ||
end | end | ||
else -- when id format incorrect | else -- when id format incorrect | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message ('err_bad_pmc'); -- set an error message | ||
end | end | ||
if is_set (embargo) then | if is_set (embargo) and is_set (is_embargoed (embargo)) then -- is PMC is still embargoed? | ||
text = table.concat ( -- still embargoed so no external link | text = table.concat ( -- still embargoed so no external link | ||
{ | { | ||
make_wikilink (handler | make_wikilink (link_label_make (handler), handler.label), | ||
handler.separator, | handler.separator, | ||
id, | id, | ||
err_cat | (err_cat and err_cat or '') -- parens required | ||
}); | }); | ||
else | else | ||
text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, -- no embargo date or embargo has expired, ok to link to article | text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, -- no embargo date or embargo has expired, ok to link to article | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access, | ||
auto_link = not err_cat and 'pmc' or nil -- do not auto-link when PMC has error | |||
}) .. (err_cat and err_cat or ''); -- parentheses required | |||
end | end | ||
return text; | return text; | ||
Line 1,050: | Line 1,093: | ||
if id:match("[^%d]") then -- if PMID has anything but digits | if id:match("[^%d]") then -- if PMID has anything but digits | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message ('err_bad_pmid'); -- set an error message | ||
else -- PMID is only digits | else -- PMID is only digits | ||
local id_num = tonumber(id); -- convert id to a number for range testing | local id_num = tonumber(id); -- convert id to a number for range testing | ||
if 1 > id_num or handler.id_limit < id_num then -- if PMID is outside test limit boundaries | if 1 > id_num or handler.id_limit < id_num then -- if PMID is outside test limit boundaries | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message ('err_bad_pmid'); -- set an error message | ||
end | end | ||
end | end | ||
return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_cat; | ||
end | end | ||
Line 1,065: | Line 1,108: | ||
--[[--------------------------< S 2 C I D >-------------------------------------------------------------------- | --[[--------------------------< S 2 C I D >-------------------------------------------------------------------- | ||
Format an | Format an S2CID, do simple error checking | ||
S2CIDs are sequential numbers beginning at 1 and counting up. This code checks the | S2CIDs are sequential numbers beginning at 1 and counting up. This code checks the S2CID to see that it is only | ||
digits and is less than test_limit; the value in local variable test_limit will need to be updated periodically | digits and is less than test_limit; the value in local variable test_limit will need to be updated periodically | ||
as more S2CIDs are issued. | as more S2CIDs are issued. | ||
Line 1,084: | Line 1,127: | ||
id_num = tonumber(id_num); -- convert id_num to a number for range testing | id_num = tonumber(id_num); -- convert id_num to a number for range testing | ||
if handler.id_limit < id_num then -- if S2CID is outside test limit boundaries | if handler.id_limit < id_num then -- if S2CID is outside test limit boundaries | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message ('bad_s2cid_err'); -- set an error message | ||
end | end | ||
else -- when id format incorrect | else -- when id format incorrect | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message ('bad_s2cid_err'); -- set an error message | ||
end | end | ||
text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix=handler.prefix, id=id:gsub ('%.%a%a', ''), separator=handler.separator, encode=handler.encode, access=access}) .. err_cat; | prefix = handler.prefix, id = id:gsub ('%.%a%a', ''), separator = handler.separator, encode = handler.encode, access = access}) .. err_cat; | ||
return text; | return text; | ||
Line 1,100: | Line 1,143: | ||
--[[--------------------------< S B N >------------------------------------------------------------------------ | --[[--------------------------< S B N >------------------------------------------------------------------------ | ||
9-digit form of | 9-digit form of ISBN-10; uses same check-digit validation when SBN is prefixed with an additional '0' to make 10 digits | ||
]] | ]] | ||
local function sbn (id) | local function sbn (id, ignore_invalid) | ||
local | local handler = cfg.id_handlers['SBN']; | ||
local err_type = ''; | local function return_result (check, err_type) -- local function to handle the various returns | ||
local SBN = internal_link_id ({link = handler.link, label = handler.label, redirect = handler.redirect, | |||
prefix = handler.prefix, id = id, separator = handler.separator}); | |||
if not ignore_invalid then -- if not ignoring SBN errors | |||
if not check then | |||
return SBN .. set_message ('err_bad_sbn', {err_type}, false, ' '); -- display an error message | |||
end | |||
else | |||
set_message ('maint_isbn_ignore'); -- add a maint category even when there is no error (ToDo: Possibly switch to separate message for SBNs only) | |||
end | |||
return SBN; | |||
end | |||
if | if id:match ('[^%s-0-9X]') then | ||
return false, cfg.err_msg_supl.char; | return return_result (false, cfg.err_msg_supl.char); -- fail if SBN contains anything but digits, hyphens, or the uppercase X | ||
end | end | ||
local ident = id:gsub ('[%s-]', ''); -- remove hyphens and whitespace; they interfere with the rest of the tests | |||
if 9 ~= | if 9 ~= ident:len() then | ||
return false, cfg.err_msg_supl.length; | return return_result (false, cfg.err_msg_supl.length); -- fail if incorrect length | ||
end | end | ||
if | if ident:match ('^%d*X?$') == nil then | ||
return return_result (false, cfg.err_msg_supl.form); -- fail if SBN has 'X' anywhere but last position | |||
end | end | ||
return is_valid_isxn('0' .. | return return_result (is_valid_isxn ('0' .. ident, 10), cfg.err_msg_supl.check); | ||
end | end | ||
Line 1,128: | Line 1,182: | ||
--[[--------------------------< S S R N >---------------------------------------------------------------------- | --[[--------------------------< S S R N >---------------------------------------------------------------------- | ||
Format an | Format an SSRN, do simple error checking | ||
SSRNs are sequential numbers beginning at 100? and counting up. This code checks the | SSRNs are sequential numbers beginning at 100? and counting up. This code checks the SSRN to see that it is | ||
only digits and is greater than 99 and less than test_limit; the value in local variable test_limit will need | only digits and is greater than 99 and less than test_limit; the value in local variable test_limit will need | ||
to be updated periodically as more SSRNs are issued. | to be updated periodically as more SSRNs are issued. | ||
Line 1,147: | Line 1,201: | ||
id_num = tonumber(id_num); -- convert id_num to a number for range testing | id_num = tonumber(id_num); -- convert id_num to a number for range testing | ||
if 100 > id_num or handler.id_limit < id_num then -- if SSRN is outside test limit boundaries | if 100 > id_num or handler.id_limit < id_num then -- if SSRN is outside test limit boundaries | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message ('err_bad_ssrn'); -- set an error message | ||
end | end | ||
else -- when id format incorrect | else -- when id format incorrect | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message ('err_bad_ssrn'); -- set an error message | ||
end | end | ||
text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access}) .. err_cat; | ||
return text; | return text; | ||
Line 1,170: | Line 1,224: | ||
local handler = cfg.id_handlers['USENETID']; | local handler = cfg.id_handlers['USENETID']; | ||
local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) | ||
if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$')then -- doesn't have '@' or has one or first or last character is '< or '>' | if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$') then -- doesn't have '@' or has one or first or last character is '< or '>' | ||
text = text .. ' ' .. | text = text .. ' ' .. set_message ('err_bad_usenet_id') -- add an error message if the message id is invalid | ||
end | end | ||
Line 1,196: | Line 1,250: | ||
if id:match('^%d%d%d%d%d%d%d%d$') then -- is this identifier using temporary format? | if id:match('^%d%d%d%d%d%d%d%d$') then -- is this identifier using temporary format? | ||
set_message ('maint_zbl'); -- yes, add maint cat | |||
elseif not id:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then -- not temporary, is it normal format? | elseif not id:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then -- not temporary, is it normal format? | ||
err_cat = ' ' .. | err_cat = ' ' .. set_message ('err_bad_zbl'); -- no, set an error message | ||
end | end | ||
return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_cat; | ||
end | end | ||
Line 1,220: | Line 1,274: | ||
local function build_id_list( id_list, options ) | local function build_id_list( id_list, options ) | ||
local new_list, handler = {}; | local new_list, handler = {}; | ||
local accept; | |||
local function fallback(k) return { __index = function(t,i) return cfg.id_handlers[k][i] end } end; | local function fallback(k) return { __index = function(t,i) return cfg.id_handlers[k][i] end } end; | ||
for k, v in pairs( id_list ) do | for k, v in pairs( id_list ) do | ||
v, accept = has_accept_as_written (v); -- remove and note accept-as-written markup if present | |||
-- k is uppercase identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table | |||
-- fallback to read-only cfg | -- fallback to read-only cfg | ||
handler = setmetatable( { ['id'] = v, ['access'] = options.IdAccessLevels[k] }, fallback(k) ); | handler = setmetatable( { ['id'] = v, ['access'] = options.IdAccessLevels[k] }, fallback(k) ); | ||
Line 1,244: | Line 1,302: | ||
table.insert( new_list, {handler.label, citeseerx( v ) } ); | table.insert( new_list, {handler.label, citeseerx( v ) } ); | ||
elseif k == 'DOI' then | elseif k == 'DOI' then | ||
table.insert( new_list, {handler.label, doi( v, options.DoiBroken, handler.access ) } ); | table.insert( new_list, {handler.label, doi( v, options.DoiBroken, handler.access, accept) } ); | ||
elseif k == 'EISSN' then | elseif k == 'EISSN' then | ||
table.insert( new_list, {handler.label, issn( v, true ) } ); | table.insert( new_list, {handler.label, issn( v, true, accept) } ); -- true distinguishes EISSN from ISSN | ||
elseif k == 'HDL' then | elseif k == 'HDL' then | ||
table.insert( new_list, {handler.label, hdl( v, handler.access ) } ); | table.insert( new_list, {handler.label, hdl( v, handler.access ) } ); | ||
elseif k == 'ISBN' then | elseif k == 'ISBN' then | ||
table.insert( new_list, {handler.label, isbn (v, (accept or options.IgnoreISBN)) } ); | |||
elseif k == 'ISMN' then | elseif k == 'ISMN' then | ||
table.insert( new_list, {handler.label, ismn( v ) } ); | table.insert( new_list, {handler.label, ismn( v ) } ); | ||
elseif k == 'ISSN' then | elseif k == 'ISSN' then | ||
table.insert( new_list, {handler.label, issn( v ) } ); | table.insert( new_list, {handler.label, issn( v, false, accept) } ); | ||
elseif k == 'JFM' then | elseif k == 'JFM' then | ||
table.insert( new_list, {handler.label, jfm( v ) } ); | table.insert( new_list, {handler.label, jfm( v ) } ); | ||
Line 1,285: | Line 1,330: | ||
table.insert( new_list, {handler.label, s2cid( v, handler.access ) } ); | table.insert( new_list, {handler.label, s2cid( v, handler.access ) } ); | ||
elseif k == 'SBN' then | elseif k == 'SBN' then | ||
table.insert( new_list, {handler.label, sbn (v, accept) } ); | |||
table.insert( new_list, {handler.label, | |||
elseif k == 'SSRN' then | elseif k == 'SSRN' then | ||
table.insert( new_list, {handler.label, ssrn( v ) } ); | table.insert( new_list, {handler.label, ssrn( v ) } ); | ||
Line 1,321: | Line 1,359: | ||
Populates ID table from arguments using configuration settings. Loops through cfg.id_handlers and searches args for | Populates ID table from arguments using configuration settings. Loops through cfg.id_handlers and searches args for | ||
any of the parameters listed in each cfg.id_handlers['...'].parameters. If found, adds the parameter and value to | any of the parameters listed in each cfg.id_handlers['...'].parameters. If found, adds the parameter and value to | ||
the identifier list. Emits redundant error message | the identifier list. Emits redundant error message if more than one alias exists in args | ||
]] | ]] | ||
Line 1,327: | Line 1,365: | ||
local function extract_ids( args ) | local function extract_ids( args ) | ||
local id_list = {}; -- list of identifiers found in args | local id_list = {}; -- list of identifiers found in args | ||
for k, v in pairs( cfg.id_handlers ) do -- k is | for k, v in pairs( cfg.id_handlers ) do -- k is uppercase identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table | ||
v = select_one( args, v.parameters, 'redundant_parameters' ); -- v.parameters is a table of aliases for k; here we pick one from args if present | v = select_one( args, v.parameters, 'redundant_parameters' ); -- v.parameters is a table of aliases for k; here we pick one from args if present | ||
if is_set(v) then id_list[k] = v; end -- if found in args, add identifier to our list | if is_set(v) then id_list[k] = v; end -- if found in args, add identifier to our list | ||
Line 1,353: | Line 1,391: | ||
if is_set (access_level) then | if is_set (access_level) then | ||
if not in_array (access_level, cfg.keywords_lists['id-access']) then -- exact match required | if not in_array (access_level, cfg.keywords_lists['id-access']) then -- exact match required | ||
table.insert( z.message_tail, { | table.insert( z.message_tail, { set_message( 'invalid_param_val', {access_param, access_level}, true ) } ); | ||
access_level = nil; -- invalid so unset | access_level = nil; -- invalid so unset | ||
end | end | ||
if not is_set(id_list[k]) then -- | if not is_set(id_list[k]) then -- identifier access-level must have a matching identifier | ||
table.insert( z.message_tail, { | table.insert( z.message_tail, { set_message( 'err_param_access_requires_param', {k:lower()}, true ) } ); -- parameter name is uppercase in cfg.id_handlers (k); lowercase for error message | ||
end | end | ||
id_accesses_list[k] = cfg.keywords_xlate[access_level]; -- get translated keyword | id_accesses_list[k] = cfg.keywords_xlate[access_level]; -- get translated keyword | ||
Line 1,376: | Line 1,414: | ||
cfg = cfg_table_ptr; | cfg = cfg_table_ptr; | ||
has_accept_as_written = utilities_page_ptr.has_accept_as_written; -- import functions from select Module:Citation/CS1/Utilities module | |||
is_set = utilities_page_ptr.is_set; | |||
in_array = utilities_page_ptr.in_array; | in_array = utilities_page_ptr.in_array; | ||
set_message = utilities_page_ptr.set_message; | |||
select_one = utilities_page_ptr.select_one; | select_one = utilities_page_ptr.select_one; | ||
substitute = utilities_page_ptr.substitute; | substitute = utilities_page_ptr.substitute; | ||
make_wikilink = utilities_page_ptr.make_wikilink; | make_wikilink = utilities_page_ptr.make_wikilink; | ||
Line 1,392: | Line 1,430: | ||
return { | return { | ||
auto_link_urls = auto_link_urls, -- table of identifier URLs to be used when auto-linking |title= | |||
build_id_list = build_id_list, | build_id_list = build_id_list, | ||
extract_ids = extract_ids, | extract_ids = extract_ids, |