Module:Citation/CS1/Identifiers: Difference between revisions
synch from sandbox;
m>Trappist the monk m (ssrn limit -> 3500000;) |
m>Trappist the monk (synch from sandbox;) |
||
Line 6: | Line 6: | ||
]] | ]] | ||
local is_set, in_array, set_error, select_one, add_maint_cat, substitute; | local is_set, in_array, set_error, select_one, add_maint_cat, substitute, make_wikilink; -- functions in Module:Citation/CS1/Utilities | ||
local z; -- table of tables defined in Module:Citation/CS1/Utilities | local z; -- table of tables defined in Module:Citation/CS1/Utilities | ||
Line 12: | Line 12: | ||
local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration | local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration | ||
--============================<< H E L P E R F U N C T I O N S >>============================================ | |||
--[[--------------------------< E X T E R N A L _ L I N K _ I D >---------------------------------------------- | --[[--------------------------< E X T E R N A L _ L I N K _ I D >---------------------------------------------- | ||
Line 32: | Line 34: | ||
end | end | ||
return | return table.concat ({ | ||
make_wikilink (options.link, options.label), | |||
options.separator or ' ', | |||
ext_link | |||
}); | |||
end | end | ||
Line 43: | Line 49: | ||
local function internal_link_id(options) | local function internal_link_id(options) | ||
return | |||
options.link, options.label, options.separator or | return table.concat ( | ||
options.prefix, options.id, options.suffix or | { | ||
make_wikilink (options.link, options.label), | |||
); | options.separator or ' ', | ||
make_wikilink ( | |||
table.concat ( | |||
{ | |||
options.prefix, | |||
options.id, | |||
options.suffix or '' | |||
}), | |||
mw.text.nowiki (options.id) | |||
); | |||
}); | |||
end | |||
--[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------ | |||
Determines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date. If embargo date is | |||
in the future, returns the content of |embargo=; otherwise, returns and empty string because the embargo has expired or because | |||
|embargo= was not set in this cite. | |||
]] | |||
local function is_embargoed (embargo) | |||
if is_set (embargo) then | |||
local lang = mw.getContentLanguage(); | |||
local good1, embargo_date, good2, todays_date; | |||
good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo ); | |||
good2, todays_date = pcall( lang.formatDate, lang, 'U' ); | |||
if good1 and good2 then -- if embargo date and today's date are good dates | |||
if tonumber( embargo_date ) >= tonumber( todays_date ) then -- is embargo date is in the future? | |||
return embargo; -- still embargoed | |||
else | |||
add_maint_cat ('embargo') | |||
return ''; -- unset because embargo has expired | |||
end | |||
end | |||
end | |||
return ''; -- |embargo= not set return empty string | |||
end | end | ||
Line 54: | Line 98: | ||
ISBN-10 and ISSN validator code calculates checksum across all isbn/issn digits including the check digit. | ISBN-10 and ISSN validator code calculates checksum across all isbn/issn digits including the check digit. | ||
ISBN-13 is checked in | ISBN-13 is checked in isbn(). | ||
If the number is valid the result will be 0. Before calling this function, issbn/issn must be checked for length | If the number is valid the result will be 0. Before calling this function, issbn/issn must be checked for length | ||
Line 63: | Line 107: | ||
local function is_valid_isxn (isxn_str, len) | local function is_valid_isxn (isxn_str, len) | ||
local temp = 0; | local temp = 0; | ||
isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' | isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58 | ||
len = len+1; -- adjust to be a loop counter | len = len+1; -- adjust to be a loop counter | ||
for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum | for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum | ||
Line 76: | Line 120: | ||
--[[--------------------------< IS _ V A L I D _ I S X N | --[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >----------------------------------------------- | ||
ISBN-13 and ISMN validator code calculates checksum across all 13 isbn/ismn digits including the check digit. | ISBN-13 and ISMN validator code calculates checksum across all 13 isbn/ismn digits including the check digit. | ||
Line 87: | Line 131: | ||
local temp=0; | local temp=0; | ||
isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' | isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39 | ||
for i, v in ipairs( isxn_str ) do | for i, v in ipairs( isxn_str ) do | ||
temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit | temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit | ||
Line 95: | Line 139: | ||
--[[--------------------------< C | --[[--------------------------< N O R M A L I Z E _ L C C N >-------------------------------------------------- | ||
lccn normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization) | |||
1. Remove all blanks. | |||
2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash. | |||
3. If there is a hyphen in the string: | |||
a. Remove it. | |||
b. Inspect the substring following (to the right of) the (removed) hyphen. Then (and assuming that steps 1 and 2 have been carried out): | |||
1. All these characters should be digits, and there should be six or less. (not done in this function) | |||
2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six. | |||
Returns a normalized lccn for lccn() to validate. There is no error checking (step 3.b.1) performed in this function. | |||
]] | ]] | ||
local function | local function normalize_lccn (lccn) | ||
lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace | |||
if | if nil ~= string.find (lccn,'/') then | ||
lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it | |||
end | end | ||
local prefix | |||
local suffix | |||
prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix | |||
if nil ~= suffix then -- if there was a hyphen | |||
suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6 | |||
lccn=prefix..suffix; -- reassemble the lccn | |||
if | |||
end | end | ||
return lccn; | |||
return | |||
end | end | ||
--============================<< I D E N T I F I E R F U N C T I O N S >>==================================== | |||
--[[--------------------------< A R X I V >-------------------------------------------------------------------- | --[[--------------------------< A R X I V >-------------------------------------------------------------------- | ||
Line 331: | Line 249: | ||
--[[--------------------------< | --[[--------------------------< A M A Z O N >------------------------------------------------------------------ | ||
Formats a link to Amazon. Do simple error checking: asin must be mix of 10 numeric or uppercase alpha | |||
characters. If a mix, first character must be uppercase alpha; if all numeric, asins must be 10-digit | |||
isbn. If 10-digit isbn, add a maintenance category so a bot or awb script can replace |asin= with |isbn=. | |||
Error message if not 10 characters, if not isbn10, if mixed and first character is a digit. | |||
]] | ]] | ||
local function | local function asin(id, domain) | ||
local err_cat = "" | |||
local err_cat = | |||
if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then | |||
err_cat = ' ' .. set_error ('bad_asin'); -- asin is not a mix of 10 uppercase alpha and numeric characters | |||
else | |||
if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X) | |||
if isbn( id ) then -- see if asin value is isbn10 | |||
add_maint_cat ('ASIN'); | |||
elseif not is_set (err_cat) then | |||
err_cat = ' ' .. set_error ('bad_asin'); -- asin is not isbn10 | |||
end | |||
elseif not id:match("^%u[%d%u]+$") then | |||
err_cat = ' ' .. set_error ('bad_asin'); -- asin doesn't begin with uppercase alpha | |||
end | |||
end | |||
if not is_set(domain) then | |||
domain = "com"; | |||
elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom | |||
domain = "co." .. domain; | |||
elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico | |||
domain = "com." .. domain; | |||
end | end | ||
local handler = cfg.id_handlers['ASIN']; | |||
return external_link_id({link=handler.link, | |||
label=handler.label, prefix=handler.prefix .. domain .. "/dp/", | |||
id=id, encode=handler.encode, separator = handler.separator}) .. err_cat; | |||
end | |||
--[[--------------------------< B I B C O D E >-------------------------------------------------------------------- | |||
Validates (sort of) and formats a bibcode id. | |||
Format for bibcodes is specified here: http://adsabs.harvard.edu/abs_doc/help_pages/data.html#bibcodes | |||
But, this: 2015arXiv151206696F is apparently valid so apparently, the only things that really matter are length, 19 characters | |||
and first four digits must be a year. This function makes these tests: | |||
length must be 19 characters | |||
characters in position | |||
1–4 must be digits and must represent a year in the range of 1000 – next year | |||
5 must be a letter | |||
6 must be letter, ampersand, or dot (ampersand cannot directly precede a dot; &. ) | |||
7–8 must be letter, digit, ampersand, or dot (ampersand cannot directly precede a dot; &. ) | |||
9–18 must be letter, digit, or dot | |||
19 must be a letter or dot | |||
]] | ]] | ||
local function | local function bibcode (id, access) | ||
local handler = cfg.id_handlers[' | local handler = cfg.id_handlers['BIBCODE']; | ||
local | local err_type; | ||
local | local year; | ||
id = | local text = external_link_id({link=handler.link, label=handler.label, | ||
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode, | |||
access=access}); | |||
if | |||
if 19 ~= id:len() then | |||
err_type = 'length'; | |||
else | |||
year = id:match ("^(%d%d%d%d)[%a][%a&%.][%a&%.%d][%a&%.%d][%a%d%.]+[%a%.]$") -- | |||
if not year then -- if nil then no pattern match | |||
err_type = 'value'; -- so value error | |||
else | |||
local next_year = tonumber(os.date ('%Y'))+1; -- get the current year as a number and add one for next year | |||
year = tonumber (year); -- convert year portion of bibcode to a number | |||
if | if (1000 > year) or (year > next_year) then | ||
err_type = 'year'; -- year out of bounds | |||
end | |||
if id:find('&%.') then | |||
err_type = 'journal'; -- journal abbreviation must not have '&.' (if it does its missing a letter) | |||
end | end | ||
end | end | ||
end | end | ||
if | if is_set (err_type) then -- if there was an error detected | ||
text = text .. ' ' .. set_error( 'bad_bibcode', {err_type}); | |||
end | end | ||
return text; | |||
return | |||
end | end | ||
--[[--------------------------< | --[[--------------------------< B I O R X I V >----------------------------------------------------------------- | ||
Format | Format bioRxiv id and do simple error checking. BiorXiv ids are exactly 6 digits. | ||
The bioRxiv id is the number following the last slash in the bioRxiv-issued DOI: | |||
https://doi.org/10.1101/078733 -> 078733 | |||
]] | ]] | ||
local function | local function biorxiv(id) | ||
local handler = cfg.id_handlers['BIORXIV']; | |||
local handler = cfg.id_handlers[' | local err_cat = ''; -- presume that bioRxiv id is valid | ||
local err_cat = | |||
if id:match(" | if nil == id:match("^%d%d%d%d%d%d$") then -- if bioRxiv id has anything but six digits | ||
err_cat = ' ' .. set_error( ' | err_cat = ' ' .. set_error( 'bad_biorxiv'); -- set an error message | ||
end | end | ||
return external_link_id({link = handler.link, label = handler.label, | return external_link_id({link = handler.link, label = handler.label, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | prefix=handler.prefix,id=id,separator=handler.separator, | ||
encode=handler.encode, access=handler.access}) .. err_cat; | |||
end | end | ||
--[[--------------------------< I S | --[[--------------------------< C I T E S E E R X >------------------------------------------------------------ | ||
CiteSeerX use their own notion of "doi" (not to be confused with the identifiers resolved via doi.org). | |||
The description of the structure of this identifier can be found at Help_talk:Citation_Style_1/Archive_26#CiteSeerX_id_structure | |||
]] | ]] | ||
local function | local function citeseerx (id) | ||
local handler = cfg.id_handlers['CITESEERX']; | |||
local matched; | |||
local text = external_link_id({link=handler.link, label=handler.label, | |||
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode, | |||
access=handler.access}); | |||
matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$"); | |||
if not matched then | |||
text = text .. ' ' .. set_error( 'bad_citeseerx' ); | |||
if | |||
end | end | ||
return text; | return text; | ||
Line 575: | Line 409: | ||
local text; | local text; | ||
if is_set(inactive) then | if is_set(inactive) then | ||
local inactive_year = inactive:match("%d%d%d%d") or ''; | local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date | ||
if is_set(inactive_year) then | if is_set(inactive_year) then | ||
table.insert( z.error_categories, "Pages with DOIs inactive since " .. inactive_year ); | table.insert( z.error_categories, "Pages with DOIs inactive since " .. inactive_year ); | ||
else | else | ||
table.insert( z.error_categories, "Pages with inactive DOIs" ); -- when inactive doesn't contain a recognizable year | table.insert( z.error_categories, "Pages with inactive DOIs" ); -- when inactive doesn't contain a recognizable year | ||
end | end | ||
inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")" | inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")" | ||
end | end | ||
text = external_link_id({link = handler.link, label = handler.label, | |||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '') | |||
if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma | if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma | ||
cat = ' ' .. set_error( 'bad_doi' ); | cat = ' ' .. set_error( 'bad_doi' ); | ||
end | end | ||
return text | |||
return text .. cat | |||
end | end | ||
Line 626: | Line 458: | ||
--[[--------------------------< | --[[--------------------------< I S B N >---------------------------------------------------------------------- | ||
Determines whether an ISBN string is valid | |||
]] | |||
local function isbn( isbn_str ) | |||
if nil ~= isbn_str:match("[^%s-0-9X]") then | |||
return false, 'invalid character'; -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X | |||
end | |||
isbn_str = isbn_str:gsub( "-", "" ):gsub( " ", "" ); -- remove hyphens and spaces | |||
local len = isbn_str:len(); | |||
if len ~= 10 and len ~= 13 then | |||
return false, 'length'; -- fail if incorrect length | |||
end | |||
if len == 10 then | |||
if isbn_str:match( "^%d*X?$" ) == nil then -- fail if isbn_str has 'X' anywhere but last position | |||
return false, 'invalid form'; | |||
end | |||
return is_valid_isxn(isbn_str, 10), 'checksum'; | |||
else | |||
if isbn_str:match( "^%d+$" ) == nil then | |||
return false, 'invalid character'; -- fail if isbn13 is not all digits | |||
end | |||
if isbn_str:match( "^97[89]%d*$" ) == nil then | |||
return false, 'invalid prefix'; -- fail when isbn13 does not begin with 978 or 979 | |||
end | |||
return is_valid_isxn_13 (isbn_str), 'checksum'; | |||
end | |||
end | |||
--[[--------------------------< I S M N >---------------------------------------------------------------------- | |||
Determines whether an ISMN string is valid. Similar to isbn-13, ismn is 13 digits begining 979-0-... and uses the | |||
same check digit calculations. See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf | |||
section 2, pages 9–12. | |||
]] | ]] | ||
local function | local function ismn (id) | ||
local | local handler = cfg.id_handlers['ISMN']; | ||
local text; | |||
local valid_ismn = true; | |||
local id_copy; | |||
id_copy = id; -- save a copy because this testing is destructive | |||
id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the ismn | |||
if ( | if 13 ~= id:len() or id:match( "^9790%d*$" ) == nil then -- ismn must be 13 digits and begin 9790 | ||
valid_ismn = false; | |||
else | else | ||
valid_ismn=is_valid_isxn_13 (id); -- validate ismn | |||
end | end | ||
-- text = internal_link_id({link = handler.link, label = handler.label, -- use this (or external version) when there is some place to link to | |||
-- prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) | |||
text = table.concat ( | |||
{ | |||
make_wikilink (handler.link, handler.label), | |||
handler.separator, | |||
id_copy | |||
}); -- because no place to link to yet | |||
if false == valid_ismn then | |||
text = text .. ' ' .. set_error( 'bad_ismn' ) -- add an error message if the ismn is invalid | |||
end | |||
return text; | |||
end | end | ||
--[[--------------------------< | --[[--------------------------< I S S N >---------------------------------------------------------------------- | ||
Validate and format a | Validate and format an issn. This code fixes the case where an editor has included an ISSN in the citation but | ||
has separated the two groups of four digits with a space. When that condition occurred, the resulting link looked | |||
like this: | |||
|issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link | |||
This code now prevents that by inserting a hyphen at the issn midpoint. It also validates the issn for length | |||
and makes sure that the checkdigit agrees with the calculated value. Incorrect length (8 digits), characters | |||
other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check issn error message. The | |||
issn is always displayed with a hyphen, even if the issn was given as a single group of 8 digits. | |||
]] | ]] | ||
local function | local function issn(id, e) | ||
local handler = cfg.id_handlers[' | local issn_copy = id; -- save a copy of unadulterated issn; use this version for display if issn does not validate | ||
local handler; | |||
local text; | |||
local valid_issn = true; | |||
if e then | |||
handler = cfg.id_handlers['EISSN']; | |||
else | |||
handler = cfg.id_handlers['ISSN']; | |||
end | |||
id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the issn | |||
if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the issn: 8 digits long, containing only 0-9 or X in the last position | |||
valid_issn=false; -- wrong length or improper character | |||
else | |||
valid_issn=is_valid_isxn(id, 8); -- validate issn | |||
end | |||
if true == valid_issn then | |||
id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version | |||
else | |||
id = issn_copy; -- if not valid, use the show the invalid issn with error message | |||
end | |||
text = external_link_id({link = handler.link, label = handler.label, | |||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) | ||
if | if false == valid_issn then | ||
text = text .. ' ' .. set_error( ' | text = text .. ' ' .. set_error( 'bad_issn', e and 'e' or '' ) -- add an error message if the issn is invalid | ||
end | end | ||
return text | return text | ||
end | |||
--[[--------------------------< J F M >----------------------------------------------------------------------- | |||
A numerical identifier in the form nn.nnnn.nn | |||
]] | |||
local function jfm (id) | |||
local handler = cfg.id_handlers['JFM']; | |||
local id_num; | |||
local err_cat = ''; | |||
id_num = id:match ('^[Jj][Ff][Mm](.*)$'); -- identifier with jfm prefix; extract identifier | |||
if is_set (id_num) then | |||
add_maint_cat ('jfm_format'); | |||
else -- plain number without mr prefix | |||
id_num = id; -- if here id does not have prefix | |||
end | |||
if id_num and id_num:match('^%d%d%.%d%d%d%d%.%d%d$') then | |||
id = id_num; -- jfm matches pattern | |||
else | |||
err_cat = ' ' .. set_error( 'bad_jfm' ); -- set an error message | |||
end | |||
return external_link_id({link = handler.link, label = handler.label, | |||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | |||
end | |||
--[[--------------------------< L C C N >---------------------------------------------------------------------- | |||
Format LCCN link and do simple error checking. LCCN is a character string 8-12 characters long. The length of | |||
the LCCN dictates the character type of the first 1-3 characters; the rightmost eight are always digits. | |||
http://info-uri.info/registry/OAIHandler?verb=GetRecord&metadataPrefix=reg&identifier=info:lccn/ | |||
length = 8 then all digits | |||
length = 9 then lccn[1] is lower case alpha | |||
length = 10 then lccn[1] and lccn[2] are both lower case alpha or both digits | |||
length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both lower case alpha or both digits | |||
length = 12 then lccn[1] and lccn[2] are both lower case alpha | |||
]] | |||
local function lccn(lccn) | |||
local handler = cfg.id_handlers['LCCN']; | |||
local err_cat = ''; -- presume that LCCN is valid | |||
local id = lccn; -- local copy of the lccn | |||
id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes) | |||
local len = id:len(); -- get the length of the lccn | |||
if 8 == len then | |||
if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) | |||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message | |||
end | |||
elseif 9 == len then -- LCCN should be adddddddd | |||
if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern? | |||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message | |||
end | |||
elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd | |||
if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ... | |||
if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern | |||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message | |||
end | |||
end | |||
elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd | |||
if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns | |||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message | |||
end | |||
elseif 12 == len then -- LCCN should be aadddddddddd | |||
if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern | |||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message | |||
end | |||
else | |||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- wrong length, set an error message | |||
end | |||
if not is_set (err_cat) and nil ~= lccn:find ('%s') then | |||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- lccn contains a space, set an error message | |||
end | |||
return external_link_id({link = handler.link, label = handler.label, | |||
prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat; | |||
end | |||
--[[--------------------------< M R >-------------------------------------------------------------------------- | |||
A seven digit number; if not seven digits, zero-fill leading digits to make seven digits. | |||
]] | |||
local function mr (id) | |||
local handler = cfg.id_handlers['MR']; | |||
local id_num; | |||
local id_len; | |||
local err_cat = ''; | |||
id_num = id:match ('^[Mm][Rr](%d+)$'); -- identifier with mr prefix | |||
if is_set (id_num) then | |||
add_maint_cat ('mr_format'); | |||
else -- plain number without mr prefix | |||
id_num = id:match ('^%d+$'); -- if here id is all digits | |||
end | |||
id_len = id_num and id_num:len() or 0; | |||
if (7 >= id_len) and (0 ~= id_len) then | |||
id = string.rep ('0', 7-id_len ) .. id_num; -- zero-fill leading digits | |||
else | |||
err_cat = ' ' .. set_error( 'bad_mr' ); -- set an error message | |||
end | |||
return external_link_id({link = handler.link, label = handler.label, | |||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | |||
end | end | ||
Line 723: | Line 746: | ||
--[[--------------------------< | --[[--------------------------< O P E N L I B R A R Y >-------------------------------------------------------- | ||
Formats an OpenLibrary link, and checks for associated errors. | |||
]] | |||
local function openlibrary(id, access) | |||
local code; | |||
local handler = cfg.id_handlers['OL']; | |||
local ident; | |||
ident, code = id:gsub('^OL', ''):match("^(%d+([AMW]))$"); -- optional OL prefix followed immediately by digits followed by 'A', 'M', or 'W'; remove OL prefix | |||
if not is_set (ident) then -- if malformed return an error | |||
return external_link_id({link=handler.link, label=handler.label, | |||
prefix=handler.prefix .. 'OL', | |||
id=id, separator=handler.separator, encode = handler.encode, | |||
access = access}) .. ' ' .. set_error( 'bad_ol' ); | |||
end | |||
id = ident; -- use ident without the optional OL prefix (it has been removed) | |||
if ( code == "A" ) then | |||
return external_link_id({link=handler.link, label=handler.label, | |||
prefix=handler.prefix .. 'authors/OL', | |||
id=id, separator=handler.separator, encode = handler.encode, | |||
access = access}) | |||
end | |||
if ( code == "M" ) then | |||
return external_link_id({link=handler.link, label=handler.label, | |||
prefix=handler.prefix .. 'books/OL', | |||
id=id, separator=handler.separator, encode = handler.encode, | |||
access = access}) | |||
end | |||
if ( code == "W" ) then | |||
return external_link_id({link=handler.link, label=handler.label, | |||
prefix=handler.prefix .. 'works/OL', | |||
id=id, separator=handler.separator, encode = handler.encode, | |||
access = access}) | |||
end | |||
end | |||
--[[--------------------------< P M C >------------------------------------------------------------------------ | |||
Format a PMC, do simple error checking, and check for embargoed articles. | |||
The embargo parameter takes a date for a value. If the embargo date is in the future the PMC identifier will not | |||
be linked to the article. If the embargo date is today or in the past, or if it is empty or omitted, then the | |||
PMC identifier is linked to the article through the link at cfg.id_handlers['PMC'].prefix. | |||
PMC embargo date testing is done in function is_embargoed () which is called earlier because when the citation | |||
has |pmc=<value> but does not have a |url= then |title= is linked with the PMC link. Function is_embargoed () | |||
returns the embargo date if the PMC article is still embargoed, otherwise it returns an empty string. | |||
PMCs are sequential numbers beginning at 1 and counting up. This code checks the PMC to see that it contains only digits and is less | |||
and | than test_limit; the value in local variable test_limit will need to be updated periodically as more PMCs are issued. | ||
]] | ]] | ||
local function | local function pmc(id, embargo) | ||
local handler = cfg.id_handlers[' | local test_limit = 6000000; -- update this value as PMCs approach | ||
local | local handler = cfg.id_handlers['PMC']; | ||
local | local err_cat = ''; -- presume that PMC is valid | ||
local id_num; | |||
local text; | |||
id_num = id:match ('^[Pp][Mm][Cc](%d+)$'); -- identifier with pmc prefix | |||
if is_set (id_num) then | |||
add_maint_cat ('pmc_format'); | |||
else -- plain number without pmc prefix | |||
id_num = id:match ('^%d+$'); -- if here id is all digits | |||
end | |||
if is_set (id_num) then -- id_num has a value so test it | |||
id_num = tonumber(id_num); -- convert id_num to a number for range testing | |||
if 1 > id_num or test_limit < id_num then -- if PMC is outside test limit boundaries | |||
err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message | |||
if | |||
if | |||
else | else | ||
id = tostring (id_num); -- make sure id is a string | |||
end | end | ||
else -- when id format incorrect | |||
err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message | |||
end | end | ||
if is_set ( | if is_set (embargo) then -- is PMC is still embargoed? | ||
text = text .. | text = table.concat ( -- still embargoed so no external link | ||
{ | |||
make_wikilink (handler.link, handler.label), | |||
handler.separator, | |||
id, | |||
err_cat | |||
}); | |||
else | |||
text = external_link_id({link = handler.link, label = handler.label, -- no embargo date or embargo has expired, ok to link to article | |||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; | |||
end | end | ||
return text; | return text; | ||
Line 776: | Line 850: | ||
--[[--------------------------< | --[[--------------------------< P M I D >---------------------------------------------------------------------- | ||
Format PMID and do simple error checking. PMIDs are sequential numbers beginning at 1 and counting up. This | |||
code checks the PMID to see that it contains only digits and is less than test_limit; the value in local variable | |||
test_limit will need to be updated periodically as more PMIDs are issued. | |||
]] | ]] | ||
local function | local function pmid(id) | ||
local handler = cfg.id_handlers[' | local test_limit = 32000000; -- update this value as PMIDs approach | ||
local | local handler = cfg.id_handlers['PMID']; | ||
local err_cat = ''; -- presume that PMID is valid | |||
if id:match("[^%d]") then -- if PMID has anything but digits | |||
err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message | |||
else -- PMID is only digits | |||
local id_num = tonumber(id); -- convert id to a number for range testing | |||
if 1 > id_num or test_limit < id_num then -- if PMID is outside test limit boundaries | |||
err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message | |||
end | |||
end | |||
return external_link_id({link = handler.link, label = handler.label, | |||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | |||
end | end | ||
Line 812: | Line 890: | ||
local test_limit = 3500000; -- update this value as SSRNs approach | local test_limit = 3500000; -- update this value as SSRNs approach | ||
local handler = cfg.id_handlers['SSRN']; | local handler = cfg.id_handlers['SSRN']; | ||
local err_cat = | local err_cat = ''; -- presume that SSRN is valid | ||
local id_num; | local id_num; | ||
local text; | local text; | ||
Line 833: | Line 911: | ||
end | end | ||
--[[--------------------------< U S E N E T _ I D >------------------------------------------------------------ | |||
Validate and format a usenet message id. Simple error checking, looks for 'id-left@id-right' not enclosed in | |||
'<' and/or '>' angle brackets. | |||
]] | |||
local function usenet_id (id) | |||
local handler = cfg.id_handlers['USENETID']; | |||
local text = external_link_id({link = handler.link, label = handler.label, | |||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) | |||
if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$')then -- doesn't have '@' or has one or first or last character is '< or '>' | |||
text = text .. ' ' .. set_error( 'bad_usenet_id' ) -- add an error message if the message id is invalid | |||
end | |||
return text | |||
end | |||
--[[--------------------------< Z B L >----------------------------------------------------------------------- | |||
A numerical identifier in the form nnnn.nnnnn - leading zeros in the first quartet optional | |||
]] | |||
local function zbl (id) | |||
local handler = cfg.id_handlers['ZBL']; | |||
local id_num; | |||
local err_cat = ''; | |||
id_num = id:match ('^[Zz][Bb][Ll](.*)$'); -- identifier with zbl prefix; extract identifier | |||
if is_set (id_num) then | |||
add_maint_cat ('zbl_format'); | |||
else -- plain number without zbl prefix | |||
id_num = id; -- if here id does not have prefix | |||
end | |||
if id_num:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then | |||
id = id_num; -- id matches pattern | |||
else | |||
err_cat = ' ' .. set_error( 'bad_zbl' ); -- set an error message | |||
end | |||
return external_link_id({link = handler.link, label = handler.label, | |||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | |||
end | |||
--============================<< I N T E R F A C E F U N C T I O N S >>========================================== | |||
--[[--------------------------< B U I L D _ I D _ L I S T >-------------------------------------------------------- | --[[--------------------------< B U I L D _ I D _ L I S T >-------------------------------------------------------- | ||
Line 850: | Line 981: | ||
for k, v in pairs( id_list ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table | for k, v in pairs( id_list ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table | ||
-- fallback to read-only cfg | -- fallback to read-only cfg | ||
handler = setmetatable( { ['id'] = v, ['access'] = options.IdAccessLevels[k] }, fallback(k) ); | handler = setmetatable( { ['id'] = v, ['access'] = options.IdAccessLevels[k] }, fallback(k) ); | ||
Line 860: | Line 990: | ||
elseif handler.mode ~= 'manual' then | elseif handler.mode ~= 'manual' then | ||
error( cfg.messages['unknown_ID_mode'] ); | error( cfg.messages['unknown_ID_mode'] ); | ||
elseif k == 'ARXIV' then | |||
table.insert( new_list, {handler.label, arxiv( v, options.Class ) } ); | |||
elseif k == 'ASIN' then | |||
table.insert( new_list, {handler.label, asin( v, options.ASINTLD ) } ); | |||
elseif k == 'BIBCODE' then | elseif k == 'BIBCODE' then | ||
table.insert( new_list, {handler.label, bibcode( v, handler.access ) } ); | table.insert( new_list, {handler.label, bibcode( v, handler.access ) } ); | ||
Line 868: | Line 1,002: | ||
elseif k == 'DOI' then | elseif k == 'DOI' then | ||
table.insert( new_list, {handler.label, doi( v, options.DoiBroken, handler.access ) } ); | table.insert( new_list, {handler.label, doi( v, options.DoiBroken, handler.access ) } ); | ||
elseif k == 'EISSN' then | |||
table.insert( new_list, {handler.label, issn( v, true ) } ); -- true distinguishes eissn from issn | |||
elseif k == 'HDL' then | elseif k == 'HDL' then | ||
table.insert( new_list, {handler.label, hdl( v, handler.access ) } ); | table.insert( new_list, {handler.label, hdl( v, handler.access ) } ); | ||
elseif k == 'ISBN' then | elseif k == 'ISBN' then | ||
local ISBN = internal_link_id( handler ); | local ISBN = internal_link_id( handler ); | ||
local check; | local check; | ||
local err_type = ''; | local err_type = ''; | ||
check, err_type = | check, err_type = isbn( v ); | ||
if not check then | if not check then | ||
if is_set(options.IgnoreISBN) then -- ISBN is invalid; if |ignore-isbn-error= set | if is_set(options.IgnoreISBN) then -- ISBN is invalid; if |ignore-isbn-error= set | ||
Line 904: | Line 1,018: | ||
end | end | ||
elseif is_set(options.IgnoreISBN) then -- ISBN is OK; if |ignore-isbn-error= set | elseif is_set(options.IgnoreISBN) then -- ISBN is OK; if |ignore-isbn-error= set | ||
add_maint_cat ('ignore_isbn_err'); -- because |ignore-isbn-error= unnecessary | |||
end | end | ||
table.insert( new_list, {handler.label, ISBN } ); | table.insert( new_list, {handler.label, ISBN } ); | ||
elseif k == 'ISMN' then | |||
table.insert( new_list, {handler.label, ismn( v ) } ); | |||
elseif k == 'ISSN' then | |||
table.insert( new_list, {handler.label, issn( v ) } ); | |||
elseif k == 'JFM' then | |||
table.insert( new_list, {handler.label, jfm( v ) } ); | |||
elseif k == 'LCCN' then | |||
table.insert( new_list, {handler.label, lccn( v ) } ); | |||
elseif k == 'MR' then | |||
table.insert( new_list, {handler.label, mr( v ) } ); | |||
elseif k == 'OCLC' then | |||
table.insert( new_list, {handler.label, oclc( v ) } ); | |||
elseif k == 'OL' or k == 'OLA' then | |||
table.insert( new_list, {handler.label, openlibrary( v, handler.access ) } ); | |||
elseif k == 'PMC' then | |||
table.insert( new_list, {handler.label, pmc( v, options.Embargo ) } ); | |||
elseif k == 'PMID' then | |||
table.insert( new_list, {handler.label, pmid( v ) } ); | |||
elseif k == 'SSRN' then | |||
table.insert( new_list, {handler.label, ssrn( v ) } ); | |||
elseif k == 'USENETID' then | elseif k == 'USENETID' then | ||
table.insert( new_list, {handler.label, | table.insert( new_list, {handler.label, usenet_id( v ) } ); | ||
elseif k == 'ZBL' then | |||
table.insert( new_list, {handler.label, zbl( v ) } ); | |||
else | else | ||
error( cfg.messages['unknown_manual_ID'] ); | error( cfg.messages['unknown_manual_ID'] ); | ||
Line 915: | Line 1,051: | ||
local function comp( a, b ) -- used in following table.sort() | local function comp( a, b ) -- used in following table.sort() | ||
return a[1] < b[1]; | return a[1]:lower() < b[1]:lower(); | ||
end | end | ||
Line 994: | Line 1,130: | ||
add_maint_cat = utilities_page_ptr.add_maint_cat; | add_maint_cat = utilities_page_ptr.add_maint_cat; | ||
substitute = utilities_page_ptr.substitute; | substitute = utilities_page_ptr.substitute; | ||
make_wikilink = utilities_page_ptr.make_wikilink; | |||
z = utilities_page_ptr.z; -- table of tables in Module:Citation/CS1/Utilities | z = utilities_page_ptr.z; -- table of tables in Module:Citation/CS1/Utilities |