Module:Citation/CS1/Identifiers: Difference between revisions

Line 3:

]]

local is_set, in_array, ~~set_error~~, select_one, ~~add_maint_cat, substitute, make_wikilink;~~ -- functions in Module:Citation/CS1/Utilities

local has_accept_as_written, is_set, in_array, set_message, select_one, -- functions in Module:Citation/CS1/Utilities

substitute, make_wikilink;

local z; -- table of tables defined in Module:Citation/CS1/Utilities

local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration

--[[--------------------------< P A G E S C O P E V A R I A B L E S >--------------------------------------

declare variables here that have page-wide scope that are not brought in from other modules; that are created here and used here

]]

local auto_link_urls = {}; -- holds identifier URLs for those identifiers that can auto-link |title=

Line 14:

Line 24:

--[[--------------------------< W I K I D A T A _ A R T I C L E _ N A M E _ G E T >----------------------------

as an aid to internationalizing identifier-label wikilinks, gets identifier article names from ~~wikidata~~.

as an aid to internationalizing identifier-label wikilinks, gets identifier article names from Wikidata.

returns :<lang code>:<article title> when <q> has an <article title> for <lang code>; nil else

Line 30:

Line 40:

local wd_article;

local this_wiki_code = cfg.this_wiki_code; -- ~~wikipedia~~ subdomain; 'en' for en.wikipedia.org

local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org

wd_article = mw.wikibase.getSitelink (q, this_wiki_code .. 'wiki'); -- fetch article title from wd; nil when no title available at this wiki

wd_article = mw.wikibase.getSitelink (q, this_wiki_code .. 'wiki'); -- fetch article title from WD; nil when no title available at this wiki

if wd_article then

wd_article = table.concat ({':', this_wiki_code, ':', wd_article}); -- interwiki-style link without brackets if taken from wd; leading colon required

wd_article = table.concat ({':', this_wiki_code, ':', wd_article}); -- interwiki-style link without brackets if taken from WD; leading colon required

end

return wd_article; -- article title from wd; nil else

return wd_article; -- article title from WD; nil else

end

--[[--------------------------< L I N K _ L A B E L _ M A K E >------------------------------------------------

common function to create identifier link label from handler table or from Wikidata

returns the first available of

1. redirect from local wiki's handler table (if enabled)

2. Wikidata (if there is a Wikidata entry for this identifier in the local wiki's language)

3. label specified in the local wiki's handler table

]]

local function link_label_make (handler)

local wd_article;

if not (cfg.use_identifier_redirects and is_set (handler.redirect)) then -- redirect has priority so if enabled and available don't fetch from Wikidata because expensive

wd_article = wikidata_article_name_get (handler.q); -- if Wikidata has an article title for this wiki, get it;

end

return (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or wd_article or handler.link;

end

Line 44:

Line 76:

--[[--------------------------< E X T E R N A L _ L I N K _ I D >----------------------------------------------

Formats a wiki style external link

Formats a wiki-style external link

NB. Wikidata P1630 has a formatter URL with $1 placeholder for the ID which could be worked into our prefix/id/suffix

string, either overriding local definitions (auto-update) or as fallback for identifiers without local definitions.

But is expensive and could be risky if WD gets vandalized.

See Template_talk:Authority_control/Archive_8#Use_Wikidata_as_the_source_for_the_external_link

]]

local function external_link_id(options)

local function external_link_id (options)

local url_string = options.id;

local ext_link;

local this_wiki_code = cfg.this_wiki_code; -- ~~wikipedia~~ subdomain; 'en' for en.wikipedia.org

local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org

local wd_article; -- article title from ~~wikidata~~

local wd_article; -- article title from Wikidata

if options.encode == true or options.encode == nil then

url_string = mw.uri.encode( url_string );

end

if options.auto_link and is_set (options.access) then

auto_link_urls[options.auto_link] = table.concat ({options.prefix, url_string, options.suffix});

end

Line 62:

Line 104:

ext_link = substitute (cfg.presentation['ext-link-access-signal'], {cfg.presentation[options.access].class, cfg.presentation[options.access].title, ext_link}); -- add the free-to-read / paywall lock

end

~~if not (cfg.use_identifier_redirects and is_set (options.redirect)) then -- redirect has priority so if enabled and available don't fetch from wikidata because expensive~~

~~wd_article = wikidata_article_name_get (options.q); -- if wikidata has an article title for this wiki, get it;~~

~~end~~

~~local label_link = (cfg.use_identifier_redirects and is_set (options.redirect) and options.redirect) or wd_article or options.link;~~

return table.concat ({

make_wikilink (~~label_link~~, options.label), -- redirect, ~~wikidata~~ link, or locally specified link (in that order)

make_wikilink (link_label_make (options), options.label), -- redirect, Wikidata link, or locally specified link (in that order)

options.separator or ' ',

ext_link

Line 79:

Line 115:

--[[--------------------------< I N T E R N A L _ L I N K _ I D >----------------------------------------------

Formats a wiki style internal link

Formats a wiki-style internal link

]]

Line 85:

Line 121:

local function internal_link_id(options)

local id = mw.ustring.gsub (options.id, '%d', cfg.date_names.local_digits); -- translate 'local' digits to Western 0-9

~~if not (cfg.use_identifier_redirects and is_set (options.redirect)) then -- redirect has priority so if enabled and available don't fetch from wikidata because expensive~~

~~wd_article = wikidata_article_name_get (options.q); -- if wikidata has an article title for this wiki, get it;~~

~~end~~

~~local label_link = (cfg.use_identifier_redirects and is_set (options.redirect) and options.redirect) or wd_article or options.link;~~

return table.concat (

{

make_wikilink (~~label_link~~, options.label), -- wiki link the identifier label

make_wikilink (link_label_make (options), options.label), -- wiki-link the identifier label

options.separator or ' ', -- add the separator

make_wikilink (

Line 100:

Line 130:

{

options.prefix,

id, -- translated to ~~western~~ digits

id, -- translated to Western digits

options.suffix or ''

}),

substitute (cfg.presentation['bdi'], {'', mw.text.nowiki (options.id)}) -- bdi tags to prevent ~~Latn~~ script identifiers from being reversed at ~~rtl~~ language wikis

substitute (cfg.presentation['bdi'], {'', mw.text.nowiki (options.id)}) -- bdi tags to prevent Latin script identifiers from being reversed at RTL language wikis

); -- nowiki because MediaWiki still has magic links for ISBN and the like; TODO: is it really required?

});

Line 111:

Line 141:

--[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------

Determines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date. If embargo date is

Determines if a PMC identifier's online version is embargoed. Compares the date in |pmc-embargo-date= against today's date. If embargo date is

in the future, returns the content of |embargo=; otherwise, returns ~~and~~ empty string because the embargo has expired or because

in the future, returns the content of |pmc-embargo-date=; otherwise, returns an empty string because the embargo has expired or because

|embargo= was not set in this cite.

|pmc-embargo-date= was not set in this cite.

]]

Line 120:

Line 150:

if is_set (embargo) then

local lang = mw.getContentLanguage();

local good1, embargo_date~~, good2~~, todays_date;

local good1, embargo_date, todays_date;

good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo );

good1, embargo_date = pcall (lang.formatDate, lang, 'U', embargo);

~~good2,~~ todays_date = ~~pcall(~~ lang.formatDate~~, lang,~~ 'U' );

todays_date = lang:formatDate ('U');

if good1 ~~and good2~~ then -- if embargo date ~~and today's~~ date ~~are good dates~~

if good1 then -- if embargo date is a good date

if tonumber( embargo_date ) >= tonumber( todays_date ) then -- is embargo date is in the future?

if tonumber (embargo_date) >= tonumber (todays_date) then -- is embargo date is in the future?

return embargo; -- still embargoed

else

~~add_maint_cat~~ ('~~embargo~~')

set_message ('maint_pmc_embargo'); -- embargo has expired; add main cat

return ''; -- unset because embargo has expired

end

return ''; -- |embargo= not set return empty string

return ''; -- |pmc-embargo-date= not set return empty string

end

Line 142:

Line 172:

2019-12-11T00:00Z <= biorxiv_date < today + 2 days

The dated form of biorxiv identifier has a start date of 2019-12-11. The ~~unix~~ timestamp for that date is {{#time:U|2019-12-11}} = 1576022400

The dated form of biorxiv identifier has a start date of 2019-12-11. The Unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400

biorxiv_date is the date provided in those |biorxiv= parameter values that are dated at time 00:00:00 UTC

Line 151:

Line 181:

This function does not work if it is fed month names for languages other than English. Wikimedia #time: parser

apparently doesn't understand non-~~Engish~~ date month names. This function will always return false when the date

apparently doesn't understand non-English date month names. This function will always return false when the date

contains a non-English month name because good1 is false after the call to lang.formatDate(). To get around that

call this function with YYYY-MM-DD format dates.

Line 159:

Line 189:

local function is_valid_biorxiv_date (biorxiv_date)

local good1, good2;

local biorxiv_ts, tomorrow_ts; -- to hold ~~unix time stamps~~ representing the dates

local biorxiv_ts, tomorrow_ts; -- to hold Unix timestamps representing the dates

local lang_object = mw.getContentLanguage();

good1, biorxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', biorxiv_date ); -- convert biorxiv_date value to ~~unix timesatmp~~

good1, biorxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', biorxiv_date ); -- convert biorxiv_date value to Unix timestamp

good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow

if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script ~~which~~ which tonumber() may not understand

if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which tonumber() may not understand

biorxiv_ts = tonumber (biorxiv_ts) or lang_object:parseFormattedNumber (biorxiv_ts); -- convert to numbers for the comparison;

tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts);

else

return false; -- one or both failed to convert to ~~unix time stamp~~

return false; -- one or both failed to convert to Unix timestamp

end

Line 178:

Line 208:

--[[--------------------------< IS _ V A L I D _ I S X N >-----------------------------------------------------

ISBN-10 and ISSN validator code calculates checksum across all ~~isbn~~/~~issn~~ digits including the check digit.

ISBN-10 and ISSN validator code calculates checksum across all ISBN/ISSN digits including the check digit.

ISBN-13 is checked in isbn().

If the number is valid the result will be 0. Before calling this function, ~~issbn~~/~~issn~~ must be checked for length

If the number is valid the result will be 0. Before calling this function, ISBN/ISSN must be checked for length

and stripped of dashes, spaces and other non-~~isxn~~ characters.

and stripped of dashes, spaces and other non-ISxN characters.

]]

Line 189:

Line 219:

local temp = 0;

isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58

len = len+1; -- adjust to be a loop counter

len = len + 1; -- adjust to be a loop counter

for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum

if v == string.byte( "X" ) then -- if checkdigit is X (compares the byte value of 'X' which is 0x58)

Line 203:

Line 233:

--[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >-----------------------------------------------

ISBN-13 and ISMN validator code calculates checksum across all 13 ~~isbn~~/~~ismn~~ digits including the check digit.

ISBN-13 and ISMN validator code calculates checksum across all 13 ISBN/ISMN digits including the check digit.

If the number is valid, the result will be 0. Before calling this function, ~~isbn~~-13/~~ismn~~ must be checked for length

If the number is valid, the result will be 0. Before calling this function, ISBN-13/ISMN must be checked for length

and stripped of dashes, spaces and other non-~~isxn~~-13 characters.

and stripped of dashes, spaces and other non-ISxN-13 characters.

]]

Line 216:

Line 246:

temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit

end

return temp % 10 == 0; -- sum modulo 10 is zero when ~~isbn~~-13/~~ismn~~ is correct

return temp % 10 == 0; -- sum modulo 10 is zero when ISBN-13/ISMN is correct

end

Line 222:

Line 252:

--[[--------------------------< N O R M A L I Z E _ L C C N >--------------------------------------------------

~~lccn~~ normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization)

LCCN normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization)

1. Remove all blanks.

2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash.

Line 231:

Line 261:

2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six.

Returns a normalized ~~lccn~~ for lccn() to validate. There is no error checking (step 3.b.1) performed in this function.

Returns a normalized LCCN for lccn() to validate. There is no error checking (step 3.b.1) performed in this function.

]]

local function normalize_lccn (lccn)

lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace

if nil ~= string.find (lccn,'/') then

if nil ~= string.find (lccn, '/') then

lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it

end

local prefix

local suffix

prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix

if nil ~= suffix then -- if there was a hyphen

suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6

suffix = string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6

lccn=prefix..suffix; -- reassemble the ~~lccn~~

lccn = prefix..suffix; -- reassemble the LCCN

end

return lccn;

end

--============================<< I D E N T I F I E R F U N C T I O N S >>====================================

Line 260:

Line 291:

format and error check arXiv identifier. There are three valid forms of the identifier:

the first form, valid only between date codes ~~9108~~ and 0703 is:

the first form, valid only between date codes 9107 and 0703, is:

arXiv:<archive>.<class>/<date code><number><version>

where:

Line 290:

Line 321:

local text; -- output text

if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the ~~9108~~-0703 format ~~w/ & w/o~~ version

if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9107-0703 format with or without version

year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$");

year = tonumber(year);

Line 299:

Line 330:

end

elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 ~~w/ & w/o~~ version

elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 with or without version

year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$");

year = tonumber(year);

month = tonumber(month);

if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years)

((7 == year) and (4 > month)) then ~~--or~~ -- when year is 07, is month invalid (before April)?

((7 == year) and (4 > month)) then -- when year is 07, is month invalid (before April)?

err_cat = true; -- flag for error message

end

elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format ~~w/ & w/o~~ version

elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format with or without version

year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$");

year = tonumber(year);

Line 320:

Line 351:

end

err_cat = err_cat and table.concat ({' ', ~~set_error~~ ('~~bad_arxiv~~')}) or ''; -- set error message if flag is true

err_cat = err_cat and table.concat ({' ', set_message ('err_bad_arxiv')}) or ''; -- set error message if flag is true

text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,

text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,

prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat;

prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access}) .. err_cat;

if is_set (class) then

Line 329:

Line 360:

text = table.concat ({text, ' [[//arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink

else

text = table.concat ({text, ' ', ~~set_error~~ ('~~class_ignored~~')});

text = table.concat ({text, ' ', set_message ('err_class_ignored')});

end

Line 339:

Line 370:

--[[--------------------------< B I B C O D E >--------------------------------------------------------------------

Validates (sort of) and formats a bibcode id.

Validates (sort of) and formats a bibcode ID.

Format for bibcodes is specified here: http://adsabs.harvard.edu/abs_doc/help_pages/data.html#bibcodes

Line 360:

Line 391:

local year;

local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,

local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,

prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode,

prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode,

access=access});

access = access});

if 19 ~= id:len() then

err_type = cfg.err_msg_supl.length;

else

year = id:match ("^(%d%d%d%d)[%a][%w&%.][%w&%.][%w&%.][%w.]+[%a%.]$") --

year = id:match ("^(%d%d%d%d)[%a][%w&%.][%w&%.][%w&%.][%w.]+[%a%.]$");

if not year then -- if nil then no pattern match

err_type = cfg.err_msg_supl.value; -- so value error

else

local next_year = tonumber(os.date ('%Y'))+1; -- get the current year as a number and add one for next year

local next_year = tonumber(os.date ('%Y')) + 1; -- get the current year as a number and add one for next year

year = tonumber (year); -- convert year portion of bibcode to a number

if (1000 > year) or (year > next_year) then

err_type = cfg.err_msg_supl.year; -- year out of bounds

end

if id:find('&%.') then

err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does ~~its~~ missing a letter)

err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does it's missing a letter)

end

Line 383:

Line 414:

if is_set (err_type) then -- if there was an error detected

text = text .. ' ' .. ~~set_error~~( '~~bad_bibcode~~', {err_type});

text = text .. ' ' .. set_message( 'err_bad_bibcode', {err_type});

end

return text;

Line 391:

Line 422:

--[[--------------------------< B I O R X I V >-----------------------------------------------------------------

Format bioRxiv id and do simple error checking. Before 2019-12-11, biorXiv ~~ids~~ were 10.1101/ followed by exactly

Format bioRxiv ID and do simple error checking. Before 2019-12-11, biorXiv IDs were 10.1101/ followed by exactly

6 digits. After 2019-12-11, biorXiv ~~ids~~ retained the six-digit identifier but prefixed that with a yyyy.mm.dd.

6 digits. After 2019-12-11, biorXiv IDs retained the six-digit identifier but prefixed that with a yyyy.mm.dd.

date and suffixed with an optional version identifier.

The bioRxiv id is the string of characters:

The bioRxiv ID is the string of characters:

https://doi.org/10.1101/078733 -> 10.1101/078733

or a date followed by a six-digit number followed by an optional version indicator 'v' and one or more digits:

Line 412:

Line 443:

'^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11)

'^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11)

}

for _, pattern in ipairs (patterns) do -- spin through the patterns looking for a match

Line 419:

Line 450:

if m then -- m is nil when id is the six-digit form

if not is_valid_biorxiv_date (y .. '-' .. m .. '-' .. d) then -- validate the encoded date; TODO: don't ignore ~~leapyear~~ and actual month lengths ({{#time:}} is a poor date validator)

if not is_valid_biorxiv_date (y .. '-' .. m .. '-' .. d) then -- validate the encoded date; TODO: don't ignore leap-year and actual month lengths ({{#time:}} is a poor date validator)

break; -- date fail; break out early so we don't unset the error message

end

Line 428:

Line 459:

end -- err_cat remains set here when no match

return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,

return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,

prefix=handler.prefix,id=id,separator=handler.separator,

prefix = handler.prefix, id = id, separator = handler.separator,

encode=handler.encode, access=handler.access}) .. (err_cat and (' ' .. ~~set_error~~( '~~bad_biorxiv~~')) or '');

encode = handler.encode, access = handler.access}) .. (err_cat and (' ' .. set_message( 'err_bad_biorxiv')) or '');

end

Line 445:

Line 476:

local matched;

local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,

local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,

prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode,

prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode,

access=handler.access});

access = handler.access});

matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$");

if not matched then

text = text .. ' ' .. ~~set_error~~( '~~bad_citeseerx~~' );

text = text .. ' ' .. set_message( 'err_bad_citeseerx' );

end

return text;

Line 465:

Line 496:

Suffix: character string of any length chosen by the registrant

This function checks a DOI name for: prefix/suffix. If the ~~doi~~ name contains spaces or endashes, or, if it ends

This function checks a DOI name for: prefix/suffix. If the DOI name contains spaces or endashes, or, if it ends

with a period or a comma, this function will emit a bad_doi error message.

DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash,

and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely

if ever used in ~~doi~~ names.

if ever used in DOI names.

]]

local function doi(id, inactive, access)

local function doi (id, inactive, access, ignore_invalid)

local ~~cat = ""~~

local err_cat;

local handler = cfg.id_handlers['DOI'];

local text;

if is_set(inactive) then

if is_set (inactive) then

local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date

local inactive_month, good;

Line 492:

Line 523:

end

else

inactive_year = nil; -- |doi-broken= has something but it isn't a date

inactive_year = nil; -- |doi-broken-date= has something but it isn't a date

end

if is_set(inactive_year) and is_set (inactive_month) then

~~table.insert~~( ~~z.error_categories,~~ '~~Pages with DOIs inactive as of~~ ' .. inactive_year .. ' ' ~~.. inactive_month~~); ~~-- use inactive month in category name~~

set_message ('maint_doi_inactive_dated', {inactive_year, inactive_month, ' '});

elseif is_set(inactive_year) then

~~table.insert~~( ~~z.error_categories~~, '~~Pages with DOIs inactive as of~~ ' ~~.. inactive_year~~);

set_message ('maint_doi_inactive_dated', {inactive_year, '', ''});

else

~~table.insert~~( ~~z.error_categories,~~ '~~Pages with inactive DOIs~~'); ~~-- when inactive doesn't contain a recognizable date~~

set_message ('maint_doi_inactive');

end

inactive = " (" .. cfg.messages['inactive'] .. ' ' .. inactive .. ')';

end

~~text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,~~

local registrant = id:match ('^10%.([^/]+)/[^%s–]-[^%.,]$'); -- registrant set when DOI has the proper basic form

~~prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '')~~

local registrant = id:match ('^10%.([^/]+)/[^%s–]-[^%.,]$'); -- registrant set when ~~doi~~ has the proper basic form

registrant_err_patterns = { -- these patterns are for code ranges that are not supported

local registrant_err_patterns = { -- these patterns are for code ranges that are not supported

'^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); ~~accecpts~~: 10000–39999

'^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999

'^[^1-4]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 40000+); ~~accecpts~~: 10000–49999

'^[^1-4]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 40000+); accepts: 10000–49999

'^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); ~~accecpts~~: 1000–9999

'^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); accepts: 1000–9999

'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); ~~accecpts~~: 1000–9999

'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999

'^%d%d%d%d%d%d+', -- 6 or more digits

'^%d%d?%d?$', -- less than 4 digits without subcode (with subcode is legitimate)

Line 520:

Line 548:

'%s', -- any space character in registrant

}

if registrant then -- when ~~doi~~ has proper form

if not ignore_invalid then

for i, pattern in ipairs (registrant_err_patterns) do -- spin through error patterns

if registrant then -- when DOI has proper form

if registrant:match (pattern) then -- to validate registrant codes

for i, pattern in ipairs (registrant_err_patterns) do -- spin through error patterns

~~cat~~ = ' ' .. ~~set_error~~ ('~~bad_doi~~'); -- when found, mark this ~~doi~~ as bad

if registrant:match (pattern) then -- to validate registrant codes

break; -- and done

err_cat = ' ' .. set_message ('err_bad_doi'); -- when found, mark this DOI as bad

break; -- and done

end

else

err_cat = ' ' .. set_message ('err_bad_doi'); -- invalid directory or malformed

end

else

~~cat = ' ' .. set_error~~ ('~~bad_doi~~'); ~~-- invalid directory or malformed~~

set_message ('maint_doi_ignore');

end

return text .. ~~cat~~

text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,

prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access,

auto_link = not (err_cat or is_set (inactive) or ignore_invalid) and 'doi' or nil -- do not auto-link when |doi-broken-date= has a value or when there is a DOI error or (to play it safe, after all, auto-linking is not essential) when invalid DOIs are ignored

}) .. (inactive or '');

return text .. (err_cat and err_cat or ''); -- parentheses required

end

Line 552:

Line 589:

Query string parameters are named here: http://www.handle.net/proxy_servlet.html. query strings are not displayed

but since '?' is ~~anallowed~~ character in an ~~hdl~~, '?' followed by one of the query parameters is the only way we

but since '?' is an allowed character in an HDL, '?' followed by one of the query parameters is the only way we

have to detect the query string so that it isn't ~~url~~ encoded with the rest of the identifier.

have to detect the query string so that it isn't URL-encoded with the rest of the identifier.

]]

Line 584:

Line 621:

if found then

id = hdl; -- found so replace id with the handle portion; this will be ~~url~~ encoded, suffix will not

id = hdl; -- found so replace id with the handle portion; this will be URL-encoded, suffix will not

else

suffix = ''; -- make sure suffix is empty string for concatenation else

end

local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,

local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,

prefix=handler.prefix, id=id, suffix=suffix, separator=handler.separator, encode=handler.encode, access=access})

prefix = handler.prefix, id = id, suffix = suffix, separator = handler.separator, encode = handler.encode, access = access})

if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- ~~hdl~~ must contain a ~~fwd~~ slash, must not contain spaces, endashes, and must not end with period or comma

if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- HDL must contain a forward slash, must not contain spaces, endashes, and must not end with period or comma

text = text .. ' ' .. ~~set_error~~( '~~bad_hdl~~' );

text = text .. ' ' .. set_message( 'err_bad_hdl' );

end

return text;

Line 605:

Line 642:

]]

local function isbn( isbn_str )

local function isbn (isbn_str, ignore_invalid)

~~if nil ~~~= ~~isbn_str:match~~(~~"[^%s~~-0-~~9X]") then~~

local handler = cfg.id_handlers['ISBN'];

~~return false~~, ~~cfg~~.~~err_msg_supl~~.~~char~~; -- ~~fail~~ if ~~isbn_str contains anything but digits~~, ~~hyphens~~, ~~or the uppercase X~~

local function return_result (check, err_type) -- local function to handle the various returns

local ISBN = internal_link_id ({link = handler.link, label = handler.label, redirect = handler.redirect,

prefix = handler.prefix, id = isbn_str, separator = handler.separator});

if ignore_invalid then -- if ignoring ISBN errors

set_message ('maint_isbn_ignore'); -- add a maint category even when there is no error

else -- here when not ignoring

if not check then -- and there is an error

return ISBN .. set_message ('err_bad_isbn', {err_type}, false, ' '); -- display an error message

end

return ISBN;

end

~~isbn_str~~ = isbn_str:~~gsub~~( "-", "" ):gsub( ~~" "~~, "" ); -- remove hyphens and ~~spaces~~

local len = ~~isbn_str~~:len();

if nil ~= isbn_str:match ('[^%s-0-9X]') then

return return_result (false, cfg.err_msg_supl.char); -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X

end

local id = isbn_str:gsub ('[%s-]', ''); -- remove hyphens and whitespace

local len = id:len();

if len ~= 10 and len ~= 13 then

return false, cfg.err_msg_supl.length; -- fail if incorrect length

return return_result (false, cfg.err_msg_supl.length); -- fail if incorrect length

end

if len == 10 then

if ~~isbn_str~~:match( "^%d*X?$" ) == nil then -- fail if isbn_str has 'X' anywhere but last position

if id:match ('^%d*X?$') == nil then -- fail if isbn_str has 'X' anywhere but last position

return false, cfg.err_msg_supl.form;

return return_result (false, cfg.err_msg_supl.form);

end

return is_valid_isxn(~~isbn_str~~, 10), cfg.err_msg_supl.check;

return return_result (is_valid_isxn(id, 10), cfg.err_msg_supl.check);

else

if ~~isbn_str~~:match( "^%d+$" ) == nil then

if id:match ('^%d+$') == nil then

return false, cfg.err_msg_supl.char; -- fail if ~~isbn13~~ is not all digits

return return_result (false, cfg.err_msg_supl.char); -- fail if ISBN-13 is not all digits

end

if ~~isbn_str~~:match( "^97[89]%d*$" ) == nil then

if id:match ('^97[89]%d*$') == nil then

return false, cfg.err_msg_supl.prefix; -- fail when ~~isbn13~~ does not begin with 978 or 979

return return_result (false, cfg.err_msg_supl.prefix); -- fail when ISBN-13 does not begin with 978 or 979

end

if ~~isbn_str~~:match ('^9790') then

if id:match ('^9790') then

return false, cfg.err_msg_supl.group; -- group identifier '0' is reserved to ~~ismn~~

return return_result (false, cfg.err_msg_supl.group); -- group identifier '0' is reserved to ISMN

end

return is_valid_isxn_13 (~~isbn_str~~), cfg.err_msg_supl.check;

return return_result (is_valid_isxn_13 (id), cfg.err_msg_supl.check);

end

--[[--------------------------< A ~~M A Z O~~ N >------------------------------------------------------------------

--[[--------------------------< A S I N >----------------------------------------------------------------------

Formats a link to Amazon. Do simple error checking: ~~asin~~ must be mix of 10 numeric or uppercase alpha

Formats a link to Amazon. Do simple error checking: ASIN must be mix of 10 numeric or uppercase alpha

characters. If a mix, first character must be uppercase alpha; if all numeric, ~~asins~~ must be 10-digit

characters. If a mix, first character must be uppercase alpha; if all numeric, ASINs must be 10-digit

~~isbn~~. If 10-digit ~~isbn~~, add a maintenance category so a bot or ~~awb~~ script can replace |asin= with |isbn=.

ISBN. If 10-digit ISBN, add a maintenance category so a bot or AWB script can replace |asin= with |isbn=.

Error message if not 10 characters, if not ~~isbn10~~, if mixed and first character is a digit.

Error message if not 10 characters, if not ISBN-10, if mixed and first character is a digit.

|asin=630....... is (apparently) not a legitimate ISBN though it checksums as one; these do not cause this

function to emit the maint_asin message

This function is positioned here because it calls isbn()

Line 647:

Line 703:

]]

local function asin(id, domain)

local function asin (id, domain)

local err_cat = ""

if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then

err_cat = ' ' .. ~~set_error~~ ('~~bad_asin~~'); -- ~~asin~~ is not a mix of 10 uppercase alpha and numeric characters

err_cat = ' ' .. set_message ('err_bad_asin'); -- ASIN is not a mix of 10 uppercase alpha and numeric characters

else

if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X)

if isbn( id ) then -- see if ~~asin~~ value is ~~isbn10~~

if isbn (id) then -- see if ASIN value is or validates as ISBN-10

~~add_maint_cat~~ ('~~ASIN~~');

if not id:find ('^630') then -- 630xxxxxxx is (apparently) not a valid isbn prefix but is used by amazon as a numeric identifier

set_message ('maint_asin'); -- begins with something other than 630 so possibly an isbn

end

elseif not is_set (err_cat) then

err_cat = ' ' .. ~~set_error~~ ('~~bad_asin~~'); -- ~~asin~~ is not ~~isbn10~~

err_cat = ' ' .. set_message ('err_bad_asin'); -- ASIN is not ISBN-10

end

elseif not id:match("^%u[%d%u]+$") then

err_cat = ' ' .. ~~set_error~~ ('~~bad_asin~~'); -- asin doesn't begin with uppercase alpha

err_cat = ' ' .. set_message ('err_bad_asin'); -- asin doesn't begin with uppercase alpha

end

if not is_set(domain) then

domain = "com";

elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom

domain = "co." .. domain;

elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico

domain = "com." .. domain;

end

local handler = cfg.id_handlers['ASIN'];

return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,

return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,

prefix=handler.prefix .. domain .. "/dp/",

prefix = handler.prefix .. domain .. "/dp/",

id=id, encode=handler.encode, separator = handler.separator}) .. err_cat;

id = id, encode = handler.encode, separator = handler.separator}) .. err_cat;

end

Line 679:

Line 737:

--[[--------------------------< I S M N >----------------------------------------------------------------------

Determines whether an ISMN string is valid. Similar to ~~isbn~~-13, ~~ismn~~ is 13 digits ~~begining~~ 979-0-... and uses the

Determines whether an ISMN string is valid. Similar to ISBN-13, ISMN is 13 digits beginning 979-0-... and uses the

same check digit calculations. See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf

section 2, pages 9–12.

Line 692:

Line 750:

id_copy = id; -- save a copy because this testing is destructive

id=id:gsub( "[%s-–]", "" ); -- ~~strip spaces,~~ hyphens, and ~~endashes from the ismn~~

id = id:gsub ('[%s-]', ''); -- remove hyphens and white space

if 13 ~= id:len() or id:match( "^9790%d*$" ) == nil then -- ~~ismn~~ must be 13 digits and begin 9790

if 13 ~= id:len() or id:match( "^9790%d*$" ) == nil then -- ISMN must be 13 digits and begin with 9790

valid_ismn = false;

else

valid_ismn=is_valid_isxn_13 (id); -- validate ~~ismn~~

valid_ismn=is_valid_isxn_13 (id); -- validate ISMN

end

-- text = internal_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, -- use this (or external version) when there is some place to link to

-- text = internal_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, -- use this (or external version) when there is some place to link to

-- prefix=handler.prefix, id=id_copy, separator=handler.separator, encode=handler.encode})

-- prefix = handler.prefix, id = id_copy, separator = handler.separator, encode = handler.encode})

~~local label_link = (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or wd_article or handler.link; -- because no place to link to yet~~

text = table.concat ( -- because no place to link to yet

{

make_wikilink (~~label_link~~, handler.label),

make_wikilink (link_label_make (handler), handler.label),

handler.separator,

id_copy

Line 713:

Line 769:

if false == valid_ismn then

text = text .. ' ' .. ~~set_error~~( '~~bad_ismn~~' ) -- add an error message if the ~~ismn~~ is invalid

text = text .. ' ' .. set_message( 'err_bad_ismn' ) -- add an error message if the ISMN is invalid

end

Line 722:

Line 778:

--[[--------------------------< I S S N >----------------------------------------------------------------------

Validate and format an ~~issn~~. This code fixes the case where an editor has included an ISSN in the citation but

Validate and format an ISSN. This code fixes the case where an editor has included an ISSN in the citation but

has separated the two groups of four digits with a space. When that condition occurred, the resulting link looked

like this:

Line 728:

Line 784:

|issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link

This code now prevents that by inserting a hyphen at the ~~issn~~ midpoint. It also validates the ~~issn~~ for length

This code now prevents that by inserting a hyphen at the ISSN midpoint. It also validates the ISSN for length

and makes sure that the checkdigit agrees with the calculated value. Incorrect length (8 digits), characters

other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check ~~issn~~ error message. The

other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check ISSN error message. The

~~issn~~ is always displayed with a hyphen, even if the ~~issn~~ was given as a single group of 8 digits.

ISSN is always displayed with a hyphen, even if the ISSN was given as a single group of 8 digits.

]]

local function issn(id, e)

local function issn (id, e, ignore_invalid)

local issn_copy = id; -- save a copy of unadulterated ~~issn~~; use this version for display if ~~issn~~ does not validate

local issn_copy = id; -- save a copy of unadulterated ISSN; use this version for display if ISSN does not validate

local handler;

local text;

local valid_issn = true;

~~if e then~~

handler = cfg.id_handlers[e and 'EISSN' or 'ISSN'];

handler = cfg.id_handlers['EISSN'];

~~else~~

~~handler = cfg.id_handlers[~~'ISSN'];

~~end~~

id=id:gsub( "[%s-–]", "" ); -- ~~strip spaces,~~ hyphens, and ~~endashes from the issn~~

id = id:gsub ('[%s-]', ''); -- remove hyphens and whitespace

if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the ~~issn~~: 8 digits long, containing only 0-9 or X in the last position

if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the ISSN: 8 digits long, containing only 0-9 or X in the last position

valid_issn=false; -- wrong length or improper character

valid_issn = false; -- wrong length or improper character

else

valid_issn=is_valid_isxn(id, 8); -- validate ~~issn~~

valid_issn = is_valid_isxn(id, 8); -- validate ISSN

end

Line 758:

Line 810:

id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version

else

id = issn_copy; -- if not valid, ~~use the~~ show the invalid ~~issn~~ with error message

id = issn_copy; -- if not valid, show the invalid ISSN with error message

end

~~text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,~~

~~prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})~~

if false == valid_issn then

text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,

text = text .. ' ' .. ~~set_error~~( '~~bad_issn~~', e and 'e' or '' ) -- add an error message if the ~~issn~~ is invalid

prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode})

end

if ignore_invalid then

set_message ('maint_issn_ignore');

else

if false == valid_issn then

text = text .. ' ' .. set_message ('err_bad_issn', e and 'e' or ''); -- add an error message if the ISSN is invalid

end

return text

Line 786:

Line 842:

if is_set (id_num) then

~~add_maint_cat~~ ('~~jfm_format~~');

set_message ('maint_jfm_format');

else -- plain number without mr prefix

id_num = id; -- if here id does not have prefix

Line 794:

Line 850:

id = id_num; -- jfm matches pattern

else

err_cat = ' ' .. ~~set_error~~( '~~bad_jfm~~' ); -- set an error message

err_cat = ' ' .. set_message( 'err_bad_jfm' ); -- set an error message

end

return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,

return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,

prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;

prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_cat;

end

Line 809:

Line 865:

length = 8 then all digits

length = 9 then lccn[1] is ~~lower case~~ alpha

length = 9 then lccn[1] is lowercase alpha

length = 10 then lccn[1] and lccn[2] are both ~~lower case~~ alpha or both digits

length = 10 then lccn[1] and lccn[2] are both lowercase alpha or both digits

length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both ~~lower case~~ alpha or both digits

length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both lowercase alpha or both digits

length = 12 then lccn[1] and lccn[2] are both ~~lower case~~ alpha

length = 12 then lccn[1] and lccn[2] are both lowercase alpha

]]

Line 819:

Line 875:

local handler = cfg.id_handlers['LCCN'];

local err_cat = ''; -- presume that LCCN is valid

local id = lccn; -- local copy of the ~~lccn~~

local id = lccn; -- local copy of the LCCN

id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes)

local len = id:len(); -- get the length of the ~~lccn~~

local len = id:len(); -- get the length of the LCCN

if 8 == len then

if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits)

err_cat = ' ' .. ~~set_error~~( '~~bad_lccn~~' ); -- set an error message

err_cat = ' ' .. set_message ('err_bad_lccn'); -- set an error message

end

elseif 9 == len then -- LCCN should be adddddddd

if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern?

err_cat = ' ' .. ~~set_error~~( '~~bad_lccn~~' ); -- set an error message

err_cat = ' ' .. set_message ('err_bad_lccn'); -- set an error message

end

elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd

if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ...

if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern

err_cat = ' ' .. ~~set_error~~( '~~bad_lccn~~' ); -- no match, set an error message

err_cat = ' ' .. set_message ('err_bad_lccn'); -- no match, set an error message

end

elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd

if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns

err_cat = ' ' .. ~~set_error~~( '~~bad_lccn~~' ); -- no match, set an error message

err_cat = ' ' .. set_message ('err_bad_lccn'); -- no match, set an error message

end

elseif 12 == len then -- LCCN should be aadddddddddd

if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern

err_cat = ' ' .. ~~set_error~~( '~~bad_lccn~~' ); -- no match, set an error message

err_cat = ' ' .. set_message ('err_bad_lccn'); -- no match, set an error message

end

else

err_cat = ' ' .. ~~set_error~~( '~~bad_lccn~~' ); -- wrong length, set an error message

err_cat = ' ' .. set_message ('err_bad_lccn'); -- wrong length, set an error message

end

if not is_set (err_cat) and nil ~= lccn:find ('%s') then

err_cat = ' ' .. ~~set_error~~( '~~bad_lccn~~' ); -- lccn contains a space, set an error message

err_cat = ' ' .. set_message ('err_bad_lccn'); -- lccn contains a space, set an error message

end

return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,

return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,

prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat;

prefix = handler.prefix, id = lccn, separator = handler.separator, encode = handler.encode}) .. err_cat;

end

Line 874:

Line 930:

if is_set (id_num) then

~~add_maint_cat~~ ('~~mr_format~~');

set_message ('maint_mr_format'); -- add maint cat

else -- plain number without mr prefix

id_num = id:match ('^%d+$'); -- if here id is all digits

Line 883:

Line 939:

id = string.rep ('0', 7-id_len ) .. id_num; -- zero-fill leading digits

else

err_cat = ' ' .. ~~set_error~~( '~~bad_mr~~' ); -- set an error message

err_cat = ' ' .. set_message ('err_bad_mr'); -- set an error message

end

return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,

return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,

prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;

prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_cat;

end

Line 893:

Line 949:

--[[--------------------------< O C L C >----------------------------------------------------------------------

Validate and format an ~~oclc id~~. https://www.oclc.org/batchload/controlnumber.en.html {{dead link}}

Validate and format an OCLC ID. https://www.oclc.org/batchload/controlnumber.en.html {{dead link}}

archived at: https://web.archive.org/web/20161228233804/https://www.oclc.org/batchload/controlnumber.en.html

Line 912:

Line 968:

number = id:match('%(OCoLC%)([1-9]%d*)'); -- get the number

if 9 < number:len() then

number = nil; -- ~~contrain~~ to 1 to 9 digits; change this when ~~oclc~~ issues 10-digit numbers

number = nil; -- constrain to 1 to 9 digits; change this when OCLC issues 10-digit numbers

end

elseif id:match('^%d+$') then -- no prefix

number = id; -- get the number

if 10 < number:len() then

number = nil; -- ~~contrain~~ to 1 to 10 digits; change this when ~~oclc~~ issues 11-digit numbers

number = nil; -- constrain to 1 to 10 digits; change this when OCLC issues 11-digit numbers

end

Line 924:

Line 980:

id = number; -- exclude prefix, if any, from external link

else

err_msg = ' ' .. ~~set_error~~( '~~bad_oclc~~' ) -- add an error message if the id is malformed

err_msg = ' ' .. set_message ('err_bad_oclc') -- add an error message if the id is malformed

end

local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,

local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,

prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode}) .. err_msg;

prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_msg;

return text;

Line 941:

Line 997:

local function openlibrary(id, access)

~~local code;~~

local handler = cfg.id_handlers['OL'];

local ~~ident;~~

local ident, code = id:gsub('^OL', ''):match("^(%d+([AMW]))$"); -- strip optional OL prefix followed immediately by digits followed by 'A', 'M', or 'W';

local error_msg = '';

ident, code = id:gsub('^OL', ''):match("^(%d+([AMW]))$"); -- optional OL prefix followed immediately by digits followed by 'A', 'M', or 'W'; ~~remove~~ OL ~~prefix~~

local prefix = { -- these are appended to the handler.prefix according to code

['A']='authors/OL',

['M']='books/OL',

['W']='works/OL',

['X']='OL' -- not a code; spoof when 'code' in id is invalid

};

if not ~~is_set (~~ident) then ~~-- if malformed return an error~~

if not ident then

~~return external_link_id ({link=handler.link, label~~=~~handler.label, q=handler.q, redirect=handler.redirect,~~

code = 'X'; -- no code or id completely invalid

~~prefix=handler.prefix ..~~ 'OL',

ident = id; -- copy id to ident so that we display the flawed identifier

~~id=~~id~~, separator=handler.separator, encode = handler.encode,~~

error_msg = ' ' .. set_message ('err_bad_ol');

~~access~~ = ~~access}) .. ' ' .. set_error( 'bad_ol' );~~

~~end~~

id ~~= ident~~; -- ~~use~~ ident ~~without~~ the ~~optional OL prefix (it has been removed)~~

~~if ( code == "A" ) then~~

~~return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect~~=~~handler.redirect,~~

~~prefix=handler.prefix ..~~ '~~authors/OL~~',

~~id=id, separator=handler~~.~~separator, encode = handler~~.~~encode,~~

~~access = access})~~

~~end~~

~~if ( code == "M" ) then~~

~~return external_link_id~~ (~~{link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,~~

~~prefix=handler.prefix ..~~ '~~books/OL~~',

~~id=id, separator=handler.separator, encode = handler.encode,~~

~~access = access}~~)

end

~~if ( code == "W" ) then~~

return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,

return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,

prefix = handler.prefix .. prefix[code],

prefix=handler.prefix .. ~~'works/OL'~~,

id = ident, separator = handler.separator, encode = handler.encode,

id=id, separator=handler.separator, encode = handler.encode,

access = access}) .. error_msg;

access = access})

~~end~~

end

Line 996:

Line 1,037:

]]

local function pmc(id, embargo)

local function pmc (id, embargo)

local handler = cfg.id_handlers['PMC'];

local err_cat ~~= ''~~; ~~-- presume that PMC is valid~~

local err_cat;

local id_num;

local text;

id_num = id:match ('^[Pp][Mm][Cc](%d+)$'); -- identifier with ~~pmc~~ prefix

id_num = id:match ('^[Pp][Mm][Cc](%d+)$'); -- identifier with PMC prefix

if is_set (id_num) then

~~add_maint_cat~~ ('~~pmc_format~~');

set_message ('maint_pmc_format');

else -- plain number without ~~pmc~~ prefix

else -- plain number without PMC prefix

id_num = id:match ('^%d+$'); -- if here id is all digits

end

Line 1,013:

Line 1,054:

id_num = tonumber(id_num); -- convert id_num to a number for range testing

if 1 > id_num or handler.id_limit < id_num then -- if PMC is outside test limit boundaries

err_cat = ' ' .. ~~set_error~~( '~~bad_pmc~~' ); -- set an error message

err_cat = ' ' .. set_message ('err_bad_pmc'); -- set an error message

else

id = tostring (id_num); -- make sure id is a string

end

else -- when id format incorrect

err_cat = ' ' .. ~~set_error~~( '~~bad_pmc~~' ); -- set an error message

err_cat = ' ' .. set_message ('err_bad_pmc'); -- set an error message

end

if is_set (embargo) then -- is PMC is still embargoed?

if is_set (embargo) and is_set (is_embargoed (embargo)) then -- is PMC is still embargoed?

text = table.concat ( -- still embargoed so no external link

{

make_wikilink (handler~~.link~~, handler.label),

make_wikilink (link_label_make (handler), handler.label),

handler.separator,

id,

err_cat

(err_cat and err_cat or '') -- parens required

});

else

text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, -- no embargo date or embargo has expired, ok to link to article

text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, -- no embargo date or embargo has expired, ok to link to article

prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat;

prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access,

auto_link = not err_cat and 'pmc' or nil -- do not auto-link when PMC has error

}) .. (err_cat and err_cat or ''); -- parentheses required

end

return text;

Line 1,050:

Line 1,093:

if id:match("[^%d]") then -- if PMID has anything but digits

err_cat = ' ' .. ~~set_error~~( '~~bad_pmid~~' ); -- set an error message

err_cat = ' ' .. set_message ('err_bad_pmid'); -- set an error message

else -- PMID is only digits

local id_num = tonumber(id); -- convert id to a number for range testing

if 1 > id_num or handler.id_limit < id_num then -- if PMID is outside test limit boundaries

err_cat = ' ' .. ~~set_error~~( '~~bad_pmid~~' ); -- set an error message

err_cat = ' ' .. set_message ('err_bad_pmid'); -- set an error message

end

return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,

return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,

prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;

prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_cat;

end

Line 1,065:

Line 1,108:

--[[--------------------------< S 2 C I D >--------------------------------------------------------------------

Format an ~~s2cid~~, do simple error checking

Format an S2CID, do simple error checking

S2CIDs are sequential numbers beginning at 1 and counting up. This code checks the ~~s2cid~~ to see that it is only

S2CIDs are sequential numbers beginning at 1 and counting up. This code checks the S2CID to see that it is only

digits and is less than test_limit; the value in local variable test_limit will need to be updated periodically

as more S2CIDs are issued.

Line 1,084:

Line 1,127:

id_num = tonumber(id_num); -- convert id_num to a number for range testing

if handler.id_limit < id_num then -- if S2CID is outside test limit boundaries

err_cat = ' ' .. ~~set_error~~( '~~bad_s2cid~~' ); -- set an error message

err_cat = ' ' .. set_message ('bad_s2cid_err'); -- set an error message

end

else -- when id format incorrect

err_cat = ' ' .. ~~set_error~~( '~~bad_s2cid~~' ); -- set an error message

err_cat = ' ' .. set_message ('bad_s2cid_err'); -- set an error message

end

text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,

text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,

prefix=handler.prefix, id=id:gsub ('%.%a%a', ''), separator=handler.separator, encode=handler.encode, access=access}) .. err_cat;

prefix = handler.prefix, id = id:gsub ('%.%a%a', ''), separator = handler.separator, encode = handler.encode, access = access}) .. err_cat;

return text;

Line 1,100:

Line 1,143:

--[[--------------------------< S B N >------------------------------------------------------------------------

9-digit form of ~~isbn10~~; uses same check-digit validation when ~~sbn~~ is prefixed with an additional '0' to make 10 digits

9-digit form of ISBN-10; uses same check-digit validation when SBN is prefixed with an additional '0' to make 10 digits

]]

local function sbn (id)

local function sbn (id, ignore_invalid)

local ~~check~~;

local handler = cfg.id_handlers['SBN'];

local err_type = '';

local function return_result (check, err_type) -- local function to handle the various returns

local SBN = internal_link_id ({link = handler.link, label = handler.label, redirect = handler.redirect,

prefix = handler.prefix, id = id, separator = handler.separator});

if not ignore_invalid then -- if not ignoring SBN errors

if not check then

return SBN .. set_message ('err_bad_sbn', {err_type}, false, ' '); -- display an error message

end

else

set_message ('maint_isbn_ignore'); -- add a maint category even when there is no error (ToDo: Possibly switch to separate message for SBNs only)

end

return SBN;

end

if ~~nil ~=~~ id:match("[^%s-0-9X]") then

if id:match ('[^%s-0-9X]') then

return false, cfg.err_msg_supl.char; -- fail if ~~sbn~~ contains anything but digits, hyphens, or the uppercase X

return return_result (false, cfg.err_msg_supl.char); -- fail if SBN contains anything but digits, hyphens, or the uppercase X

end

id=id:gsub( "[%s-]", "" ); -- ~~strip spaces~~ and ~~hyphens from~~ the ~~sbn~~

local ident = id:gsub ('[%s-]', ''); -- remove hyphens and whitespace; they interfere with the rest of the tests

if 9 ~= id:len() then

if 9 ~= ident:len() then

return false, cfg.err_msg_supl.length; -- fail if incorrect length

return return_result (false, cfg.err_msg_supl.length); -- fail if incorrect length

end

if id:match( "^%d*X?$" ) == nil then -- fail if ~~sbn~~ has 'X' anywhere but last position

if ident:match ('^%d*X?$') == nil then

~~return false, cfg.err_msg_supl.form;~~

return return_result (false, cfg.err_msg_supl.form); -- fail if SBN has 'X' anywhere but last position

end

return is_valid_isxn('0' .. id, 10), cfg.err_msg_supl.check; ~~-- prefix sbn with '0' and validate as isbn10~~

return return_result (is_valid_isxn ('0' .. ident, 10), cfg.err_msg_supl.check);

end

Line 1,128:

Line 1,182:

--[[--------------------------< S S R N >----------------------------------------------------------------------

Format an ~~ssrn~~, do simple error checking

Format an SSRN, do simple error checking

SSRNs are sequential numbers beginning at 100? and counting up. This code checks the ~~ssrn~~ to see that it is

SSRNs are sequential numbers beginning at 100? and counting up. This code checks the SSRN to see that it is

only digits and is greater than 99 and less than test_limit; the value in local variable test_limit will need

to be updated periodically as more SSRNs are issued.

Line 1,147:

Line 1,201:

id_num = tonumber(id_num); -- convert id_num to a number for range testing

if 100 > id_num or handler.id_limit < id_num then -- if SSRN is outside test limit boundaries

err_cat = ' ' .. ~~set_error~~( '~~bad_ssrn~~' ); -- set an error message

err_cat = ' ' .. set_message ('err_bad_ssrn'); -- set an error message

end

else -- when id format incorrect

err_cat = ' ' .. ~~set_error~~( '~~bad_ssrn~~' ); -- set an error message

err_cat = ' ' .. set_message ('err_bad_ssrn'); -- set an error message

end

text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,

text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,

prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat;

prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access}) .. err_cat;

return text;

Line 1,170:

Line 1,224:

local handler = cfg.id_handlers['USENETID'];

local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,

local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,

prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})

prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode})

if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$')then -- doesn't have '@' or has one or first or last character is '< or '>'

if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$') then -- doesn't have '@' or has one or first or last character is '< or '>'

text = text .. ' ' .. ~~set_error~~( '~~bad_usenet_id~~' ) -- add an error message if the message id is invalid

text = text .. ' ' .. set_message ('err_bad_usenet_id') -- add an error message if the message id is invalid

end

Line 1,196:

Line 1,250:

if id:match('^%d%d%d%d%d%d%d%d$') then -- is this identifier using temporary format?

~~add_maint_cat~~ ('~~zbl~~'); -- yes, add maint cat

set_message ('maint_zbl'); -- yes, add maint cat

elseif not id:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then -- not temporary, is it normal format?

err_cat = ' ' .. ~~set_error~~( '~~bad_zbl~~' ); -- no, set an error message

err_cat = ' ' .. set_message ('err_bad_zbl'); -- no, set an error message

end

return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,

return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,

prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;

prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_cat;

end

Line 1,220:

Line 1,274:

local function build_id_list( id_list, options )

local new_list, handler = {};

local accept;

local function fallback(k) return { __index = function(t,i) return cfg.id_handlers[k][i] end } end;

for k, v in pairs( id_list ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table

for k, v in pairs( id_list ) do

v, accept = has_accept_as_written (v); -- remove and note accept-as-written markup if present

-- k is uppercase identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table

-- fallback to read-only cfg

handler = setmetatable( { ['id'] = v, ['access'] = options.IdAccessLevels[k] }, fallback(k) );

Line 1,244:

Line 1,302:

table.insert( new_list, {handler.label, citeseerx( v ) } );

elseif k == 'DOI' then

table.insert( new_list, {handler.label, doi( v, options.DoiBroken, handler.access ) } );

table.insert( new_list, {handler.label, doi( v, options.DoiBroken, handler.access, accept) } );

elseif k == 'EISSN' then

table.insert( new_list, {handler.label, issn( v, true ) } ); -- true distinguishes ~~eissn~~ from ~~issn~~

table.insert( new_list, {handler.label, issn( v, true, accept) } ); -- true distinguishes EISSN from ISSN

elseif k == 'HDL' then

table.insert( new_list, {handler.label, hdl( v, handler.access ) } );

elseif k == 'ISBN' then

~~local ISBN = internal_link_id~~( handler );

table.insert( new_list, {handler.label, isbn (v, (accept or options.IgnoreISBN)) } );

~~local check;~~

~~local err_type = '';~~

~~check~~, ~~err_type =~~ isbn( v );

~~if not check then~~

~~if is_set(options.IgnoreISBN) then -- ISBN is invalid; if |ignore-isbn-error= set~~

~~add_maint_cat ('ignore_isbn_err'); -- ad a maint category~~

~~else~~

~~ISBN = ISBN .. set_error( 'bad_isbn'~~, ~~{err_type}, false, " ", "" ); -- else display an error message~~

~~end~~

~~elseif is_set~~(options.IgnoreISBN) ~~then -- ISBN is OK; if |ignore-isbn-error= set~~

~~add_maint_cat ('ignore_isbn_err'~~)~~; -- because |ignore-isbn-error= unnecessary~~

~~end~~

~~table.insert( new_list, {handler.label, ISBN~~ } );

elseif k == 'ISMN' then

table.insert( new_list, {handler.label, ismn( v ) } );

elseif k == 'ISSN' then

table.insert( new_list, {handler.label, issn( v ) } );

table.insert( new_list, {handler.label, issn( v, false, accept) } );

elseif k == 'JFM' then

table.insert( new_list, {handler.label, jfm( v ) } );

Line 1,285:

Line 1,330:

table.insert( new_list, {handler.label, s2cid( v, handler.access ) } );

elseif k == 'SBN' then

~~local SBN = internal_link_id (handler);~~

table.insert( new_list, {handler.label, sbn (v, accept) } );

~~local check; -- boolean validation result~~

~~local err_type = '';~~

~~check, err_type = sbn (v);~~

~~if not check then~~

~~SBN = SBN .. set_error( 'bad_sbn', {err_type}, false, " ", "" ); -- display an error message~~

~~end~~

table.insert( new_list, {handler.label, ~~SBN~~ } );

elseif k == 'SSRN' then

table.insert( new_list, {handler.label, ssrn( v ) } );

Line 1,321:

Line 1,359:

Populates ID table from arguments using configuration settings. Loops through cfg.id_handlers and searches args for

any of the parameters listed in each cfg.id_handlers['...'].parameters. If found, adds the parameter and value to

the identifier list. Emits redundant error message is more than one alias exists in args

the identifier list. Emits redundant error message if more than one alias exists in args

]]

Line 1,327:

Line 1,365:

local function extract_ids( args )

local id_list = {}; -- list of identifiers found in args

for k, v in pairs( cfg.id_handlers ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table

for k, v in pairs( cfg.id_handlers ) do -- k is uppercase identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table

v = select_one( args, v.parameters, 'redundant_parameters' ); -- v.parameters is a table of aliases for k; here we pick one from args if present

if is_set(v) then id_list[k] = v; end -- if found in args, add identifier to our list

Line 1,353:

Line 1,391:

if is_set (access_level) then

if not in_array (access_level, cfg.keywords_lists['id-access']) then -- exact match required

table.insert( z.message_tail, { ~~set_error~~( 'invalid_param_val', {access_param, access_level}, true ) } );

table.insert( z.message_tail, { set_message( 'invalid_param_val', {access_param, access_level}, true ) } );

access_level = nil; -- invalid so unset

end

if not is_set(id_list[k]) then -- ~~identifer~~ access-level must have a matching identifier

if not is_set(id_list[k]) then -- identifier access-level must have a matching identifier

table.insert( z.message_tail, { ~~set_error~~( '~~param_access_requires_param~~', {k:lower()}, true ) } ); -- ~~param~~ name is uppercase in cfg.id_handlers (k); lowercase for error message

table.insert( z.message_tail, { set_message( 'err_param_access_requires_param', {k:lower()}, true ) } ); -- parameter name is uppercase in cfg.id_handlers (k); lowercase for error message

end

id_accesses_list[k] = cfg.keywords_xlate[access_level]; -- get translated keyword

Line 1,376:

Line 1,414:

cfg = cfg_table_ptr;

~~is_set~~ = utilities_page_ptr.~~is_set~~; -- import functions from select Module:Citation/CS1/Utilities module

has_accept_as_written = utilities_page_ptr.has_accept_as_written; -- import functions from select Module:Citation/CS1/Utilities module

is_set = utilities_page_ptr.is_set;

in_array = utilities_page_ptr.in_array;

~~set_error~~ = utilities_page_ptr.~~set_error~~;

set_message = utilities_page_ptr.set_message;

select_one = utilities_page_ptr.select_one;

~~add_maint_cat = utilities_page_ptr.add_maint_cat;~~

substitute = utilities_page_ptr.substitute;

make_wikilink = utilities_page_ptr.make_wikilink;

Line 1,392:

Line 1,430:

return {

auto_link_urls = auto_link_urls, -- table of identifier URLs to be used when auto-linking |title=

build_id_list = build_id_list,

extract_ids = extract_ids,