Module:Citation/CS1/Identifiers: Difference between revisions

    (sync from sandbox;)
    (sync from sandbox;)
    Line 3: Line 3:
    ]]
    ]]


    local is_set, in_array, set_error, select_one, add_maint_cat, substitute, make_wikilink; -- functions in Module:Citation/CS1/Utilities
    local has_accept_as_written, is_set, in_array, set_message, select_one, -- functions in Module:Citation/CS1/Utilities
    substitute, make_wikilink;


    local z; -- table of tables defined in Module:Citation/CS1/Utilities
    local z; -- table of tables defined in Module:Citation/CS1/Utilities


    local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
    local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
    --[[--------------------------< P A G E  S C O P E  V A R I A B L E S >--------------------------------------
    declare variables here that have page-wide scope that are not brought in from other modules; that are created here and used here
    ]]
    local auto_link_urls = {}; -- holds identifier URLs for those identifiers that can auto-link |title=




    Line 14: Line 24:
    --[[--------------------------< W I K I D A T A _ A R T I C L E _ N A M E _ G E T >----------------------------
    --[[--------------------------< W I K I D A T A _ A R T I C L E _ N A M E _ G E T >----------------------------


    as an aid to internationalizing identifier-label wikilinks, gets identifier article names from wikidata.
    as an aid to internationalizing identifier-label wikilinks, gets identifier article names from Wikidata.


    returns :<lang code>:<article title> when <q> has an <article title> for <lang code>; nil else
    returns :<lang code>:<article title> when <q> has an <article title> for <lang code>; nil else
    Line 30: Line 40:


    local wd_article;
    local wd_article;
    local this_wiki_code = cfg.this_wiki_code; -- wikipedia subdomain; 'en' for en.wikipedia.org
    local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org


    wd_article = mw.wikibase.getSitelink (q, this_wiki_code .. 'wiki'); -- fetch article title from wd; nil when no title available at this wiki
    wd_article = mw.wikibase.getSitelink (q, this_wiki_code .. 'wiki'); -- fetch article title from WD; nil when no title available at this wiki


    if wd_article then
    if wd_article then
    wd_article = table.concat ({':', this_wiki_code, ':', wd_article}); -- interwiki-style link without brackets if taken from wd; leading colon required
    wd_article = table.concat ({':', this_wiki_code, ':', wd_article}); -- interwiki-style link without brackets if taken from WD; leading colon required
    end
    end


    return wd_article; -- article title from wd; nil else
    return wd_article; -- article title from WD; nil else
    end
     
     
    --[[--------------------------< L I N K _ L A B E L _ M A K E >------------------------------------------------
     
    common function to create identifier link label from handler table or from Wikidata
     
    returns the first available of
    1. redirect from local wiki's handler table (if enabled)
    2. Wikidata (if there is a Wikidata entry for this identifier in the local wiki's language)
    3. label specified in the local wiki's handler table
    ]]
     
    local function link_label_make (handler)
    local wd_article;
    if not (cfg.use_identifier_redirects and is_set (handler.redirect)) then -- redirect has priority so if enabled and available don't fetch from Wikidata because expensive
    wd_article = wikidata_article_name_get (handler.q); -- if Wikidata has an article title for this wiki, get it;
    end
    return (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or wd_article or handler.link;
    end
    end


    Line 44: Line 76:
    --[[--------------------------< E X T E R N A L _ L I N K _ I D >----------------------------------------------
    --[[--------------------------< E X T E R N A L _ L I N K _ I D >----------------------------------------------


    Formats a wiki style external link
    Formats a wiki-style external link
     
    NB. Wikidata P1630 has a formatter URL with $1 placeholder for the ID which could be worked into our prefix/id/suffix
    string, either overriding local definitions (auto-update) or as fallback for identifiers without local definitions.
    But is expensive and could be risky if WD gets vandalized.
     
    See Template_talk:Authority_control/Archive_8#Use_Wikidata_as_the_source_for_the_external_link


    ]]
    ]]


    local function external_link_id(options)
    local function external_link_id (options)
    local url_string = options.id;
    local url_string = options.id;
    local ext_link;
    local ext_link;
    local this_wiki_code = cfg.this_wiki_code; -- wikipedia subdomain; 'en' for en.wikipedia.org
    local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org
    local wd_article; -- article title from wikidata
    local wd_article; -- article title from Wikidata
    if options.encode == true or options.encode == nil then
    if options.encode == true or options.encode == nil then
    url_string = mw.uri.encode( url_string );
    url_string = mw.uri.encode( url_string );
    end
    if options.auto_link and is_set (options.access) then
    auto_link_urls[options.auto_link] = table.concat ({options.prefix, url_string, options.suffix});
    end
    end


    Line 62: Line 104:
    ext_link = substitute (cfg.presentation['ext-link-access-signal'], {cfg.presentation[options.access].class, cfg.presentation[options.access].title, ext_link}); -- add the free-to-read / paywall lock
    ext_link = substitute (cfg.presentation['ext-link-access-signal'], {cfg.presentation[options.access].class, cfg.presentation[options.access].title, ext_link}); -- add the free-to-read / paywall lock
    end
    end
    if not (cfg.use_identifier_redirects and is_set (options.redirect)) then -- redirect has priority so if enabled and available don't fetch from wikidata because expensive
    wd_article = wikidata_article_name_get (options.q); -- if wikidata has an article title for this wiki, get it;
    end
    local label_link = (cfg.use_identifier_redirects and is_set (options.redirect) and options.redirect) or wd_article or options.link;


    return table.concat ({
    return table.concat ({
    make_wikilink (label_link, options.label), -- redirect, wikidata link, or locally specified link (in that order)
    make_wikilink (link_label_make (options), options.label), -- redirect, Wikidata link, or locally specified link (in that order)
    options.separator or '&nbsp;',
    options.separator or '&nbsp;',
    ext_link
    ext_link
    Line 79: Line 115:
    --[[--------------------------< I N T E R N A L _ L I N K _ I D >----------------------------------------------
    --[[--------------------------< I N T E R N A L _ L I N K _ I D >----------------------------------------------


    Formats a wiki style internal link
    Formats a wiki-style internal link


    ]]
    ]]
    Line 85: Line 121:
    local function internal_link_id(options)
    local function internal_link_id(options)
    local id = mw.ustring.gsub (options.id, '%d', cfg.date_names.local_digits); -- translate 'local' digits to Western 0-9
    local id = mw.ustring.gsub (options.id, '%d', cfg.date_names.local_digits); -- translate 'local' digits to Western 0-9
    if not (cfg.use_identifier_redirects and is_set (options.redirect)) then -- redirect has priority so if enabled and available don't fetch from wikidata because expensive
    wd_article = wikidata_article_name_get (options.q); -- if wikidata has an article title for this wiki, get it;
    end
    local label_link = (cfg.use_identifier_redirects and is_set (options.redirect) and options.redirect) or wd_article or options.link;


    return table.concat (
    return table.concat (
    {
    {
    make_wikilink (label_link, options.label), -- wiki link the identifier label
    make_wikilink (link_label_make (options), options.label), -- wiki-link the identifier label
    options.separator or '&nbsp;', -- add the separator
    options.separator or '&nbsp;', -- add the separator
    make_wikilink (
    make_wikilink (
    Line 100: Line 130:
    {
    {
    options.prefix,
    options.prefix,
    id, -- translated to western digits
    id, -- translated to Western digits
    options.suffix or ''
    options.suffix or ''
    }),
    }),
    substitute (cfg.presentation['bdi'], {'', mw.text.nowiki (options.id)}) -- bdi tags to prevent Latn script identifiers from being reversed at rtl language wikis
    substitute (cfg.presentation['bdi'], {'', mw.text.nowiki (options.id)}) -- bdi tags to prevent Latin script identifiers from being reversed at RTL language wikis
    ); -- nowiki because MediaWiki still has magic links for ISBN and the like; TODO: is it really required?
    ); -- nowiki because MediaWiki still has magic links for ISBN and the like; TODO: is it really required?
    });
    });
    Line 111: Line 141:
    --[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------
    --[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------


    Determines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date.  If embargo date is
    Determines if a PMC identifier's online version is embargoed. Compares the date in |pmc-embargo-date= against today's date.  If embargo date is
    in the future, returns the content of |embargo=; otherwise, returns and empty string because the embargo has expired or because
    in the future, returns the content of |pmc-embargo-date=; otherwise, returns an empty string because the embargo has expired or because
    |embargo= was not set in this cite.
    |pmc-embargo-date= was not set in this cite.


    ]]
    ]]
    Line 120: Line 150:
    if is_set (embargo) then
    if is_set (embargo) then
    local lang = mw.getContentLanguage();
    local lang = mw.getContentLanguage();
    local good1, embargo_date, good2, todays_date;
    local good1, embargo_date, todays_date;
    good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo );
    good1, embargo_date = pcall (lang.formatDate, lang, 'U', embargo);
    good2, todays_date = pcall( lang.formatDate, lang, 'U' );
    todays_date = lang:formatDate ('U');
    if good1 and good2 then -- if embargo date and today's date are good dates
    if good1 then -- if embargo date is a good date
    if tonumber( embargo_date ) >= tonumber( todays_date ) then -- is embargo date is in the future?
    if tonumber (embargo_date) >= tonumber (todays_date) then -- is embargo date is in the future?
    return embargo; -- still embargoed
    return embargo; -- still embargoed
    else
    else
    add_maint_cat ('embargo')
    set_message ('maint_pmc_embargo'); -- embargo has expired; add main cat
    return ''; -- unset because embargo has expired
    return ''; -- unset because embargo has expired
    end
    end
    end
    end
    end
    end
    return ''; -- |embargo= not set return empty string
    return ''; -- |pmc-embargo-date= not set return empty string
    end
    end


    Line 142: Line 172:
    2019-12-11T00:00Z <= biorxiv_date < today + 2 days
    2019-12-11T00:00Z <= biorxiv_date < today + 2 days
    The dated form of biorxiv identifier has a start date of 2019-12-11.  The unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400
    The dated form of biorxiv identifier has a start date of 2019-12-11.  The Unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400


    biorxiv_date is the date provided in those |biorxiv= parameter values that are dated at time 00:00:00 UTC
    biorxiv_date is the date provided in those |biorxiv= parameter values that are dated at time 00:00:00 UTC
    Line 151: Line 181:


    This function does not work if it is fed month names for languages other than English.  Wikimedia #time: parser
    This function does not work if it is fed month names for languages other than English.  Wikimedia #time: parser
    apparently doesn't understand non-Engish date month names. This function will always return false when the date
    apparently doesn't understand non-English date month names. This function will always return false when the date
    contains a non-English month name because good1 is false after the call to lang.formatDate().  To get around that
    contains a non-English month name because good1 is false after the call to lang.formatDate().  To get around that
    call this function with YYYY-MM-DD format dates.
    call this function with YYYY-MM-DD format dates.
    Line 159: Line 189:
    local function is_valid_biorxiv_date (biorxiv_date)
    local function is_valid_biorxiv_date (biorxiv_date)
    local good1, good2;
    local good1, good2;
    local biorxiv_ts, tomorrow_ts; -- to hold unix time stamps representing the dates
    local biorxiv_ts, tomorrow_ts; -- to hold Unix timestamps representing the dates
    local lang_object = mw.getContentLanguage();
    local lang_object = mw.getContentLanguage();


    good1, biorxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', biorxiv_date ); -- convert biorxiv_date value to unix timesatmp
    good1, biorxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', biorxiv_date ); -- convert biorxiv_date value to Unix timestamp
    good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow
    good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow
    if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which which tonumber() may not understand
    if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which tonumber() may not understand
    biorxiv_ts = tonumber (biorxiv_ts) or lang_object:parseFormattedNumber (biorxiv_ts); -- convert to numbers for the comparison;
    biorxiv_ts = tonumber (biorxiv_ts) or lang_object:parseFormattedNumber (biorxiv_ts); -- convert to numbers for the comparison;
    tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts);
    tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts);
    else
    else
    return false; -- one or both failed to convert to unix time stamp
    return false; -- one or both failed to convert to Unix timestamp
    end
    end


    Line 178: Line 208:
    --[[--------------------------< IS _ V A L I D _ I S X N >-----------------------------------------------------
    --[[--------------------------< IS _ V A L I D _ I S X N >-----------------------------------------------------


    ISBN-10 and ISSN validator code calculates checksum across all isbn/issn digits including the check digit.
    ISBN-10 and ISSN validator code calculates checksum across all ISBN/ISSN digits including the check digit.
    ISBN-13 is checked in isbn().
    ISBN-13 is checked in isbn().


    If the number is valid the result will be 0. Before calling this function, issbn/issn must be checked for length
    If the number is valid the result will be 0. Before calling this function, ISBN/ISSN must be checked for length
    and stripped of dashes, spaces and other non-isxn characters.
    and stripped of dashes, spaces and other non-ISxN characters.


    ]]
    ]]
    Line 189: Line 219:
    local temp = 0;
    local temp = 0;
    isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58
    isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58
    len = len+1; -- adjust to be a loop counter
    len = len + 1; -- adjust to be a loop counter
    for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum
    for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum
    if v == string.byte( "X" ) then -- if checkdigit is X (compares the byte value of 'X' which is 0x58)
    if v == string.byte( "X" ) then -- if checkdigit is X (compares the byte value of 'X' which is 0x58)
    Line 203: Line 233:
    --[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >-----------------------------------------------
    --[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >-----------------------------------------------


    ISBN-13 and ISMN validator code calculates checksum across all 13 isbn/ismn digits including the check digit.
    ISBN-13 and ISMN validator code calculates checksum across all 13 ISBN/ISMN digits including the check digit.
    If the number is valid, the result will be 0. Before calling this function, isbn-13/ismn must be checked for length
    If the number is valid, the result will be 0. Before calling this function, ISBN-13/ISMN must be checked for length
    and stripped of dashes, spaces and other non-isxn-13 characters.
    and stripped of dashes, spaces and other non-ISxN-13 characters.


    ]]
    ]]
    Line 216: Line 246:
    temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit
    temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit
    end
    end
    return temp % 10 == 0; -- sum modulo 10 is zero when isbn-13/ismn is correct
    return temp % 10 == 0; -- sum modulo 10 is zero when ISBN-13/ISMN is correct
    end
    end


    Line 222: Line 252:
    --[[--------------------------< N O R M A L I Z E _ L C C N >--------------------------------------------------
    --[[--------------------------< N O R M A L I Z E _ L C C N >--------------------------------------------------


    lccn normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization)
    LCCN normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization)
    1. Remove all blanks.
    1. Remove all blanks.
    2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash.
    2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash.
    Line 231: Line 261:
    2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six.
    2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six.


    Returns a normalized lccn for lccn() to validate.  There is no error checking (step 3.b.1) performed in this function.
    Returns a normalized LCCN for lccn() to validate.  There is no error checking (step 3.b.1) performed in this function.
    ]]
    ]]


    local function normalize_lccn (lccn)
    local function normalize_lccn (lccn)
    lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace
    lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace


    if nil ~= string.find (lccn,'/') then
    if nil ~= string.find (lccn, '/') then
    lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it
    lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it
    end
    end


    local prefix
    local prefix
    local suffix
    local suffix
    prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix
    prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix


    if nil ~= suffix then -- if there was a hyphen
    if nil ~= suffix then -- if there was a hyphen
    suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6
    suffix = string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6
    lccn=prefix..suffix; -- reassemble the lccn
    lccn = prefix..suffix; -- reassemble the LCCN
    end
    end
    return lccn;
    return lccn;
    end
    end


    --============================<< I D E N T I F I E R  F U N C T I O N S >>====================================
    --============================<< I D E N T I F I E R  F U N C T I O N S >>====================================
    Line 260: Line 291:


    format and error check arXiv identifier.  There are three valid forms of the identifier:
    format and error check arXiv identifier.  There are three valid forms of the identifier:
    the first form, valid only between date codes 9108 and 0703 is:
    the first form, valid only between date codes 9107 and 0703, is:
    arXiv:<archive>.<class>/<date code><number><version>
    arXiv:<archive>.<class>/<date code><number><version>
    where:
    where:
    Line 290: Line 321:
    local text; -- output text
    local text; -- output text
    if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9108-0703 format w/ & w/o version
    if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9107-0703 format with or without version
    year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$");
    year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$");
    year = tonumber(year);
    year = tonumber(year);
    Line 299: Line 330:
    end
    end


    elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 w/ & w/o version
    elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 with or without version
    year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$");
    year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$");
    year = tonumber(year);
    year = tonumber(year);
    month = tonumber(month);
    month = tonumber(month);
    if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years)
    if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years)
    ((7 == year) and (4 > month)) then --or -- when year is 07, is month invalid (before April)?
    ((7 == year) and (4 > month)) then -- when year is 07, is month invalid (before April)?
    err_cat = true; -- flag for error message
    err_cat = true; -- flag for error message
    end
    end


    elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format w/ & w/o version
    elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format with or without version
    year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$");
    year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$");
    year = tonumber(year);
    year = tonumber(year);
    Line 320: Line 351:
    end
    end


    err_cat = err_cat and table.concat ({' ', set_error ('bad_arxiv')}) or ''; -- set error message if flag is true
    err_cat = err_cat and table.concat ({' ', set_message ('err_bad_arxiv')}) or ''; -- set error message if flag is true
    text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat;
    prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access}) .. err_cat;


    if is_set (class) then
    if is_set (class) then
    Line 329: Line 360:
    text = table.concat ({text, ' [[//arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink
    text = table.concat ({text, ' [[//arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink
    else
    else
    text = table.concat ({text, ' ', set_error ('class_ignored')});
    text = table.concat ({text, ' ', set_message ('err_class_ignored')});
    end
    end
    end
    end
    Line 339: Line 370:
    --[[--------------------------< B I B C O D E >--------------------------------------------------------------------
    --[[--------------------------< B I B C O D E >--------------------------------------------------------------------


    Validates (sort of) and formats a bibcode id.
    Validates (sort of) and formats a bibcode ID.


    Format for bibcodes is specified here: http://adsabs.harvard.edu/abs_doc/help_pages/data.html#bibcodes
    Format for bibcodes is specified here: http://adsabs.harvard.edu/abs_doc/help_pages/data.html#bibcodes
    Line 360: Line 391:
    local year;
    local year;


    local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
    prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode,
    prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode,
    access=access});
    access = access});
    if 19 ~= id:len() then
    if 19 ~= id:len() then
    err_type = cfg.err_msg_supl.length;
    err_type = cfg.err_msg_supl.length;
    else
    else
    year = id:match ("^(%d%d%d%d)[%a][%w&%.][%w&%.][%w&%.][%w.]+[%a%.]$") --
    year = id:match ("^(%d%d%d%d)[%a][%w&%.][%w&%.][%w&%.][%w.]+[%a%.]$");
    if not year then -- if nil then no pattern match
    if not year then -- if nil then no pattern match
    err_type = cfg.err_msg_supl.value; -- so value error
    err_type = cfg.err_msg_supl.value; -- so value error
    else
    else
    local next_year = tonumber(os.date ('%Y'))+1; -- get the current year as a number and add one for next year
    local next_year = tonumber(os.date ('%Y')) + 1; -- get the current year as a number and add one for next year
    year = tonumber (year); -- convert year portion of bibcode to a number
    year = tonumber (year); -- convert year portion of bibcode to a number
    if (1000 > year) or (year > next_year) then
    if (1000 > year) or (year > next_year) then
    err_type = cfg.err_msg_supl.year; -- year out of bounds
    err_type = cfg.err_msg_supl.year; -- year out of bounds
    end
    end
    if id:find('&%.') then
    if id:find('&%.') then
    err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does its missing a letter)
    err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does it's missing a letter)
    end
    end
    end
    end
    Line 383: Line 414:


    if is_set (err_type) then -- if there was an error detected
    if is_set (err_type) then -- if there was an error detected
    text = text .. ' ' .. set_error( 'bad_bibcode', {err_type});
    text = text .. ' ' .. set_message( 'err_bad_bibcode', {err_type});
    end
    end
    return text;
    return text;
    Line 391: Line 422:
    --[[--------------------------< B I O R X I V >-----------------------------------------------------------------
    --[[--------------------------< B I O R X I V >-----------------------------------------------------------------


    Format bioRxiv id and do simple error checking.  Before 2019-12-11, biorXiv ids were 10.1101/ followed by exactly
    Format bioRxiv ID and do simple error checking.  Before 2019-12-11, biorXiv IDs were 10.1101/ followed by exactly
    6 digits.  After 2019-12-11, biorXiv ids retained the six-digit identifier but prefixed that with a yyyy.mm.dd.  
    6 digits.  After 2019-12-11, biorXiv IDs retained the six-digit identifier but prefixed that with a yyyy.mm.dd.  
    date and suffixed with an optional version identifier.
    date and suffixed with an optional version identifier.


    The bioRxiv id is the string of characters:
    The bioRxiv ID is the string of characters:
    https://doi.org/10.1101/078733 -> 10.1101/078733
    https://doi.org/10.1101/078733 -> 10.1101/078733
    or a date followed by a six-digit number followed by an optional version indicator 'v' and one or more digits:
    or a date followed by a six-digit number followed by an optional version indicator 'v' and one or more digits:
    Line 412: Line 443:
    '^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11)
    '^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11)
    '^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11)
    '^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11)
    }
    }
    for _, pattern in ipairs (patterns) do -- spin through the patterns looking for a match
    for _, pattern in ipairs (patterns) do -- spin through the patterns looking for a match
    Line 419: Line 450:


    if m then -- m is nil when id is the six-digit form
    if m then -- m is nil when id is the six-digit form
    if not is_valid_biorxiv_date (y .. '-' .. m .. '-' .. d) then -- validate the encoded date; TODO: don't ignore leapyear and actual month lengths ({{#time:}} is a poor date validator)
    if not is_valid_biorxiv_date (y .. '-' .. m .. '-' .. d) then -- validate the encoded date; TODO: don't ignore leap-year and actual month lengths ({{#time:}} is a poor date validator)
    break; -- date fail; break out early so we don't unset the error message
    break; -- date fail; break out early so we don't unset the error message
    end
    end
    Line 428: Line 459:
    end -- err_cat remains set here when no match
    end -- err_cat remains set here when no match


    return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
    prefix=handler.prefix,id=id,separator=handler.separator,
    prefix = handler.prefix, id = id, separator = handler.separator,
    encode=handler.encode, access=handler.access}) .. (err_cat and (' ' .. set_error( 'bad_biorxiv')) or '');
    encode = handler.encode, access = handler.access}) .. (err_cat and (' ' .. set_message( 'err_bad_biorxiv')) or '');
    end
    end


    Line 445: Line 476:
    local matched;
    local matched;
    local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
    prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode,
    prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode,
    access=handler.access});
    access = handler.access});
    matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$");
    matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$");
    if not matched then
    if not matched then
    text = text .. ' ' .. set_error( 'bad_citeseerx' );
    text = text .. ' ' .. set_message( 'err_bad_citeseerx' );
    end
    end
    return text;
    return text;
    Line 465: Line 496:
    Suffix: character string of any length chosen by the registrant
    Suffix: character string of any length chosen by the registrant


    This function checks a DOI name for: prefix/suffix.  If the doi name contains spaces or endashes, or, if it ends
    This function checks a DOI name for: prefix/suffix.  If the DOI name contains spaces or endashes, or, if it ends
    with a period or a comma, this function will emit a bad_doi error message.
    with a period or a comma, this function will emit a bad_doi error message.


    DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash,
    DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash,
    and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely
    and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely
    if ever used in doi names.
    if ever used in DOI names.


    ]]
    ]]


    local function doi(id, inactive, access)
    local function doi (id, inactive, access, ignore_invalid)
    local cat = ""
    local err_cat;
    local handler = cfg.id_handlers['DOI'];
    local handler = cfg.id_handlers['DOI'];
    local text;
    local text;
    if is_set(inactive) then
    if is_set (inactive) then
    local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
    local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
    local inactive_month, good;
    local inactive_month, good;
    Line 492: Line 523:
    end
    end
    else
    else
    inactive_year = nil; -- |doi-broken= has something but it isn't a date
    inactive_year = nil; -- |doi-broken-date= has something but it isn't a date
    end
    end
    if is_set(inactive_year) and is_set (inactive_month) then
    if is_set(inactive_year) and is_set (inactive_month) then
    table.insert( z.error_categories, 'Pages with DOIs inactive as of ' .. inactive_year .. ' ' .. inactive_month); -- use inactive month in category name
    set_message ('maint_doi_inactive_dated', {inactive_year, inactive_month, ' '});
    elseif is_set(inactive_year) then
    elseif is_set(inactive_year) then
    table.insert( z.error_categories, 'Pages with DOIs inactive as of ' .. inactive_year);
    set_message ('maint_doi_inactive_dated', {inactive_year, '', ''});
    else
    else
    table.insert( z.error_categories, 'Pages with inactive DOIs'); -- when inactive doesn't contain a recognizable date
    set_message ('maint_doi_inactive');
    end
    end
    inactive = " (" .. cfg.messages['inactive'] .. ' ' .. inactive .. ')';
    inactive = " (" .. cfg.messages['inactive'] .. ' ' .. inactive .. ')';
    end
    end


    text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    local registrant = id:match ('^10%.([^/]+)/[^%s–]-[^%.,]$'); -- registrant set when DOI has the proper basic form
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '')
     
    local registrant = id:match ('^10%.([^/]+)/[^%s–]-[^%.,]$'); -- registrant set when doi has the proper basic form
    registrant_err_patterns = { -- these patterns are for code ranges that are not supported  
    local registrant_err_patterns = { -- these patterns are for code ranges that are not supported  
    '^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); accecpts: 10000–39999
    '^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999
    '^[^1-4]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 40000+); accecpts: 10000–49999
    '^[^1-4]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 40000+); accepts: 10000–49999
    '^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); accecpts: 1000–9999
    '^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); accepts: 1000–9999
    '^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accecpts: 1000–9999
    '^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999
    '^%d%d%d%d%d%d+', -- 6 or more digits
    '^%d%d%d%d%d%d+', -- 6 or more digits
    '^%d%d?%d?$', -- less than 4 digits without subcode (with subcode is legitimate)
    '^%d%d?%d?$', -- less than 4 digits without subcode (with subcode is legitimate)
    Line 520: Line 548:
    '%s', -- any space character in registrant
    '%s', -- any space character in registrant
    }
    }
     
    if registrant then -- when doi has proper form
    if not ignore_invalid then
    for i, pattern in ipairs (registrant_err_patterns) do -- spin through error patterns
    if registrant then -- when DOI has proper form
    if registrant:match (pattern) then -- to validate registrant codes
    for i, pattern in ipairs (registrant_err_patterns) do -- spin through error patterns
    cat = ' ' .. set_error ('bad_doi'); -- when found, mark this doi as bad
    if registrant:match (pattern) then -- to validate registrant codes
    break; -- and done
    err_cat = ' ' .. set_message ('err_bad_doi'); -- when found, mark this DOI as bad
    break; -- and done
    end
    end
    end
    else
    err_cat = ' ' .. set_message ('err_bad_doi'); -- invalid directory or malformed
    end
    end
    else
    else
    cat = ' ' .. set_error ('bad_doi'); -- invalid directory or malformed
    set_message ('maint_doi_ignore');
    end
    end


    return text .. cat
    text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
    prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access,
    auto_link = not (err_cat or is_set (inactive) or ignore_invalid) and 'doi' or nil -- do not auto-link when |doi-broken-date= has a value or when there is a DOI error or (to play it safe, after all, auto-linking is not essential) when invalid DOIs are ignored
    }) .. (inactive or '');
     
    return text .. (err_cat and err_cat or ''); -- parentheses required
    end
    end


    Line 552: Line 589:


    Query string parameters are named here: http://www.handle.net/proxy_servlet.html.  query strings are not displayed
    Query string parameters are named here: http://www.handle.net/proxy_servlet.html.  query strings are not displayed
    but since '?' is anallowed character in an hdl, '?' followed by one of the query parameters is the only way we
    but since '?' is an allowed character in an HDL, '?' followed by one of the query parameters is the only way we
    have to detect the query string so that it isn't url encoded with the rest of the identifier.
    have to detect the query string so that it isn't URL-encoded with the rest of the identifier.


    ]]
    ]]
    Line 584: Line 621:


    if found then
    if found then
    id = hdl; -- found so replace id with the handle portion; this will be url encoded, suffix will not
    id = hdl; -- found so replace id with the handle portion; this will be URL-encoded, suffix will not
    else
    else
    suffix = ''; -- make sure suffix is empty string for concatenation else
    suffix = ''; -- make sure suffix is empty string for concatenation else
    end
    end


    local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
    prefix=handler.prefix, id=id, suffix=suffix, separator=handler.separator, encode=handler.encode, access=access})
    prefix = handler.prefix, id = id, suffix = suffix, separator = handler.separator, encode = handler.encode, access = access})


    if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- hdl must contain a fwd slash, must not contain spaces, endashes, and must not end with period or comma
    if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- HDL must contain a forward slash, must not contain spaces, endashes, and must not end with period or comma
    text = text .. ' ' .. set_error( 'bad_hdl' );
    text = text .. ' ' .. set_message( 'err_bad_hdl' );
    end
    end
    return text;
    return text;
    Line 605: Line 642:
    ]]
    ]]


    local function isbn( isbn_str )
    local function isbn (isbn_str, ignore_invalid)
    if nil ~= isbn_str:match("[^%s-0-9X]") then
    local handler = cfg.id_handlers['ISBN'];
    return false, cfg.err_msg_supl.char; -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X
    local function return_result (check, err_type) -- local function to handle the various returns
    local ISBN = internal_link_id ({link = handler.link, label = handler.label, redirect = handler.redirect,
    prefix = handler.prefix, id = isbn_str, separator = handler.separator});
    if ignore_invalid then -- if ignoring ISBN errors
    set_message ('maint_isbn_ignore'); -- add a maint category even when there is no error
    else -- here when not ignoring
    if not check then -- and there is an error
    return ISBN .. set_message ('err_bad_isbn', {err_type}, false, ' '); -- display an error message
    end
    end
    return ISBN;
    end
    end
    isbn_str = isbn_str:gsub( "-", "" ):gsub( " ", "" ); -- remove hyphens and spaces
     
    local len = isbn_str:len();
    if nil ~= isbn_str:match ('[^%s-0-9X]') then
    return return_result (false, cfg.err_msg_supl.char); -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X
    end
     
    local id = isbn_str:gsub ('[%s-]', ''); -- remove hyphens and whitespace
     
    local len = id:len();
       
       
    if len ~= 10 and len ~= 13 then
    if len ~= 10 and len ~= 13 then
    return false, cfg.err_msg_supl.length; -- fail if incorrect length
    return return_result (false, cfg.err_msg_supl.length); -- fail if incorrect length
    end
    end


    if len == 10 then
    if len == 10 then
    if isbn_str:match( "^%d*X?$" ) == nil then -- fail if isbn_str has 'X' anywhere but last position
    if id:match ('^%d*X?$') == nil then -- fail if isbn_str has 'X' anywhere but last position
    return false, cfg.err_msg_supl.form;
    return return_result (false, cfg.err_msg_supl.form);
    end
    end
    return is_valid_isxn(isbn_str, 10), cfg.err_msg_supl.check;
    return return_result (is_valid_isxn(id, 10), cfg.err_msg_supl.check);
    else
    else
    if isbn_str:match( "^%d+$" ) == nil then
    if id:match ('^%d+$') == nil then
    return false, cfg.err_msg_supl.char; -- fail if isbn13 is not all digits
    return return_result (false, cfg.err_msg_supl.char); -- fail if ISBN-13 is not all digits
    end
    end
    if isbn_str:match( "^97[89]%d*$" ) == nil then
    if id:match ('^97[89]%d*$') == nil then
    return false, cfg.err_msg_supl.prefix; -- fail when isbn13 does not begin with 978 or 979
    return return_result (false, cfg.err_msg_supl.prefix); -- fail when ISBN-13 does not begin with 978 or 979
    end
    end
    if isbn_str:match ('^9790') then
    if id:match ('^9790') then
    return false, cfg.err_msg_supl.group; -- group identifier '0' is reserved to ismn
    return return_result (false, cfg.err_msg_supl.group); -- group identifier '0' is reserved to ISMN
    end
    end
    return is_valid_isxn_13 (isbn_str), cfg.err_msg_supl.check;
    return return_result (is_valid_isxn_13 (id), cfg.err_msg_supl.check);
    end
    end
    end
    end




    --[[--------------------------< A M A Z O N >------------------------------------------------------------------
    --[[--------------------------< A S I N >----------------------------------------------------------------------


    Formats a link to Amazon.  Do simple error checking: asin must be mix of 10 numeric or uppercase alpha
    Formats a link to Amazon.  Do simple error checking: ASIN must be mix of 10 numeric or uppercase alpha
    characters.  If a mix, first character must be uppercase alpha; if all numeric, asins must be 10-digit
    characters.  If a mix, first character must be uppercase alpha; if all numeric, ASINs must be 10-digit
    isbn. If 10-digit isbn, add a maintenance category so a bot or awb script can replace |asin= with |isbn=.
    ISBN. If 10-digit ISBN, add a maintenance category so a bot or AWB script can replace |asin= with |isbn=.
    Error message if not 10 characters, if not isbn10, if mixed and first character is a digit.
    Error message if not 10 characters, if not ISBN-10, if mixed and first character is a digit.
     
    |asin=630....... is (apparently) not a legitimate ISBN though it checksums as one; these do not cause this
    function to emit the maint_asin message


    This function is positioned here because it calls isbn()
    This function is positioned here because it calls isbn()
    Line 647: Line 703:
    ]]
    ]]


    local function asin(id, domain)
    local function asin (id, domain)
    local err_cat = ""
    local err_cat = ""


    if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then
    if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then
    err_cat = ' ' .. set_error ('bad_asin'); -- asin is not a mix of 10 uppercase alpha and numeric characters
    err_cat = ' ' .. set_message ('err_bad_asin'); -- ASIN is not a mix of 10 uppercase alpha and numeric characters
    else
    else
    if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X)
    if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X)
    if isbn( id ) then -- see if asin value is isbn10
    if isbn (id) then -- see if ASIN value is or validates as ISBN-10
    add_maint_cat ('ASIN');
    if not id:find ('^630') then -- 630xxxxxxx is (apparently) not a valid isbn prefix but is used by amazon as a numeric identifier
    set_message ('maint_asin'); -- begins with something other than 630 so possibly an isbn
    end
    elseif not is_set (err_cat) then
    elseif not is_set (err_cat) then
    err_cat = ' ' .. set_error ('bad_asin'); -- asin is not isbn10
    err_cat = ' ' .. set_message ('err_bad_asin'); -- ASIN is not ISBN-10
    end
    end
    elseif not id:match("^%u[%d%u]+$") then
    elseif not id:match("^%u[%d%u]+$") then
    err_cat = ' ' .. set_error ('bad_asin'); -- asin doesn't begin with uppercase alpha
    err_cat = ' ' .. set_message ('err_bad_asin'); -- asin doesn't begin with uppercase alpha
    end
    end
    end
    end
    if not is_set(domain) then  
    if not is_set(domain) then  
    domain = "com";
    domain = "com";
    elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom
    elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom
    domain = "co." .. domain;
    domain = "co." .. domain;
    elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico
    elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico
    domain = "com." .. domain;
    domain = "com." .. domain;
    end
    end
    local handler = cfg.id_handlers['ASIN'];
    local handler = cfg.id_handlers['ASIN'];
    return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
    prefix=handler.prefix .. domain .. "/dp/",
    prefix = handler.prefix .. domain .. "/dp/",
    id=id, encode=handler.encode, separator = handler.separator}) .. err_cat;
    id = id, encode = handler.encode, separator = handler.separator}) .. err_cat;
    end
    end


    Line 679: Line 737:
    --[[--------------------------< I S M N >----------------------------------------------------------------------
    --[[--------------------------< I S M N >----------------------------------------------------------------------


    Determines whether an ISMN string is valid.  Similar to isbn-13, ismn is 13 digits begining 979-0-... and uses the
    Determines whether an ISMN string is valid.  Similar to ISBN-13, ISMN is 13 digits beginning 979-0-... and uses the
    same check digit calculations.  See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf
    same check digit calculations.  See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf
    section 2, pages 9–12.
    section 2, pages 9–12.
    Line 692: Line 750:


    id_copy = id; -- save a copy because this testing is destructive
    id_copy = id; -- save a copy because this testing is destructive
    id=id:gsub( "[%s-]", "" ); -- strip spaces, hyphens, and endashes from the ismn
    id = id:gsub ('[%s-]', ''); -- remove hyphens and white space


    if 13 ~= id:len() or id:match( "^9790%d*$" ) == nil then -- ismn must be 13 digits and begin 9790
    if 13 ~= id:len() or id:match( "^9790%d*$" ) == nil then -- ISMN must be 13 digits and begin with 9790
    valid_ismn = false;
    valid_ismn = false;
    else
    else
    valid_ismn=is_valid_isxn_13 (id); -- validate ismn
    valid_ismn=is_valid_isxn_13 (id); -- validate ISMN
    end
    end


    -- text = internal_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, -- use this (or external version) when there is some place to link to
    -- text = internal_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, -- use this (or external version) when there is some place to link to
    -- prefix=handler.prefix, id=id_copy, separator=handler.separator, encode=handler.encode})
    -- prefix = handler.prefix, id = id_copy, separator = handler.separator, encode = handler.encode})
    local label_link = (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or wd_article or handler.link; -- because no place to link to yet


    text = table.concat ( -- because no place to link to yet
    text = table.concat ( -- because no place to link to yet
    {
    {
    make_wikilink (label_link, handler.label),
    make_wikilink (link_label_make (handler), handler.label),
    handler.separator,
    handler.separator,
    id_copy
    id_copy
    Line 713: Line 769:


    if false == valid_ismn then
    if false == valid_ismn then
    text = text .. ' ' .. set_error( 'bad_ismn' ) -- add an error message if the ismn is invalid
    text = text .. ' ' .. set_message( 'err_bad_ismn' ) -- add an error message if the ISMN is invalid
    end  
    end  
    Line 722: Line 778:
    --[[--------------------------< I S S N >----------------------------------------------------------------------
    --[[--------------------------< I S S N >----------------------------------------------------------------------


    Validate and format an issn.  This code fixes the case where an editor has included an ISSN in the citation but
    Validate and format an ISSN.  This code fixes the case where an editor has included an ISSN in the citation but
    has separated the two groups of four digits with a space.  When that condition occurred, the resulting link looked
    has separated the two groups of four digits with a space.  When that condition occurred, the resulting link looked
    like this:
    like this:
    Line 728: Line 784:
    |issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link
    |issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link
    This code now prevents that by inserting a hyphen at the issn midpoint.  It also validates the issn for length
    This code now prevents that by inserting a hyphen at the ISSN midpoint.  It also validates the ISSN for length
    and makes sure that the checkdigit agrees with the calculated value.  Incorrect length (8 digits), characters
    and makes sure that the checkdigit agrees with the calculated value.  Incorrect length (8 digits), characters
    other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check issn error message.  The
    other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check ISSN error message.  The
    issn is always displayed with a hyphen, even if the issn was given as a single group of 8 digits.
    ISSN is always displayed with a hyphen, even if the ISSN was given as a single group of 8 digits.


    ]]
    ]]


    local function issn(id, e)
    local function issn (id, e, ignore_invalid)
    local issn_copy = id; -- save a copy of unadulterated issn; use this version for display if issn does not validate
    local issn_copy = id; -- save a copy of unadulterated ISSN; use this version for display if ISSN does not validate
    local handler;
    local handler;
    local text;
    local text;
    local valid_issn = true;
    local valid_issn = true;
    if e then
    handler = cfg.id_handlers[e and 'EISSN' or 'ISSN'];
    handler = cfg.id_handlers['EISSN'];
    else
    handler = cfg.id_handlers['ISSN'];
    end


    id=id:gsub( "[%s-]", "" ); -- strip spaces, hyphens, and endashes from the issn
    id = id:gsub ('[%s-]', ''); -- remove hyphens and whitespace


    if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the issn: 8 digits long, containing only 0-9 or X in the last position
    if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the ISSN: 8 digits long, containing only 0-9 or X in the last position
    valid_issn=false; -- wrong length or improper character
    valid_issn = false; -- wrong length or improper character
    else
    else
    valid_issn=is_valid_isxn(id, 8); -- validate issn
    valid_issn = is_valid_isxn(id, 8); -- validate ISSN
    end
    end


    Line 758: Line 810:
    id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version
    id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version
    else
    else
    id = issn_copy; -- if not valid, use the show the invalid issn with error message
    id = issn_copy; -- if not valid, show the invalid ISSN with error message
    end
    end
    text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})


    if false == valid_issn then
    text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
    text = text .. ' ' .. set_error( 'bad_issn', e and 'e' or '' ) -- add an error message if the issn is invalid
    prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode})
    end  
     
    if ignore_invalid then
    set_message ('maint_issn_ignore');
    else
    if false == valid_issn then
    text = text .. ' ' .. set_message ('err_bad_issn', e and 'e' or ''); -- add an error message if the ISSN is invalid
    end
    end
    return text
    return text
    Line 786: Line 842:


    if is_set (id_num) then
    if is_set (id_num) then
    add_maint_cat ('jfm_format');
    set_message ('maint_jfm_format');
    else -- plain number without mr prefix
    else -- plain number without mr prefix
    id_num = id; -- if here id does not have prefix
    id_num = id; -- if here id does not have prefix
    Line 794: Line 850:
    id = id_num; -- jfm matches pattern
    id = id_num; -- jfm matches pattern
    else
    else
    err_cat = ' ' .. set_error( 'bad_jfm' ); -- set an error message
    err_cat = ' ' .. set_message( 'err_bad_jfm' ); -- set an error message
    end
    end
    return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
    prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_cat;
    end
    end


    Line 809: Line 865:


    length = 8 then all digits
    length = 8 then all digits
    length = 9 then lccn[1] is lower case alpha
    length = 9 then lccn[1] is lowercase alpha
    length = 10 then lccn[1] and lccn[2] are both lower case alpha or both digits
    length = 10 then lccn[1] and lccn[2] are both lowercase alpha or both digits
    length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both lower case alpha or both digits
    length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both lowercase alpha or both digits
    length = 12 then lccn[1] and lccn[2] are both lower case alpha
    length = 12 then lccn[1] and lccn[2] are both lowercase alpha


    ]]
    ]]
    Line 819: Line 875:
    local handler = cfg.id_handlers['LCCN'];
    local handler = cfg.id_handlers['LCCN'];
    local err_cat = ''; -- presume that LCCN is valid
    local err_cat = ''; -- presume that LCCN is valid
    local id = lccn; -- local copy of the lccn
    local id = lccn; -- local copy of the LCCN


    id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes)
    id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes)
    local len = id:len(); -- get the length of the lccn
    local len = id:len(); -- get the length of the LCCN


    if 8 == len then
    if 8 == len then
    if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits)
    if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits)
    err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message
    err_cat = ' ' .. set_message ('err_bad_lccn'); -- set an error message
    end
    end
    elseif 9 == len then -- LCCN should be adddddddd
    elseif 9 == len then -- LCCN should be adddddddd
    if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern?
    if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern?
    err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message
    err_cat = ' ' .. set_message ('err_bad_lccn'); -- set an error message
    end
    end
    elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd
    elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd
    if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ...
    if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ...
    if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern
    if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern
    err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
    err_cat = ' ' .. set_message ('err_bad_lccn'); -- no match, set an error message
    end
    end
    end
    end
    elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd
    elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd
    if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns
    if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns
    err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
    err_cat = ' ' .. set_message ('err_bad_lccn'); -- no match, set an error message
    end
    end
    elseif 12 == len then -- LCCN should be aadddddddddd
    elseif 12 == len then -- LCCN should be aadddddddddd
    if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern
    if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern
    err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
    err_cat = ' ' .. set_message ('err_bad_lccn'); -- no match, set an error message
    end
    end
    else
    else
    err_cat = ' ' .. set_error( 'bad_lccn' ); -- wrong length, set an error message
    err_cat = ' ' .. set_message ('err_bad_lccn'); -- wrong length, set an error message
    end
    end


    if not is_set (err_cat) and nil ~= lccn:find ('%s') then
    if not is_set (err_cat) and nil ~= lccn:find ('%s') then
    err_cat = ' ' .. set_error( 'bad_lccn' ); -- lccn contains a space, set an error message
    err_cat = ' ' .. set_message ('err_bad_lccn'); -- lccn contains a space, set an error message
    end
    end


    return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
    prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat;
    prefix = handler.prefix, id = lccn, separator = handler.separator, encode = handler.encode}) .. err_cat;
    end
    end


    Line 874: Line 930:


    if is_set (id_num) then
    if is_set (id_num) then
    add_maint_cat ('mr_format');
    set_message ('maint_mr_format'); -- add maint cat
    else -- plain number without mr prefix
    else -- plain number without mr prefix
    id_num = id:match ('^%d+$'); -- if here id is all digits
    id_num = id:match ('^%d+$'); -- if here id is all digits
    Line 883: Line 939:
    id = string.rep ('0', 7-id_len ) .. id_num; -- zero-fill leading digits
    id = string.rep ('0', 7-id_len ) .. id_num; -- zero-fill leading digits
    else
    else
    err_cat = ' ' .. set_error( 'bad_mr' ); -- set an error message
    err_cat = ' ' .. set_message ('err_bad_mr'); -- set an error message
    end
    end
    return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
    prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_cat;
    end
    end


    Line 893: Line 949:
    --[[--------------------------< O C L C >----------------------------------------------------------------------
    --[[--------------------------< O C L C >----------------------------------------------------------------------


    Validate and format an oclc id.  https://www.oclc.org/batchload/controlnumber.en.html {{dead link}}
    Validate and format an OCLC ID.  https://www.oclc.org/batchload/controlnumber.en.html {{dead link}}
    archived at: https://web.archive.org/web/20161228233804/https://www.oclc.org/batchload/controlnumber.en.html
    archived at: https://web.archive.org/web/20161228233804/https://www.oclc.org/batchload/controlnumber.en.html


    Line 912: Line 968:
    number = id:match('%(OCoLC%)([1-9]%d*)'); -- get the number
    number = id:match('%(OCoLC%)([1-9]%d*)'); -- get the number
    if 9 < number:len() then
    if 9 < number:len() then
    number = nil; -- contrain to 1 to 9 digits; change this when oclc issues 10-digit numbers
    number = nil; -- constrain to 1 to 9 digits; change this when OCLC issues 10-digit numbers
    end
    end
    elseif id:match('^%d+$') then -- no prefix
    elseif id:match('^%d+$') then -- no prefix
    number = id; -- get the number
    number = id; -- get the number
    if 10 < number:len() then
    if 10 < number:len() then
    number = nil; -- contrain to 1 to 10 digits; change this when oclc issues 11-digit numbers
    number = nil; -- constrain to 1 to 10 digits; change this when OCLC issues 11-digit numbers
    end
    end
    end
    end
    Line 924: Line 980:
    id = number; -- exclude prefix, if any, from external link
    id = number; -- exclude prefix, if any, from external link
    else
    else
    err_msg = ' ' .. set_error( 'bad_oclc' ) -- add an error message if the id is malformed
    err_msg = ' ' .. set_message ('err_bad_oclc') -- add an error message if the id is malformed
    end
    end
    local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
    prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode}) .. err_msg;
    prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_msg;


    return text;
    return text;
    Line 941: Line 997:


    local function openlibrary(id, access)
    local function openlibrary(id, access)
    local code;
    local handler = cfg.id_handlers['OL'];
    local handler = cfg.id_handlers['OL'];
    local ident;
    local ident, code = id:gsub('^OL', ''):match("^(%d+([AMW]))$"); -- strip optional OL prefix followed immediately by digits followed by 'A', 'M', or 'W';
    local error_msg = '';
    ident, code = id:gsub('^OL', ''):match("^(%d+([AMW]))$"); -- optional OL prefix followed immediately by digits followed by 'A', 'M', or 'W'; remove OL prefix
    local prefix = { -- these are appended to the handler.prefix according to code
    ['A']='authors/OL',
    ['M']='books/OL',
    ['W']='works/OL',
    ['X']='OL' -- not a code; spoof when 'code' in id is invalid
    };


    if not is_set (ident) then -- if malformed return an error
    if not ident then
    return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    code = 'X'; -- no code or id completely invalid
    prefix=handler.prefix .. 'OL',
    ident = id; -- copy id to ident so that we display the flawed identifier
    id=id, separator=handler.separator, encode = handler.encode,
    error_msg = ' ' .. set_message ('err_bad_ol');
    access = access}) .. ' ' .. set_error( 'bad_ol' );
    end
    id = ident; -- use ident without the optional OL prefix (it has been removed)
    if ( code == "A" ) then
    return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    prefix=handler.prefix .. 'authors/OL',
    id=id, separator=handler.separator, encode = handler.encode,
    access = access})
    end
    if ( code == "M" ) then
    return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    prefix=handler.prefix .. 'books/OL',
    id=id, separator=handler.separator, encode = handler.encode,
    access = access})
    end
    end


    if ( code == "W" ) then
    return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
    return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    prefix = handler.prefix .. prefix[code],
    prefix=handler.prefix .. 'works/OL',
    id = ident, separator = handler.separator, encode = handler.encode,
    id=id, separator=handler.separator, encode = handler.encode,
    access = access}) .. error_msg;
    access = access})
    end
    end
    end


    Line 996: Line 1,037:
    ]]
    ]]


    local function pmc(id, embargo)
    local function pmc (id, embargo)
    local handler = cfg.id_handlers['PMC'];
    local handler = cfg.id_handlers['PMC'];
    local err_cat = ''; -- presume that PMC is valid
    local err_cat;
    local id_num;
    local id_num;
    local text;
    local text;
     
    id_num = id:match ('^[Pp][Mm][Cc](%d+)$'); -- identifier with pmc prefix
    id_num = id:match ('^[Pp][Mm][Cc](%d+)$'); -- identifier with PMC prefix


    if is_set (id_num) then
    if is_set (id_num) then
    add_maint_cat ('pmc_format');
    set_message ('maint_pmc_format');
    else -- plain number without pmc prefix
    else -- plain number without PMC prefix
    id_num = id:match ('^%d+$'); -- if here id is all digits
    id_num = id:match ('^%d+$'); -- if here id is all digits
    end
    end
    Line 1,013: Line 1,054:
    id_num = tonumber(id_num); -- convert id_num to a number for range testing
    id_num = tonumber(id_num); -- convert id_num to a number for range testing
    if 1 > id_num or handler.id_limit < id_num then -- if PMC is outside test limit boundaries
    if 1 > id_num or handler.id_limit < id_num then -- if PMC is outside test limit boundaries
    err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message
    err_cat = ' ' .. set_message ('err_bad_pmc'); -- set an error message
    else
    else
    id = tostring (id_num); -- make sure id is a string
    id = tostring (id_num); -- make sure id is a string
    end
    end
    else -- when id format incorrect
    else -- when id format incorrect
    err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message
    err_cat = ' ' .. set_message ('err_bad_pmc'); -- set an error message
    end
    end
    if is_set (embargo) then -- is PMC is still embargoed?
    if is_set (embargo) and is_set (is_embargoed (embargo)) then -- is PMC is still embargoed?
    text = table.concat ( -- still embargoed so no external link
    text = table.concat ( -- still embargoed so no external link
    {
    {
    make_wikilink (handler.link, handler.label),
    make_wikilink (link_label_make (handler), handler.label),
    handler.separator,
    handler.separator,
    id,
    id,
    err_cat
    (err_cat and err_cat or '') -- parens required
    });
    });
    else
    else
    text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, -- no embargo date or embargo has expired, ok to link to article
    text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, -- no embargo date or embargo has expired, ok to link to article
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat;
    prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access,
    auto_link = not err_cat and 'pmc' or nil -- do not auto-link when PMC has error
    }) .. (err_cat and err_cat or ''); -- parentheses required
    end
    end
    return text;
    return text;
    Line 1,050: Line 1,093:
    if id:match("[^%d]") then -- if PMID has anything but digits
    if id:match("[^%d]") then -- if PMID has anything but digits
    err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message
    err_cat = ' ' .. set_message ('err_bad_pmid'); -- set an error message
    else -- PMID is only digits
    else -- PMID is only digits
    local id_num = tonumber(id); -- convert id to a number for range testing
    local id_num = tonumber(id); -- convert id to a number for range testing
    if 1 > id_num or handler.id_limit < id_num then -- if PMID is outside test limit boundaries
    if 1 > id_num or handler.id_limit < id_num then -- if PMID is outside test limit boundaries
    err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message
    err_cat = ' ' .. set_message ('err_bad_pmid'); -- set an error message
    end
    end
    end
    end
    return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
    prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_cat;
    end
    end


    Line 1,065: Line 1,108:
    --[[--------------------------< S 2 C I D >--------------------------------------------------------------------
    --[[--------------------------< S 2 C I D >--------------------------------------------------------------------


    Format an s2cid, do simple error checking
    Format an S2CID, do simple error checking


    S2CIDs are sequential numbers beginning at 1 and counting up.  This code checks the s2cid to see that it is only
    S2CIDs are sequential numbers beginning at 1 and counting up.  This code checks the S2CID to see that it is only
    digits and is less than test_limit; the value in local variable test_limit will need to be updated periodically
    digits and is less than test_limit; the value in local variable test_limit will need to be updated periodically
    as more S2CIDs are issued.
    as more S2CIDs are issued.
    Line 1,084: Line 1,127:
    id_num = tonumber(id_num); -- convert id_num to a number for range testing
    id_num = tonumber(id_num); -- convert id_num to a number for range testing
    if handler.id_limit < id_num then -- if S2CID is outside test limit boundaries
    if handler.id_limit < id_num then -- if S2CID is outside test limit boundaries
    err_cat = ' ' .. set_error( 'bad_s2cid' ); -- set an error message
    err_cat = ' ' .. set_message ('bad_s2cid_err'); -- set an error message
    end
    end


    else -- when id format incorrect
    else -- when id format incorrect
    err_cat = ' ' .. set_error( 'bad_s2cid' ); -- set an error message
    err_cat = ' ' .. set_message ('bad_s2cid_err'); -- set an error message
    end
    end


    text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
    prefix=handler.prefix, id=id:gsub ('%.%a%a', ''), separator=handler.separator, encode=handler.encode, access=access}) .. err_cat;
    prefix = handler.prefix, id = id:gsub ('%.%a%a', ''), separator = handler.separator, encode = handler.encode, access = access}) .. err_cat;


    return text;
    return text;
    Line 1,100: Line 1,143:
    --[[--------------------------< S B N >------------------------------------------------------------------------
    --[[--------------------------< S B N >------------------------------------------------------------------------


    9-digit form of isbn10; uses same check-digit validation when sbn is prefixed with an additional '0' to make 10 digits
    9-digit form of ISBN-10; uses same check-digit validation when SBN is prefixed with an additional '0' to make 10 digits


    ]]
    ]]


    local function sbn (id)
    local function sbn (id, ignore_invalid)
    local check;
    local handler = cfg.id_handlers['SBN'];
    local err_type = '';
    local function return_result (check, err_type) -- local function to handle the various returns
    local SBN = internal_link_id ({link = handler.link, label = handler.label, redirect = handler.redirect,
    prefix = handler.prefix, id = id, separator = handler.separator});
    if not ignore_invalid then -- if not ignoring SBN errors
    if not check then
    return SBN .. set_message ('err_bad_sbn', {err_type}, false, ' '); -- display an error message
    end
    else
    set_message ('maint_isbn_ignore'); -- add a maint category even when there is no error (ToDo: Possibly switch to separate message for SBNs only)
    end
    return SBN;
    end


    if nil ~= id:match("[^%s-0-9X]") then
    if id:match ('[^%s-0-9X]') then
    return false, cfg.err_msg_supl.char; -- fail if sbn contains anything but digits, hyphens, or the uppercase X
    return return_result (false, cfg.err_msg_supl.char); -- fail if SBN contains anything but digits, hyphens, or the uppercase X
    end
    end


    id=id:gsub( "[%s-]", "" ); -- strip spaces and hyphens from the sbn
    local ident = id:gsub ('[%s-]', ''); -- remove hyphens and whitespace; they interfere with the rest of the tests


    if  9 ~= id:len() then
    if  9 ~= ident:len() then
    return false, cfg.err_msg_supl.length; -- fail if incorrect length
    return return_result (false, cfg.err_msg_supl.length); -- fail if incorrect length
    end
    end


    if id:match( "^%d*X?$" ) == nil then -- fail if sbn has 'X' anywhere but last position
    if ident:match ('^%d*X?$') == nil then
    return false, cfg.err_msg_supl.form;
    return return_result (false, cfg.err_msg_supl.form); -- fail if SBN has 'X' anywhere but last position
    end
    end


    return is_valid_isxn('0' .. id, 10), cfg.err_msg_supl.check; -- prefix sbn with '0' and validate as isbn10
    return return_result (is_valid_isxn ('0' .. ident, 10), cfg.err_msg_supl.check);
    end
    end


    Line 1,128: Line 1,182:
    --[[--------------------------< S S R N >----------------------------------------------------------------------
    --[[--------------------------< S S R N >----------------------------------------------------------------------


    Format an ssrn, do simple error checking
    Format an SSRN, do simple error checking


    SSRNs are sequential numbers beginning at 100? and counting up.  This code checks the ssrn to see that it is
    SSRNs are sequential numbers beginning at 100? and counting up.  This code checks the SSRN to see that it is
    only digits and is greater than 99 and less than test_limit; the value in local variable test_limit will need
    only digits and is greater than 99 and less than test_limit; the value in local variable test_limit will need
    to be updated periodically as more SSRNs are issued.
    to be updated periodically as more SSRNs are issued.
    Line 1,147: Line 1,201:
    id_num = tonumber(id_num); -- convert id_num to a number for range testing
    id_num = tonumber(id_num); -- convert id_num to a number for range testing
    if 100 > id_num or handler.id_limit < id_num then -- if SSRN is outside test limit boundaries
    if 100 > id_num or handler.id_limit < id_num then -- if SSRN is outside test limit boundaries
    err_cat = ' ' .. set_error( 'bad_ssrn' ); -- set an error message
    err_cat = ' ' .. set_message ('err_bad_ssrn'); -- set an error message
    end
    end
    else -- when id format incorrect
    else -- when id format incorrect
    err_cat = ' ' .. set_error( 'bad_ssrn' ); -- set an error message
    err_cat = ' ' .. set_message ('err_bad_ssrn'); -- set an error message
    end
    end
    text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat;
    prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access}) .. err_cat;


    return text;
    return text;
    Line 1,170: Line 1,224:
    local handler = cfg.id_handlers['USENETID'];
    local handler = cfg.id_handlers['USENETID'];


    local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
    prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode})
       
       
    if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$')then -- doesn't have '@' or has one or first or last character is '< or '>'
    if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$') then -- doesn't have '@' or has one or first or last character is '< or '>'
    text = text .. ' ' .. set_error( 'bad_usenet_id' ) -- add an error message if the message id is invalid
    text = text .. ' ' .. set_message ('err_bad_usenet_id') -- add an error message if the message id is invalid
    end  
    end  
    Line 1,196: Line 1,250:
    if id:match('^%d%d%d%d%d%d%d%d$') then -- is this identifier using temporary format?
    if id:match('^%d%d%d%d%d%d%d%d$') then -- is this identifier using temporary format?
    add_maint_cat ('zbl'); -- yes, add maint cat
    set_message ('maint_zbl'); -- yes, add maint cat
    elseif not id:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then -- not temporary, is it normal format?
    elseif not id:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then -- not temporary, is it normal format?
    err_cat = ' ' .. set_error( 'bad_zbl' ); -- no, set an error message
    err_cat = ' ' .. set_message ('err_bad_zbl'); -- no, set an error message
    end
    end
    return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect,
    return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
    prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_cat;
    end
    end


    Line 1,220: Line 1,274:
    local function build_id_list( id_list, options )
    local function build_id_list( id_list, options )
    local new_list, handler = {};
    local new_list, handler = {};
    local accept;


    local function fallback(k) return { __index = function(t,i) return cfg.id_handlers[k][i] end } end;
    local function fallback(k) return { __index = function(t,i) return cfg.id_handlers[k][i] end } end;
    for k, v in pairs( id_list ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table
    for k, v in pairs( id_list ) do
    v, accept = has_accept_as_written (v); -- remove and note accept-as-written markup if present
     
    -- k is uppercase identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table
    -- fallback to read-only cfg
    -- fallback to read-only cfg
    handler = setmetatable( { ['id'] = v, ['access'] = options.IdAccessLevels[k] }, fallback(k) );
    handler = setmetatable( { ['id'] = v, ['access'] = options.IdAccessLevels[k] }, fallback(k) );
    Line 1,244: Line 1,302:
    table.insert( new_list, {handler.label, citeseerx( v ) } );
    table.insert( new_list, {handler.label, citeseerx( v ) } );
    elseif k == 'DOI' then
    elseif k == 'DOI' then
    table.insert( new_list, {handler.label, doi( v, options.DoiBroken, handler.access ) } );
    table.insert( new_list, {handler.label, doi( v, options.DoiBroken, handler.access, accept) } );
    elseif k == 'EISSN' then
    elseif k == 'EISSN' then
    table.insert( new_list, {handler.label, issn( v, true ) } ); -- true distinguishes eissn from issn
    table.insert( new_list, {handler.label, issn( v, true, accept) } ); -- true distinguishes EISSN from ISSN
    elseif k == 'HDL' then
    elseif k == 'HDL' then
    table.insert( new_list, {handler.label, hdl( v, handler.access ) } );
    table.insert( new_list, {handler.label, hdl( v, handler.access ) } );
    elseif k == 'ISBN' then
    elseif k == 'ISBN' then
    local ISBN = internal_link_id( handler );
    table.insert( new_list, {handler.label, isbn (v, (accept or options.IgnoreISBN)) } );
    local check;
    local err_type = '';
    check, err_type = isbn( v );
    if not check then
    if is_set(options.IgnoreISBN) then -- ISBN is invalid; if |ignore-isbn-error= set
    add_maint_cat ('ignore_isbn_err'); -- ad a maint category
    else
    ISBN = ISBN .. set_error( 'bad_isbn', {err_type}, false, " ", "" ); -- else display an error message
    end
    elseif is_set(options.IgnoreISBN) then -- ISBN is OK; if |ignore-isbn-error= set
    add_maint_cat ('ignore_isbn_err'); -- because |ignore-isbn-error= unnecessary
    end
    table.insert( new_list, {handler.label, ISBN } );
    elseif k == 'ISMN' then
    elseif k == 'ISMN' then
    table.insert( new_list, {handler.label, ismn( v ) } );
    table.insert( new_list, {handler.label, ismn( v ) } );
    elseif k == 'ISSN' then
    elseif k == 'ISSN' then
    table.insert( new_list, {handler.label, issn( v ) } );
    table.insert( new_list, {handler.label, issn( v, false, accept) } );
    elseif k == 'JFM' then
    elseif k == 'JFM' then
    table.insert( new_list, {handler.label, jfm( v ) } );
    table.insert( new_list, {handler.label, jfm( v ) } );
    Line 1,285: Line 1,330:
    table.insert( new_list, {handler.label, s2cid( v, handler.access ) } );
    table.insert( new_list, {handler.label, s2cid( v, handler.access ) } );
    elseif k == 'SBN' then
    elseif k == 'SBN' then
    local SBN = internal_link_id (handler);
    table.insert( new_list, {handler.label, sbn (v, accept) } );
    local check; -- boolean validation result
    local err_type = '';
    check, err_type = sbn (v);
    if not check then
    SBN = SBN .. set_error( 'bad_sbn', {err_type}, false, " ", "" ); -- display an error message
    end
    table.insert( new_list, {handler.label, SBN } );
    elseif k == 'SSRN' then
    elseif k == 'SSRN' then
    table.insert( new_list, {handler.label, ssrn( v ) } );
    table.insert( new_list, {handler.label, ssrn( v ) } );
    Line 1,321: Line 1,359:
    Populates ID table from arguments using configuration settings. Loops through cfg.id_handlers and searches args for
    Populates ID table from arguments using configuration settings. Loops through cfg.id_handlers and searches args for
    any of the parameters listed in each cfg.id_handlers['...'].parameters.  If found, adds the parameter and value to
    any of the parameters listed in each cfg.id_handlers['...'].parameters.  If found, adds the parameter and value to
    the identifier list.  Emits redundant error message is more than one alias exists in args
    the identifier list.  Emits redundant error message if more than one alias exists in args


    ]]
    ]]
    Line 1,327: Line 1,365:
    local function extract_ids( args )
    local function extract_ids( args )
    local id_list = {}; -- list of identifiers found in args
    local id_list = {}; -- list of identifiers found in args
    for k, v in pairs( cfg.id_handlers ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table
    for k, v in pairs( cfg.id_handlers ) do -- k is uppercase identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table
    v = select_one( args, v.parameters, 'redundant_parameters' ); -- v.parameters is a table of aliases for k; here we pick one from args if present
    v = select_one( args, v.parameters, 'redundant_parameters' ); -- v.parameters is a table of aliases for k; here we pick one from args if present
    if is_set(v) then id_list[k] = v; end -- if found in args, add identifier to our list
    if is_set(v) then id_list[k] = v; end -- if found in args, add identifier to our list
    Line 1,353: Line 1,391:
    if is_set (access_level) then
    if is_set (access_level) then
    if not in_array (access_level, cfg.keywords_lists['id-access']) then -- exact match required
    if not in_array (access_level, cfg.keywords_lists['id-access']) then -- exact match required
    table.insert( z.message_tail, { set_error( 'invalid_param_val', {access_param, access_level}, true ) } );
    table.insert( z.message_tail, { set_message( 'invalid_param_val', {access_param, access_level}, true ) } );
    access_level = nil; -- invalid so unset
    access_level = nil; -- invalid so unset
    end
    end
    if not is_set(id_list[k]) then -- identifer access-level must have a matching identifier
    if not is_set(id_list[k]) then -- identifier access-level must have a matching identifier
    table.insert( z.message_tail, { set_error( 'param_access_requires_param', {k:lower()}, true ) } ); -- param name is uppercase in cfg.id_handlers (k); lowercase for error message
    table.insert( z.message_tail, { set_message( 'err_param_access_requires_param', {k:lower()}, true ) } ); -- parameter name is uppercase in cfg.id_handlers (k); lowercase for error message
    end
    end
    id_accesses_list[k] = cfg.keywords_xlate[access_level]; -- get translated keyword
    id_accesses_list[k] = cfg.keywords_xlate[access_level]; -- get translated keyword
    Line 1,376: Line 1,414:
    cfg = cfg_table_ptr;
    cfg = cfg_table_ptr;


    is_set = utilities_page_ptr.is_set; -- import functions from select Module:Citation/CS1/Utilities module
    has_accept_as_written = utilities_page_ptr.has_accept_as_written; -- import functions from select Module:Citation/CS1/Utilities module
    is_set = utilities_page_ptr.is_set;
    in_array = utilities_page_ptr.in_array;
    in_array = utilities_page_ptr.in_array;
    set_error = utilities_page_ptr.set_error;
    set_message = utilities_page_ptr.set_message;
    select_one = utilities_page_ptr.select_one;
    select_one = utilities_page_ptr.select_one;
    add_maint_cat = utilities_page_ptr.add_maint_cat;
    substitute = utilities_page_ptr.substitute;
    substitute = utilities_page_ptr.substitute;
    make_wikilink = utilities_page_ptr.make_wikilink;
    make_wikilink = utilities_page_ptr.make_wikilink;
    Line 1,392: Line 1,430:


    return {
    return {
    auto_link_urls = auto_link_urls, -- table of identifier URLs to be used when auto-linking |title=
    build_id_list = build_id_list,
    build_id_list = build_id_list,
    extract_ids = extract_ids,
    extract_ids = extract_ids,