Module:Citation/CS1/Identifiers: Difference between revisions

    m (47 revisions imported from templatewiki:Module:Citation/CS1/Identifiers)
    (sync from sandbox;)
    Line 1: Line 1:
    local identifiers = {};


    --[[--------------------------< F O R W A R D  D E C L A R A T I O N S >--------------------------------------
    --[[--------------------------< F O R W A R D  D E C L A R A T I O N S >--------------------------------------
    Line 11: Line 8:


    local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
    local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
    local wd_int_lang = (mw.site.server:match ('wikidata') and mw.getCurrentFrame():preprocess('{{int:lang}}')) or '';




    Line 26: Line 21:
    local url_string = options.id;
    local url_string = options.id;
    local ext_link;
    local ext_link;
    local this_wiki_code; -- wikipedia subdomain; 'en' for en.wikipedia.org
    local this_wiki_code = cfg.this_wiki_code; -- wikipedia subdomain; 'en' for en.wikipedia.org
    local wd_article; -- article title from wikidata
    local wd_article; -- article title from wikidata
    Line 38: Line 33:
    end
    end


    this_wiki_code = mw.language.getContentLanguage():getCode(); -- get this wikipedia's language code (subdomain)
    -- this_wiki_code = mw.language.getContentLanguage():getCode(); -- get this wikipedia's language code (subdomain)
    if string.match (mw.site.server, 'wikidata') then
    if string.match (mw.site.server, 'wikidata') then
    Line 44: Line 39:
    end
    end
    if is_set (options.q) and mw.wikibase then
    if is_set (options.q) and mw.wikibase then -- wikibase test here avoids script errors in third party wikis that aren't using mw.wikibase extension
    wd_article = mw.wikibase.getEntity (options.q):getSitelink (this_wiki_code .. 'wiki'); -- fetch article title from wd
    wd_article = mw.wikibase.getEntity (options.q):getSitelink (this_wiki_code .. 'wiki'); -- fetch article title from wd
    if wd_article then
    if wd_article then
    Line 66: Line 61:


    local function internal_link_id(options)
    local function internal_link_id(options)
    local id = mw.ustring.gsub (options.id, '%d', cfg.date_names.local_digits); -- translate 'local' digits to Western 0-9


    return table.concat (
    return table.concat (
    Line 75: Line 71:
    {
    {
    options.prefix,
    options.prefix,
    options.id,
    id, -- translated to western digits
    options.suffix or ''
    options.suffix or ''
    }),
    }),
    mw.text.nowiki (options.id)
    substitute (cfg.presentation['bdi'], {'', mw.text.nowiki (options.id)}) -- bdi tags to prevent Latn script identifiers from being reversed at rtl language wikis
    );
    ); -- nowiki because MediaWiki still has magic links for ISBN and the like; TODO: is it really required?
    });
    });
    end
    end
    Line 285: Line 281:
    1–4 must be digits and must represent a year in the range of 1000 – next year
    1–4 must be digits and must represent a year in the range of 1000 – next year
    5 must be a letter
    5 must be a letter
    6 must be letter, ampersand, or dot (ampersand cannot directly precede a dot; &. )
    6–8 must be letter, digit, ampersand, or dot (ampersand cannot directly precede a dot; &. )
    7–8 must be letter, digit, ampersand, or dot (ampersand cannot directly precede a dot; &. )
    9–18 must be letter, digit, or dot
    9–18 must be letter, digit, or dot
    19 must be a letter or dot
    19 must be a letter or dot
    Line 304: Line 299:
    err_type = 'length';
    err_type = 'length';
    else
    else
    year = id:match ("^(%d%d%d%d)[%a][%a&%.][%a&%.%d][%a&%.%d][%a%d%.]+[%a%.]$") --  
    year = id:match ("^(%d%d%d%d)[%a][%w&%.][%w&%.][%w&%.][%w.]+[%a%.]$") --  
    if not year then -- if nil then no pattern match
    if not year then -- if nil then no pattern match
    err_type = 'value'; -- so value error
    err_type = 'value'; -- so value error
    Line 395: Line 390:
    if is_set(inactive) then
    if is_set(inactive) then
    local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
    local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
    if is_set(inactive_year) then
    local inactive_month, good;
    table.insert( z.error_categories, "Pages with DOIs inactive since " .. inactive_year );
     
    if is_set (inactive_year) then
    if 4 < inactive:len() then -- inactive date has more than just a year (could be anything)
    local lang_obj = mw.getContentLanguage(); -- get a language object for this wiki
    good, inactive_month = pcall (lang_obj.formatDate, lang_obj, 'F', inactive); -- try to get the month name from the inactive date
    if not good then
    inactive_month = nil; -- something went wrong so make sure this is unset
    end
    end
    else
    inactive_year = nil; -- |doi-broken= has something but it isn't a date
    end
    if is_set(inactive_year) and is_set (inactive_month) then
    table.insert( z.error_categories, 'Pages with DOIs inactive as of ' .. inactive_year .. ' ' .. inactive_month); -- use inactive month in category name
    elseif is_set(inactive_year) then
    table.insert( z.error_categories, 'Pages with DOIs inactive as of ' .. inactive_year);
    else
    else
    table.insert( z.error_categories, "Pages with inactive DOIs" ); -- when inactive doesn't contain a recognizable year
    table.insert( z.error_categories, 'Pages with inactive DOIs'); -- when inactive doesn't contain a recognizable date
    end
    end
    inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")"
    inactive = " (" .. cfg.messages['inactive'] .. ' ' .. inactive .. ')';
    end
    end
    text = external_link_id({link = handler.link, label = handler.label, q = handler.q,
    text = external_link_id({link = handler.link, label = handler.label, q = handler.q,
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '')
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '')


    if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma
    -- if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") or -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma
    cat = ' ' .. set_error( 'bad_doi' );
    -- id: match ('^10.5555') then -- test doi will never resolve
    -- cat = ' ' .. set_error ('bad_doi');
    -- end
    local registrant = id:match ('^10%.([^/]+)/[^%s–]-[^%.,]$'); -- registrant set when doi has the proper basic form
    registrant_err_patterns = { -- these patterns are for code ranges that are not supported
    '^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); accecpts: 10000–39999
    '^[^1-3]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 40000+); accecpts: 10000–39999
    '^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); accecpts: 1000–9999
    '^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accecpts: 1000–9999
    '^%d%d%d%d%d%d+', -- 6 or more digits
    '^%d%d?%d?%.%d%d*$', -- less than 4 digits with subcode
    '^%d%d?%d?$', -- less than 4 digits without subcode
    '^5555$', -- test registrant will never resolve
    }
    if registrant then -- when doi has proper form
    for i, pattern in ipairs (registrant_err_patterns) do -- spin through error patterns
    if registrant:match (pattern) then -- to validate registrant codes
    cat = ' ' .. set_error ('bad_doi'); -- when found, mark this doi as bad
    break; -- and done
    end
    end
    else
    cat = ' ' .. set_error ('bad_doi'); -- invalid directory or malformed
    end
    end


    Line 471: Line 508:
    if isbn_str:match( "^97[89]%d*$" ) == nil then
    if isbn_str:match( "^97[89]%d*$" ) == nil then
    return false, 'invalid prefix'; -- fail when isbn13 does not begin with 978 or 979
    return false, 'invalid prefix'; -- fail when isbn13 does not begin with 978 or 979
    end
    if isbn_str:match ('^9790') then
    return false, 'invalid group id'; -- group identifier '0' is reserved to ismn
    end
    end
    return is_valid_isxn_13 (isbn_str), 'checksum';
    return is_valid_isxn_13 (isbn_str), 'checksum';
    Line 732: Line 772:
    --[[--------------------------< O C L C >----------------------------------------------------------------------
    --[[--------------------------< O C L C >----------------------------------------------------------------------


    Validate and format an oclc id.  https://www.oclc.org/batchload/controlnumber.en.html
    Validate and format an oclc id.  https://www.oclc.org/batchload/controlnumber.en.html {{dead link}}
    archived at: https://web.archive.org/web/20161228233804/https://www.oclc.org/batchload/controlnumber.en.html


    ]]
    ]]
    Line 835: Line 876:


    local function pmc(id, embargo)
    local function pmc(id, embargo)
    local test_limit = 6500000; -- update this value as PMCs approach
    local test_limit = 7000000; -- update this value as PMCs approach
    local handler = cfg.id_handlers['PMC'];
    local handler = cfg.id_handlers['PMC'];
    local err_cat = ''; -- presume that PMC is valid
    local err_cat = ''; -- presume that PMC is valid
    Line 962: Line 1,003:


    A numerical identifier in the form nnnn.nnnnn - leading zeros in the first quartet optional
    A numerical identifier in the form nnnn.nnnnn - leading zeros in the first quartet optional
    format described here: http://emis.mi.sanu.ac.rs/ZMATH/zmath/en/help/search/
    temporary format is apparently eight digits.  Anything else is an error


    ]]
    ]]
    Line 967: Line 1,012:
    local function zbl (id)
    local function zbl (id)
    local handler = cfg.id_handlers['ZBL'];
    local handler = cfg.id_handlers['ZBL'];
    local id_num;
    local err_cat = '';
    local err_cat = '';
    id_num = id:match ('^[Zz][Bb][Ll](.*)$'); -- identifier with zbl prefix; extract identifier
    if id:match('^%d%d%d%d%d%d%d%d$') then -- is this identifier using temporary format?
     
    add_maint_cat ('zbl'); -- yes, add maint cat
    if is_set (id_num) then
    elseif not id:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then -- not temporary, is it normal format?
    add_maint_cat ('zbl_format');
    err_cat = ' ' .. set_error( 'bad_zbl' ); -- no, set an error message
    else -- plain number without zbl prefix
    id_num = id; -- if here id does not have prefix
    end
     
    if id_num:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then
    id = id_num; -- id matches pattern
    else
    err_cat = ' ' .. set_error( 'bad_zbl' ); -- set an error message
    end
    end
    Line 1,112: Line 1,148:
    Parameters which have a predefined access level (e.g. arxiv) do not use this
    Parameters which have a predefined access level (e.g. arxiv) do not use this
    function as they are directly rendered as free without using an additional parameter.
    function as they are directly rendered as free without using an additional parameter.
    access-level values must match the case used in cfg.keywords_lists['id-access'] (lowercase unless there is some special reason for something else)


    ]]
    ]]
    Line 1,118: Line 1,156:
    local id_accesses_list = {};
    local id_accesses_list = {};
    for k, v in pairs( cfg.id_handlers ) do
    for k, v in pairs( cfg.id_handlers ) do
    local access_param = v.custom_access;
    local access_param = v.custom_access; -- name of identifier's access-level parameter
    local k_lower = string.lower(k);
    if is_set(access_param) then
    if is_set(access_param) then
    local access_level = args[access_param];
    local access_level = args[access_param]; -- get the assigned value if there is one
    if is_set(access_level) then
    if is_set (access_level) then
    if not in_array (access_level:lower(), cfg.keywords['id-access']) then
    if not in_array (access_level, cfg.keywords_lists['id-access']) then -- exact match required
    table.insert( z.message_tail, { set_error( 'invalid_param_val', {access_param, access_level}, true ) } );
    table.insert( z.message_tail, { set_error( 'invalid_param_val', {access_param, access_level}, true ) } );
    access_level = nil;
    access_level = nil; -- invalid so unset
    end
    end
    if not is_set(id_list[k]) then
    if not is_set(id_list[k]) then -- identifer access-level must have a matching identifier
    table.insert( z.message_tail, { set_error( 'param_access_requires_param', {k_lower}, true ) } );
    table.insert( z.message_tail, { set_error( 'param_access_requires_param', {k:lower()}, true ) } ); -- param name is uppercase in cfg.id_handlers (k); lowercase for error message
    end
    end
    if is_set(access_level) then
    id_accesses_list[k] = cfg.keywords_xlate[access_level]; -- get translated keyword
    access_level = access_level:lower();
    end
    id_accesses_list[k] = access_level;
    end
    end
    end
    end
    Line 1,161: Line 1,195:
    end
    end


    --[[--------------------------< E X P O R T E D  F U N C T I O N S >------------------------------------------
    ]]


    return {
    return {