Module:Citation/CS1/Identifiers: Difference between revisions

    (sync from sandbox;)
    m (47 revisions imported from templatewiki:Module:Citation/CS1/Identifiers)
    Line 1: Line 1:
    local identifiers = {};


    --[[--------------------------< F O R W A R D  D E C L A R A T I O N S >--------------------------------------
    --[[--------------------------< F O R W A R D  D E C L A R A T I O N S >--------------------------------------
    Line 8: Line 11:


    local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
    local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
    local wd_int_lang = (mw.site.server:match ('wikidata') and mw.getCurrentFrame():preprocess('{{int:lang}}')) or '';




    Line 39: Line 44:
    end
    end
    if is_set (options.q) and mw.wikibase then -- wikibase test here avoids script errors in third party wikis that aren't using mw.wikibase extension
    if is_set (options.q) and mw.wikibase then
    wd_article = mw.wikibase.getEntity (options.q):getSitelink (this_wiki_code .. 'wiki'); -- fetch article title from wd
    wd_article = mw.wikibase.getEntity (options.q):getSitelink (this_wiki_code .. 'wiki'); -- fetch article title from wd
    if wd_article then
    if wd_article then
    Line 61: Line 66:


    local function internal_link_id(options)
    local function internal_link_id(options)
    local id = mw.ustring.gsub (options.id, '%d', cfg.date_names.local_digits); -- translate 'local' digits to Western 0-9


    return table.concat (
    return table.concat (
    Line 71: Line 75:
    {
    {
    options.prefix,
    options.prefix,
    id, -- translated to western digits
    options.id,
    options.suffix or ''
    options.suffix or ''
    }),
    }),
    substitute (cfg.presentation['bdi'], {'', mw.text.nowiki (options.id)}) -- bdi tags to prevent Latn script identifiers from being reversed at rtl language wikis
    mw.text.nowiki (options.id)
    ); -- nowiki because MediaWiki still has magic links for ISBN and the like; TODO: is it really required?
    );
    });
    });
    end
    end
    Line 281: Line 285:
    1–4 must be digits and must represent a year in the range of 1000 – next year
    1–4 must be digits and must represent a year in the range of 1000 – next year
    5 must be a letter
    5 must be a letter
    6–8 must be letter, digit, ampersand, or dot (ampersand cannot directly precede a dot; &. )
    6 must be letter, ampersand, or dot (ampersand cannot directly precede a dot; &. )
    7–8 must be letter, digit, ampersand, or dot (ampersand cannot directly precede a dot; &. )
    9–18 must be letter, digit, or dot
    9–18 must be letter, digit, or dot
    19 must be a letter or dot
    19 must be a letter or dot
    Line 299: Line 304:
    err_type = 'length';
    err_type = 'length';
    else
    else
    year = id:match ("^(%d%d%d%d)[%a][%w&%.][%w&%.][%w&%.][%w.]+[%a%.]$") --  
    year = id:match ("^(%d%d%d%d)[%a][%a&%.][%a&%.%d][%a&%.%d][%a%d%.]+[%a%.]$") --  
    if not year then -- if nil then no pattern match
    if not year then -- if nil then no pattern match
    err_type = 'value'; -- so value error
    err_type = 'value'; -- so value error
    Line 390: Line 395:
    if is_set(inactive) then
    if is_set(inactive) then
    local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
    local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
    local inactive_month, good;
    if is_set(inactive_year) then
     
    table.insert( z.error_categories, "Pages with DOIs inactive since " .. inactive_year );
    if is_set (inactive_year) then
    if 4 < inactive:len() then -- inactive date has more than just a year (could be anything)
    local lang_obj = mw.getContentLanguage(); -- get a language object for this wiki
    good, inactive_month = pcall (lang_obj.formatDate, lang_obj, 'F', inactive); -- try to get the month name from the inactive date
    if not good then
    inactive_month = nil; -- something went wrong so make sure this is unset
    end
    end
    else
    else
    inactive_year = nil; -- |doi-broken= has something but it isn't a date
    table.insert( z.error_categories, "Pages with inactive DOIs" ); -- when inactive doesn't contain a recognizable year
    end
    end
    inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")"
    if is_set(inactive_year) and is_set (inactive_month) then
    table.insert( z.error_categories, 'Pages with DOIs inactive as of ' .. inactive_year .. ' ' .. inactive_month); -- use inactive month in category name
    elseif is_set(inactive_year) then
    table.insert( z.error_categories, 'Pages with DOIs inactive as of ' .. inactive_year);
    else
    table.insert( z.error_categories, 'Pages with inactive DOIs'); -- when inactive doesn't contain a recognizable date
    end
    inactive = " (" .. cfg.messages['inactive'] .. ' ' .. inactive .. ')';
    end
    end
    text = external_link_id({link = handler.link, label = handler.label, q = handler.q,
    text = external_link_id({link = handler.link, label = handler.label, q = handler.q,
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '')
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '')


    if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") or -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma
    if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma
    id: match ('^10.5555') then -- test doi will never resolve
    cat = ' ' .. set_error( 'bad_doi' );
    cat = ' ' .. set_error ('bad_doi');
    end
    end


    Line 745: Line 732:
    --[[--------------------------< O C L C >----------------------------------------------------------------------
    --[[--------------------------< O C L C >----------------------------------------------------------------------


    Validate and format an oclc id.  https://www.oclc.org/batchload/controlnumber.en.html {{dead link}}
    Validate and format an oclc id.  https://www.oclc.org/batchload/controlnumber.en.html
    archived at: https://web.archive.org/web/20161228233804/https://www.oclc.org/batchload/controlnumber.en.html


    ]]
    ]]
    Line 849: Line 835:


    local function pmc(id, embargo)
    local function pmc(id, embargo)
    local test_limit = 7000000; -- update this value as PMCs approach
    local test_limit = 6500000; -- update this value as PMCs approach
    local handler = cfg.id_handlers['PMC'];
    local handler = cfg.id_handlers['PMC'];
    local err_cat = ''; -- presume that PMC is valid
    local err_cat = ''; -- presume that PMC is valid
    Line 976: Line 962:


    A numerical identifier in the form nnnn.nnnnn - leading zeros in the first quartet optional
    A numerical identifier in the form nnnn.nnnnn - leading zeros in the first quartet optional
    format described here: http://emis.mi.sanu.ac.rs/ZMATH/zmath/en/help/search/
    temporary format is apparently eight digits.  Anything else is an error


    ]]
    ]]
    Line 985: Line 967:
    local function zbl (id)
    local function zbl (id)
    local handler = cfg.id_handlers['ZBL'];
    local handler = cfg.id_handlers['ZBL'];
    local id_num;
    local err_cat = '';
    local err_cat = '';
    if id:match('^%d%d%d%d%d%d%d%d$') then -- is this identifier using temporary format?
    id_num = id:match ('^[Zz][Bb][Ll](.*)$'); -- identifier with zbl prefix; extract identifier
    add_maint_cat ('zbl'); -- yes, add maint cat
     
    elseif not id:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then -- not temporary, is it normal format?
    if is_set (id_num) then
    err_cat = ' ' .. set_error( 'bad_zbl' ); -- no, set an error message
    add_maint_cat ('zbl_format');
    else -- plain number without zbl prefix
    id_num = id; -- if here id does not have prefix
    end
     
    if id_num:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then
    id = id_num; -- id matches pattern
    else
    err_cat = ' ' .. set_error( 'bad_zbl' ); -- set an error message
    end
    end
    Line 1,121: Line 1,112:
    Parameters which have a predefined access level (e.g. arxiv) do not use this
    Parameters which have a predefined access level (e.g. arxiv) do not use this
    function as they are directly rendered as free without using an additional parameter.
    function as they are directly rendered as free without using an additional parameter.
    access-level values must match the case used in cfg.keywords['id-access'] (lowercase unless there is some special reason for something else)


    ]]
    ]]
    Line 1,129: Line 1,118:
    local id_accesses_list = {};
    local id_accesses_list = {};
    for k, v in pairs( cfg.id_handlers ) do
    for k, v in pairs( cfg.id_handlers ) do
    local access_param = v.custom_access; -- name of identifier's access-level parameter
    local access_param = v.custom_access;
    local k_lower = string.lower(k);
    if is_set(access_param) then
    if is_set(access_param) then
    local access_level = args[access_param]; -- get the assigned value if there is one
    local access_level = args[access_param];
    if is_set (access_level) then
    if is_set(access_level) then
    if not in_array (access_level, cfg.keywords['id-access']) then -- exact match required
    if not in_array (access_level:lower(), cfg.keywords['id-access']) then
    table.insert( z.message_tail, { set_error( 'invalid_param_val', {access_param, access_level}, true ) } );
    table.insert( z.message_tail, { set_error( 'invalid_param_val', {access_param, access_level}, true ) } );
    access_level = nil; -- invalid so unset
    access_level = nil;
    end
    end
    if not is_set(id_list[k]) then -- identifer access-level must have a matching identifier
    if not is_set(id_list[k]) then
    table.insert( z.message_tail, { set_error( 'param_access_requires_param', {k:lower()}, true ) } ); -- param name is uppercase in cfg.id_handlers (k); lowercase for error message
    table.insert( z.message_tail, { set_error( 'param_access_requires_param', {k_lower}, true ) } );
    end
    if is_set(access_level) then
    access_level = access_level:lower();
    end
    end
    id_accesses_list[k] = access_level;
    id_accesses_list[k] = access_level;
    Line 1,168: Line 1,161:
    end
    end


    --[[--------------------------< E X P O R T E D  F U N C T I O N S >------------------------------------------
    ]]


    return {
    return {