Module:Citation/CS1/Identifiers: Difference between revisions

    m>Nihiltres
    m (Bumped PMC limit from 5000000 to 5100000 as some valid PMCs are being flagged currently. Probably worth bumping further, but I don't know enough to judge.)
    m>Trappist the monk
    (Synch from sandbox;)
    Line 28: Line 28:
    ext_link = mw.ustring.format ('[%s%s%s %s]', options.prefix, url_string, options.suffix or "", mw.text.nowiki(options.id));
    ext_link = mw.ustring.format ('[%s%s%s %s]', options.prefix, url_string, options.suffix or "", mw.text.nowiki(options.id));
    if options.free then
    if is_set(options.access) then
    ext_link = substitute (cfg.presentation['free to read'], ext_link); -- add the free-to-read lock
    ext_link = substitute (cfg.presentation[options.access], ext_link); -- add the free-to-read / paywall lock
    end
    end
    Line 147: Line 147:
    local text;
    local text;
    local valid_ismn = true;
    local valid_ismn = true;
    local id_copy;


    id_copy = id; -- save a copy because this testing is destructive
    id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the ismn
    id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the ismn


    Line 159: Line 161:
    -- prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
    -- prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
       
       
    text="[[" .. handler.link .. "|" .. handler.label .. "]]" .. handler.separator .. id; -- because no place to link to yet
    text="[[" .. handler.link .. "|" .. handler.label .. "]]" .. handler.separator .. id_copy; -- because no place to link to yet


    if false == valid_ismn then
    if false == valid_ismn then
    Line 323: Line 325:


    text = external_link_id({link = handler.link, label = handler.label,
    text = external_link_id({link = handler.link, label = handler.label,
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, free=handler.free}) .. err_cat;
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat;


    if is_set (class) then
    if is_set (class) then
    Line 334: Line 336:
    end
    end


    --[[--------------------------< B I O R X I V >-----------------------------------------------------------------
    Format bioRxiv id and do simple error checking.  BiorXiv ids are exactly 6 digits.
    The bioRxiv id is the number following the last slash in the bioRxiv-issued DOI:
    http://dx.doi.org/10.1101/078733 -> 078733
    ]]
    local function biorxiv(id)
    local handler = cfg.id_handlers['BIORXIV'];
    local err_cat =  ''; -- presume that bioRxiv id is valid
    if nil == id:match("^%d%d%d%d%d%d$") then -- if bioRxiv id has anything but six digits
    err_cat = ' ' .. set_error( 'bad_biorxiv'); -- set an error message
        end
    return external_link_id({link = handler.link, label = handler.label,
    prefix=handler.prefix,id=id,separator=handler.separator,
    encode=handler.encode, access=handler.access}) .. err_cat;
    end


    --[[--------------------------< N O R M A L I Z E _ L C C N >--------------------------------------------------
    --[[--------------------------< N O R M A L I Z E _ L C C N >--------------------------------------------------
    Line 499: Line 521:


    local function pmc(id, embargo)
    local function pmc(id, embargo)
    local test_limit = 5100000; -- update this value as PMCs approach
    local test_limit = 6000000; -- update this value as PMCs approach
    local handler = cfg.id_handlers['PMC'];
    local handler = cfg.id_handlers['PMC'];
    local err_cat =  ''; -- presume that PMC is valid
    local err_cat =  ''; -- presume that PMC is valid
    Line 515: Line 537:
    if is_set (embargo) then -- is PMC is still embargoed?
    if is_set (embargo) then -- is PMC is still embargoed?
    text="[[" .. handler.link .. "|" .. handler.label .. "]]:" .. handler.separator .. id .. err_cat; -- still embargoed so no external link
    text="[[" .. handler.link .. "|" .. handler.label .. "]]" .. handler.separator .. id .. err_cat; -- still embargoed so no external link
    else
    else
    text = external_link_id({link = handler.link, label = handler.label, -- no embargo date or embargo has expired, ok to link to article
    text = external_link_id({link = handler.link, label = handler.label, -- no embargo date or embargo has expired, ok to link to article
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, free=handler.free}) .. err_cat;
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat;
    end
    end
    return text;
    return text;
    Line 541: Line 563:
    ]]
    ]]


    local function doi(id, inactive)
    local function doi(id, inactive, access)
    local cat = ""
    local cat = ""
    local handler = cfg.id_handlers['DOI'];
    local handler = cfg.id_handlers['DOI'];
    Line 557: Line 579:
    else  
    else  
    text = external_link_id({link = handler.link, label = handler.label,
    text = external_link_id({link = handler.link, label = handler.label,
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access})
    inactive = ""  
    inactive = ""  
    end
    end
    Line 585: Line 607:
    ]]
    ]]


    local function hdl(id)
    local function hdl(id, access)
    local handler = cfg.id_handlers['HDL'];
    local handler = cfg.id_handlers['HDL'];
    local text = external_link_id({link = handler.link, label = handler.label,
    local text = external_link_id({link = handler.link, label = handler.label,
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
    prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access})


    if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- hdl must contain a fwd slash, must not contain spaces, endashes, and must not end with period or comma
    if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- hdl must contain a fwd slash, must not contain spaces, endashes, and must not end with period or comma
    Line 604: Line 626:
    ]]
    ]]


    local function openlibrary(id)
    local function openlibrary(id, access)
    local code = id:match("^%d+([AMW])$"); -- only digits followed by 'A', 'M', or 'W'
    local code = id:match("^%d+([AMW])$"); -- only digits followed by 'A', 'M', or 'W'
    local handler = cfg.id_handlers['OL'];
    local handler = cfg.id_handlers['OL'];
    Line 611: Line 633:
    return external_link_id({link=handler.link, label=handler.label,
    return external_link_id({link=handler.link, label=handler.label,
    prefix=handler.prefix .. 'authors/OL',
    prefix=handler.prefix .. 'authors/OL',
    id=id, separator=handler.separator, encode = handler.encode})
    id=id, separator=handler.separator, encode = handler.encode,
    access = access})
    elseif ( code == "M" ) then
    elseif ( code == "M" ) then
    return external_link_id({link=handler.link, label=handler.label,
    return external_link_id({link=handler.link, label=handler.label,
    prefix=handler.prefix .. 'books/OL',
    prefix=handler.prefix .. 'books/OL',
    id=id, separator=handler.separator, encode = handler.encode})
    id=id, separator=handler.separator, encode = handler.encode,
    access = access})
    elseif ( code == "W" ) then
    elseif ( code == "W" ) then
    return external_link_id({link=handler.link, label=handler.label,
    return external_link_id({link=handler.link, label=handler.label,
    prefix=handler.prefix .. 'works/OL',
    prefix=handler.prefix .. 'works/OL',
    id=id, separator=handler.separator, encode = handler.encode})
    id=id, separator=handler.separator, encode = handler.encode,
    access = access})
    else
    else
    return external_link_id({link=handler.link, label=handler.label,
    return external_link_id({link=handler.link, label=handler.label,
    prefix=handler.prefix .. 'OL',
    prefix=handler.prefix .. 'OL',
    id=id, separator=handler.separator, encode = handler.encode}) .. ' ' .. set_error( 'bad_ol' );
    id=id, separator=handler.separator, encode = handler.encode,
    access = access}) .. ' ' .. set_error( 'bad_ol' );
    end
    end
    end
    end
    Line 710: Line 736:
    ]]
    ]]


    local function bibcode (id)
    local function bibcode (id, access)
    local handler = cfg.id_handlers['BIBCODE'];
    local handler = cfg.id_handlers['BIBCODE'];
    local err_type;
    local err_type;
    Line 716: Line 742:


    local text = external_link_id({link=handler.link, label=handler.label,
    local text = external_link_id({link=handler.link, label=handler.label,
    prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode});
    prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode,
    access=access});
    if 19 ~= id:len() then
    if 19 ~= id:len() then
    Line 742: Line 769:
    end
    end


    --[[--------------------------< C I T E S E E R X >------------------------------------------------------------
    CiteSeerX use their own notion of "doi" (not to be confused with the identifiers resolved via doi.org).
    The description of the structure of this identifier can be found at Help_talk:Citation_Style_1#CiteSeerX_id_structure
    ]]
    local function citeseerx (id)
    local handler = cfg.id_handlers['CITESEERX'];
    local matched;
    local text = external_link_id({link=handler.link, label=handler.label,
    prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode,
    access=handler.access});
    matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$");
    if not matched then
    text = text .. ' ' .. set_error( 'bad_citeseerx' );
    end
    return text;
    end


    --[[--------------------------< B U I L D _ I D _ L I S T >--------------------------------------------------------
    --[[--------------------------< B U I L D _ I D _ L I S T >--------------------------------------------------------
    Line 759: Line 807:
    for k, v in pairs( id_list ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table
    for k, v in pairs( id_list ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table
    -- fallback to read-only cfg
    -- fallback to read-only cfg
    handler = setmetatable( { ['id'] = v }, fallback(k) );
    handler = setmetatable( { ['id'] = v, ['access'] = options.IdAccessLevels[k] }, fallback(k) );


    if handler.mode == 'external' then
    if handler.mode == 'external' then
    Line 769: Line 818:
    error( cfg.messages['unknown_ID_mode'] );
    error( cfg.messages['unknown_ID_mode'] );
    elseif k == 'BIBCODE' then
    elseif k == 'BIBCODE' then
    table.insert( new_list, {handler.label, bibcode( v ) } );
    table.insert( new_list, {handler.label, bibcode( v, handler.access ) } );
    elseif k == 'BIORXIV' then
    table.insert( new_list, {handler.label, biorxiv( v ) } );
    elseif k == 'CITESEERX' then
    table.insert( new_list, {handler.label, citeseerx( v ) } );
    elseif k == 'DOI' then
    elseif k == 'DOI' then
    table.insert( new_list, {handler.label, doi( v, options.DoiBroken ) } );
    table.insert( new_list, {handler.label, doi( v, options.DoiBroken, handler.access ) } );
    elseif k == 'HDL' then
    elseif k == 'HDL' then
    table.insert( new_list, {handler.label, hdl( v ) } );
    table.insert( new_list, {handler.label, hdl( v, handler.access ) } );
    elseif k == 'ARXIV' then
    elseif k == 'ARXIV' then
    table.insert( new_list, {handler.label, arxiv( v, options.Class ) } );  
    table.insert( new_list, {handler.label, arxiv( v, options.Class ) } );  
    Line 781: Line 834:
    table.insert( new_list, {handler.label, lccn( v ) } );
    table.insert( new_list, {handler.label, lccn( v ) } );
    elseif k == 'OL' or k == 'OLA' then
    elseif k == 'OL' or k == 'OLA' then
    table.insert( new_list, {handler.label, openlibrary( v ) } );
    table.insert( new_list, {handler.label, openlibrary( v, handler.access ) } );
    elseif k == 'PMC' then
    elseif k == 'PMC' then
    table.insert( new_list, {handler.label, pmc( v, options.Embargo ) } );
    table.insert( new_list, {handler.label, pmc( v, options.Embargo ) } );
    Line 847: Line 900:
    end
    end
    return id_list;
    return id_list;
    end
    --[[--------------------------< E X T R A C T _ I D _ A C C E S S _ L E V E L S >--------------------------------------
    Fetches custom id access levels from arguments using configuration settings.
    Parameters which have a predefined access level (e.g. arxiv) do not use this
    function as they are directly rendered as free without using an additional parameter.
    ]]
    local function extract_id_access_levels( args, id_list )
    local id_accesses_list = {};
    for k, v in pairs( cfg.id_handlers ) do
    local access_param = v.custom_access;
    local k_lower = string.lower(k);
    if is_set(access_param) then
    local access_level = args[access_param];
    if is_set(access_level) then
    if not in_array (access_level:lower(), cfg.keywords['id-access']) then
    table.insert( z.message_tail, { set_error( 'invalid_param_val', {access_param, access_level}, true ) } );
    access_level = nil;
    end
    if not is_set(id_list[k]) then
    table.insert( z.message_tail, { set_error( 'param_access_requires_param', {k_lower}, true ) } );
    end
    if is_set(access_level) then
    access_level = access_level:lower();
    end
    id_accesses_list[k] = access_level;
    end
    end
    end
    return id_accesses_list;
    end
    end


    Line 874: Line 960:
    build_id_list = build_id_list,
    build_id_list = build_id_list,
    extract_ids = extract_ids,
    extract_ids = extract_ids,
    extract_id_access_levels = extract_id_access_levels,
    is_embargoed = is_embargoed;
    is_embargoed = is_embargoed;
    set_selected_modules = set_selected_modules;
    set_selected_modules = set_selected_modules;
    }
    }