Editing Module:Citation/CS1/Identifiers
The edit can be undone. Please check the comparison below to verify that this is what you want to do, and then publish the changes below to finish undoing the edit.
Latest revision | Your text | ||
Line 87: | Line 87: | ||
if options.encode == true or options.encode == nil then | if options.encode == true or options.encode == nil then | ||
url_string = mw.uri.encode (url_string | url_string = mw.uri.encode (url_string); | ||
end | end | ||
Line 242: | Line 242: | ||
isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39 | isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39 | ||
for i, v in ipairs (isxn_str) do | for i, v in ipairs (isxn_str) do | ||
temp = temp + (3 - 2*(i % 2)) * tonumber (string.char (v) ); | temp = temp + (3 - 2*(i % 2)) * tonumber (string.char (v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit | ||
end | end | ||
return temp % 10 == 0; -- sum modulo 10 is zero when ISBN-13/ISMN is correct | return temp % 10 == 0; -- sum modulo 10 is zero when ISBN-13/ISMN is correct | ||
Line 351: | Line 351: | ||
end | end | ||
err_cat = err_cat and table.concat ({' ', set_message ('err_bad_arxiv')}) or ''; -- set error message if flag is true | err_cat = err_cat and table.concat ({' ', set_message ('err_bad_arxiv')}) or ''; -- set error message if flag is true | ||
Line 408: | Line 404: | ||
err_type = cfg.err_msg_supl.value; -- so value error | err_type = cfg.err_msg_supl.value; -- so value error | ||
else | else | ||
local next_year = tonumber (os.date ('%Y')) + 1; | local next_year = tonumber (os.date ('%Y')) + 1; -- get the current year as a number and add one for next year | ||
year = tonumber (year); -- convert year portion of bibcode to a number | year = tonumber (year); -- convert year portion of bibcode to a number | ||
if (1000 > year) or (year > next_year) then | if (1000 > year) or (year > next_year) then | ||
Line 421: | Line 417: | ||
if is_set (err_type) then -- if there was an error detected | if is_set (err_type) then -- if there was an error detected | ||
text = text .. ' ' .. set_message ('err_bad_bibcode', {err_type}); | text = text .. ' ' .. set_message ('err_bad_bibcode', {err_type}); | ||
end | end | ||
return text; | return text; | ||
Line 468: | Line 462: | ||
end -- err_cat remains set here when no match | end -- err_cat remains set here when no match | ||
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, | prefix = handler.prefix, id = id, separator = handler.separator, | ||
Line 497: | Line 487: | ||
if not matched then | if not matched then | ||
text = text .. ' ' .. set_message ('err_bad_citeseerx' ); | text = text .. ' ' .. set_message ('err_bad_citeseerx' ); | ||
end | end | ||
return text; | return text; | ||
Line 559: | Line 548: | ||
local registrant_err_patterns = { -- these patterns are for code ranges that are not supported | local registrant_err_patterns = { -- these patterns are for code ranges that are not supported | ||
'^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999 | '^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999 | ||
'^[^1- | '^[^1-4]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 40000+); accepts: 10000–49999 | ||
'^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); accepts: 1000–9999 | '^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); accepts: 1000–9999 | ||
'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999 | '^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999 | ||
Line 569: | Line 558: | ||
if not ignore_invalid then | if not ignore_invalid then | ||
if registrant then | if registrant then -- when DOI has proper form | ||
for i, pattern in ipairs (registrant_err_patterns) do | for i, pattern in ipairs (registrant_err_patterns) do -- spin through error patterns | ||
if registrant:match (pattern) then | if registrant:match (pattern) then -- to validate registrant codes | ||
err_cat = ' ' .. set_message ('err_bad_doi'); | err_cat = ' ' .. set_message ('err_bad_doi'); -- when found, mark this DOI as bad | ||
break; | break; -- and done | ||
end | end | ||
end | end | ||
else | else | ||
err_cat = ' ' .. set_message ('err_bad_doi'); | err_cat = ' ' .. set_message ('err_bad_doi'); -- invalid directory or malformed | ||
end | end | ||
else | else | ||
Line 583: | Line 572: | ||
end | end | ||
text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access, | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access, | ||
Line 656: | Line 641: | ||
if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- HDL must contain a forward slash, must not contain spaces, endashes, and must not end with period or comma | if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- HDL must contain a forward slash, must not contain spaces, endashes, and must not end with period or comma | ||
text = text .. ' ' .. set_message ('err_bad_hdl' ); | text = text .. ' ' .. set_message ('err_bad_hdl' ); | ||
end | end | ||
return text; | return text; | ||
Line 680: | Line 664: | ||
else -- here when not ignoring | else -- here when not ignoring | ||
if not check then -- and there is an error | if not check then -- and there is an error | ||
return ISBN .. set_message ('err_bad_isbn', {err_type}, false, ' '); -- display an error message | return ISBN .. set_message ('err_bad_isbn', {err_type}, false, ' '); -- display an error message | ||
end | end | ||
Line 703: | Line 686: | ||
return return_result (false, cfg.err_msg_supl.form); | return return_result (false, cfg.err_msg_supl.form); | ||
end | end | ||
return return_result (is_valid_isxn (id, 10), cfg.err_msg_supl.check); | |||
else | else | ||
if id:match ('^%d+$') == nil then | if id:match ('^%d+$') == nil then | ||
Line 732: | Line 709: | ||
Error message if not 10 characters, if not ISBN-10, if mixed and first character is a digit. | Error message if not 10 characters, if not ISBN-10, if mixed and first character is a digit. | ||
|asin=630....... | |asin=630....... is (apparently) not a legitimate ISBN though it checksums as one; these do not cause this | ||
do not cause this function to emit the maint_asin message | function to emit the maint_asin message | ||
This function is positioned here because it calls isbn() | This function is positioned here because it calls isbn() | ||
Line 743: | Line 720: | ||
local domain = options.ASINTLD; | local domain = options.ASINTLD; | ||
local err_cat = | local err_cat = "" | ||
if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then | if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then | ||
Line 749: | Line 726: | ||
else | else | ||
if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X) | if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X) | ||
if | if isbn (options) then -- see if ASIN value is or validates as ISBN-10 | ||
if not id:find ('^ | if not id:find ('^630') then -- 630xxxxxxx is (apparently) not a valid isbn prefix but is used by amazon as a numeric identifier | ||
set_message ('maint_asin'); -- begins with something other than 630 so possibly an isbn | |||
end | end | ||
elseif not is_set (err_cat) then | elseif not is_set (err_cat) then | ||
Line 760: | Line 737: | ||
end | end | ||
end | end | ||
if | if not is_set (domain) then | ||
domain = "com"; | domain = "com"; | ||
elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom | elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom | ||
domain = "co." .. domain; | domain = "co." .. domain; | ||
elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico | |||
elseif in_array (domain, {'au', 'br', 'mx | |||
domain = "com." .. domain; | domain = "com." .. domain; | ||
end | end | ||
local handler = options.handler; | local handler = options.handler; | ||
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix .. domain .. "/dp/", | prefix = handler.prefix .. domain .. "/dp/", | ||
Line 790: | Line 757: | ||
same check digit calculations. See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf | same check digit calculations. See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf | ||
section 2, pages 9–12. | section 2, pages 9–12. | ||
]] | ]] | ||
Line 823: | Line 787: | ||
if false == valid_ismn then | if false == valid_ismn then | ||
text = text .. ' ' .. set_message ('err_bad_ismn' ) -- add an error message if the ISMN is invalid | text = text .. ' ' .. set_message ('err_bad_ismn' ) -- add an error message if the ISMN is invalid | ||
end | end | ||
Line 876: | Line 839: | ||
else | else | ||
if false == valid_issn then | if false == valid_issn then | ||
text = text .. ' ' .. set_message ('err_bad_issn', (options.hkey == 'EISSN') and 'e' or ''); -- add an error message if the ISSN is invalid | text = text .. ' ' .. set_message ('err_bad_issn', (options.hkey == 'EISSN') and 'e' or ''); -- add an error message if the ISSN is invalid | ||
end | end | ||
Line 909: | Line 871: | ||
else | else | ||
err_cat = ' ' .. set_message ('err_bad_jfm' ); -- set an error message | err_cat = ' ' .. set_message ('err_bad_jfm' ); -- set an error message | ||
end | end | ||
Line 931: | Line 892: | ||
if id:find ('[Jj][Ss][Tt][Oo][Rr]') or id:find ('^https?://') or id:find ('%s') then | if id:find ('[Jj][Ss][Tt][Oo][Rr]') or id:find ('^https?://') or id:find ('%s') then | ||
err_msg = ' ' .. set_message ('err_bad_jstor'); -- set an error message | err_msg = ' ' .. set_message ('err_bad_jstor'); -- set an error message | ||
end | end | ||
Line 990: | Line 950: | ||
if not is_set (err_cat) and nil ~= lccn:find ('%s') then | if not is_set (err_cat) and nil ~= lccn:find ('%s') then | ||
err_cat = ' ' .. set_message ('err_bad_lccn'); -- lccn contains a space, set an error message | err_cat = ' ' .. set_message ('err_bad_lccn'); -- lccn contains a space, set an error message | ||
end | end | ||
Line 1,027: | Line 983: | ||
else | else | ||
err_cat = ' ' .. set_message ('err_bad_mr'); -- set an error message | err_cat = ' ' .. set_message ('err_bad_mr'); -- set an error message | ||
end | end | ||
Line 1,070: | Line 1,025: | ||
else | else | ||
err_msg = ' ' .. set_message ('err_bad_oclc') -- add an error message if the id is malformed | err_msg = ' ' .. set_message ('err_bad_oclc') -- add an error message if the id is malformed | ||
end | end | ||
Line 1,103: | Line 1,057: | ||
ident = id; -- copy id to ident so that we display the flawed identifier | ident = id; -- copy id to ident so that we display the flawed identifier | ||
error_msg = ' ' .. set_message ('err_bad_ol'); | error_msg = ' ' .. set_message ('err_bad_ol'); | ||
end | end | ||
Line 1,136: | Line 1,084: | ||
if id:match("[^%d]") then -- if OSTI has anything but digits | if id:match("[^%d]") then -- if OSTI has anything but digits | ||
err_cat = ' ' .. set_message ('err_bad_osti'); -- set an error message | err_cat = ' ' .. set_message ('err_bad_osti'); -- set an error message | ||
else -- OSTI is only digits | else -- OSTI is only digits | ||
local id_num = tonumber (id); -- convert id to a number for range testing | local id_num = tonumber (id); -- convert id to a number for range testing | ||
if 1018 > id_num or handler.id_limit < id_num then -- if OSTI is outside test limit boundaries | if 1018 > id_num or handler.id_limit < id_num then -- if OSTI is outside test limit boundaries | ||
err_cat = ' ' .. set_message ('err_bad_osti'); -- set an error message | err_cat = ' ' .. set_message ('err_bad_osti'); -- set an error message | ||
end | end | ||
end | end | ||
Line 1,205: | Line 1,151: | ||
text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, -- no embargo date or embargo has expired, ok to link to article | text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, -- no embargo date or embargo has expired, ok to link to article | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access, | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access, | ||
auto_link = not err_cat and 'pmc' or nil | auto_link = not err_cat and 'pmc' or nil -- do not auto-link when PMC has error | ||
}) .. (err_cat and err_cat or ''); -- parentheses required | }) .. (err_cat and err_cat or ''); -- parentheses required | ||
end | end | ||
return text; | return text; | ||
end | end | ||
Line 1,232: | Line 1,173: | ||
if id:match("[^%d]") then -- if PMID has anything but digits | if id:match("[^%d]") then -- if PMID has anything but digits | ||
err_cat = ' ' .. set_message ('err_bad_pmid'); -- set an error message | err_cat = ' ' .. set_message ('err_bad_pmid'); -- set an error message | ||
else -- PMID is only digits | else -- PMID is only digits | ||
local id_num = tonumber (id); -- convert id to a number for range testing | local id_num = tonumber (id); -- convert id to a number for range testing | ||
if 1 > id_num or handler.id_limit < id_num then -- if PMID is outside test limit boundaries | if 1 > id_num or handler.id_limit < id_num then -- if PMID is outside test limit boundaries | ||
err_cat = ' ' .. set_message ('err_bad_pmid'); -- set an error message | err_cat = ' ' .. set_message ('err_bad_pmid'); -- set an error message | ||
end | end | ||
end | end | ||
Line 1,263: | Line 1,202: | ||
if id:match("[^%d]") then -- if RFC has anything but digits | if id:match("[^%d]") then -- if RFC has anything but digits | ||
err_cat = ' ' .. set_message ('err_bad_rfc'); -- set an error message | err_cat = ' ' .. set_message ('err_bad_rfc'); -- set an error message | ||
else -- RFC is only digits | else -- RFC is only digits | ||
local id_num = tonumber (id); -- convert id to a number for range testing | local id_num = tonumber (id); -- convert id to a number for range testing | ||
if 1 > id_num or handler.id_limit < id_num then -- if RFC is outside test limit boundaries | if 1 > id_num or handler.id_limit < id_num then -- if RFC is outside test limit boundaries | ||
err_cat = ' ' .. set_message ('err_bad_rfc'); -- set an error message | err_cat = ' ' .. set_message ('err_bad_rfc'); -- set an error message | ||
end | end | ||
end | end | ||
Line 1,301: | Line 1,238: | ||
if handler.id_limit < id_num then -- if S2CID is outside test limit boundaries | if handler.id_limit < id_num then -- if S2CID is outside test limit boundaries | ||
err_cat = ' ' .. set_message ('err_bad_s2cid'); -- set an error message | err_cat = ' ' .. set_message ('err_bad_s2cid'); -- set an error message | ||
end | end | ||
else -- when id format incorrect | else -- when id format incorrect | ||
err_cat = ' ' .. set_message ('err_bad_s2cid'); -- set an error message | err_cat = ' ' .. set_message ('err_bad_s2cid'); -- set an error message | ||
end | end | ||
Line 1,319: | Line 1,254: | ||
9-digit form of ISBN-10; uses same check-digit validation when SBN is prefixed with an additional '0' to make 10 digits | 9-digit form of ISBN-10; uses same check-digit validation when SBN is prefixed with an additional '0' to make 10 digits | ||
]] | ]] | ||
Line 1,334: | Line 1,266: | ||
if not ignore_invalid then -- if not ignoring SBN errors | if not ignore_invalid then -- if not ignoring SBN errors | ||
if not check then | if not check then | ||
return SBN .. set_message ('err_bad_sbn', {err_type}, false, ' '); -- display an error message | return SBN .. set_message ('err_bad_sbn', {err_type}, false, ' '); -- display an error message | ||
end | end | ||
Line 1,347: | Line 1,278: | ||
end | end | ||
local ident = id:gsub ('[%s-]', ''); | local ident = id:gsub ('[%s-]', ''); -- remove hyphens and whitespace; they interfere with the rest of the tests | ||
if 9 ~= ident:len() then | if 9 ~= ident:len() then | ||
Line 1,384: | Line 1,315: | ||
if 100 > id_num or handler.id_limit < id_num then -- if SSRN is outside test limit boundaries | if 100 > id_num or handler.id_limit < id_num then -- if SSRN is outside test limit boundaries | ||
err_cat = ' ' .. set_message ('err_bad_ssrn'); -- set an error message | err_cat = ' ' .. set_message ('err_bad_ssrn'); -- set an error message | ||
end | end | ||
else -- when id format incorrect | else -- when id format incorrect | ||
err_cat = ' ' .. set_message ('err_bad_ssrn'); -- set an error message | err_cat = ' ' .. set_message ('err_bad_ssrn'); -- set an error message | ||
end | end | ||
Line 1,414: | Line 1,343: | ||
if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$') then -- doesn't have '@' or has one or first or last character is '< or '>' | if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$') then -- doesn't have '@' or has one or first or last character is '< or '>' | ||
text = text .. ' ' .. set_message ('err_bad_usenet_id') -- add an error message if the message id is invalid | text = text .. ' ' .. set_message ('err_bad_usenet_id') -- add an error message if the message id is invalid | ||
end | end | ||
Line 1,440: | Line 1,368: | ||
elseif not id:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then -- not temporary, is it normal format? | elseif not id:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then -- not temporary, is it normal format? | ||
err_cat = ' ' .. set_message ('err_bad_zbl'); -- no, set an error message | err_cat = ' ' .. set_message ('err_bad_zbl'); -- no, set an error message | ||
end | end | ||
Line 1,487: | Line 1,414: | ||
if is_set (access_level) then | if is_set (access_level) then | ||
if not in_array (access_level, cfg.keywords_lists['id-access']) then -- exact match required | if not in_array (access_level, cfg.keywords_lists['id-access']) then -- exact match required | ||
table.insert (z.message_tail, { set_message (' | table.insert (z.message_tail, { set_message ('invalid_param_val', {access_param, access_level}, true) } ); | ||
access_level = nil; -- invalid so unset | access_level = nil; -- invalid so unset | ||
end | end | ||
Line 1,505: | Line 1,432: | ||
render the identifiers into a sorted sequence table | render the identifiers into a sorted sequence table | ||
< | <id_list_t> is a table of k/v pairs where k is same as key in cfg.id_handlers and v is the assigned value | ||
<options_t> is a table of various k/v option pairs provided in the call to new_build_id_list(); | <options_t> is a table of is a table of various k/v option pairs provided in the call to new_build_id_list(); | ||
modified by this function and passed to all identifier rendering functions | modified by this function and passed to all identifier rendering functions | ||
<access_levels_t> is a table of k/v pairs where k is same as key in cfg.id_handlers and v is the assigned value (if valid) | <access_levels_t> is a table of k/v pairs where k is same as key in cfg.id_handlers and v is the assigned value (if valid) | ||
returns a sequence table of sorted (by hkey | returns a sequence table of sorted (by hkey) rendered identifier strings | ||
]] | ]] | ||
local function build_id_list ( | local function build_id_list (id_list_t, options_t, access_levels_t) | ||
local | local new_list_t = {}; | ||
local accept; | local accept; | ||
local func_map = { --function map points to functions associated with hkey identifier | local func_map = { --function map points to functions associated with hkey identifier | ||
Line 1,546: | Line 1,473: | ||
} | } | ||
for hkey, v in pairs ( | for hkey, v in pairs (id_list_t) do | ||
v, accept = has_accept_as_written (v); -- remove accept-as-written markup if present; accept is boolean true when markup removed; false else | v, accept = has_accept_as_written (v); -- remove accept-as-written markup if present; accept is boolean true when markup removed; false else | ||
-- every function gets the options table with value v and accept boolean | -- every function gets the options table with value v and accept boolean | ||
Line 1,554: | Line 1,481: | ||
options_t.access = access_levels_t[hkey]; -- add the access level for those that have an |<identifier-access= parameter | options_t.access = access_levels_t[hkey]; -- add the access level for those that have an |<identifier-access= parameter | ||
options_t.handler = cfg.id_handlers[hkey]; | options_t.handler = cfg.id_handlers[hkey]; | ||
if func_map[hkey] then | if func_map[hkey] then | ||
table.insert ( | table.insert (new_list_t, {hkey, func_map[hkey] (options_t)}); -- call the function and add the results to the output sequence table | ||
-- TODO: also retrieve identifier validity status, OL A/M/W/X type and ASIN TLD info from the corresponding function call for improved metadata generation in COinS() in ~/COinS | |||
else | else | ||
error (cfg.messages['unknown_ID_key'] .. ' ' .. hkey); -- here when func_map doesn't have a function for hkey | error (cfg.messages['unknown_ID_key'] .. ' ' .. hkey); -- here when func_map doesn't have a function for hkey | ||
Line 1,567: | Line 1,495: | ||
end | end | ||
table.sort ( | table.sort (new_list_t, comp); -- sequence table of tables sort | ||
for k, v in ipairs ( | for k, v in ipairs (new_list_t) do -- convert sequence table of tables to simple sequence table of strings | ||
new_list_t[k] = v[2]; | |||
end | end | ||
return | return new_list_t; | ||
end | end | ||
Line 1,608: | Line 1,512: | ||
]] | ]] | ||
local function identifier_lists_get (args, options_t | local function identifier_lists_get (args, options_t) | ||
local ID_list_coins_t = extract_ids (args); -- get a table of identifiers and their values for use locally and for use in COinS | local ID_list_coins_t = extract_ids (args); -- get a table of identifiers and their values for use locally and for use in COinS | ||
local ID_access_levels_t = extract_id_access_levels (args, ID_list_coins_t); -- get a table of identifier access levels | local ID_access_levels_t = extract_id_access_levels (args, ID_list_coins_t); -- get a table of identifier access levels | ||
local ID_list_t = build_id_list (ID_list_coins_t, options_t, ID_access_levels_t); -- get a sequence table of rendered identifier strings | local ID_list_t = build_id_list (ID_list_coins_t, options_t, ID_access_levels_t); -- get a sequence table of rendered identifier strings | ||
-- TODO: add code to retrieve identifier validity status, OL A/M/W/X type and ASIN TLD info from ID_list_t and add this to ID_list_coins_t for improved metadata generation in COinS() in ~/COinS | |||
return ID_list_t, ID_list_coins_t; -- return the tables | return ID_list_t, ID_list_coins_t; -- return the tables |