Module:Citation/CS1: Difference between revisions
Synch from sandbox;
m>Trappist the monk (revised archive url check) |
m>Trappist the monk (Synch from sandbox;) |
||
Line 5: | Line 5: | ||
]] | ]] | ||
local dates, year_date_check, reformat_dates | local dates, year_date_check, reformat_dates, date_hyphen_to_dash -- functions in Module:Citation/CS1/Date_validation | ||
local is_set, in_array, substitute, error_comment, set_error, select_one, -- functions in Module:Citation/CS1/Utilities | local is_set, in_array, substitute, error_comment, set_error, select_one, -- functions in Module:Citation/CS1/Utilities | ||
Line 18: | Line 18: | ||
local cfg = {}; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration | local cfg = {}; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration | ||
local whitelist = {}; -- table of tables listing valid template parameter names; defined in Module:Citation/CS1/Whitelist | local whitelist = {}; -- table of tables listing valid template parameter names; defined in Module:Citation/CS1/Whitelist | ||
--[[--------------------------< P A G E S C O P E V A R I A B L E S >-------------------------------------- | |||
delare variables here that have page-wide scope | |||
]] | |||
local Preview_mode = false; -- flag indicating that we are rendering a preview page (Show preview button) | |||
--[[--------------------------< F I R S T _ S E T >------------------------------------------------------------ | --[[--------------------------< F I R S T _ S E T >------------------------------------------------------------ | ||
Line 62: | Line 71: | ||
local added_vanc_errs; -- flag so we only emit one Vancouver error / category | local added_vanc_errs; -- flag so we only emit one Vancouver error / category | ||
local function add_vanc_error () | local function add_vanc_error (source) | ||
if not added_vanc_errs then | if not added_vanc_errs then | ||
added_vanc_errs = true; -- note that we've added this category | added_vanc_errs = true; -- note that we've added this category | ||
table.insert( z.message_tail, { set_error( 'vancouver', {}, true ) } ); | table.insert( z.message_tail, { set_error( 'vancouver', {source}, true ) } ); | ||
end | end | ||
end | end | ||
Line 475: | Line 484: | ||
script_value = script_value:gsub ('^%l%l%s*:%s*', ''); -- strip prefix from script | script_value = script_value:gsub ('^%l%l%s*:%s*', ''); -- strip prefix from script | ||
-- is prefix one of these language codes? | -- is prefix one of these language codes? | ||
if in_array (lang, | if in_array (lang, cfg.script_lang_codes) then | ||
add_prop_cat ('script_with_name', {name, lang}) | add_prop_cat ('script_with_name', {name, lang}) | ||
else | else | ||
Line 841: | Line 850: | ||
return str; | return str; | ||
end | end | ||
--[[--------------------------< I S _ S U F F I X >------------------------------------------------------------ | |||
returns true is suffix is properly formed Jr, Sr, or ordinal in the range 2–9. Puncutation not allowed. | |||
]] | |||
local function is_suffix (suffix) | |||
if in_array (suffix, {'Jr', 'Sr', '2nd', '3rd'}) or suffix:match ('^%dth$') then | |||
return true; | |||
end | |||
return false; | |||
end | |||
--[[--------------------------< I S _ G O O D _ V A N C _ N A M E >-------------------------------------------- | --[[--------------------------< I S _ G O O D _ V A N C _ N A M E >-------------------------------------------- | ||
Line 858: | Line 881: | ||
|firstn= also allowed to contain hyphens, spaces, apostrophes, and periods | |firstn= also allowed to contain hyphens, spaces, apostrophes, and periods | ||
This original test: | |||
because the code editor gets confused between character insertion point and cursor position. | if nil == mw.ustring.find (last, "^[A-Za-zÀ-ÖØ-öø-ƿDŽ-ɏ%-%s%']*$") or nil == mw.ustring.find (first, "^[A-Za-zÀ-ÖØ-öø-ƿDŽ-ɏ%-%s%'%.]+[2-6%a]*$") then | ||
was written ouside of the code editor and pasted here because the code editor gets confused between character insertion point and cursor position. | |||
The test has been rewritten to use decimal character escape sequence for the individual bytes of the unicode characters so that it is not necessary | |||
to use an external editor to maintain this code. | |||
\195\128-\195\150 – À-Ö | |||
\195\152-\195\182 – Ø-ö | |||
\195\184-\198\191 – ø-ƿ | |||
\199\132-\201\143 – DŽ-ɏ% | |||
]] | ]] | ||
local function is_good_vanc_name (last, first) | local function is_good_vanc_name (last, first) | ||
if nil == mw.ustring.find (last, "^[A-Za- | local first, suffix = first:match ('(.-),?%s*([%dJS][%drndth]+)%.?$') or first; -- if first has something that looks like a generational suffix, get it | ||
if is_set (suffix) then | |||
if not is_suffix (suffix) then | |||
add_vanc_error ('suffix'); | |||
return false; -- not a name with an appropriate suffix | |||
end | |||
end | |||
if nil == mw.ustring.find (last, "^[A-Za-z\195\128-\195\150\195\152-\195\182\195\184-\198\191\199\132-\201\143%-%s%']*$") or | |||
nil == mw.ustring.find (first, "^[A-Za-z\195\128-\195\150\195\152-\195\182\195\184-\198\191\199\132-\201\143%-%s%'%.]*$") then | |||
add_vanc_error ('non-Latin character'); | |||
return false; -- not a string of latin characters; Vancouver requires Romanization | |||
end; | end; | ||
return true; | return true; | ||
Line 877: | Line 917: | ||
Names in |firstn= may be separated by spaces or hyphens, or for initials, a period. See http://www.ncbi.nlm.nih.gov/books/NBK7271/box/A35062/. | Names in |firstn= may be separated by spaces or hyphens, or for initials, a period. See http://www.ncbi.nlm.nih.gov/books/NBK7271/box/A35062/. | ||
Vancouver style requires family rank designations (Jr, II, III, etc) to be rendered as Jr, 2nd, 3rd, etc. | Vancouver style requires family rank designations (Jr, II, III, etc) to be rendered as Jr, 2nd, 3rd, etc. See http://www.ncbi.nlm.nih.gov/books/NBK7271/box/A35085/. | ||
This code only accepts and understands generaltional suffix in the Vancouver format because Roman numerals look like, and can be mistaken for, initials. | |||
This function uses ustring functions because firstname initials may be any of the unicode Latin characters accepted by is_good_vanc_name (). | This function uses ustring functions because firstname initials may be any of the unicode Latin characters accepted by is_good_vanc_name (). | ||
Line 885: | Line 925: | ||
local function reduce_to_initials(first) | local function reduce_to_initials(first) | ||
local name, suffix = mw.ustring.match(first, "^(%u+) ([%dJS][%drndth]+)$"); | |||
if not name then -- if not initials and a suffix | |||
name = mw.ustring.match(first, "^(%u+)$"); -- is it just intials? | |||
end | end | ||
if name then -- if first is initials with or without suffix | |||
if 3 > name:len() then -- if one or two initials | |||
if suffix then -- if there is a suffix | |||
if is_suffix (suffix) then -- is it legitimate? | |||
return first; -- one or two initials and a valid suffix so nothing to do | |||
else | |||
add_vanc_error ('suffix'); -- one or two initials with invalid suffix so error message | |||
return first; -- and return first unmolested | |||
end | |||
else | |||
return first; -- one or two initials without suffix; nothing to do | |||
end | |||
end | |||
end -- if here then name has 3 or more uppercase letters so treat them as a word | |||
local initials, names = {}, {}; -- tables to hold name parts and initials | |||
local i = 1; -- counter for number of initials | |||
names = mw.text.split (first, '[%s,]+'); -- split into a table of names and possible suffix | |||
while names[i] do -- loop through the table | |||
if 1 < i and names[i]:match ('[%dJS][%drndth]+%.?$') then -- if not the first name, and looks like a suffix (may have trailing dot) | |||
names[i] = names[i]:gsub ('%.', ''); -- remove terminal dot if present | |||
if is_suffix (names[i]) then -- if a legitimate suffix | |||
table.insert (initials, ' ' .. names[i]); -- add a separator space, insert at end of initials table | |||
break; -- and done because suffix must fall at the end of a name | |||
end -- no error message if not a suffix; possibly because of Romanization | |||
end | |||
if 3 > i then | |||
table.insert (initials, mw.ustring.sub(names[i],1,1)); -- insert the intial at end of initials table | |||
end | |||
i = i+1; -- bump the counter | |||
end | |||
-- for word in mw.ustring.gmatch(first, "[^%s%.%-]+") do -- names separated by spaces, hyphens, or periods | |||
-- table.insert(initials, mw.ustring.sub(word,1,1)) -- Vancouver format does not include full stops. | |||
-- i = i + 1; -- bump the counter | |||
-- if 2 <= i then break; end -- only two initials allowed in Vancouver system; if 2, quit | |||
-- end | |||
return table.concat(initials) -- Vancouver format does not include spaces. | return table.concat(initials) -- Vancouver format does not include spaces. | ||
end | end | ||
Line 1,408: | Line 1,486: | ||
may sometimes be required and because such names will often fail the is_good_vanc_name() and other format compliance | may sometimes be required and because such names will often fail the is_good_vanc_name() and other format compliance | ||
tests, are wrapped in doubled paranethese ((corporate name)) to suppress the format tests. | tests, are wrapped in doubled paranethese ((corporate name)) to suppress the format tests. | ||
Supports generational suffixes Jr, 2nd, 3rd, 4th–6th. | |||
This function sets the vancouver error when a reqired comma is missing and when there is a space between an author's initials. | This function sets the vancouver error when a reqired comma is missing and when there is a space between an author's initials. | ||
]] | ]] | ||
Line 1,419: | Line 1,497: | ||
local v_name_table = {}; | local v_name_table = {}; | ||
local etal = false; -- return value set to true when we find some form of et al. vauthors parameter | local etal = false; -- return value set to true when we find some form of et al. vauthors parameter | ||
local last, first, link, mask; | local last, first, link, mask, suffix; | ||
local corporate = false; | local corporate = false; | ||
vparam, etal = name_has_etal (vparam, etal, true); -- find and remove variations on et al. do not categorize (do it here because et al. might have a period) | vparam, etal = name_has_etal (vparam, etal, true); -- find and remove variations on et al. do not categorize (do it here because et al. might have a period) | ||
if vparam:find ('%[%[') or vparam:find ('%]%]') then -- no wikilinking vauthors names | if vparam:find ('%[%[') or vparam:find ('%]%]') then -- no wikilinking vauthors names | ||
add_vanc_error (); | add_vanc_error ('wikilink'); | ||
end | end | ||
v_name_table = mw.text.split(vparam, "%s*,%s*") -- names are separated by commas | v_name_table = mw.text.split(vparam, "%s*,%s*") -- names are separated by commas | ||
for i, v_name in ipairs(v_name_table) do | for i, v_name in ipairs(v_name_table) do | ||
if v_name:match ('^%(%(.+%)%)$') then -- corporate authors are wrapped in doubled | if v_name:match ('^%(%(.+%)%)$') then -- corporate authors are wrapped in doubled parentheses to supress vanc formatting and error detection | ||
first = ''; -- set to empty string for concatenation and because it may have been set for previous author/editor | first = ''; -- set to empty string for concatenation and because it may have been set for previous author/editor | ||
last = v_name:match ('^%(%((.+)%)%)$') | last = v_name:match ('^%(%((.+)%)%)$') -- remove doubled parntheses | ||
corporate = true; | corporate = true; -- flag used in list_people() | ||
elseif string.find(v_name, "%s") then | elseif string.find(v_name, "%s") then | ||
if v_name:find('[;%.]') then -- look for commonly occurring punctuation characters; | |||
add_vanc_error ('punctuation'); | |||
end | |||
local lastfirstTable = {} | local lastfirstTable = {} | ||
lastfirstTable = mw.text.split(v_name, "%s") | lastfirstTable = mw.text.split(v_name, "%s") | ||
first = table.remove(lastfirstTable); -- removes and returns value of last element in table which should be author intials | first = table.remove(lastfirstTable); -- removes and returns value of last element in table which should be author intials | ||
last | if is_suffix (first) then -- if a valid suffix | ||
if mw.ustring.match (last, '%a+%s+%u+%s+%a+') | suffix = first -- save it as a suffix and | ||
add_vanc_error (); | first = table.remove(lastfirstTable); -- get what should be the initials from the table | ||
end -- no suffix error message here because letter combination may be result of Romanization; check for digits? | |||
last = table.concat(lastfirstTable, " ") -- returns a string that is the concatenation of all other names that are not initials | |||
if mw.ustring.match (last, '%a+%s+%u+%s+%a+') then | |||
add_vanc_error ('missing comma'); -- matches last II last; the case when a comma is missing | |||
end | |||
if mw.ustring.match (v_name, ' %u %u$') then -- this test is in the wrong place TODO: move or replace with a more appropriate test | |||
add_vanc_error ('name'); -- matches a space between two intiials | |||
end | end | ||
else | else | ||
Line 1,446: | Line 1,534: | ||
end | end | ||
if is_set (first) | if is_set (first) then | ||
if not mw.ustring.match (first, "^%u?%u$") then -- first shall contain one or two upper-case letters, nothing else | |||
add_vanc_error ('initials'); -- too many initials; mixed case initials (which may be ok Romanization); hyphenated initials | |||
end | |||
is_good_vanc_name (last, first); -- check first and last before restoring the suffix which may have a non-Latin digit | |||
if is_set (suffix) then | |||
first = first .. ' ' .. suffix; -- if there was a suffix concatenate with the initials | |||
end | |||
else | |||
is_good_vanc_name (last, ''); | |||
end | end | ||
-- this from extract_names () | -- this from extract_names () | ||
link = select_one( args, cfg.aliases[list_name .. '-Link'], 'redundant_parameters', i ); | link = select_one( args, cfg.aliases[list_name .. '-Link'], 'redundant_parameters', i ); | ||
Line 1,657: | Line 1,755: | ||
Check archive.org urls to make sure they at least look like they are pointing at valid archives and not to the | Check archive.org urls to make sure they at least look like they are pointing at valid archives and not to the | ||
save snapshot url. When the archive url is 'https://web.archive.org/save/' (or http://...) archive.org saves a snapshot | save snapshot url or to calendar pages. When the archive url is 'https://web.archive.org/save/' (or http://...) | ||
of the target page in the url. That is something that Wikipedia should not allow unwitting readers to do. | archive.org saves a snapshot of the target page in the url. That is something that Wikipedia should not allow | ||
unwitting readers to do. | |||
When the archive.org url does not have a complete timestamp, archive.org chooses a snapshot according to its own | When the archive.org url does not have a complete timestamp, archive.org chooses a snapshot according to its own | ||
algorithm or provides a 'search' result. [[WP:ELNO]] discourages links to search results. | algorithm or provides a calendar 'search' result. [[WP:ELNO]] discourages links to search results. | ||
This function looks at the value assigned to |archive-url= and returns empty strings for |archive-url= and | This function looks at the value assigned to |archive-url= and returns empty strings for |archive-url= and | ||
|archive-date= and an error message when: | |archive-date= and an error message when: | ||
|archive-url= holds an archive.org save command url | |archive-url= holds an archive.org save command url | ||
|archive-url= is an archive.org url that does not have a complete timestamp (YYYYMMDDhhmmss 14 digits) in the correct place | |archive-url= is an archive.org url that does not have a complete timestamp (YYYYMMDDhhmmss 14 digits) in the | ||
correct place | |||
otherwise returns |archive-url= and |archive-date= | otherwise returns |archive-url= and |archive-date= | ||
Line 1,676: | Line 1,776: | ||
('id_', 'js_', 'cs_', 'im_') but since archive.org ignores others following the same form (two letters and an underscore) | ('id_', 'js_', 'cs_', 'im_') but since archive.org ignores others following the same form (two letters and an underscore) | ||
we don't check for these specific flags but we do check the form. | we don't check for these specific flags but we do check the form. | ||
This function supports a preview mode. When the article is rendered in preview mode, this funct may return a modified | |||
archive url: | |||
for save command errors, return undated wildcard (/*/) | |||
for timestamp errors when the timestamp has a wildcard, return the url unmodified | |||
for timestamp errors when the timestamp does not have a wildcard, return with timestamp limited to six digits plus wildcard (/yyyymm*/) | |||
]=] | ]=] | ||
Line 1,684: | Line 1,790: | ||
if not url:match('//web%.archive%.org/') then | if not url:match('//web%.archive%.org/') then | ||
return url, date; -- not an archive.org archive, return | return url, date; -- not an archive.org archive, return ArchiveURL and ArchiveDate | ||
end | end | ||
if url:match('//web%.archive%.org/save/') then -- if a save command url, we don't want to allow saving of the target page | if url:match('//web%.archive%.org/save/') then -- if a save command url, we don't want to allow saving of the target page | ||
table.insert( z.message_tail, { set_error( 'archive_url', {'save command'}, true ) } ); -- add error message | table.insert( z.message_tail, { set_error( 'archive_url', {'save command'}, true ) } ); -- add error message | ||
return '', ''; | if Preview_mode then | ||
return url:gsub ('(//web%.archive%.org)/save/', '%1/*/', 1), date; -- preview mode: modify and return ArchiveURL and ArchiveDate | |||
else | |||
return '', ''; -- return empty strings for archiveURL and ArchiveDate | |||
end | |||
end | end | ||
-- if url:match('//web%.archive%.org/web/%*/') or url:match('//web%.archive%.org/%*/') then -- wildcard with or without 'web/' path element | |||
-- table.insert( z.message_tail, { set_error( 'archive_url', {'wildcard'}, true ) } ); -- add error message and | |||
-- return '', ''; -- return empty strings for archiveURL and ArchiveDate | |||
-- end | |||
path, timestamp, flag = url:match('//web%.archive%.org/([^%d]*)(%d+)([^/]*)/'); -- split out some of the url parts for evaluation | |||
if not is_set(timestamp) or 14 ~= timestamp:len() then -- path and flag optional, must have 14-digit timestamp here | |||
if not is_set(timestamp) or 14 ~= timestamp:len() then | |||
err_msg = 'timestamp'; | err_msg = 'timestamp'; | ||
if '*' ~= flag then | |||
url=url:gsub ('(//web%.archive%.org/[^%d]*%d?%d?%d?%d?%d?%d?)[^/]*', '%1*', 1) -- for preview, modify ts to be yearmo* max (0-6 digits plus splat) | |||
end | |||
elseif is_set(path) and 'web/' ~= path then -- older archive urls do not have the extra 'web/' path element | elseif is_set(path) and 'web/' ~= path then -- older archive urls do not have the extra 'web/' path element | ||
err_msg = 'path'; | err_msg = 'path'; | ||
Line 1,712: | Line 1,825: | ||
-- if here something not right so | -- if here something not right so | ||
table.insert( z.message_tail, { set_error( 'archive_url', {err_msg}, true ) } ); -- add error message and | table.insert( z.message_tail, { set_error( 'archive_url', {err_msg}, true ) } ); -- add error message and | ||
return '', ''; | if Preview_mode then | ||
return url, date; -- preview mode so return archiveURL and ArchiveDate | |||
else | |||
return '', ''; -- return empty strings for archiveURL and ArchiveDate | |||
end | |||
end | end | ||
Line 1,857: | Line 1,974: | ||
ArchiveURL, ArchiveDate = archive_url_check (A['ArchiveURL'], A['ArchiveDate']) | ArchiveURL, ArchiveDate = archive_url_check (A['ArchiveURL'], A['ArchiveDate']) | ||
local DeadURL = A['DeadURL'] | local DeadURL = A['DeadURL'] | ||
if not is_valid_parameter_value (DeadURL, 'dead-url', cfg.keywords ['deadurl']) then -- set in config.defaults to 'yes' | if not is_valid_parameter_value (DeadURL, 'dead-url', cfg.keywords ['deadurl']) then -- set in config.defaults to 'yes' | ||
Line 2,296: | Line 2,406: | ||
end | end | ||
end | end | ||
if not is_set(error_message) then -- error free dates only | |||
local modified = false; -- flag | |||
if is_set (DF) then -- if we need to reformat dates | |||
modified = reformat_dates (date_parameters_list, DF, false); -- reformat to DF format, use long month names if appropriate | |||
end | |||
if true == date_hyphen_to_dash (date_parameters_list) then -- convert hyphens to dashes where appropriate | |||
modified = true; | |||
add_maint_cat ('date_format'); -- hyphens were converted so add maint category | |||
if | end | ||
AccessDate = date_parameters_list['access-date']; -- overwrite date holding parameters with | |||
if modified then -- if the date_parameters_list values were modified | |||
AccessDate = date_parameters_list['access-date']; -- overwrite date holding parameters with modified values | |||
ArchiveDate = date_parameters_list['archive-date']; | ArchiveDate = date_parameters_list['archive-date']; | ||
Date = date_parameters_list['date']; | Date = date_parameters_list['date']; | ||
Line 2,308: | Line 2,426: | ||
PublicationDate = date_parameters_list['publication-date']; | PublicationDate = date_parameters_list['publication-date']; | ||
end | end | ||
else | |||
table.insert( z.message_tail, { set_error( 'bad_date', {error_message}, true ) } ); -- add this error message | |||
end | end | ||
-- if is_set(error_message) then | |||
-- table.insert( z.message_tail, { set_error( 'bad_date', {error_message}, true ) } ); -- add this error message | |||
-- elseif is_set (DF) then | |||
-- if reformat_dates (date_parameters_list, DF, false) then -- reformat to DF format, use long month names if appropriate | |||
-- AccessDate = date_parameters_list['access-date']; -- overwrite date holding parameters with reformatted values | |||
-- ArchiveDate = date_parameters_list['archive-date']; | |||
-- Date = date_parameters_list['date']; | |||
-- DoiBroken = date_parameters_list['doi-broken-date']; | |||
-- LayDate = date_parameters_list['lay-date']; | |||
-- PublicationDate = date_parameters_list['publication-date']; | |||
-- end | |||
-- end | |||
end -- end of do | end -- end of do | ||
Line 2,688: | Line 2,821: | ||
if is_set (Translators) then | if is_set (Translators) then | ||
-- Others = sepc .. ' Translated by ' .. Translators .. Others; | |||
Others = sepc .. ' ' .. wrap_msg ('translated', Translators, use_lowercase) .. Others; | |||
end | end | ||
Line 2,993: | Line 3,127: | ||
text = safe_join( {Editors, Date, Chapter, Place, tcommon, pgtext, idcommon}, sepc ); | text = safe_join( {Editors, Date, Chapter, Place, tcommon, pgtext, idcommon}, sepc ); | ||
else | else | ||
if config.CitationClass | if in_array(config.CitationClass, {"journal","citation"}) and is_set(Periodical) then | ||
text = safe_join( {Chapter, Place, tcommon, pgtext, Date, idcommon}, sepc ); | text = safe_join( {Chapter, Place, tcommon, pgtext, Date, idcommon}, sepc ); | ||
else | else | ||
Line 3,030: | Line 3,164: | ||
namelist = e; | namelist = e; | ||
end | end | ||
id = anchor_id (namelist, year); | if #namelist > 0 then -- if there are names in namelist | ||
id = anchor_id (namelist, year); -- go make the CITEREF anchor | |||
else | |||
id = ''; -- unset | |||
end | |||
end | end | ||
options.id = id; | options.id = id; | ||
Line 3,121: | Line 3,259: | ||
year_date_check = validation.year_date_check; | year_date_check = validation.year_date_check; | ||
reformat_dates = validation.reformat_dates; | reformat_dates = validation.reformat_dates; | ||
date_hyphen_to_dash = validation.date_hyphen_to_dash; | |||
is_set = utilities.is_set; -- imported functions from Module:Citation/CS1/Utilities | is_set = utilities.is_set; -- imported functions from Module:Citation/CS1/Utilities | ||
Line 3,143: | Line 3,282: | ||
COinS = metadata.COinS; | COinS = metadata.COinS; | ||
Preview_mode = frame:preprocess('{{REVISIONID}}'); -- use magic word to get revision id | |||
if is_set (Preview_mode) then -- if there is a value then this is not a preiview | |||
Preview_mode = false; | |||
else | |||
Preview_mode = true; -- no value (nil or empty string) so this is a preview | |||
end | |||
local args = {}; | local args = {}; |