@@ Line 5: / Line 5: @@
 ]]
-local dates, year_date_check, reformat_dates									-- functions in Module:Citation/CS1/Date_validation
+local dates, year_date_check, reformat_dates, date_hyphen_to_dash				-- functions in Module:Citation/CS1/Date_validation
 local is_set, in_array, substitute, error_comment, set_error, select_one,		-- functions in Module:Citation/CS1/Utilities
@@ Line 18: / Line 18: @@
 local cfg = {};																	-- table of configuration tables that are defined in Module:Citation/CS1/Configuration
 local whitelist = {};															-- table of tables listing valid template parameter names; defined in Module:Citation/CS1/Whitelist
+--[[--------------------------< P A G E   S C O P E   V A R I A B L E S >--------------------------------------
+delare variables here that have page-wide scope
+]]
+local Preview_mode = false;														-- flag indicating that we are rendering a preview page (Show preview button)
 --[[--------------------------< F I R S T _ S E T >------------------------------------------------------------
@@ Line 62: / Line 71: @@
 local added_vanc_errs;															-- flag so we only emit one Vancouver error / category
-local function add_vanc_error ()
+local function add_vanc_error (source)
 	if not added_vanc_errs then
 		added_vanc_errs = true;													-- note that we've added this category
-		table.insert( z.message_tail, { set_error( 'vancouver', {}, true ) } );
+		table.insert( z.message_tail, { set_error( 'vancouver', {source}, true ) } );
 	end
 end
@@ Line 475: / Line 484: @@
 			script_value = script_value:gsub ('^%l%l%s*:%s*', '');				-- strip prefix from script
 																				-- is prefix one of these language codes?
-			if in_array (lang, {'am', 'ar', 'be', 'bg', 'bs', 'dv', 'el', 'fa', 'he', 'hy', 'ja', 'ka', 'ko', 'ku', 'mk', 'ml', 'ps', 'ru', 'sd', 'sr', 'th', 'uk', 'ug', 'yi', 'zh'}) then
+			if in_array (lang, cfg.script_lang_codes) then
 				add_prop_cat ('script_with_name', {name, lang})
 			else
@@ Line 841: / Line 850: @@
 	return str;
 end
+--[[--------------------------< I S _ S U F F I X >------------------------------------------------------------
+returns true is suffix is properly formed Jr, Sr, or ordinal in the range 2–9.  Puncutation not allowed.
+]]
+local function is_suffix (suffix)
+	if in_array (suffix, {'Jr', 'Sr', '2nd', '3rd'}) or suffix:match ('^%dth$') then
+		return true;
+	end
+	return false;
+end
 --[[--------------------------< I S _ G O O D _ V A N C _ N A M E >--------------------------------------------
@@ Line 858: / Line 881: @@
 |firstn= also allowed to contain hyphens, spaces, apostrophes, and periods
-At the time of this writing, I had to write the 'if nil == mw.ustring.find ...' test ouside of the code editor and paste it here
+This original test:
-because the code editor gets confused between character insertion point and cursor position.
+	if nil == mw.ustring.find (last, "^[A-Za-zÀ-ÖØ-öø-ƿǄ-ɏ%-%s%']*$") or nil == mw.ustring.find (first, "^[A-Za-zÀ-ÖØ-öø-ƿǄ-ɏ%-%s%'%.]+[2-6%a]*$") then
+was written ouside of the code editor and pasted here because the code editor gets confused between character insertion point and cursor position.
+The test has been rewritten to use decimal character escape sequence for the individual bytes of the unicode characters so that it is not necessary
+to use an external editor to maintain this code.
+	\195\128-\195\150 – À-Ö
+	\195\152-\195\182 – Ø-ö
+	\195\184-\198\191 – ø-ƿ
+	\199\132-\201\143 – Ǆ-ɏ%
 ]]
 local function is_good_vanc_name (last, first)
-	if nil == mw.ustring.find (last, "^[A-Za-zÀ-ÖØ-öø-ƿǄ-ɏ%-%s%']*$") or nil == mw.ustring.find (first, "^[A-Za-zÀ-ÖØ-öø-ƿǄ-ɏ%-%s%'%.]*$") then
+	local first, suffix = first:match ('(.-),?%s*([%dJS][%drndth]+)%.?$') or first;		-- if first has something that looks like a generational suffix, get it
-		add_vanc_error ();
-		return false;															-- not a string of latin characters; Vancouver required Romanization
+	if is_set (suffix) then
+		if not is_suffix (suffix) then
+			add_vanc_error ('suffix');
+			return false;														-- not a name with an appropriate suffix
+		end
+	end
+	if nil == mw.ustring.find (last, "^[A-Za-z\195\128-\195\150\195\152-\195\182\195\184-\198\191\199\132-\201\143%-%s%']*$") or
+		nil == mw.ustring.find (first, "^[A-Za-z\195\128-\195\150\195\152-\195\182\195\184-\198\191\199\132-\201\143%-%s%'%.]*$") then
+			add_vanc_error ('non-Latin character');
+			return false;														-- not a string of latin characters; Vancouver requires Romanization
 	end;
 	return true;
@@ Line 877: / Line 917: @@
 Names in |firstn= may be separated by spaces or hyphens, or for initials, a period. See http://www.ncbi.nlm.nih.gov/books/NBK7271/box/A35062/.
-Vancouver style requires family rank designations (Jr, II, III, etc) to be rendered as Jr, 2nd, 3rd, etc.  This form is not
+Vancouver style requires family rank designations (Jr, II, III, etc) to be rendered as Jr, 2nd, 3rd, etc.  See http://www.ncbi.nlm.nih.gov/books/NBK7271/box/A35085/.
-currently supported by this code so correctly formed names like Smith JL 2nd are converted to Smith J2. See http://www.ncbi.nlm.nih.gov/books/NBK7271/box/A35085/.
+This code only accepts and understands generaltional suffix in the Vancouver format because Roman numerals look like, and can be mistaken for, initials.
 This function uses ustring functions because firstname initials may be any of the unicode Latin characters accepted by is_good_vanc_name ().
@@ Line 885: / Line 925: @@
 local function reduce_to_initials(first)
-	if mw.ustring.match(first, "^%u%u$") then return first end;					-- when first contains just two upper-case letters, nothing to do
+	local name, suffix = mw.ustring.match(first, "^(%u+) ([%dJS][%drndth]+)$");
-	local initials = {}
-	local i = 0;																-- counter for number of initials
+	if not name then															-- if not initials and a suffix
-	for word in mw.ustring.gmatch(first, "[^%s%.%-]+") do						-- names separated by spaces, hyphens, or periods
+		name = mw.ustring.match(first, "^(%u+)$");								-- is it just intials?
-		table.insert(initials, mw.ustring.sub(word,1,1))						-- Vancouver format does not include full stops.
-		i = i + 1;																-- bump the counter
-		if 2 <= i then break; end												-- only two initials allowed in Vancouver system; if 2, quit
 	end
+	if name then																-- if first is initials with or without suffix
+		if 3 > name:len() then													-- if one or two initials
+			if suffix then														-- if there is a suffix
+				if is_suffix (suffix) then										-- is it legitimate?
+					return first;												-- one or two initials and a valid suffix so nothing to do
+				else
+					add_vanc_error ('suffix');									-- one or two initials with invalid suffix so error message
+					return first;												-- and return first unmolested
+				end
+			else
+				return first;													-- one or two initials without suffix; nothing to do
+			end
+		end
+	end																			-- if here then name has 3 or more uppercase letters so treat them as a word
+	local initials, names = {}, {};												-- tables to hold name parts and initials
+	local i = 1;																-- counter for number of initials
+	names = mw.text.split (first, '[%s,]+');									-- split into a table of names and possible suffix
+	while names[i] do															-- loop through the table
+		if 1 < i and names[i]:match ('[%dJS][%drndth]+%.?$') then				-- if not the first name, and looks like a suffix (may have trailing dot)
+			names[i] = names[i]:gsub ('%.', '');								-- remove terminal dot if present
+			if is_suffix (names[i]) then										-- if a legitimate suffix
+				table.insert (initials, ' ' .. names[i]);						-- add a separator space, insert at end of initials table
+				break;															-- and done because suffix must fall at the end of a name
+			end																	-- no error message if not a suffix; possibly because of Romanization
+		end
+		if 3 > i then
+			table.insert (initials, mw.ustring.sub(names[i],1,1));				-- insert the intial at end of initials table
+		end
+		i = i+1;																-- bump the counter
+	end
+--	for word in mw.ustring.gmatch(first, "[^%s%.%-]+") do						-- names separated by spaces, hyphens, or periods
+--		table.insert(initials, mw.ustring.sub(word,1,1))						-- Vancouver format does not include full stops.
+--		i = i + 1;																-- bump the counter
+--		if 2 <= i then break; end												-- only two initials allowed in Vancouver system; if 2, quit
+--	end
 	return table.concat(initials)												-- Vancouver format does not include spaces.
 end
@@ Line 1,408: / Line 1,486: @@
 may sometimes be required and because such names will often fail the is_good_vanc_name() and other format compliance
 tests, are wrapped in doubled paranethese ((corporate name)) to suppress the format tests.
+Supports generational suffixes Jr, 2nd, 3rd, 4th–6th.
 This function sets the vancouver error when a reqired comma is missing and when there is a space between an author's initials.
-TODO: check for names like Coon V JS (Coon JS 5th at PMID 25205766, John S. Coon V at doi:10.1093/humupd/dmu048)?
 ]]
@@ Line 1,419: / Line 1,497: @@
 	local v_name_table = {};
 	local etal = false;															-- return value set to true when we find some form of et al. vauthors parameter
-	local last, first, link, mask;
+	local last, first, link, mask, suffix;
 	local corporate = false;
 	vparam, etal = name_has_etal (vparam, etal, true);							-- find and remove variations on et al. do not categorize (do it here because et al. might have a period)
 	if vparam:find ('%[%[') or vparam:find ('%]%]')	then						-- no wikilinking vauthors names
-		add_vanc_error ();
+		add_vanc_error ('wikilink');
 	end
 	v_name_table = mw.text.split(vparam, "%s*,%s*")								-- names are separated by commas
 	for i, v_name in ipairs(v_name_table) do
-		if v_name:match ('^%(%(.+%)%)$') then									-- corporate authors are wrapped in doubled parenthese to supress vanc formatting and error detection
+		if v_name:match ('^%(%(.+%)%)$') then									-- corporate authors are wrapped in doubled parentheses to supress vanc formatting and error detection
 			first = '';															-- set to empty string for concatenation and because it may have been set for previous author/editor
-			last = v_name:match ('^%(%((.+)%)%)$')
+			last = v_name:match ('^%(%((.+)%)%)$')								-- remove doubled parntheses
-			corporate = true;
+			corporate = true;													-- flag used in list_people()
 		elseif string.find(v_name, "%s") then
+			if v_name:find('[;%.]') then										-- look for commonly occurring punctuation characters;
+				add_vanc_error ('punctuation');
+			end
 			local lastfirstTable = {}
 			lastfirstTable = mw.text.split(v_name, "%s")
 			first = table.remove(lastfirstTable);								-- removes and returns value of last element in table which should be author intials
-			last  = table.concat(lastfirstTable, " ")							-- returns a string that is the concatenation of all other names that are not initials
+			if is_suffix (first) then											-- if a valid suffix
-			if mw.ustring.match (last, '%a+%s+%u+%s+%a+') or mw.ustring.match (v_name, ' %u %u$') then
+				suffix = first													-- save it as a suffix and
-				add_vanc_error ();												-- matches last II last; the case when a comma is missing or a space between two intiials
+				first = table.remove(lastfirstTable);							-- get what should be the initials from the table
+			end																	-- no suffix error message here because letter combination may be result of Romanization; check for digits?
+			last = table.concat(lastfirstTable, " ")							-- returns a string that is the concatenation of all other names that are not initials
+			if mw.ustring.match (last, '%a+%s+%u+%s+%a+') then
+				add_vanc_error ('missing comma');								-- matches last II last; the case when a comma is missing
+			end
+			if mw.ustring.match (v_name, ' %u %u$') then						-- this test is in the wrong place TODO: move or replace with a more appropriate test
+				add_vanc_error ('name');										-- matches a space between two intiials
 			end
 		else
@@ Line 1,446: / Line 1,534: @@
 		end
-		if is_set (first) and not mw.ustring.match (first, "^%u?%u$") then		-- first shall contain one or two upper-case letters, nothing else
+		if is_set (first) then
-			add_vanc_error ();
+			if not mw.ustring.match (first, "^%u?%u$") then						-- first shall contain one or two upper-case letters, nothing else
+				add_vanc_error ('initials');									-- too many initials; mixed case initials (which may be ok Romanization); hyphenated initials
+			end
+			is_good_vanc_name (last, first);									-- check first and last before restoring the suffix which may have a non-Latin digit
+			if is_set (suffix) then
+				first = first .. ' ' .. suffix;									-- if there was a suffix concatenate with the initials
+			end
+		else
+			is_good_vanc_name (last, '');
 		end
 																				-- this from extract_names ()
 		link = select_one( args, cfg.aliases[list_name .. '-Link'], 'redundant_parameters', i );
@@ Line 1,657: / Line 1,755: @@
 Check archive.org urls to make sure they at least look like they are pointing at valid archives and not to the
-save snapshot url.  When the archive url is 'https://web.archive.org/save/' (or http://...) archive.org saves a snapshot
+save snapshot url or to calendar pages.  When the archive url is 'https://web.archive.org/save/' (or http://...)
-of the target page in the url.  That is something that Wikipedia should not allow unwitting readers to do.
+archive.org saves a snapshot of the target page in the url.  That is something that Wikipedia should not allow
+unwitting readers to do.
 When the archive.org url does not have a complete timestamp, archive.org chooses a snapshot according to its own
-algorithm or provides a 'search' result.  [[WP:ELNO]] discourages links to search results.
+algorithm or provides a calendar 'search' result.  [[WP:ELNO]] discourages links to search results.
 This function looks at the value assigned to |archive-url= and returns empty strings for |archive-url= and
 |archive-date= and an error message when:
 	|archive-url= holds an archive.org save command url
-	|archive-url= is an archive.org url that does not have a complete timestamp (YYYYMMDDhhmmss 14 digits) in the correct place
+	|archive-url= is an archive.org url that does not have a complete timestamp (YYYYMMDDhhmmss 14 digits) in the
+		correct place
 otherwise returns |archive-url= and |archive-date=
@@ Line 1,676: / Line 1,776: @@
 ('id_', 'js_', 'cs_', 'im_') but since archive.org ignores others following the same form (two letters and an underscore)
 we don't check for these specific flags but we do check the form.
+This function supports a preview mode.  When the article is rendered in preview mode, this funct may return a modified
+archive url:
+	for save command errors, return undated wildcard (/*/)
+	for timestamp errors when the timestamp has a wildcard, return the url unmodified
+	for timestamp errors when the timestamp does not have a wildcard, return with timestamp limited to six digits plus wildcard (/yyyymm*/)
 ]=]
@@ Line 1,684: / Line 1,790: @@
 	if not url:match('//web%.archive%.org/') then
-		return url, date;														-- not an archive.org archive, return archiveURL and ArchiveDate
+		return url, date;														-- not an archive.org archive, return ArchiveURL and ArchiveDate
 	end
 	if url:match('//web%.archive%.org/save/') then								-- if a save command url, we don't want to allow saving of the target page
 		table.insert( z.message_tail, { set_error( 'archive_url', {'save command'}, true ) } );	-- add error message
-		return '', '';															-- return empty strings for archiveURL and ArchiveDate
+		if Preview_mode then
+			return url:gsub ('(//web%.archive%.org)/save/', '%1/*/', 1), date;	-- preview mode: modify and return ArchiveURL and ArchiveDate
+		else
+			return '', '';														-- return empty strings for archiveURL and ArchiveDate
+		end
 	end
-	if url:match('//web%.archive%.org/web/%*/') or url:match('//web%.archive%.org/%*/') then	-- wildcard with or without 'web/' path element
+--	if url:match('//web%.archive%.org/web/%*/') or url:match('//web%.archive%.org/%*/') then	-- wildcard with or without 'web/' path element
-		table.insert( z.message_tail, { set_error( 'archive_url', {'wildcard'}, true ) } );		-- add error message and
+--		table.insert( z.message_tail, { set_error( 'archive_url', {'wildcard'}, true ) } );		-- add error message and
-		return '', '';																			-- return empty strings for archiveURL and ArchiveDate
+--		return '', '';																			-- return empty strings for archiveURL and ArchiveDate
-	end
+--	end
+	path, timestamp, flag = url:match('//web%.archive%.org/([^%d]*)(%d+)([^/]*)/');		-- split out some of the url parts for evaluation
-	path, timestamp, flag = url:match('//web%.archive%.org/([^%d]*)(%d+)([^/]*)/');			-- split out some of the url parts for evaluation
+	if not is_set(timestamp) or 14 ~= timestamp:len() then								-- path and flag optional, must have 14-digit timestamp here
-	if not is_set(timestamp) or 14 ~= timestamp:len() then						-- path and flag optional, must have 14-digit timestamp here
 		err_msg = 'timestamp';
+		if '*' ~= flag then
+			url=url:gsub ('(//web%.archive%.org/[^%d]*%d?%d?%d?%d?%d?%d?)[^/]*', '%1*', 1)	-- for preview, modify ts to be yearmo* max (0-6 digits plus splat)
+		end
 	elseif is_set(path) and 'web/' ~= path then									-- older archive urls do not have the extra 'web/' path element
 		err_msg = 'path';
@@ Line 1,712: / Line 1,825: @@
 																				-- if here something not right so
 	table.insert( z.message_tail, { set_error( 'archive_url', {err_msg}, true ) } );	-- add error message and
-	return '', '';																-- return empty strings for archiveURL and ArchiveDate
+	if Preview_mode then
+		return url, date;														-- preview mode so return archiveURL and ArchiveDate
+	else
+		return '', '';															-- return empty strings for archiveURL and ArchiveDate
+	end
 end
@@ Line 1,857: / Line 1,974: @@
 	ArchiveURL, ArchiveDate = archive_url_check (A['ArchiveURL'], A['ArchiveDate'])
---	local ArchiveDate = A['ArchiveDate'];
---	local ArchiveURL = A['ArchiveURL'];
---		if ArchiveURL:match('//web%.archive%.org/save/') then				-- if an archive.org save command url, we don't want to save target page ...
---			ArchiveURL = '';													-- every time a reader clicks the link so
---			ArchiveDate = '';													-- unset these
---			table.insert( z.message_tail, { set_error( 'archive_save', {}, true ) } );	-- and add error message
---		end
 	local DeadURL = A['DeadURL']
 		if not is_valid_parameter_value (DeadURL, 'dead-url', cfg.keywords ['deadurl']) then	-- set in config.defaults to 'yes'
@@ Line 2,296: / Line 2,406: @@
 			end
 		end
+		if not is_set(error_message) then										-- error free dates only
+			local modified = false;												-- flag
+			if is_set (DF) then													-- if we need to reformat dates
+				modified = reformat_dates (date_parameters_list, DF, false);	-- reformat to DF format, use long month names if appropriate
+			end
-		if is_set(error_message) then
+			if true == date_hyphen_to_dash (date_parameters_list) then					-- convert hyphens to dashes where appropriate
-			table.insert( z.message_tail, { set_error( 'bad_date', {error_message}, true ) } );	-- add this error message
+				modified = true;
-		elseif is_set (DF) then
+				add_maint_cat ('date_format');									-- hyphens were converted so add maint category
-			if reformat_dates (date_parameters_list, DF, false) then			-- reformat to DF format, use long month names if appropriate
+			end
-				AccessDate = date_parameters_list['access-date'];				-- overwrite date holding parameters with reformatted values
+			if modified then													-- if the date_parameters_list values were modified
+				AccessDate = date_parameters_list['access-date'];				-- overwrite date holding parameters with modified values
 				ArchiveDate = date_parameters_list['archive-date'];
 				Date = date_parameters_list['date'];
@@ Line 2,308: / Line 2,426: @@
 				PublicationDate = date_parameters_list['publication-date'];
 			end
+		else
+			table.insert( z.message_tail, { set_error( 'bad_date', {error_message}, true ) } );	-- add this error message
 		end
+--		if is_set(error_message) then
+--			table.insert( z.message_tail, { set_error( 'bad_date', {error_message}, true ) } );	-- add this error message
+--		elseif is_set (DF) then
+--			if reformat_dates (date_parameters_list, DF, false) then			-- reformat to DF format, use long month names if appropriate
+--				AccessDate = date_parameters_list['access-date'];				-- overwrite date holding parameters with reformatted values
+--				ArchiveDate = date_parameters_list['archive-date'];
+--				Date = date_parameters_list['date'];
+--				DoiBroken = date_parameters_list['doi-broken-date'];
+--				LayDate = date_parameters_list['lay-date'];
+--				PublicationDate = date_parameters_list['publication-date'];
+--			end
+--		end
 	end	-- end of do
@@ Line 2,688: / Line 2,821: @@
 	if is_set (Translators) then
-		Others = sepc .. ' Translated by ' .. Translators .. Others;
+--		Others = sepc .. ' Translated by ' .. Translators .. Others;
+		Others = sepc .. ' ' .. wrap_msg ('translated', Translators, use_lowercase) .. Others;
 	end
@@ Line 2,993: / Line 3,127: @@
 		text = safe_join( {Editors, Date, Chapter, Place, tcommon, pgtext, idcommon}, sepc );
 	else
-		if config.CitationClass=="journal" and is_set(Periodical) then
+		if in_array(config.CitationClass, {"journal","citation"}) and is_set(Periodical) then
 			text = safe_join( {Chapter, Place, tcommon, pgtext, Date, idcommon}, sepc );
 		else
@@ Line 3,030: / Line 3,164: @@
 				namelist = e;
 			end
-			id = anchor_id (namelist, year);									-- go make the CITEREF anchor
+			if #namelist > 0 then												-- if there are names in namelist
+				id = anchor_id (namelist, year);								-- go make the CITEREF anchor
+			else
+				id = '';														-- unset
+			end
 		end
 		options.id = id;
@@ Line 3,121: / Line 3,259: @@
 	year_date_check = validation.year_date_check;
 	reformat_dates = validation.reformat_dates;
+	date_hyphen_to_dash = validation.date_hyphen_to_dash;
 	is_set = utilities.is_set;													-- imported functions from Module:Citation/CS1/Utilities
@@ Line 3,143: / Line 3,282: @@
 	COinS = metadata.COinS;
+	Preview_mode = frame:preprocess('{{REVISIONID}}');							-- use magic word to get revision id
+	if is_set (Preview_mode) then												-- if there is a value then this is not a preiview
+		Preview_mode = false;
+	else
+		Preview_mode = true;													-- no value (nil or empty string) so this is a preview
+	end
 	local args = {};

Module:Citation/CS1: Difference between revisions

Module:Citation/CS1 (edit)

Revision as of 17:42, 4 June 2016