Module:Citation/CS1/Date validation: Difference between revisions
sync from sandbox;
m (24 revisions imported from templatewiki:Module:Citation/CS1/Date_validation) |
(sync from sandbox;) |
||
Line 1: | Line 1: | ||
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | --[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | ||
Line 7: | Line 4: | ||
local is_set, in_array; -- imported functions from selected Module:Citation/CS1/Utilities | local is_set, in_array; -- imported functions from selected Module:Citation/CS1/Utilities | ||
local cfg; -- table of tables imported from | local cfg; -- table of tables imported from selected Module:Citation/CS1/Configuration | ||
--[=[-------------------------< I S _ V A L I D _ A C C E S S D A T E >---------------------------------------- | --[=[-------------------------< I S _ V A L I D _ A C C E S S D A T E >---------------------------------------- | ||
Line 96: | Line 94: | ||
37-39 = Quadrimester 1, Quadrimester 2, Quadrimester 3 (4 months each) | 37-39 = Quadrimester 1, Quadrimester 2, Quadrimester 3 (4 months each) | ||
40-41 = Semestral 1, Semestral-2 (6 months each) | 40-41 = Semestral 1, Semestral-2 (6 months each) | ||
]] | ]] | ||
Line 105: | Line 102: | ||
0; -- not a recognized season name | 0; -- not a recognized season name | ||
end | end | ||
--[[--------------------------< I S _ P R O P E R _ N A M E >-------------------------------------------------- | --[[--------------------------< I S _ P R O P E R _ N A M E >-------------------------------------------------- | ||
Line 117: | Line 115: | ||
0; -- not a recognized named date | 0; -- not a recognized named date | ||
end | end | ||
--[[--------------------------< I S _ V A L I D _ M O N T H _ O R _ S E A S O N >------------------------------ | --[[--------------------------< I S _ V A L I D _ M O N T H _ O R _ S E A S O N >------------------------------ | ||
Line 125: | Line 124: | ||
local function is_valid_month_or_season (month_season) | local function is_valid_month_or_season (month_season) | ||
if 0 == get_month_number (month_season) then | if 0 == get_month_number (month_season) then -- if month text isn't one of the twelve months, might be a season | ||
if 0 == get_season_number (month_season) then -- not a month, is it a season? | if 0 == get_season_number (month_season) then -- not a month, is it a season? | ||
return false; | return false; -- return false not a month or one of the five seasons | ||
end | end | ||
end | end | ||
return true; | return true; | ||
end | end | ||
--[[--------------------------< I S _ V A L I D _ Y E A R >---------------------------------------------------- | --[[--------------------------< I S _ V A L I D _ Y E A R >---------------------------------------------------- | ||
Line 141: | Line 141: | ||
local function is_valid_year(year) | local function is_valid_year(year) | ||
if not is_set(year_limit) then | if not is_set(year_limit) then | ||
year_limit = tonumber(os.date("%Y"))+1; | year_limit = tonumber(os.date("%Y"))+1; -- global variable so we only have to fetch it once | ||
end | end | ||
return tonumber(year) <= year_limit; | return tonumber(year) <= year_limit; -- false if year is in the future more than one year | ||
end | end | ||
--[[--------------------------< I S _ V A L I D _ D A T E >---------------------------------------------------- | --[[--------------------------< I S _ V A L I D _ D A T E >---------------------------------------------------- | ||
Line 168: | Line 169: | ||
month_length = 28; -- then 28 days unless | month_length = 28; -- then 28 days unless | ||
if 1582 > tonumber(year) then -- Julian calendar | if 1582 > tonumber(year) then -- Julian calendar | ||
if 0==(year%4) then | if 0==(year%4) then -- is a leap year? | ||
month_length = 29; | month_length = 29; -- if leap year then 29 days in February | ||
end | end | ||
else -- Gregorian calendar | else -- Gregorian calendar | ||
Line 185: | Line 186: | ||
return true; | return true; | ||
end | end | ||
--[[--------------------------< I S _ V A L I D _ M O N T H _ R A N G E _ S T Y L E >-------------------------- | --[[--------------------------< I S _ V A L I D _ M O N T H _ R A N G E _ S T Y L E >-------------------------- | ||
Line 338: | Line 340: | ||
return; | return; | ||
end | end | ||
--[[--------------------------< P A T T E R N S >-------------------------------------------------------------- | |||
this is the list of patterns for date formats that this module recognizes. Approximately the first half of these | |||
patterns represent formats that might be reformatted into another format. Those that might be reformatted have | |||
'indicator' letters that identify the content of the matching capture: 'd' (day), 'm' (month), 'a' (anchor year), | |||
'y' (year); second day, month, year have a '2' suffix. | |||
These patterns are used for both date validation and for reformatting. This table should not be moved to ~/Configuration | |||
because changes to this table require changes to check_date() and to reformatter() and reformat_date() | |||
]] | |||
local patterns = { | |||
-- year-initial numerical year-month-day | |||
['ymd'] = {'^(%d%d%d%d)%-(%d%d)%-(%d%d)$', 'y', 'm', 'd'}, | |||
-- month-initial: month day, year | |||
['Mdy'] = {'^(%D-) +([1-9]%d?), +((%d%d%d%d?)%a?)$', 'm', 'd', 'a', 'y'}, | |||
-- month-initial day range: month day–day, year; days are separated by endash | |||
['Md-dy'] = {'^(%D-) +([1-9]%d?)[%-–]([1-9]%d?), +((%d%d%d%d)%a?)$', 'm', 'd', 'd2', 'a', 'y'}, | |||
-- day-initial: day month year | |||
['dMy'] = {'^([1-9]%d?) *(%D-) +((%d%d%d%d?)%a?)$', 'd', 'm', 'a', 'y'}, | |||
-- year-initial: year month day; day: 1 or 2 two digits, leading zero allowed; not supported at en.wiki | |||
-- ['yMd'] = {'^((%d%d%d%d?)%a?) +(%D-) +(%d%d?)$', 'a', 'y', 'm', 'd'}, | |||
-- day-range-initial: day–day month year; days are separated by endash | |||
['d-dMy'] = {'^([1-9]%d?)[%-–]([1-9]%d?) +(%D-) +((%d%d%d%d)%a?)$', 'd', 'd2', 'm', 'a', 'y'}, | |||
-- day initial month-day-range: day month - day month year; uses spaced endash | |||
['dM-dMy'] = {'^([1-9]%d?) +(%D-) +[%-–] +([1-9]%d?) +(%D-) +((%d%d%d%d)%a?)$', 'd', 'm', 'd2', 'm2', 'a', 'y'}, | |||
-- month initial month-day-range: month day – month day, year; uses spaced endash | |||
['Md-Mdy'] = {'^(%D-) +([1-9]%d?) +[%-–] +(%D-) +([1-9]%d?), +((%d%d%d%d)%a?)$','m', 'd', 'm2', 'd2', 'a', 'y'}, | |||
-- day initial month-day-year-range: day month year - day month year; uses spaced endash | |||
['dMy-dMy'] = {'^([1-9]%d?) +(%D-) +(%d%d%d%d) +[%-–] +([1-9]%d?) +(%D-) +((%d%d%d%d)%a?)$', 'd', 'm', 'y', 'd2', 'm2', 'a', 'y2'}, | |||
-- month initial month-day-year-range: month day, year – month day, year; uses spaced endash | |||
['Mdy-Mdy'] = {'^(%D-) +([1-9]%d?), +(%d%d%d%d) +[%-–] +(%D-) +([1-9]%d?), +((%d%d%d%d)%a?)$', 'm', 'd', 'y', 'm2', 'd2', 'a', 'y2'}, | |||
-- these date formats cannot be converted, per se, but month name can be rendered short or long | |||
-- month/season year - month/season year; separated by spaced endash | |||
['My-My'] = {'^(%D-) +(%d%d%d%d) +[%-–] +(%D-) +((%d%d%d%d)%a?)$', 'm', 'y', 'm2', 'a', 'y2'}, | |||
-- month/season range year; months separated by endash | |||
['M-My'] = {'^(%D-)[%-–](%D-) +((%d%d%d%d)%a?)$', 'm', 'm2', 'a', 'y'}, | |||
-- month/season year or proper-name year | |||
['My'] = {'^([^%d–]-) +((%d%d%d%d)%a?)$', 'm', 'a', 'y'}, -- this way because endash is a member of %D; %D- will match January–March 2019 when it shouldn't | |||
-- these date formats cannot be converted | |||
['Sy4-y2'] = {'^(%D-) +((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- special case Winter/Summer year-year (YYYY-YY); year separated with unspaced endash | |||
['Sy-y'] = {'^(%D-) +(%d%d%d%d)[%-–]((%d%d%d%d)%a?)$'}, -- special case Winter/Summer year-year; year separated with unspaced endash | |||
['y-y'] = {'^(%d%d%d%d?)[%-–]((%d%d%d%d?)%a?)$'}, -- year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999 | |||
['y4-y2'] = {'^((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- year range: YYYY–YY; separated by unspaced endash | |||
['y'] = {'^((%d%d%d%d?)%a?)$'}, -- year; here accept either YYY or YYYY | |||
} | |||
Line 372: | Line 424: | ||
local coins_date; | local coins_date; | ||
if date_string:match( | if date_string:match (patterns['ymd'][1]) then -- year-initial numerical year month day format | ||
year, month, day= | year, month, day=date_string:match (patterns['ymd'][1]); | ||
if 12 < tonumber(month) or 1 > tonumber(month) or 1582 > tonumber(year) or 0 == tonumber(day) then return false; end -- month or day number not valid or not Gregorian calendar | if 12 < tonumber(month) or 1 > tonumber(month) or 1582 > tonumber(year) or 0 == tonumber(day) then return false; end -- month or day number not valid or not Gregorian calendar | ||
anchor_year = year; | anchor_year = year; | ||
elseif mw.ustring.match(date_string, | |||
month, day, anchor_year, year=mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['Mdy'][1]) then -- month-initial: month day, year | ||
month, day, anchor_year, year=mw.ustring.match(date_string, patterns['Mdy'][1]); | |||
month = get_month_number (month); | month = get_month_number (month); | ||
if 0 == month then return false; end | if 0 == month then return false; end -- return false if month text isn't one of the twelve months | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['Md-dy'][1]) then -- month-initial day range: month day–day, year; days are separated by endash | ||
month, day, day2, anchor_year, year=mw.ustring.match(date_string, | month, day, day2, anchor_year, year=mw.ustring.match(date_string, patterns['Md-dy'][1]); | ||
if tonumber(day) >= tonumber(day2) then return false; end | if tonumber(day) >= tonumber(day2) then return false; end -- date range order is left to right: earlier to later; dates may not be the same; | ||
month = get_month_number (month); | month = get_month_number (month); | ||
if 0 == month then return false; end | if 0 == month then return false; end -- return false if month text isn't one of the twelve months | ||
month2=month; | month2=month; -- for metadata | ||
year2=year; | year2=year; | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['dMy'][1]) then -- day-initial: day month year | ||
day, month, anchor_year, year=mw.ustring.match(date_string, | day, month, anchor_year, year=mw.ustring.match(date_string, patterns['dMy'][1]); | ||
month = get_month_number (month); | |||
if 0 == month then return false; end -- return false if month text isn't one of the twelve months | |||
--[[ NOT supported at en.wiki | |||
elseif mw.ustring.match(date_string, patterns['yMd'][1]) then -- year-initial: year month day; day: 1 or 2 two digits, leading zero allowed | |||
anchor_year, year, month, day=mw.ustring.match(date_string, patterns['yMd'][1]); | |||
month = get_month_number (month); | month = get_month_number (month); | ||
if 0 == month then return false; end | if 0 == month then return false; end -- return false if month text isn't one of the twelve months | ||
-- end NOT supported at en.wiki ]] | |||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['d-dMy'][1]) then -- day-range-initial: day–day month year; days are separated by endash | ||
day, day2, month, anchor_year, year=mw.ustring.match(date_string, | day, day2, month, anchor_year, year=mw.ustring.match(date_string, patterns['d-dMy'][1]); | ||
if tonumber(day) >= tonumber(day2) then return false; end | if tonumber(day) >= tonumber(day2) then return false; end -- date range order is left to right: earlier to later; dates may not be the same; | ||
month = get_month_number (month); | month = get_month_number (month); | ||
if 0 == month then return false; end | if 0 == month then return false; end -- return false if month text isn't one of the twelve months | ||
month2=month; | month2=month; -- for metadata | ||
year2=year; | year2=year; | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['dM-dMy'][1]) then -- day initial month-day-range: day month - day month year; uses spaced endash | ||
day, month, day2, month2, anchor_year, year=mw.ustring.match(date_string, | day, month, day2, month2, anchor_year, year=mw.ustring.match(date_string, patterns['dM-dMy'][1]); | ||
if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end -- date range order is left to right: earlier to later; | if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end -- date range order is left to right: earlier to later; | ||
month = get_month_number (month); | month = get_month_number (month); -- for metadata | ||
month2 = get_month_number (month2); | month2 = get_month_number (month2); | ||
year2=year; | year2=year; | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['Md-Mdy'][1]) then -- month initial month-day-range: month day – month day, year; uses spaced endash | ||
month, day, month2, day2, anchor_year, year=mw.ustring.match(date_string, | month, day, month2, day2, anchor_year, year=mw.ustring.match(date_string, patterns['Md-Mdy'][1]); | ||
if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end | if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end | ||
month = get_month_number (month); | month = get_month_number (month); -- for metadata | ||
month2 = get_month_number (month2); | month2 = get_month_number (month2); | ||
year2=year; | year2=year; | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['dMy-dMy'][1]) then -- day initial month-day-year-range: day month year - day month year; uses spaced endash | ||
day, month, year, day2, month2, anchor_year, year2=mw.ustring.match(date_string, | day, month, year, day2, month2, anchor_year, year2=mw.ustring.match(date_string, patterns['dMy-dMy'][1]); | ||
if tonumber(year2) <= tonumber(year) then return false; end | if tonumber(year2) <= tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | ||
if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end -- year2 no more than one year in the future; months same style | if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end -- year2 no more than one year in the future; months same style | ||
month = get_month_number (month); | month = get_month_number (month); -- for metadata | ||
month2 = get_month_number (month2); | month2 = get_month_number (month2); | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['Mdy-Mdy'][1]) then -- month initial month-day-year-range: month day, year – month day, year; uses spaced endash | ||
month, day, year, month2, day2, anchor_year, year2=mw.ustring.match(date_string, | month, day, year, month2, day2, anchor_year, year2=mw.ustring.match(date_string, patterns['Mdy-Mdy'][1]); | ||
if tonumber(year2) <= tonumber(year) then return false; end | if tonumber(year2) <= tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | ||
if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end -- year2 no more than one year in the future; months same style | if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end -- year2 no more than one year in the future; months same style | ||
month = get_month_number (month); | month = get_month_number (month); -- for metadata | ||
month2 = get_month_number (month2); | month2 = get_month_number (month2); | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['Sy4-y2'][1]) then -- special case Winter/Summer year-year (YYYY-YY); year separated with unspaced endash | ||
local century; | local century; | ||
month, year, century, anchor_year, year2=mw.ustring.match(date_string, | month, year, century, anchor_year, year2=mw.ustring.match(date_string, patterns['Sy4-y2'][1]); | ||
if 'Winter' ~= month and 'Summer' ~= month then return false end; | if 'Winter' ~= month and 'Summer' ~= month then return false end; -- 'month' can only be Winter or Summer | ||
anchor_year=year..'–'..anchor_year; | anchor_year=year..'–'..anchor_year; -- assemble anchor_year from both years | ||
year2 = century..year2; | year2 = century..year2; -- add the century to year2 for comparisons | ||
if 1 ~= tonumber(year2) - tonumber(year) then return false; end | if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | ||
if not is_valid_year(year2) then return false; end | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | ||
month = get_season_number (month); | month = get_season_number (month); | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['Sy-y'][1]) then -- special case Winter/Summer year-year; year separated with unspaced endash | ||
month, year, anchor_year, year2=mw.ustring.match(date_string, | month, year, anchor_year, year2=mw.ustring.match(date_string, patterns['Sy-y'][1]); | ||
if 'Winter' ~= month and 'Summer' ~= month then return false end; | if 'Winter' ~= month and 'Summer' ~= month then return false end; -- 'month' can only be Winter or Summer | ||
anchor_year=year..'–'..anchor_year; | anchor_year=year..'–'..anchor_year; -- assemble anchor_year from both years | ||
if 1 ~= tonumber(year2) - tonumber(year) then return false; end | if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | ||
if not is_valid_year(year2) then return false; end | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | ||
month = get_season_number (month); | month = get_season_number (month); -- for metadata | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['My-My'][1]) then -- month/season year - month/season year; separated by spaced endash | ||
month, year, month2, anchor_year, year2=mw.ustring.match(date_string, | month, year, month2, anchor_year, year2=mw.ustring.match(date_string, patterns['My-My'][1]); | ||
anchor_year=year..'–'..anchor_year; | anchor_year=year..'–'..anchor_year; -- assemble anchor_year from both years | ||
if tonumber(year) >= tonumber(year2) then return false; end | if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same | ||
if not is_valid_year(year2) then return false; end | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | ||
if 0 ~= get_month_number(month) and 0 ~= get_month_number(month2) and is_valid_month_range_style(month, month2) then -- both must be month year, same month style | if 0 ~= get_month_number(month) and 0 ~= get_month_number(month2) and is_valid_month_range_style(month, month2) then -- both must be month year, same month style | ||
month = get_month_number(month); | month = get_month_number(month); | ||
month2 = get_month_number(month2); | month2 = get_month_number(month2); | ||
elseif 0 ~= get_season_number(month) and 0 ~= get_season_number(month2) then | elseif 0 ~= get_season_number(month) and 0 ~= get_season_number(month2) then -- both must be or season year, not mixed | ||
month = get_season_number(month); | month = get_season_number(month); | ||
month2 = get_season_number(month2); | month2 = get_season_number(month2); | ||
Line 463: | Line 523: | ||
end | end | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['M-My'][1]) then -- month/season range year; months separated by endash | ||
month, month2, anchor_year, year=mw.ustring.match(date_string, | month, month2, anchor_year, year=mw.ustring.match(date_string, patterns['M-My'][1]); | ||
if (not is_valid_month_season_range(month, month2)) or (not is_valid_year(year)) then return false; end | if (not is_valid_month_season_range(month, month2)) or (not is_valid_year(year)) then return false; end | ||
if 0 ~= get_month_number(month) then -- determined to be a valid range so just check this one to know if month or season | if 0 ~= get_month_number(month) then -- determined to be a valid range so just check this one to know if month or season | ||
Line 475: | Line 535: | ||
year2=year; | year2=year; | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['My'][1]) then -- month/season year or proper-name year | ||
month, anchor_year, year=mw.ustring.match(date_string, | month, anchor_year, year=mw.ustring.match(date_string, patterns['My'][1]); | ||
if not is_valid_year(year) then return false; end | if not is_valid_year(year) then return false; end | ||
if not is_valid_month_or_season (month) and 0 == is_proper_name (month) then return false; end | if not is_valid_month_or_season (month) and 0 == is_proper_name (month) then return false; end | ||
Line 487: | Line 547: | ||
end | end | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['y-y'][1]) then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999 | ||
year, anchor_year, year2=mw.ustring.match(date_string, | year, anchor_year, year2=mw.ustring.match(date_string, patterns['y-y'][1]); | ||
anchor_year=year..'–'..anchor_year; -- assemble anchor year from both years | anchor_year=year..'–'..anchor_year; -- assemble anchor year from both years | ||
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same | if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same | ||
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['y4-y2'][1]) then -- Year range: YYYY–YY; separated by unspaced endash | ||
local century; | local century; | ||
year, century, anchor_year, year2=mw.ustring.match(date_string, | year, century, anchor_year, year2=mw.ustring.match(date_string, patterns['y4-y2'][1]); | ||
anchor_year=year..'–'..anchor_year; -- assemble anchor year from both years | anchor_year=year..'–'..anchor_year; -- assemble anchor year from both years | ||
if 13 > tonumber(year2) then return false; end -- don't allow 2003-05 which might be May 2003 | if 13 > tonumber(year2) then return false; end -- don't allow 2003-05 which might be May 2003 | ||
Line 502: | Line 562: | ||
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | ||
elseif | elseif mw.ustring.match (date_string, patterns['y'][1]) then -- year; here accept either YYY or YYYY | ||
anchor_year, year= | anchor_year, year=mw.ustring.match (date_string, patterns['y'][1]); | ||
if false == is_valid_year(year) then | if false == is_valid_year(year) then | ||
return false; | return false; | ||
Line 569: | Line 629: | ||
for k, v in pairs(date_parameters_list) do -- for each date-holding parameter in the list | for k, v in pairs(date_parameters_list) do -- for each date-holding parameter in the list | ||
if is_set(v.val) then | if is_set(v.val) then -- if the parameter has a value | ||
v.val = mw.ustring.gsub (v.val, '%d', cfg.date_names.local_digits); | v.val = mw.ustring.gsub (v.val, '%d', cfg.date_names.local_digits); -- translate 'local' digits to Western 0-9 | ||
if v.val:match("^c%. [1-9]%d%d%d?%a?$") then | if v.val:match("^c%. [1-9]%d%d%d?%a?$") then -- special case for c. year or with or without CITEREF disambiguator - only |date= and |year= | ||
local year = v.val:match("c%. ([1-9]%d%d%d?)%a?"); | local year = v.val:match("c%. ([1-9]%d%d%d?)%a?"); -- get the year portion so it can be tested | ||
if 'date'==k then | if 'date'==k then | ||
anchor_year, COinS_date = v.val:match("((c%. [1-9]%d%d%d?)%a?)"); -- anchor year and COinS_date only from |date= parameter | anchor_year, COinS_date = v.val:match("((c%. [1-9]%d%d%d?)%a?)"); -- anchor year and COinS_date only from |date= parameter | ||
Line 580: | Line 640: | ||
end | end | ||
elseif 'date'==k then -- if the parameter is |date= | elseif 'date'==k then -- if the parameter is |date= | ||
if v.val:match("^n%.d%.%a?$") then | if v.val:match("^n%.d%.%a?$") then -- if |date=n.d. with or without a CITEREF disambiguator | ||
good_date, anchor_year, COinS_date = true, v.val:match("((n%.d%.)%a?)"); --"n.d."; no error when date parameter is set to no date | good_date, anchor_year, COinS_date = true, v.val:match("((n%.d%.)%a?)"); --"n.d."; no error when date parameter is set to no date | ||
elseif v.val:match("^nd%a?$") then | elseif v.val:match("^nd%a?$") then -- if |date=nd with or without a CITEREF disambiguator | ||
good_date, anchor_year, COinS_date = true, v.val:match("((nd)%a?)"); --"nd"; no error when date parameter is set to no date | good_date, anchor_year, COinS_date = true, v.val:match("((nd)%a?)"); --"nd"; no error when date parameter is set to no date | ||
else | else | ||
Line 588: | Line 648: | ||
end | end | ||
elseif 'year'==k then -- if the parameter is |year= it should hold only a year value | elseif 'year'==k then -- if the parameter is |year= it should hold only a year value | ||
if v.val:match("^[1-9]%d%d%d?%a?$") then | if v.val:match("^[1-9]%d%d%d?%a?$") then -- if |year= 3 or 4 digits only with or without a CITEREF disambiguator | ||
good_date, anchor_year, COinS_date = true, v.val:match("((%d+)%a?)"); | good_date, anchor_year, COinS_date = true, v.val:match("((%d+)%a?)"); | ||
end | end | ||
elseif 'access-date'==k then -- if the parameter is |date= | elseif 'access-date'==k then -- if the parameter is |date= | ||
good_date = check_date (v.val, nil, true); | good_date = check_date (v.val, nil, true); -- go test the date; nil is a placeholder; true is the test_accessdate flag | ||
elseif 'embargo'==k then -- if the parameter is |embargo= | elseif 'embargo'==k then -- if the parameter is |embargo= | ||
good_date = check_date (v.val); | good_date = check_date (v.val); -- go test the date | ||
if true == good_date then -- if the date is a valid date | if true == good_date then -- if the date is a valid date | ||
good_date, embargo_date = is_valid_embargo_date (v.val); | good_date, embargo_date = is_valid_embargo_date (v.val); -- is |embargo= date a single dmy, mdy, or ymd formatted date? yes:returns embargo; no: returns 9999 | ||
end | end | ||
else -- any other date-holding parameter | else -- any other date-holding parameter | ||
good_date = check_date (v.val); | good_date = check_date (v.val); -- go test the date | ||
end | end | ||
if false==good_date then -- assemble one error message so we don't add the tracking category multiple times | if false==good_date then -- assemble one error message so we don't add the tracking category multiple times | ||
Line 609: | Line 669: | ||
end | end | ||
end | end | ||
return anchor_year, embargo_date, error_message; | return anchor_year, embargo_date, error_message; -- and done | ||
end | end | ||
Line 645: | Line 705: | ||
end | end | ||
elseif mw.ustring.match(date_string, "%d%d%d%d[%-–]%d%d") then | elseif mw.ustring.match(date_string, "%d%d%d%d[%-–]%d%d") then -- YYYY-YY date ranges | ||
local century; | local century; | ||
date1, century, date2 = mw.ustring.match(date_string, "((%d%d)%d%d)[%-–]+(%d%d)"); | date1, century, date2 = mw.ustring.match(date_string, "((%d%d)%d%d)[%-–]+(%d%d)"); | ||
Line 665: | Line 725: | ||
--[[-------------------------< R E F O R M A T | --[[--------------------------< R E F O R M A T T E R >-------------------------------------------------------- | ||
reformat 'date' into new format specified by format_param if pattern_idx (the current format of 'date') can be | |||
reformatted. Does the grunt work for reformat_dates(). | |||
The table re_formats maps patern_idx (current format) and format_param (desired format) to a table that holds: | |||
format string used by string.format() | |||
identifier letters ('d', 'm', 'y', 'd2', 'm2', 'y2') that serve as indexes into a table t{} that holds captures | |||
from mw.ustring.match() for the various date parts specified by patterns[pattern_idx][1] | |||
Items in patterns{} have the general form: | |||
['ymd'] = {'^(%d%d%d%d)%-(%d%d)%-(%d%d)$', 'y', 'm', 'd'}, where: | |||
['ymd'] is pattern_idx | |||
patterns['ymd'][1] is the match pattern with captures for mw.ustring.match() | |||
patterns['ymd'][2] is an indicator letter identifying the content of the first capture | |||
patterns['ymd'][3] ... the second capture etc | |||
when a pattern matches a date, the captures are loaded into table t{} in capture order using the idemtifier | |||
characters as indexes into t{} For the above, a ymd date is in t{} as: | |||
t.y = first capture (year), t.m = second capture (month), t.d = third capture (day) | |||
To reformat, this function is called with the pattern_idx that matches the current format of the date and with | |||
format_param set to the desired format. This function loads table t{} as described and then calls string.format() | |||
with the format string specified by re_format[pattern_idx][format_param][1] using values taken from t{} according | |||
to the capture identifier letters specified by patterns[pattern_idx][format_param][n] where n is 2.. | |||
]] | ]] | ||
local | local re_formats = { | ||
['ymd'] = { -- date format is ymd; reformat to: | |||
['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy | |||
['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy | |||
-- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki | |||
}, | |||
['Mdy'] = { -- date format is Mdy; reformat to: | |||
['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- for long/short reformatting | |||
['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy | |||
['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd | |||
-- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki | |||
}, | |||
['dMy'] = { -- date format is dMy; reformat to: | |||
['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- for long/short reformatting | |||
['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy | |||
['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd | |||
-- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki | |||
}, | |||
['Md-dy'] = { -- date format is Md-dy; reformat to: | |||
['mdy'] = {'%s %s–%s, %s', 'm', 'd', 'd2', 'y'}, -- for long/short reformatting | |||
['dmy'] = {'%s–%s %s %s', 'd', 'd2', 'm', 'y'}, -- |df=dmy -> d-dMy | |||
}, | |||
['d-dMy'] = { -- date format is d-d>y; reformat to: | |||
['dmy'] = {'%s–%s %s %s', 'd', 'd2', 'm', 'y'}, -- for long/short reformatting | |||
['mdy'] = {'%s %s–%s, %s', 'm', 'd', 'd2', 'y'}, -- |df=mdy -> Md-dy | |||
}, | |||
['dM-dMy'] = { -- date format is dM-dMy; reformat to: | |||
['dmy'] = {'%s %s – %s %s %s', 'd', 'm', 'd2', 'm2', 'y'}, -- for long/short reformatting | |||
['mdy'] = {'%s %s – %s %s, %s', 'm', 'd', 'm2', 'd2', 'y'}, -- |df=mdy -> Md-Mdy | |||
}, | |||
['Md-Mdy'] = { -- date format is Md-Mdy; reformat to: | |||
['mdy'] = {'%s %s – %s %s, %s', 'm', 'd', 'm2', 'd2', 'y'}, -- for long/short reformatting | |||
['dmy'] = {'%s %s – %s %s %s', 'd', 'm', 'd2', 'm2', 'y'}, -- |df=dmy -> dM-dMy | |||
}, | |||
['dMy-dMy'] = { -- date format is dMy-dMy; reformat to: | |||
['dmy'] = {'%s %s %s – %s %s %s', 'd', 'm', 'y', 'd2', 'm2', 'y2'}, -- for long/short reformatting | |||
['mdy'] = {'%s %s, %s – %s %s, %s', 'm', 'd', 'y', 'm2', 'd2', 'y2'}, -- |df=mdy -> Mdy-Mdy | |||
}, | |||
['Mdy-Mdy'] = { -- date format is Mdy-Mdy; reformat to: | |||
['mdy'] = {'%s %s, %s – %s %s, %s', 'm', 'd', 'y', 'm2', 'd2', 'y2'}, -- for long/short reformatting | |||
['dmy'] = {'%s %s %s – %s %s %s', 'd', 'm', 'y', 'd2', 'm2', 'y2'}, -- |df=dmy -> dMy-dMy | |||
}, | |||
['My-My'] = { -- these for long/short reformatting | |||
['any'] = {'%s %s – %s %s', 'm', 'y', 'm2', 'y2'}, -- dmy/mdy agnostic | |||
}, | |||
['M-My'] = { -- these for long/short reformatting | |||
['any'] = {'%s–%s %s', 'm', 'm2', 'y'}, -- dmy/mdy agnostic | |||
}, | |||
['My'] = { -- these for long/short reformatting | |||
['any'] = {'%s %s', 'm', 'y'}, -- dmy/mdy agnostic | |||
}, | |||
-- ['yMd'] = { -- not supported at en.wiki | |||
-- ['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy | |||
-- ['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy | |||
-- ['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd | |||
-- }, | |||
} | |||
-- | local function reformatter (date, pattern_idx, format_param, mon_len) | ||
if not in_array (pattern_idx, {'ymd', 'Mdy', 'Md-dy', 'dMy', 'yMd', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy', 'My-My', 'M-My', 'My'}) then | |||
return; -- not in this set of date format patterns then not a reformattable date | |||
end | |||
if 'ymd' == format_param and in_array (pattern_idx, {'ymd', 'Md-dy', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy', 'My-My', 'M-My', 'My'}) then | |||
return; -- ymd date ranges not supported at en.wiki; no point in reformatting ymd to ymd | |||
end | |||
if in_array (pattern_idx, {'My', 'M-My', 'My-My'}) then -- these are not dmy/mdy so can't be 'reformatted' into either | |||
format_param = 'any'; -- so format-agnostic | |||
end | |||
]] | -- if 'yMd' == format_param and in_array (pattern_idx, {'yMd', 'Md-dy', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy'}) then -- not supported at en.wiki | ||
if 'yMd' == format_param then -- not supported at en.wiki | |||
return; -- not a reformattable date | |||
end | |||
local c1, c2, c3, c4, c5, c6, c7; -- these hold the captures specified in patterns[pattern_idx][1] | |||
c1, c2, c3, c4, c5, c6, c7 = mw.ustring.match (date, patterns[pattern_idx][1]); -- get the captures | |||
local | local t = { -- table that holds k/v pairs of date parts from the captures and patterns[pattern_idx][2..] | ||
[patterns[pattern_idx][2]] = c1; -- at minimum there is always one capture with a matching indicator letter | |||
[patterns[pattern_idx][3] or 'x'] = c2; -- patterns can have a variable number of captures; each capture requires an indicator letter; | |||
[patterns[pattern_idx][4] or 'x'] = c3; -- where there is no capture, there is no indicator letter so n in patterns[pattern_idx][n] will be nil; | |||
[patterns[pattern_idx][5] or 'x'] = c4; -- the 'x' here spoofs an indicator letter to prevent 'table index is nil' error | |||
[patterns[pattern_idx][6] or 'x'] = c5; | |||
[patterns[pattern_idx][7] or 'x'] = c6; | |||
[patterns[pattern_idx][8] or 'x'] = c7; | |||
}; | |||
if tonumber(t.m) then -- if raw month is a number (converting from ymd) | |||
if 's' == mon_len then -- if we are to use abbreviated month names | |||
t.m = cfg.date_names['inv_local_s'][tonumber(t.m)]; -- convert it to a month name | |||
else | |||
t.m = cfg.date_names['inv_local_l'][tonumber(t.m)]; -- convert it to a month name | |||
end | |||
t.d = t.d:gsub ('0(%d)', '%1'); -- strip leading '0' from day if present | |||
elseif 'ymd' == format_param then -- when converting to ymd | |||
if 1582 > tonumber(t.y) then -- ymd format dates not allowed before 1582 | |||
return; | |||
end | |||
t.m = string.format ('%02d', get_month_number (t.m)); -- make sure that month and day are two digits | |||
t.d = string.format ('%02d', t.d); | |||
elseif mon_len then -- if mon_len is set to either 'short' or 'long' | |||
for _, mon in ipairs ({'m', 'm2'}) do -- because there can be two month names, check both | |||
if t[mon] then | |||
t[mon] = get_month_number (t[mon]); -- get the month number for this month (is length agnostic) | |||
if 0 == t[mon] then return; end -- seasons and named dates can't be converted | |||
t[mon] = (('s' == mon_len) and cfg.date_names['inv_local_s'][t[mon]]) or cfg.date_names['inv_local_l'][t[mon]]; -- fetch month name according to length | |||
end | |||
end | |||
end | |||
--[[-- | local new_date = string.format (re_formats[pattern_idx][format_param][1], -- format string | ||
t[re_formats[pattern_idx][format_param][2]], -- named captures from t{} | |||
t[re_formats[pattern_idx][format_param][3]], | |||
t[re_formats[pattern_idx][format_param][4]], | |||
]] | t[re_formats[pattern_idx][format_param][5]], | ||
t[re_formats[pattern_idx][format_param][6]], | |||
t[re_formats[pattern_idx][format_param][7]], | |||
t[re_formats[pattern_idx][format_param][8]] | |||
); | |||
return new_date; | |||
end | end | ||
Line 727: | Line 882: | ||
--[[-------------------------< R E F O R M A T _ D A T E S >-------------------------------------------------- | --[[-------------------------< R E F O R M A T _ D A T E S >-------------------------------------------------- | ||
Reformats existing dates into the format specified by format | Reformats existing dates into the format specified by format. | ||
format is one of several keywords: dmy, dmy-all, mdy, mdy-all, ymd, ymd-all. The all version includes access- and | format is one of several manual keywords: dmy, dmy-all, mdy, mdy-all, ymd, ymd-all. The -all version includes | ||
archive-dates; otherwise these dates are not reformatted | access- and archive-dates; otherwise these dates are not reformatted. | ||
This function allows automatic date formatting. In ~/Configuration, the article source is searched for one of | |||
the {{use xxx dates}} templates. If found, xxx becomes the global date format as xxx-all. If |cs1-dates= in | |||
{{use xxx dates}} has legitimate value then that value determines how cs1|2 dates will be rendered. Legitimate | |||
values for |cs1-dates= are: | |||
l - all dates are rendered with long month names | |||
ls - publication dates use long month names; access-/archive-dates use abbreviated month names | |||
ly - publication dates use long month names; access-/archive-dates rendered in ymd format | |||
s - all dates are rendered with abbreviated (short) month names | |||
sy - publication dates use abbreviated month names; access-/archive-dates rendered in ymd format | |||
y - all dates are rendered in ymd format | |||
the format argument for automatic date formatting will be the format specified by {{use xxx dates}} with the | |||
value supplied by |cs1-dates so one of: xxx-l, xxx-ls, xxx-ly, xxx-s, xxx-sy, xxx-y, or simply xxx (|cs1-dates= | |||
empty, omitted, or invalid) where xxx shall be either of dmy or mdy. | |||
dates are extracted from date_parameters_list, reformatted (if appropriate), and then written back into the | |||
list in the new format. Dates in date_parameters_list are presumed here to be valid (no errors). This function | |||
returns true when a date has been reformatted, false else. Actual reformatting is done by reformatter(). | |||
]] | ]] | ||
local function reformat_dates (date_parameters_list, format | local function reformat_dates (date_parameters_list, format) | ||
local all = false; -- set to false to skip access- and archive-dates | local all = false; -- set to false to skip access- and archive-dates | ||
local len_p = 'l'; -- default publication date length shall be long | |||
local len_a = 'l'; -- default access-/archive-date length shall be long | |||
local result = false; | local result = false; | ||
local | local new_date; | ||
if format:match('%a+%-all') then | if format:match('%a+%-all') then -- manual df keyword; auto df keyword when length not specified in {{use xxx dates}}; | ||
format = format:match('(%a+)%-all'); -- extract the format | format = format:match('(%a+)%-all'); -- extract the format | ||
all = true; -- | all = true; -- all dates are long format dates because this keyword doesn't specify length | ||
end | elseif format:match('%a+%-[lsy][sy]?') then -- auto df keywords; internal only | ||
all = true; -- auto df applies to all dates; use length specified by capture len_p for all dates | |||
format, len_p, len_a = format:match('(%a+)%-([lsy])([sy]?)'); -- extract the format and length keywords | |||
if 'y' == len_p then -- because allowed by MOS:DATEUNIFY (sort of) range dates and My dates not reformatted | |||
format = 'ymd'; -- override {{use xxx dates}} | |||
elseif (not is_set(len_a)) or (len_p == len_a) then -- no access-/archive-date length specified or same length as publication dates then | |||
len_a = len_p; -- in case len_a not set | |||
end | |||
end -- else only publication dates and they are long | |||
for param_name, param_val in pairs (date_parameters_list) do -- for each date-holding parameter in the list | for param_name, param_val in pairs (date_parameters_list) do -- for each date-holding parameter in the list | ||
if is_set (param_val.val) then -- if the parameter has a value | if is_set (param_val.val) then -- if the parameter has a value | ||
if not (not all and in_array (param_name, {'access-date', 'archive-date'})) then -- skip access- or archive-date unless format is xxx-all; yeah, ugly; TODO: find a better way | if not (not all and in_array (param_name, {'access-date', 'archive-date'})) then -- skip access- or archive-date unless format is xxx-all; yeah, ugly; TODO: find a better way | ||
for | for pattern_idx, pattern in pairs (patterns) do | ||
if param_val.val | if mw.ustring.match (param_val.val, pattern[1]) then | ||
if ' | if all and in_array (param_name, {'access-date', 'archive-date'}) then -- if this date is an access- or archive-date | ||
new_date = reformatter (param_val.val, pattern_idx, (('y' == len_a) and 'ymd') or format, len_a); -- choose ymd or dmy/mdy according to len_a setting | |||
else -- all other dates | |||
new_date = reformatter (param_val.val, pattern_idx, format, len_p); | |||
end | end | ||
if | if new_date then -- set when date was reformatted | ||
date_parameters_list[param_name].val = new_date; -- update date in date list | |||
result = true; -- and announce that changes have been made | |||
end | end | ||
end -- if | end -- if | ||
end -- for | end -- for | ||
Line 783: | Line 948: | ||
end -- if | end -- if | ||
end -- for | end -- for | ||
return result; -- declare result and done | return result; -- declare boolean result and done | ||
end | end | ||
Line 801: | Line 966: | ||
for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list | for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list | ||
if is_set (param_val.val) then | if is_set (param_val.val) then | ||
if not mw.ustring.match (param_val.val, '%d%d%d%d%-%d%d%-%d%d') then | if not mw.ustring.match (param_val.val, '%d%d%d%d%-%d%d%-%d%d') then -- for those that are not ymd dates (ustring because here digits may not be western) | ||
param_val.val, n = param_val.val:gsub ('%-', '–'); | param_val.val, n = param_val.val:gsub ('%-', '–'); -- replace any hyphen with ndash | ||
if 0 ~= n then | if 0 ~= n then | ||
date_parameters_list[param_name].val = param_val.val; | date_parameters_list[param_name].val = param_val.val; -- update the list | ||
result = true; | result = true; | ||
end | end | ||
Line 842: | Line 1,007: | ||
end | end | ||
if mode then | if mode then -- might be a season | ||
xlate = mw.getContentLanguage():formatDate(mode, '1' .. month); | xlate = mw.getContentLanguage():formatDate(mode, '1' .. month); -- translate the month name to this local language | ||
date = mw.ustring.gsub (date, month, xlate); | date = mw.ustring.gsub (date, month, xlate); -- replace the English with the translation | ||
date_parameters_list[param_name].val = date; | date_parameters_list[param_name].val = date; -- save the translated date | ||
modified = true; | modified = true; | ||
end | end | ||
Line 875: | Line 1,040: | ||
--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------ | |||
]] | |||
return { -- return exported functions | return { -- return exported functions |