Module:Citation/CS1/Date validation: Difference between revisions
sync from sandbox;
(sync from sandbox;) |
(sync from sandbox;) |
||
Line 3: | Line 3: | ||
]] | ]] | ||
local add_prop_cat, is_set, in_array, wrap_style; | local add_prop_cat, is_set, in_array, set_message, substitute, wrap_style; -- imported functions from selected Module:Citation/CS1/Utilities | ||
local cfg; -- table of tables imported from selected Module:Citation/CS1/Configuration | local cfg; -- table of tables imported from selected Module:Citation/CS1/Configuration | ||
Line 44: | Line 44: | ||
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which which tonumber() may not understand | if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which which tonumber() may not understand | ||
access_ts = tonumber (access_ts) or lang_object:parseFormattedNumber (access_ts); | access_ts = tonumber (access_ts) or lang_object:parseFormattedNumber (access_ts); -- convert to numbers for the comparison; | ||
tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts); | tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts); | ||
else | else | ||
Line 85: | Line 85: | ||
local function get_month_number (month) | local function get_month_number (month) | ||
return cfg.date_names['local'].long[month] or cfg.date_names['local'].short[month] or | return cfg.date_names['local'].long[month] or cfg.date_names['local'].short[month] or -- look for local names first | ||
cfg.date_names['en'].long[month] or cfg.date_names['en'].short[month] or | cfg.date_names['en'].long[month] or cfg.date_names['en'].short[month] or -- failing that, look for English names | ||
0; | 0; -- not a recognized month name | ||
end | end | ||
Line 102: | Line 102: | ||
which became part of ISO 8601 in 2019. See '§Sub-year groupings'. The standard defines various divisions using | which became part of ISO 8601 in 2019. See '§Sub-year groupings'. The standard defines various divisions using | ||
numbers 21-41. cs1|2 only supports generic seasons. EDTF does support the distinction between north and south | numbers 21-41. cs1|2 only supports generic seasons. EDTF does support the distinction between north and south | ||
hemisphere seasons but cs1|2 has no way to make that distinction. | |||
These additional divisions not currently supported: | These additional divisions not currently supported: | ||
Line 228: | Line 228: | ||
month = tonumber(month); -- required for YYYY-MM-DD dates | month = tonumber(month); -- required for YYYY-MM-DD dates | ||
if (2 == month) then | if (2 == month) then -- if February | ||
month_length = 28; -- then 28 days unless | month_length = 28; -- then 28 days unless | ||
if 1582 > tonumber(year) then -- Julian calendar | if 1582 > tonumber(year) then -- Julian calendar | ||
if 0 == (year%4) then | if 0 == (year%4) then -- is a leap year? | ||
month_length = 29; -- if leap year then 29 days in February | month_length = 29; -- if leap year then 29 days in February | ||
end | end | ||
else -- Gregorian calendar | else -- Gregorian calendar | ||
if (0 == (year%4) and (0 ~= (year%100) or 0 == (year%400))) then | if (0 == (year%4) and (0 ~= (year%100) or 0 == (year%400))) then -- is a leap year? | ||
month_length = 29; -- if leap year then 29 days in February | month_length = 29; -- if leap year then 29 days in February | ||
end | end | ||
Line 299: | Line 299: | ||
-- here when range_start is a month | -- here when range_start is a month | ||
range_end_number = get_month_number (range_end); -- get end month number | range_end_number = get_month_number (range_end); -- get end month number | ||
if range_start_number < range_end_number | if range_start_number < range_end_number and -- range_start is a month; does range_start precede range_end? | ||
is_valid_month_range_style (range_start, range_end) then -- do months have the same style? | |||
return true; -- proper order and same style | return true; -- proper order and same style | ||
end | end | ||
return false; -- range_start month number is greater than or equal to range end number; or range end isn't a month | return false; -- range_start month number is greater than or equal to range end number; or range end isn't a month | ||
Line 429: | Line 428: | ||
['dMy'] = {'^([1-9]%d?) +(%D-) +((%d%d%d%d?)%a?)$', 'd', 'm', 'a', 'y'}, | ['dMy'] = {'^([1-9]%d?) +(%D-) +((%d%d%d%d?)%a?)$', 'd', 'm', 'a', 'y'}, | ||
-- year-initial: year month day; day: 1 or 2 two digits, leading zero allowed; not supported at en.wiki | -- year-initial: year month day; day: 1 or 2 two digits, leading zero allowed; not supported at en.wiki | ||
-- ['yMd'] = {'^((%d%d%d%d?)%a?) +(%D-) +(%d%d?)$', 'a', 'y', 'm', 'd'}, | -- ['yMd'] = {'^((%d%d%d%d?)%a?) +(%D-) +(%d%d?)$', 'a', 'y', 'm', 'd'}, | ||
-- day-range-initial: day–day month year; days are separated by endash | -- day-range-initial: day–day month year; days are separated by endash | ||
['d-dMy'] = {'^([1-9]%d?)[%-–]([1-9]%d?) +(%D-) +((%d%d%d%d)%a?)$', 'd', 'd2', 'm', 'a', 'y'}, | ['d-dMy'] = {'^([1-9]%d?)[%-–]([1-9]%d?) +(%D-) +((%d%d%d%d)%a?)$', 'd', 'd2', 'm', 'a', 'y'}, | ||
Line 454: | Line 453: | ||
['y-y'] = {'^(%d%d%d%d?)[%-–]((%d%d%d%d?)%a?)$'}, -- year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999 | ['y-y'] = {'^(%d%d%d%d?)[%-–]((%d%d%d%d?)%a?)$'}, -- year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999 | ||
['y4-y2'] = {'^((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- year range: YYYY–YY; separated by unspaced endash | ['y4-y2'] = {'^((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- year range: YYYY–YY; separated by unspaced endash | ||
['ymx'] = {'^(%d%d%d%d)%-(%d%d)%-XX$', 'y', 'm'}, -- edtf year-initial numerical year-month-XX | |||
['y'] = {'^((%d%d%d%d?)%a?)$'}, -- year; here accept either YYY or YYYY | ['y'] = {'^((%d%d%d%d?)%a?)$'}, -- year; here accept either YYY or YYYY | ||
} | } | ||
Line 482: | Line 482: | ||
local function check_date (date_string, param, tCOinS_date) | local function check_date (date_string, param, tCOinS_date) | ||
local year; -- assume that year2, months, and days are not used; | local year; -- assume that year2, months, and days are not used; | ||
local year2 = 0; | local year2 = 0; -- second year in a year range | ||
local month = 0; | local month = 0; | ||
local month2 = 0; | local month2 = 0; -- second month in a month range | ||
local day = 0; | local day = 0; | ||
local day2 = 0; -- second day in a day range | local day2 = 0; -- second day in a day range | ||
Line 495: | Line 495: | ||
anchor_year = year; | anchor_year = year; | ||
elseif date_string:match (patterns['ymx'][1]) then -- year-initial numerical year month edtf format | |||
year, month = date_string:match (patterns['ymx'][1]); | |||
if 12 < tonumber(month) or 1 > tonumber(month) or 1582 > tonumber(year) or not is_valid_year(year) then return false; end -- month number not valid or not Gregorian calendar or future year | |||
anchor_year = year; | |||
elseif mw.ustring.match(date_string, patterns['Mdy'][1]) then -- month-initial: month day, year | elseif mw.ustring.match(date_string, patterns['Mdy'][1]) then -- month-initial: month day, year | ||
month, day, anchor_year, year = mw.ustring.match(date_string, patterns['Mdy'][1]); | month, day, anchor_year, year = mw.ustring.match(date_string, patterns['Mdy'][1]); | ||
Line 562: | Line 567: | ||
month, year, century, anchor_year, year2 = mw.ustring.match(date_string, patterns['Sy4-y2'][1]); | month, year, century, anchor_year, year2 = mw.ustring.match(date_string, patterns['Sy4-y2'][1]); | ||
if 'Winter' ~= month and 'Summer' ~= month then return false end; -- 'month' can only be Winter or Summer | if 'Winter' ~= month and 'Summer' ~= month then return false end; -- 'month' can only be Winter or Summer | ||
anchor_year = year .. '–' .. anchor_year; | anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years | ||
year2 = century..year2; -- add the century to year2 for comparisons | year2 = century..year2; -- add the century to year2 for comparisons | ||
if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | ||
Line 578: | Line 583: | ||
elseif mw.ustring.match(date_string, patterns['My-My'][1]) then -- month/season year - month/season year; separated by spaced endash | elseif mw.ustring.match(date_string, patterns['My-My'][1]) then -- month/season year - month/season year; separated by spaced endash | ||
month, year, month2, anchor_year, year2 = mw.ustring.match(date_string, patterns['My-My'][1]); | month, year, month2, anchor_year, year2 = mw.ustring.match(date_string, patterns['My-My'][1]); | ||
anchor_year = year .. '–' .. anchor_year; | anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years | ||
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same | if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same | ||
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | ||
Line 612: | Line 617: | ||
elseif mw.ustring.match(date_string, patterns['y-y'][1]) then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999 | elseif mw.ustring.match(date_string, patterns['y-y'][1]) then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999 | ||
year, anchor_year, year2 = mw.ustring.match(date_string, patterns['y-y'][1]); | year, anchor_year, year2 = mw.ustring.match(date_string, patterns['y-y'][1]); | ||
anchor_year = year .. '–' .. anchor_year; | anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years | ||
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same | if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same | ||
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | ||
Line 619: | Line 624: | ||
local century; | local century; | ||
year, century, anchor_year, year2 = mw.ustring.match(date_string, patterns['y4-y2'][1]); | year, century, anchor_year, year2 = mw.ustring.match(date_string, patterns['y4-y2'][1]); | ||
anchor_year = year .. '–' .. anchor_year; | anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years | ||
if in_array (param, {'date', 'publication-date', 'year'}) then | if in_array (param, {'date', 'publication-date', 'year'}) then | ||
Line 630: | Line 635: | ||
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | ||
elseif mw.ustring.match(date_string, patterns['y'][1]) then | elseif mw.ustring.match(date_string, patterns['y'][1]) then -- year; here accept either YYY or YYYY | ||
anchor_year, year = mw.ustring.match(date_string, patterns['y'][1]); | anchor_year, year = mw.ustring.match(date_string, patterns['y'][1]); | ||
if false == is_valid_year(year) then | if false == is_valid_year(year) then | ||
Line 737: | Line 742: | ||
--[[--------------------------< Y E A R _ D A T E _ C H E C K >------------------------------------------------ | --[[--------------------------< Y E A R _ D A T E _ C H E C K >------------------------------------------------ | ||
Compare the value provided in |year= with the year value(s) provided in |date=. This function | Compare the value provided in |year= with the year value(s) provided in |date=. This function sets a local numeric value: | ||
0 - year value does not match the year value in date | 0 - year value does not match the year value in date | ||
1 - (default) year value matches the year value in date or one of the year values when date contains two years | 1 - (default) year value matches the year value in date or one of the year values when date contains two years | ||
2 - year value matches the year value in date when date is in the form YYYY-MM-DD and year is disambiguated (|year=YYYYx) | 2 - year value matches the year value in date when date is in the form YYYY-MM-DD and year is disambiguated (|year=YYYYx) | ||
the numernic value in <result> determines the 'output' if any from this function: | |||
0 – adds error message to error_list sequence table | |||
1 – adds maint cat | |||
2 – does nothing | |||
]] | ]] | ||
local function year_date_check (year_string, date_string) | local function year_date_check (year_string, year_origin, date_string, date_origin, error_list) | ||
local year; | local year; | ||
local date1; | local date1; | ||
local date2; | local date2; | ||
local result = 1; -- result of the test; assume that the test passes | local result = 1; -- result of the test; assume that the test passes | ||
year = year_string:match ('(%d%d%d%d?)'); | year = year_string:match ('(%d%d%d%d?)'); | ||
Line 780: | Line 790: | ||
result = 0; | result = 0; | ||
end | end | ||
else | else -- should never get here; this function called only when no other date errors | ||
result = 0; -- no recognizable year in date | result = 0; -- no recognizable year in date | ||
end | end | ||
if 0 == result then -- year / date mismatch | |||
table.insert (error_list, substitute (cfg.messages['mismatch'], {year_origin, date_origin})); -- add error message to error_list sequence table | |||
elseif 1 == result then -- redundant year / date | |||
set_message ('maint_date_year'); -- add a maint cat | |||
end | |||
end | end | ||
Line 819: | Line 834: | ||
['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy | ['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy | ||
['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy | ['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy | ||
-- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, | -- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki | ||
}, | }, | ||
['Mdy'] = { -- date format is Mdy; reformat to: | ['Mdy'] = { -- date format is Mdy; reformat to: | ||
Line 825: | Line 840: | ||
['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy | ['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy | ||
['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd | ['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd | ||
-- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, | -- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki | ||
}, | }, | ||
['dMy'] = { -- date format is dMy; reformat to: | ['dMy'] = { -- date format is dMy; reformat to: | ||
Line 831: | Line 846: | ||
['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy | ['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy | ||
['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd | ['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd | ||
-- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, | -- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki | ||
}, | }, | ||
['Md-dy'] = { -- date format is Md-dy; reformat to: | ['Md-dy'] = { -- date format is Md-dy; reformat to: | ||
Line 866: | Line 881: | ||
['any'] = {'%s %s', 'm', 'y'}, -- dmy/mdy agnostic | ['any'] = {'%s %s', 'm', 'y'}, -- dmy/mdy agnostic | ||
}, | }, | ||
-- ['yMd'] = { | -- ['yMd'] = { -- not supported at en.wiki | ||
-- ['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, | -- ['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy | ||
-- ['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, | -- ['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy | ||
-- ['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, | -- ['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd | ||
-- }, | -- }, | ||
} | } | ||
Line 887: | Line 902: | ||
end | end | ||
-- yMd is not supported at en.wiki; if yMd is supported at your wiki, uncomment the next line | |||
-- if 'yMd' == format_param and in_array (pattern_idx, {'yMd', 'Md-dy', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy'}) then -- these formats not convertable; yMd not supported at en.wiki | |||
-- if 'yMd' == format_param and in_array (pattern_idx, {'yMd', 'Md-dy', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy'}) then -- these formats not convertable; yMd not supported at en.wiki | |||
-- if yMd is supported at your wiki, remove or comment-out the next line | -- if yMd is supported at your wiki, remove or comment-out the next line | ||
if 'yMd' == format_param then -- yMd not supported at en.wiki | if 'yMd' == format_param then -- yMd not supported at en.wiki | ||
Line 1,034: | Line 1,048: | ||
local n; | local n; | ||
for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list | for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list | ||
if is_set (param_val.val) | if is_set (param_val.val) and | ||
not mw.ustring.match (param_val.val, patterns.ymd[1]) then -- for those that are not ymd dates (ustring because here digits may not be Western) | |||
param_val.val, n = param_val.val:gsub ('%-', '–'); -- replace any hyphen with ndash | param_val.val, n = param_val.val:gsub ('%-', '–'); -- replace any hyphen with ndash | ||
if 0 ~= n then | if 0 ~= n then | ||
Line 1,041: | Line 1,055: | ||
result = true; | result = true; | ||
end | end | ||
end | end | ||
end | end | ||
return result; -- so we know if any hyphens were replaced | return result; -- so we know if any hyphens were replaced | ||
end | |||
--[[--------------------------< E D T F _ T R A N S F O R M >-------------------------------------------------- | |||
Loops through the list of date-holding parameters and converts any EDTF formatted dates to MOS compliant dates. | |||
Only YYY-MM-XX supported at this time. Not called if the cs1|2 template has any date errors. | |||
must be done before reformat_dates() and before date_hyphen_to_dash() | |||
Modifies the date_parameters_list and returns true if transformation is performed, else returns false. | |||
]] | |||
local function edtf_transform (date_parameters_list) | |||
local result = false; | |||
local source_date = {}; | |||
for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list | |||
if is_set(param_val.val) and param_val.val:match (patterns.ymx[1]) then -- if parameter is set and is an EDTF dates | |||
source_date.year, source_date.month = param_val.val:match (patterns.ymx[1]); -- get year and month number | |||
source_date.day = 1; -- required by os.time() | |||
date_parameters_list[param_name].val = mw.text.trim (os.date ('%B %Y', os.time (source_date))); | |||
result = true; | |||
end | |||
end | |||
return result; -- so we know if a transform was done | |||
end | end | ||
Line 1,093: | Line 1,133: | ||
return modified; | return modified; | ||
end | |||
Line 1,106: | Line 1,146: | ||
is_set = utilities_page_ptr.is_set; | is_set = utilities_page_ptr.is_set; | ||
in_array = utilities_page_ptr.in_array; | in_array = utilities_page_ptr.in_array; | ||
set_message = utilities_page_ptr.set_message; | |||
substitute = utilities_page_ptr.substitute; | |||
wrap_style = utilities_page_ptr.wrap_style; | wrap_style = utilities_page_ptr.wrap_style; | ||
Line 1,122: | Line 1,163: | ||
date_hyphen_to_dash = date_hyphen_to_dash, | date_hyphen_to_dash = date_hyphen_to_dash, | ||
date_name_xlate = date_name_xlate, | date_name_xlate = date_name_xlate, | ||
edtf_transform = edtf_transform, | |||
set_selected_modules = set_selected_modules | set_selected_modules = set_selected_modules | ||
} | } |