Module:Citation/CS1/Date validation: Difference between revisions
sync from sandbox;
(sync from sandbox;) |
(sync from sandbox;) |
||
Line 55: | Line 55: | ||
return false; -- accessdate out of range | return false; -- accessdate out of range | ||
end | end | ||
end | end | ||
Line 87: | Line 74: | ||
end | end | ||
return false, '9999'; -- if here not good date so return false and set embargo date to long time in future | return false, '9999'; -- if here not good date so return false and set embargo date to long time in future | ||
end | |||
--[[--------------------------< G E T _ M O N T H _ N U M B E R >---------------------------------------------- | |||
returns a number according to the month in a date: 1 for January, etc. Capitalization and spelling must be correct. | |||
If not a valid month, returns 0 | |||
]] | |||
local function get_month_number (month) | |||
return cfg.date_names['local'].long[month] or cfg.date_names['local'].short[month] or -- look for local names first | |||
cfg.date_names['en'].long[month] or cfg.date_names['en'].short[month] or -- failing that, look for English names | |||
0; -- not a recognized month name | |||
end | end | ||
Line 92: | Line 93: | ||
--[[--------------------------< G E T _ S E A S O N _ N U M B E R >-------------------------------------------- | --[[--------------------------< G E T _ S E A S O N _ N U M B E R >-------------------------------------------- | ||
returns a number according to the sequence of seasons in a year: | returns a number according to the sequence of seasons in a year: 21 for Spring, etc. Capitalization and spelling | ||
must be correct. If not a valid season, returns 0. | |||
21-24 = Spring, Summer, Autumn, Winter, independent of “Hemisphere” | |||
returns 0 when <param> is not |date= | |||
Season numbering is defined by Extended Date/Time Format (EDTF) Specification (https://www.loc.gov/standards/datetime/) | |||
which became part of ISO 8601 in 2019. See '§Sub-year groupings'. The standard defines various divisions using | |||
numbers 21-41. cs1|2 only supports generic seasons. EDTF does support the distinction between north and south | |||
hemispere seasons but cs1|2 has no way to make that distinction. | |||
These additional divisions not currently supported: | These additional divisions not currently supported: | ||
Line 106: | Line 113: | ||
]] | ]] | ||
local function get_season_number (season) | local function get_season_number (season, param) | ||
if 'date' ~= param then | |||
return 0; -- season dates only supported by |date= | |||
end | |||
return cfg.date_names['local'].season[season] or -- look for local names first | return cfg.date_names['local'].season[season] or -- look for local names first | ||
cfg.date_names['en'].season[season] or -- failing that, look for English names | cfg.date_names['en'].season[season] or -- failing that, look for English names | ||
Line 113: | Line 123: | ||
--[[--------------------------< | --[[--------------------------< G E T _ Q U A R T E R _ N U M B E R >------------------------------------------ | ||
returns a number according to the sequence of quarters in a year: 33 for first quarter, etc. Capitalization and spelling | |||
must be correct. If not a valid quarter, returns 0. | |||
33-36 = Quarter 1, Quarter 2, Quarter 3, Quarter 4 (3 months each) | |||
returns 0 when <param> is not |date= | |||
Quarter numbering is defined by Extended Date/Time Format (EDTF) Specification (https://www.loc.gov/standards/datetime/) | |||
which became part of ISO 8601 in 2019. See '§Sub-year groupings'. The standard defines various divisions using | |||
numbers 21-41. cs1|2 only supports generic seasons and quarters. | |||
These additional divisions not currently supported: | |||
37-39 = Quadrimester 1, Quadrimester 2, Quadrimester 3 (4 months each) | |||
40-41 = Semestral 1, Semestral-2 (6 months each) | |||
]] | |||
local function get_quarter_number (quarter, param) | |||
if 'date' ~= param then | |||
return 0; -- quarter dates only supported by |date= | |||
end | |||
quarter = mw.ustring.gsub (quarter, ' +', ' '); -- special case replace multiple space chars with a single space char | |||
return cfg.date_names['local'].quarter[quarter] or -- look for local names first | |||
cfg.date_names['en'].quarter[quarter] or -- failing that, look for English names | |||
0; -- not a recognized quarter name | |||
end | |||
--[[--------------------------< G E T _ P R O P E R _ N A M E _ N U M B E R >---------------------------------- | |||
returns a non-zero number if date contains a recognized proper name. Capitalization and spelling must be correct. | returns a non-zero number if date contains a recognized proper-name. Capitalization and spelling must be correct. | ||
returns 0 when <param> is not |date= | |||
]] | ]] | ||
local function | local function get_proper_name_number (name, param) | ||
if 'date' ~= param then | |||
return 0; -- proper-name dates only supported by |date= | |||
end | |||
return cfg.date_names['local'].named[name] or -- look for local names dates first | return cfg.date_names['local'].named[name] or -- look for local names dates first | ||
cfg.date_names['en'].named[name] or -- failing that, look for English names | cfg.date_names['en'].named[name] or -- failing that, look for English names | ||
Line 126: | Line 170: | ||
--[[--------------------------< | --[[--------------------------< G E T _ E L E M E N T _ N U M B E R <------------------------------------------ | ||
returns true if month or season or quarter or proper name is valid (properly spelled, capitalized, abbreviated) | |||
]] | ]] | ||
local function | local function get_element_number (element, param) | ||
local num; | |||
if 0 = | |||
return | local funcs = {get_month_number, get_season_number, get_quarter_number, get_proper_name_number}; -- list of functions to execute in order | ||
for _, func in ipairs (funcs) do -- spin through the function list | |||
num = func (element, param); -- call the function and get the returned number | |||
if 0 ~= num then -- non-zero when valid month season quarter | |||
return num; -- return that number | |||
end | end | ||
end | end | ||
return | return nil; -- not valid | ||
end | end | ||
Line 144: | Line 193: | ||
--[[--------------------------< I S _ V A L I D _ Y E A R >---------------------------------------------------- | --[[--------------------------< I S _ V A L I D _ Y E A R >---------------------------------------------------- | ||
Function gets current year from the server and compares it to year from a citation parameter. Years more than one year in the future are not acceptable. | Function gets current year from the server and compares it to year from a citation parameter. Years more than one | ||
year in the future are not acceptable. | |||
]] | ]] | ||
local function is_valid_year(year) | local function is_valid_year (year) | ||
if not is_set(year_limit) then | if not is_set(year_limit) then | ||
year_limit = tonumber(os.date("%Y"))+1; -- global variable so we only have to fetch it once | year_limit = tonumber(os.date("%Y"))+1; -- global variable so we only have to fetch it once | ||
Line 159: | Line 209: | ||
--[[--------------------------< I S _ V A L I D _ D A T E >---------------------------------------------------- | --[[--------------------------< I S _ V A L I D _ D A T E >---------------------------------------------------- | ||
Returns true if day is less than or equal to the number of days in month and year is no farther into the future | Returns true if day is less than or equal to the number of days in month and year is no farther into the future | ||
than next year; else returns false. | than next year; else returns false. | ||
Line 281: | Line 332: | ||
the output table receives: | the output table receives: | ||
rftdate: an IS8601 formatted date | rftdate: an IS8601 formatted date | ||
rftchron: a free-form version of the date, usually without year which is in rftdate (season ranges and | rftchron: a free-form version of the date, usually without year which is in rftdate (season ranges and proper-name dates) | ||
rftssn: one of four season keywords: winter, spring, summer, fall (lowercase) | rftssn: one of four season keywords: winter, spring, summer, fall (lowercase) | ||
rftquarter: one of four values: 1, 2, 3, 4 | |||
]] | ]] | ||
Line 306: | Line 358: | ||
date = string.format ('%.4d/%.4d', tonumber(input.year), tonumber(input.year2)) -- assemble the date range | date = string.format ('%.4d/%.4d', tonumber(input.year), tonumber(input.year2)) -- assemble the date range | ||
end | end | ||
if 20 < tonumber(input.month) then -- if season or | if 20 < tonumber(input.month) then -- if season or proper-name date | ||
local season = {[24]='winter', [21]='spring', [22]='summer', [23]='fall', [99]='Christmas'}; -- seasons lowercase, no autumn; proper names use title case | local season = {[24]='winter', [21]='spring', [22]='summer', [23]='fall', [33]='1', [34]='2', [35]='3', [36]='4', [98]='Easter', [99]='Christmas'}; -- seasons lowercase, no autumn; proper-names use title case | ||
if 0 == input.month2 then -- single season date | if 0 == input.month2 then -- single season date | ||
if | if 40 <tonumber(input.month) then | ||
tCOinS_date.rftchron = season[input.month]; -- proper name dates | tCOinS_date.rftchron = season[input.month]; -- proper-name dates | ||
elseif 30 <tonumber(input.month) then | |||
tCOinS_date.rftquarter = season[input.month]; -- quarters | |||
else | else | ||
tCOinS_date.rftssn = season[input.month]; -- seasons | tCOinS_date.rftssn = season[input.month]; -- seasons | ||
Line 392: | Line 446: | ||
-- month/season range year; months separated by endash | -- month/season range year; months separated by endash | ||
['M-My'] = {'^(%D-)[%-–](%D-) +((%d%d%d%d)%a?)$', 'm', 'm2', 'a', 'y'}, | ['M-My'] = {'^(%D-)[%-–](%D-) +((%d%d%d%d)%a?)$', 'm', 'm2', 'a', 'y'}, | ||
-- month/season year or proper-name year | -- month/season year or proper-name year; quarter year when First Quarter YYYY etc | ||
['My'] = {'^([^%d–]-) +((%d%d%d%d)%a?)$', 'm', 'a', 'y'}, -- this way because endash is a member of %D; %D- will match January–March 2019 when it shouldn't | ['My'] = {'^([^%d–]-) +((%d%d%d%d)%a?)$', 'm', 'a', 'y'}, -- this way because endash is a member of %D; %D- will match January–March 2019 when it shouldn't | ||
-- these date formats cannot be converted | -- these date formats cannot be converted | ||
-- ['Q,y'] = {'^(Q%a* +[1-4]), +((%d%d%d%d)%a?)$'}, -- Quarter n, yyyy | |||
['Sy4-y2'] = {'^(%D-) +((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- special case Winter/Summer year-year (YYYY-YY); year separated with unspaced endash | ['Sy4-y2'] = {'^(%D-) +((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- special case Winter/Summer year-year (YYYY-YY); year separated with unspaced endash | ||
['Sy-y'] = {'^(%D-) +(%d%d%d%d)[%-–]((%d%d%d%d)%a?)$'}, -- special case Winter/Summer year-year; year separated with unspaced endash | ['Sy-y'] = {'^(%D-) +(%d%d%d%d)[%-–]((%d%d%d%d)%a?)$'}, -- special case Winter/Summer year-year; year separated with unspaced endash | ||
Line 425: | Line 482: | ||
]] | ]] | ||
local function check_date (date_string, tCOinS_date, test_accessdate) | --local function check_date (date_string, tCOinS_date, test_accessdate) | ||
local function check_date (date_string, param, tCOinS_date) | |||
local year; -- assume that year2, months, and days are not used; | local year; -- assume that year2, months, and days are not used; | ||
local year2=0; -- second year in a year range | local year2=0; -- second year in a year range | ||
Line 440: | Line 498: | ||
anchor_year = year; | anchor_year = year; | ||
-- elseif mw.ustring.match(date_string, patterns['Q,y'][1]) then -- quarter n, year; here because much the same as Mdy | |||
-- month, anchor_year, year=mw.ustring.match(date_string, patterns['Q,y'][1]); | |||
-- if not is_valid_year(year) then return false; end | |||
-- month = get_quarter_number (month, param); -- get quarter number or nil | |||
-- if not month then return false; end -- not valid whatever it is | |||
elseif mw.ustring.match(date_string, patterns['Mdy'][1]) then -- month-initial: month day, year | elseif mw.ustring.match(date_string, patterns['Mdy'][1]) then -- month-initial: month day, year | ||
month, day, anchor_year, year=mw.ustring.match(date_string, patterns['Mdy'][1]); | month, day, anchor_year, year=mw.ustring.match(date_string, patterns['Mdy'][1]); | ||
Line 546: | Line 610: | ||
year2=year; | year2=year; | ||
elseif mw.ustring.match(date_string, patterns['My'][1]) then -- month/season | elseif mw.ustring.match(date_string, patterns['My'][1]) then -- month/season/quarter/proper-name year | ||
month, anchor_year, year=mw.ustring.match(date_string, patterns['My'][1]); | month, anchor_year, year=mw.ustring.match(date_string, patterns['My'][1]); | ||
if not is_valid_year(year) then return false; end | if not is_valid_year(year) then return false; end | ||
month = get_element_number (month, param); -- get month season quarter proper-name number or nil | |||
if not month then return false; end -- not valid whatever it is | |||
elseif mw.ustring.match(date_string, patterns['y-y'][1]) then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999 | elseif mw.ustring.match(date_string, patterns['y-y'][1]) then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999 | ||
Line 583: | Line 641: | ||
end | end | ||
if | if 'access-date' == param then -- test accessdate here because we have numerical date parts | ||
if 0 ~= year and 0 ~= month and 0 ~= day and -- all parts of a single date required | if 0 ~= year and 0 ~= month and 0 ~= day and -- all parts of a single date required | ||
0 == year2 and 0 == month2 and 0 == day2 then -- none of these; accessdate must not be a range | 0 == year2 and 0 == month2 and 0 == day2 then -- none of these; accessdate must not be a range | ||
Line 656: | Line 714: | ||
good_date, anchor_year, COinS_date = true, v.val:match("((nd)%a?)"); --"nd"; no error when date parameter is set to no date | good_date, anchor_year, COinS_date = true, v.val:match("((nd)%a?)"); --"nd"; no error when date parameter is set to no date | ||
else | else | ||
good_date, anchor_year, COinS_date = check_date (v.val, tCOinS_date); -- go test the date | good_date, anchor_year, COinS_date = check_date (v.val, k, tCOinS_date); -- go test the date | ||
end | end | ||
elseif 'year'==k then -- if the parameter is |year= it should hold only a year value | elseif 'year'==k then -- if the parameter is |year= it should hold only a year value | ||
Line 662: | Line 720: | ||
good_date, anchor_year, COinS_date = true, v.val:match("((%d+)%a?)"); | good_date, anchor_year, COinS_date = true, v.val:match("((%d+)%a?)"); | ||
end | end | ||
elseif 'embargo'==k then -- if the parameter is |embargo= | elseif 'embargo'==k then -- if the parameter is |embargo= | ||
good_date = check_date (v.val); -- go test the date | good_date = check_date (v.val); -- go test the date | ||
Line 670: | Line 726: | ||
end | end | ||
else -- any other date-holding parameter | else -- any other date-holding parameter | ||
good_date = check_date (v.val); | good_date = check_date (v.val, k); -- go test the date | ||
end | end | ||
if false==good_date then -- assemble one error message so we don't add the tracking category multiple times | if false==good_date then -- assemble one error message so we don't add the tracking category multiple times | ||
Line 853: | Line 909: | ||
[patterns[pattern_idx][8] or 'x'] = c7; | [patterns[pattern_idx][8] or 'x'] = c7; | ||
}; | }; | ||
if t.a then -- if this date has an anchor year capture | |||
t.y = t.a; -- use the anchor year capture when reassembling the date | |||
end | |||
if tonumber(t.m) then -- if raw month is a number (converting from ymd) | if tonumber(t.m) then -- if raw month is a number (converting from ymd) |