Jump to content

Module:Citation/CS1/Date validation: Difference between revisions

sync from sandbox;
(sync from sandbox;)
(sync from sandbox;)
Line 55: Line 55:
return false; -- accessdate out of range
return false; -- accessdate out of range
end
end
end
--[[--------------------------< G E T _ M O N T H _ N U M B E R >----------------------------------------------
returns a number according to the month in a date: 1 for January, etc.  Capitalization and spelling must be correct. If not a valid month, returns 0
]]
local function get_month_number (month)
return cfg.date_names['local'].long[month] or cfg.date_names['local'].short[month] or -- look for local names first
cfg.date_names['en'].long[month] or cfg.date_names['en'].short[month] or -- failing that, look for English names
0; -- not a recognized month name
end
end


Line 87: Line 74:
end
end
return false, '9999'; -- if here not good date so return false and set embargo date to long time in future
return false, '9999'; -- if here not good date so return false and set embargo date to long time in future
end
--[[--------------------------< G E T _ M O N T H _ N U M B E R >----------------------------------------------
returns a number according to the month in a date: 1 for January, etc.  Capitalization and spelling must be correct.
If not a valid month, returns 0
]]
local function get_month_number (month)
return cfg.date_names['local'].long[month] or cfg.date_names['local'].short[month] or -- look for local names first
cfg.date_names['en'].long[month] or cfg.date_names['en'].short[month] or -- failing that, look for English names
0; -- not a recognized month name
end
end


Line 92: Line 93:
--[[--------------------------< G E T _ S E A S O N _ N U M B E R >--------------------------------------------
--[[--------------------------< G E T _ S E A S O N _ N U M B E R >--------------------------------------------


returns a number according to the sequence of seasons in a year: 1 for Winter, etc.  Capitalization and spelling must be correct. If not a valid season, returns 0
returns a number according to the sequence of seasons in a year: 21 for Spring, etc.  Capitalization and spelling
must be correct. If not a valid season, returns 0.
21-24 = Spring, Summer, Autumn, Winter, independent of “Hemisphere”
 
returns 0 when <param> is not |date=


Uses ISO DIS 8601 2016 part 2 §4.7 Divisions of a year for hemishpere-independent seasons:
Season numbering is defined by Extended Date/Time Format (EDTF) Specification (https://www.loc.gov/standards/datetime/)
21-24 = Spring, Summer, Autumn, Winter, independent of “Hemisphere”
which became part of ISO 8601 in 2019See '§Sub-year groupings'.  The standard defines various divisions using
numbers 21-41.  cs1|2 only supports generic seasons.  EDTF does support the distinction between north and south
hemispere seasons but cs1|2 has no way to make that distinction.


These additional divisions not currently supported:
These additional divisions not currently supported:
Line 106: Line 113:
]]
]]


local function get_season_number (season)
local function get_season_number (season, param)
if 'date' ~= param then
return 0; -- season dates only supported by |date=
end
return cfg.date_names['local'].season[season] or -- look for local names first
return cfg.date_names['local'].season[season] or -- look for local names first
cfg.date_names['en'].season[season] or -- failing that, look for English names
cfg.date_names['en'].season[season] or -- failing that, look for English names
Line 113: Line 123:




--[[--------------------------< I S _ P R O P E R _ N A M E >--------------------------------------------------
--[[--------------------------< G E T _ Q U A R T E R _ N U M B E R >------------------------------------------
 
returns a number according to the sequence of quarters in a year: 33 for first quarter, etc.  Capitalization and spelling
must be correct. If not a valid quarter, returns 0.
33-36 = Quarter 1, Quarter 2, Quarter 3, Quarter 4 (3 months each)
 
returns 0 when <param> is not |date=
 
Quarter numbering is defined by Extended Date/Time Format (EDTF) Specification (https://www.loc.gov/standards/datetime/)
which became part of ISO 8601 in 2019.  See '§Sub-year groupings'.  The standard defines various divisions using
numbers 21-41.  cs1|2 only supports generic seasons and quarters.
 
These additional divisions not currently supported:
37-39 = Quadrimester 1, Quadrimester 2, Quadrimester 3 (4 months each)
40-41 = Semestral 1, Semestral-2 (6 months each)
 
]]
 
local function get_quarter_number (quarter, param)
if 'date' ~= param then
return 0; -- quarter dates only supported by |date=
end
quarter = mw.ustring.gsub (quarter, ' +', ' '); -- special case replace multiple space chars with a single space char
return cfg.date_names['local'].quarter[quarter] or -- look for local names first
cfg.date_names['en'].quarter[quarter] or -- failing that, look for English names
0; -- not a recognized quarter name
end
 
 
--[[--------------------------< G E T _ P R O P E R _ N A M E _ N U M B E R >----------------------------------


returns a non-zero number if date contains a recognized proper name.  Capitalization and spelling must be correct.
returns a non-zero number if date contains a recognized proper-name.  Capitalization and spelling must be correct.
 
returns 0 when <param> is not |date=


]]
]]


local function is_proper_name (name)
local function get_proper_name_number (name, param)
if 'date' ~= param then
return 0; -- proper-name dates only supported by |date=
end
return cfg.date_names['local'].named[name] or -- look for local names dates first
return cfg.date_names['local'].named[name] or -- look for local names dates first
cfg.date_names['en'].named[name] or -- failing that, look for English names
cfg.date_names['en'].named[name] or -- failing that, look for English names
Line 126: Line 170:




--[[--------------------------< I S _ V A L I D _ M O N T H _ O R _ S E A S O N >------------------------------
--[[--------------------------< G E T _ E L E M E N T _ N U M B E R <------------------------------------------


--returns true if month or season is valid (properly spelled, capitalized, abbreviated)
returns true if month or season or quarter or proper name is valid (properly spelled, capitalized, abbreviated)


]]
]]


local function is_valid_month_or_season (month_season)
local function get_element_number (element, param)
if 0 == get_month_number (month_season) then -- if month text isn't one of the twelve months, might be a season
local num;
if 0 == get_season_number (month_season) then -- not a month, is it a season?
return false; -- return false not a month or one of the five seasons
local funcs = {get_month_number, get_season_number, get_quarter_number, get_proper_name_number}; -- list of functions to execute in order
for _, func in ipairs (funcs) do -- spin through the function list
num = func (element, param); -- call the function and get the returned number
if 0 ~= num then -- non-zero when valid month season quarter
return num; -- return that number
end
end
end
end
return true;
return nil; -- not valid
end
end


Line 144: Line 193:
--[[--------------------------< I S _ V A L I D _ Y E A R >----------------------------------------------------
--[[--------------------------< I S _ V A L I D _ Y E A R >----------------------------------------------------


Function gets current year from the server and compares it to year from a citation parameter.  Years more than one year in the future are not acceptable.
Function gets current year from the server and compares it to year from a citation parameter.  Years more than one
year in the future are not acceptable.


]]
]]


local function is_valid_year(year)
local function is_valid_year (year)
if not is_set(year_limit) then
if not is_set(year_limit) then
year_limit = tonumber(os.date("%Y"))+1; -- global variable so we only have to fetch it once
year_limit = tonumber(os.date("%Y"))+1; -- global variable so we only have to fetch it once
Line 159: Line 209:


--[[--------------------------< I S _ V A L I D _ D A T E >----------------------------------------------------
--[[--------------------------< I S _ V A L I D _ D A T E >----------------------------------------------------
Returns true if day is less than or equal to the number of days in month and year is no farther into the future
Returns true if day is less than or equal to the number of days in month and year is no farther into the future
than next year; else returns false.
than next year; else returns false.
Line 281: Line 332:
the output table receives:
the output table receives:
rftdate: an IS8601 formatted date
rftdate: an IS8601 formatted date
rftchron: a free-form version of the date, usually without year which is in rftdate (season ranges and propername dates)
rftchron: a free-form version of the date, usually without year which is in rftdate (season ranges and proper-name dates)
rftssn: one of four season keywords: winter, spring, summer, fall (lowercase)
rftssn: one of four season keywords: winter, spring, summer, fall (lowercase)
rftquarter: one of four values: 1, 2, 3, 4


]]
]]
Line 306: Line 358:
date = string.format ('%.4d/%.4d', tonumber(input.year), tonumber(input.year2)) -- assemble the date range
date = string.format ('%.4d/%.4d', tonumber(input.year), tonumber(input.year2)) -- assemble the date range
end
end
if 20 < tonumber(input.month) then -- if season or propername date
if 20 < tonumber(input.month) then -- if season or proper-name date
local season = {[24]='winter', [21]='spring', [22]='summer', [23]='fall', [99]='Christmas'}; -- seasons lowercase, no autumn; proper names use title case
local season = {[24]='winter', [21]='spring', [22]='summer', [23]='fall', [33]='1', [34]='2', [35]='3', [36]='4', [98]='Easter', [99]='Christmas'}; -- seasons lowercase, no autumn; proper-names use title case
if 0 == input.month2 then -- single season date
if 0 == input.month2 then -- single season date
if 30 <tonumber(input.month) then
if 40 <tonumber(input.month) then
tCOinS_date.rftchron = season[input.month]; -- proper name dates
tCOinS_date.rftchron = season[input.month]; -- proper-name dates
elseif 30 <tonumber(input.month) then
tCOinS_date.rftquarter = season[input.month]; -- quarters
else
else
tCOinS_date.rftssn = season[input.month]; -- seasons
tCOinS_date.rftssn = season[input.month]; -- seasons
Line 392: Line 446:
-- month/season range year; months separated by endash
-- month/season range year; months separated by endash
['M-My'] = {'^(%D-)[%-–](%D-) +((%d%d%d%d)%a?)$', 'm', 'm2', 'a', 'y'},
['M-My'] = {'^(%D-)[%-–](%D-) +((%d%d%d%d)%a?)$', 'm', 'm2', 'a', 'y'},
-- month/season year or proper-name year
-- month/season year or proper-name year; quarter year when First Quarter YYYY etc
['My'] = {'^([^%d–]-) +((%d%d%d%d)%a?)$', 'm', 'a', 'y'}, -- this way because endash is a member of %D; %D- will match January–March 2019 when it shouldn't
['My'] = {'^([^%d–]-) +((%d%d%d%d)%a?)$', 'm', 'a', 'y'}, -- this way because endash is a member of %D; %D- will match January–March 2019 when it shouldn't
-- these date formats cannot be converted
-- these date formats cannot be converted
-- ['Q,y'] = {'^(Q%a* +[1-4]), +((%d%d%d%d)%a?)$'}, -- Quarter n, yyyy
['Sy4-y2'] = {'^(%D-) +((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- special case Winter/Summer year-year (YYYY-YY); year separated with unspaced endash
['Sy4-y2'] = {'^(%D-) +((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- special case Winter/Summer year-year (YYYY-YY); year separated with unspaced endash
['Sy-y'] = {'^(%D-) +(%d%d%d%d)[%-–]((%d%d%d%d)%a?)$'}, -- special case Winter/Summer year-year; year separated with unspaced endash
['Sy-y'] = {'^(%D-) +(%d%d%d%d)[%-–]((%d%d%d%d)%a?)$'}, -- special case Winter/Summer year-year; year separated with unspaced endash
Line 425: Line 482:
]]
]]


local function check_date (date_string, tCOinS_date, test_accessdate)
--local function check_date (date_string, tCOinS_date, test_accessdate)
local function check_date (date_string, param, tCOinS_date)
local year; -- assume that year2, months, and days are not used;
local year; -- assume that year2, months, and days are not used;
local year2=0; -- second year in a year range
local year2=0; -- second year in a year range
Line 440: Line 498:
anchor_year = year;
anchor_year = year;
-- elseif mw.ustring.match(date_string, patterns['Q,y'][1]) then -- quarter n, year; here because much the same as Mdy
-- month, anchor_year, year=mw.ustring.match(date_string, patterns['Q,y'][1]);
-- if not is_valid_year(year) then return false; end
-- month = get_quarter_number (month, param); -- get quarter number or nil
-- if not month then return false; end -- not valid whatever it is
elseif mw.ustring.match(date_string, patterns['Mdy'][1]) then -- month-initial: month day, year
elseif mw.ustring.match(date_string, patterns['Mdy'][1]) then -- month-initial: month day, year
month, day, anchor_year, year=mw.ustring.match(date_string, patterns['Mdy'][1]);
month, day, anchor_year, year=mw.ustring.match(date_string, patterns['Mdy'][1]);
Line 546: Line 610:
year2=year;
year2=year;
elseif mw.ustring.match(date_string, patterns['My'][1]) then -- month/season year or proper-name year
elseif mw.ustring.match(date_string, patterns['My'][1]) then -- month/season/quarter/proper-name year
month, anchor_year, year=mw.ustring.match(date_string, patterns['My'][1]);
month, anchor_year, year=mw.ustring.match(date_string, patterns['My'][1]);
if not is_valid_year(year) then return false; end
if not is_valid_year(year) then return false; end
if not is_valid_month_or_season (month) and 0 == is_proper_name (month) then return false; end
month = get_element_number (month, param); -- get month season quarter proper-name number or nil
if 0 ~= get_month_number(month) then -- determined to be a valid range so just check this one to know if month or season
if not month then return false; end -- not valid whatever it is
month = get_month_number(month);
elseif 0 ~= get_season_number(month) then
month = get_season_number(month);
else
month = is_proper_name (month); -- must be proper name; not supported in COinS
end


elseif mw.ustring.match(date_string, patterns['y-y'][1]) then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999
elseif mw.ustring.match(date_string, patterns['y-y'][1]) then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999
Line 583: Line 641:
end
end


if test_accessdate then -- test accessdate here because we have numerical date parts
if 'access-date' == param then -- test accessdate here because we have numerical date parts
if 0 ~= year and 0 ~= month and 0 ~= day and -- all parts of a single date required
if 0 ~= year and 0 ~= month and 0 ~= day and -- all parts of a single date required
0 == year2 and 0 == month2 and 0 == day2 then -- none of these; accessdate must not be a range
0 == year2 and 0 == month2 and 0 == day2 then -- none of these; accessdate must not be a range
Line 656: Line 714:
good_date, anchor_year, COinS_date = true, v.val:match("((nd)%a?)"); --"nd"; no error when date parameter is set to no date
good_date, anchor_year, COinS_date = true, v.val:match("((nd)%a?)"); --"nd"; no error when date parameter is set to no date
else
else
good_date, anchor_year, COinS_date = check_date (v.val, tCOinS_date); -- go test the date
good_date, anchor_year, COinS_date = check_date (v.val, k, tCOinS_date); -- go test the date
end
end
elseif 'year'==k then -- if the parameter is |year= it should hold only a year value
elseif 'year'==k then -- if the parameter is |year= it should hold only a year value
Line 662: Line 720:
good_date, anchor_year, COinS_date = true, v.val:match("((%d+)%a?)");
good_date, anchor_year, COinS_date = true, v.val:match("((%d+)%a?)");
end
end
elseif 'access-date'==k then -- if the parameter is |date=
good_date = check_date (v.val, nil, true); -- go test the date; nil is a placeholder; true is the test_accessdate flag
elseif 'embargo'==k then -- if the parameter is |embargo=
elseif 'embargo'==k then -- if the parameter is |embargo=
good_date = check_date (v.val); -- go test the date
good_date = check_date (v.val); -- go test the date
Line 670: Line 726:
end
end
else -- any other date-holding parameter
else -- any other date-holding parameter
good_date = check_date (v.val); -- go test the date
good_date = check_date (v.val, k); -- go test the date
end
end
if false==good_date then -- assemble one error message so we don't add the tracking category multiple times
if false==good_date then -- assemble one error message so we don't add the tracking category multiple times
Line 853: Line 909:
[patterns[pattern_idx][8] or 'x'] = c7;
[patterns[pattern_idx][8] or 'x'] = c7;
};
};
if t.a then -- if this date has an anchor year capture
t.y = t.a; -- use the anchor year capture when reassembling the date
end


if tonumber(t.m) then -- if raw month is a number (converting from ymd)
if tonumber(t.m) then -- if raw month is a number (converting from ymd)
Cookies help us deliver our services. By using our services, you agree to our use of cookies.