Module:Citation/CS1/Date validation: Difference between revisions

    (sync from sandbox;)
    (sync from sandbox;)
    Line 55: Line 55:
    return false; -- accessdate out of range
    return false; -- accessdate out of range
    end
    end
    end
    --[[--------------------------< G E T _ M O N T H _ N U M B E R >----------------------------------------------
    returns a number according to the month in a date: 1 for January, etc.  Capitalization and spelling must be correct. If not a valid month, returns 0
    ]]
    local function get_month_number (month)
    return cfg.date_names['local'].long[month] or cfg.date_names['local'].short[month] or -- look for local names first
    cfg.date_names['en'].long[month] or cfg.date_names['en'].short[month] or -- failing that, look for English names
    0; -- not a recognized month name
    end
    end


    Line 87: Line 74:
    end
    end
    return false, '9999'; -- if here not good date so return false and set embargo date to long time in future
    return false, '9999'; -- if here not good date so return false and set embargo date to long time in future
    end
    --[[--------------------------< G E T _ M O N T H _ N U M B E R >----------------------------------------------
    returns a number according to the month in a date: 1 for January, etc.  Capitalization and spelling must be correct.
    If not a valid month, returns 0
    ]]
    local function get_month_number (month)
    return cfg.date_names['local'].long[month] or cfg.date_names['local'].short[month] or -- look for local names first
    cfg.date_names['en'].long[month] or cfg.date_names['en'].short[month] or -- failing that, look for English names
    0; -- not a recognized month name
    end
    end


    Line 92: Line 93:
    --[[--------------------------< G E T _ S E A S O N _ N U M B E R >--------------------------------------------
    --[[--------------------------< G E T _ S E A S O N _ N U M B E R >--------------------------------------------


    returns a number according to the sequence of seasons in a year: 1 for Winter, etc.  Capitalization and spelling must be correct. If not a valid season, returns 0
    returns a number according to the sequence of seasons in a year: 21 for Spring, etc.  Capitalization and spelling
    must be correct. If not a valid season, returns 0.
    21-24 = Spring, Summer, Autumn, Winter, independent of “Hemisphere”
     
    returns 0 when <param> is not |date=


    Uses ISO DIS 8601 2016 part 2 §4.7 Divisions of a year for hemishpere-independent seasons:
    Season numbering is defined by Extended Date/Time Format (EDTF) Specification (https://www.loc.gov/standards/datetime/)
    21-24 = Spring, Summer, Autumn, Winter, independent of “Hemisphere”
    which became part of ISO 8601 in 2019See '§Sub-year groupings'.  The standard defines various divisions using
    numbers 21-41.  cs1|2 only supports generic seasons.  EDTF does support the distinction between north and south
    hemispere seasons but cs1|2 has no way to make that distinction.


    These additional divisions not currently supported:
    These additional divisions not currently supported:
    Line 106: Line 113:
    ]]
    ]]


    local function get_season_number (season)
    local function get_season_number (season, param)
    if 'date' ~= param then
    return 0; -- season dates only supported by |date=
    end
    return cfg.date_names['local'].season[season] or -- look for local names first
    return cfg.date_names['local'].season[season] or -- look for local names first
    cfg.date_names['en'].season[season] or -- failing that, look for English names
    cfg.date_names['en'].season[season] or -- failing that, look for English names
    Line 113: Line 123:




    --[[--------------------------< I S _ P R O P E R _ N A M E >--------------------------------------------------
    --[[--------------------------< G E T _ Q U A R T E R _ N U M B E R >------------------------------------------
     
    returns a number according to the sequence of quarters in a year: 33 for first quarter, etc.  Capitalization and spelling
    must be correct. If not a valid quarter, returns 0.
    33-36 = Quarter 1, Quarter 2, Quarter 3, Quarter 4 (3 months each)
     
    returns 0 when <param> is not |date=
     
    Quarter numbering is defined by Extended Date/Time Format (EDTF) Specification (https://www.loc.gov/standards/datetime/)
    which became part of ISO 8601 in 2019.  See '§Sub-year groupings'.  The standard defines various divisions using
    numbers 21-41.  cs1|2 only supports generic seasons and quarters.
     
    These additional divisions not currently supported:
    37-39 = Quadrimester 1, Quadrimester 2, Quadrimester 3 (4 months each)
    40-41 = Semestral 1, Semestral-2 (6 months each)
     
    ]]
     
    local function get_quarter_number (quarter, param)
    if 'date' ~= param then
    return 0; -- quarter dates only supported by |date=
    end
    quarter = mw.ustring.gsub (quarter, ' +', ' '); -- special case replace multiple space chars with a single space char
    return cfg.date_names['local'].quarter[quarter] or -- look for local names first
    cfg.date_names['en'].quarter[quarter] or -- failing that, look for English names
    0; -- not a recognized quarter name
    end
     
     
    --[[--------------------------< G E T _ P R O P E R _ N A M E _ N U M B E R >----------------------------------


    returns a non-zero number if date contains a recognized proper name.  Capitalization and spelling must be correct.
    returns a non-zero number if date contains a recognized proper-name.  Capitalization and spelling must be correct.
     
    returns 0 when <param> is not |date=


    ]]
    ]]


    local function is_proper_name (name)
    local function get_proper_name_number (name, param)
    if 'date' ~= param then
    return 0; -- proper-name dates only supported by |date=
    end
    return cfg.date_names['local'].named[name] or -- look for local names dates first
    return cfg.date_names['local'].named[name] or -- look for local names dates first
    cfg.date_names['en'].named[name] or -- failing that, look for English names
    cfg.date_names['en'].named[name] or -- failing that, look for English names
    Line 126: Line 170:




    --[[--------------------------< I S _ V A L I D _ M O N T H _ O R _ S E A S O N >------------------------------
    --[[--------------------------< G E T _ E L E M E N T _ N U M B E R <------------------------------------------


    --returns true if month or season is valid (properly spelled, capitalized, abbreviated)
    returns true if month or season or quarter or proper name is valid (properly spelled, capitalized, abbreviated)


    ]]
    ]]


    local function is_valid_month_or_season (month_season)
    local function get_element_number (element, param)
    if 0 == get_month_number (month_season) then -- if month text isn't one of the twelve months, might be a season
    local num;
    if 0 == get_season_number (month_season) then -- not a month, is it a season?
    return false; -- return false not a month or one of the five seasons
    local funcs = {get_month_number, get_season_number, get_quarter_number, get_proper_name_number}; -- list of functions to execute in order
    for _, func in ipairs (funcs) do -- spin through the function list
    num = func (element, param); -- call the function and get the returned number
    if 0 ~= num then -- non-zero when valid month season quarter
    return num; -- return that number
    end
    end
    end
    end
    return true;
    return nil; -- not valid
    end
    end


    Line 144: Line 193:
    --[[--------------------------< I S _ V A L I D _ Y E A R >----------------------------------------------------
    --[[--------------------------< I S _ V A L I D _ Y E A R >----------------------------------------------------


    Function gets current year from the server and compares it to year from a citation parameter.  Years more than one year in the future are not acceptable.
    Function gets current year from the server and compares it to year from a citation parameter.  Years more than one
    year in the future are not acceptable.


    ]]
    ]]


    local function is_valid_year(year)
    local function is_valid_year (year)
    if not is_set(year_limit) then
    if not is_set(year_limit) then
    year_limit = tonumber(os.date("%Y"))+1; -- global variable so we only have to fetch it once
    year_limit = tonumber(os.date("%Y"))+1; -- global variable so we only have to fetch it once
    Line 159: Line 209:


    --[[--------------------------< I S _ V A L I D _ D A T E >----------------------------------------------------
    --[[--------------------------< I S _ V A L I D _ D A T E >----------------------------------------------------
    Returns true if day is less than or equal to the number of days in month and year is no farther into the future
    Returns true if day is less than or equal to the number of days in month and year is no farther into the future
    than next year; else returns false.
    than next year; else returns false.
    Line 281: Line 332:
    the output table receives:
    the output table receives:
    rftdate: an IS8601 formatted date
    rftdate: an IS8601 formatted date
    rftchron: a free-form version of the date, usually without year which is in rftdate (season ranges and propername dates)
    rftchron: a free-form version of the date, usually without year which is in rftdate (season ranges and proper-name dates)
    rftssn: one of four season keywords: winter, spring, summer, fall (lowercase)
    rftssn: one of four season keywords: winter, spring, summer, fall (lowercase)
    rftquarter: one of four values: 1, 2, 3, 4


    ]]
    ]]
    Line 306: Line 358:
    date = string.format ('%.4d/%.4d', tonumber(input.year), tonumber(input.year2)) -- assemble the date range
    date = string.format ('%.4d/%.4d', tonumber(input.year), tonumber(input.year2)) -- assemble the date range
    end
    end
    if 20 < tonumber(input.month) then -- if season or propername date
    if 20 < tonumber(input.month) then -- if season or proper-name date
    local season = {[24]='winter', [21]='spring', [22]='summer', [23]='fall', [99]='Christmas'}; -- seasons lowercase, no autumn; proper names use title case
    local season = {[24]='winter', [21]='spring', [22]='summer', [23]='fall', [33]='1', [34]='2', [35]='3', [36]='4', [98]='Easter', [99]='Christmas'}; -- seasons lowercase, no autumn; proper-names use title case
    if 0 == input.month2 then -- single season date
    if 0 == input.month2 then -- single season date
    if 30 <tonumber(input.month) then
    if 40 <tonumber(input.month) then
    tCOinS_date.rftchron = season[input.month]; -- proper name dates
    tCOinS_date.rftchron = season[input.month]; -- proper-name dates
    elseif 30 <tonumber(input.month) then
    tCOinS_date.rftquarter = season[input.month]; -- quarters
    else
    else
    tCOinS_date.rftssn = season[input.month]; -- seasons
    tCOinS_date.rftssn = season[input.month]; -- seasons
    Line 392: Line 446:
    -- month/season range year; months separated by endash
    -- month/season range year; months separated by endash
    ['M-My'] = {'^(%D-)[%-–](%D-) +((%d%d%d%d)%a?)$', 'm', 'm2', 'a', 'y'},
    ['M-My'] = {'^(%D-)[%-–](%D-) +((%d%d%d%d)%a?)$', 'm', 'm2', 'a', 'y'},
    -- month/season year or proper-name year
    -- month/season year or proper-name year; quarter year when First Quarter YYYY etc
    ['My'] = {'^([^%d–]-) +((%d%d%d%d)%a?)$', 'm', 'a', 'y'}, -- this way because endash is a member of %D; %D- will match January–March 2019 when it shouldn't
    ['My'] = {'^([^%d–]-) +((%d%d%d%d)%a?)$', 'm', 'a', 'y'}, -- this way because endash is a member of %D; %D- will match January–March 2019 when it shouldn't
    -- these date formats cannot be converted
    -- these date formats cannot be converted
    -- ['Q,y'] = {'^(Q%a* +[1-4]), +((%d%d%d%d)%a?)$'}, -- Quarter n, yyyy
    ['Sy4-y2'] = {'^(%D-) +((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- special case Winter/Summer year-year (YYYY-YY); year separated with unspaced endash
    ['Sy4-y2'] = {'^(%D-) +((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- special case Winter/Summer year-year (YYYY-YY); year separated with unspaced endash
    ['Sy-y'] = {'^(%D-) +(%d%d%d%d)[%-–]((%d%d%d%d)%a?)$'}, -- special case Winter/Summer year-year; year separated with unspaced endash
    ['Sy-y'] = {'^(%D-) +(%d%d%d%d)[%-–]((%d%d%d%d)%a?)$'}, -- special case Winter/Summer year-year; year separated with unspaced endash
    Line 425: Line 482:
    ]]
    ]]


    local function check_date (date_string, tCOinS_date, test_accessdate)
    --local function check_date (date_string, tCOinS_date, test_accessdate)
    local function check_date (date_string, param, tCOinS_date)
    local year; -- assume that year2, months, and days are not used;
    local year; -- assume that year2, months, and days are not used;
    local year2=0; -- second year in a year range
    local year2=0; -- second year in a year range
    Line 440: Line 498:
    anchor_year = year;
    anchor_year = year;
    -- elseif mw.ustring.match(date_string, patterns['Q,y'][1]) then -- quarter n, year; here because much the same as Mdy
    -- month, anchor_year, year=mw.ustring.match(date_string, patterns['Q,y'][1]);
    -- if not is_valid_year(year) then return false; end
    -- month = get_quarter_number (month, param); -- get quarter number or nil
    -- if not month then return false; end -- not valid whatever it is
    elseif mw.ustring.match(date_string, patterns['Mdy'][1]) then -- month-initial: month day, year
    elseif mw.ustring.match(date_string, patterns['Mdy'][1]) then -- month-initial: month day, year
    month, day, anchor_year, year=mw.ustring.match(date_string, patterns['Mdy'][1]);
    month, day, anchor_year, year=mw.ustring.match(date_string, patterns['Mdy'][1]);
    Line 546: Line 610:
    year2=year;
    year2=year;
    elseif mw.ustring.match(date_string, patterns['My'][1]) then -- month/season year or proper-name year
    elseif mw.ustring.match(date_string, patterns['My'][1]) then -- month/season/quarter/proper-name year
    month, anchor_year, year=mw.ustring.match(date_string, patterns['My'][1]);
    month, anchor_year, year=mw.ustring.match(date_string, patterns['My'][1]);
    if not is_valid_year(year) then return false; end
    if not is_valid_year(year) then return false; end
    if not is_valid_month_or_season (month) and 0 == is_proper_name (month) then return false; end
    month = get_element_number (month, param); -- get month season quarter proper-name number or nil
    if 0 ~= get_month_number(month) then -- determined to be a valid range so just check this one to know if month or season
    if not month then return false; end -- not valid whatever it is
    month = get_month_number(month);
    elseif 0 ~= get_season_number(month) then
    month = get_season_number(month);
    else
    month = is_proper_name (month); -- must be proper name; not supported in COinS
    end


    elseif mw.ustring.match(date_string, patterns['y-y'][1]) then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999
    elseif mw.ustring.match(date_string, patterns['y-y'][1]) then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999
    Line 583: Line 641:
    end
    end


    if test_accessdate then -- test accessdate here because we have numerical date parts
    if 'access-date' == param then -- test accessdate here because we have numerical date parts
    if 0 ~= year and 0 ~= month and 0 ~= day and -- all parts of a single date required
    if 0 ~= year and 0 ~= month and 0 ~= day and -- all parts of a single date required
    0 == year2 and 0 == month2 and 0 == day2 then -- none of these; accessdate must not be a range
    0 == year2 and 0 == month2 and 0 == day2 then -- none of these; accessdate must not be a range
    Line 656: Line 714:
    good_date, anchor_year, COinS_date = true, v.val:match("((nd)%a?)"); --"nd"; no error when date parameter is set to no date
    good_date, anchor_year, COinS_date = true, v.val:match("((nd)%a?)"); --"nd"; no error when date parameter is set to no date
    else
    else
    good_date, anchor_year, COinS_date = check_date (v.val, tCOinS_date); -- go test the date
    good_date, anchor_year, COinS_date = check_date (v.val, k, tCOinS_date); -- go test the date
    end
    end
    elseif 'year'==k then -- if the parameter is |year= it should hold only a year value
    elseif 'year'==k then -- if the parameter is |year= it should hold only a year value
    Line 662: Line 720:
    good_date, anchor_year, COinS_date = true, v.val:match("((%d+)%a?)");
    good_date, anchor_year, COinS_date = true, v.val:match("((%d+)%a?)");
    end
    end
    elseif 'access-date'==k then -- if the parameter is |date=
    good_date = check_date (v.val, nil, true); -- go test the date; nil is a placeholder; true is the test_accessdate flag
    elseif 'embargo'==k then -- if the parameter is |embargo=
    elseif 'embargo'==k then -- if the parameter is |embargo=
    good_date = check_date (v.val); -- go test the date
    good_date = check_date (v.val); -- go test the date
    Line 670: Line 726:
    end
    end
    else -- any other date-holding parameter
    else -- any other date-holding parameter
    good_date = check_date (v.val); -- go test the date
    good_date = check_date (v.val, k); -- go test the date
    end
    end
    if false==good_date then -- assemble one error message so we don't add the tracking category multiple times
    if false==good_date then -- assemble one error message so we don't add the tracking category multiple times
    Line 853: Line 909:
    [patterns[pattern_idx][8] or 'x'] = c7;
    [patterns[pattern_idx][8] or 'x'] = c7;
    };
    };
    if t.a then -- if this date has an anchor year capture
    t.y = t.a; -- use the anchor year capture when reassembling the date
    end


    if tonumber(t.m) then -- if raw month is a number (converting from ymd)
    if tonumber(t.m) then -- if raw month is a number (converting from ymd)