Module:Citation/CS1/Date validation: Difference between revisions

    (sync from sandbox;)
    (sync from sandbox;)
    Line 3: Line 3:
    ]]
    ]]


    local add_prop_cat, is_set, in_array, wrap_style; -- imported functions from selected Module:Citation/CS1/Utilities
    local add_prop_cat, is_set, in_array, set_message, substitute, wrap_style; -- imported functions from selected Module:Citation/CS1/Utilities
    local cfg; -- table of tables imported from selected Module:Citation/CS1/Configuration
    local cfg; -- table of tables imported from selected Module:Citation/CS1/Configuration


    Line 44: Line 44:
    if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which which tonumber() may not understand
    if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which which tonumber() may not understand
    access_ts = tonumber (access_ts) or lang_object:parseFormattedNumber (access_ts); -- convert to numbers for the comparison;
    access_ts = tonumber (access_ts) or lang_object:parseFormattedNumber (access_ts); -- convert to numbers for the comparison;
    tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts);
    tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts);
    else
    else
    Line 85: Line 85:


    local function get_month_number (month)
    local function get_month_number (month)
    return cfg.date_names['local'].long[month] or cfg.date_names['local'].short[month] or -- look for local names first
    return cfg.date_names['local'].long[month] or cfg.date_names['local'].short[month] or -- look for local names first
    cfg.date_names['en'].long[month] or cfg.date_names['en'].short[month] or -- failing that, look for English names
    cfg.date_names['en'].long[month] or cfg.date_names['en'].short[month] or -- failing that, look for English names
    0; -- not a recognized month name
    0; -- not a recognized month name
    end
    end


    Line 102: Line 102:
    which became part of ISO 8601 in 2019.  See '§Sub-year groupings'.  The standard defines various divisions using
    which became part of ISO 8601 in 2019.  See '§Sub-year groupings'.  The standard defines various divisions using
    numbers 21-41.  cs1|2 only supports generic seasons.  EDTF does support the distinction between north and south
    numbers 21-41.  cs1|2 only supports generic seasons.  EDTF does support the distinction between north and south
    hemispere seasons but cs1|2 has no way to make that distinction.
    hemisphere seasons but cs1|2 has no way to make that distinction.


    These additional divisions not currently supported:
    These additional divisions not currently supported:
    Line 228: Line 228:
    month = tonumber(month); -- required for YYYY-MM-DD dates
    month = tonumber(month); -- required for YYYY-MM-DD dates
    if (2 == month) then -- if February
    if (2 == month) then -- if February
    month_length = 28; -- then 28 days unless
    month_length = 28; -- then 28 days unless
    if 1582 > tonumber(year) then -- Julian calendar
    if 1582 > tonumber(year) then -- Julian calendar
    if 0 == (year%4) then -- is a leap year?
    if 0 == (year%4) then -- is a leap year?
    month_length = 29; -- if leap year then 29 days in February
    month_length = 29; -- if leap year then 29 days in February
    end
    end
    else -- Gregorian calendar
    else -- Gregorian calendar
    if (0 == (year%4) and (0 ~= (year%100) or 0 == (year%400))) then -- is a leap year?
    if (0 == (year%4) and (0 ~= (year%100) or 0 == (year%400))) then -- is a leap year?
    month_length = 29; -- if leap year then 29 days in February
    month_length = 29; -- if leap year then 29 days in February
    end
    end
    Line 299: Line 299:
    -- here when range_start is a month
    -- here when range_start is a month
    range_end_number = get_month_number (range_end); -- get end month number
    range_end_number = get_month_number (range_end); -- get end month number
    if range_start_number < range_end_number then -- range_start is a month; does range_start precede range_end?
    if range_start_number < range_end_number and -- range_start is a month; does range_start precede range_end?
    if is_valid_month_range_style (range_start, range_end) then -- do months have the same style?
    is_valid_month_range_style (range_start, range_end) then -- do months have the same style?
    return true; -- proper order and same style
    return true; -- proper order and same style
    end
    end
    end
    return false; -- range_start month number is greater than or equal to range end number; or range end isn't a month
    return false; -- range_start month number is greater than or equal to range end number; or range end isn't a month
    Line 429: Line 428:
    ['dMy'] = {'^([1-9]%d?) +(%D-) +((%d%d%d%d?)%a?)$', 'd', 'm', 'a', 'y'},
    ['dMy'] = {'^([1-9]%d?) +(%D-) +((%d%d%d%d?)%a?)$', 'd', 'm', 'a', 'y'},
    -- year-initial: year month day; day: 1 or 2 two digits, leading zero allowed; not supported at en.wiki
    -- year-initial: year month day; day: 1 or 2 two digits, leading zero allowed; not supported at en.wiki
    -- ['yMd'] = {'^((%d%d%d%d?)%a?) +(%D-) +(%d%d?)$', 'a', 'y', 'm', 'd'},
    -- ['yMd'] = {'^((%d%d%d%d?)%a?) +(%D-) +(%d%d?)$', 'a', 'y', 'm', 'd'},
    -- day-range-initial: day–day month year; days are separated by endash
    -- day-range-initial: day–day month year; days are separated by endash
    ['d-dMy'] = {'^([1-9]%d?)[%-–]([1-9]%d?) +(%D-) +((%d%d%d%d)%a?)$', 'd', 'd2', 'm', 'a', 'y'},
    ['d-dMy'] = {'^([1-9]%d?)[%-–]([1-9]%d?) +(%D-) +((%d%d%d%d)%a?)$', 'd', 'd2', 'm', 'a', 'y'},
    Line 454: Line 453:
    ['y-y'] = {'^(%d%d%d%d?)[%-–]((%d%d%d%d?)%a?)$'}, -- year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999
    ['y-y'] = {'^(%d%d%d%d?)[%-–]((%d%d%d%d?)%a?)$'}, -- year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999
    ['y4-y2'] = {'^((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- year range: YYYY–YY; separated by unspaced endash
    ['y4-y2'] = {'^((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- year range: YYYY–YY; separated by unspaced endash
    ['ymx'] = {'^(%d%d%d%d)%-(%d%d)%-XX$', 'y', 'm'}, -- edtf year-initial numerical year-month-XX
    ['y'] = {'^((%d%d%d%d?)%a?)$'}, -- year; here accept either YYY or YYYY
    ['y'] = {'^((%d%d%d%d?)%a?)$'}, -- year; here accept either YYY or YYYY
    }
    }
    Line 482: Line 482:
    local function check_date (date_string, param, tCOinS_date)
    local function check_date (date_string, param, tCOinS_date)
    local year; -- assume that year2, months, and days are not used;
    local year; -- assume that year2, months, and days are not used;
    local year2 = 0; -- second year in a year range
    local year2 = 0; -- second year in a year range
    local month = 0;
    local month = 0;
    local month2 = 0; -- second month in a month range
    local month2 = 0; -- second month in a month range
    local day = 0;
    local day = 0;
    local day2 = 0; -- second day in a day range
    local day2 = 0; -- second day in a day range
    Line 495: Line 495:
    anchor_year = year;
    anchor_year = year;
    elseif date_string:match (patterns['ymx'][1]) then -- year-initial numerical year month edtf format
    year, month = date_string:match (patterns['ymx'][1]);
    if 12 < tonumber(month) or 1 > tonumber(month) or 1582 > tonumber(year) or not is_valid_year(year) then return false; end -- month number not valid or not Gregorian calendar or future year
    anchor_year = year;
    elseif mw.ustring.match(date_string, patterns['Mdy'][1]) then -- month-initial: month day, year
    elseif mw.ustring.match(date_string, patterns['Mdy'][1]) then -- month-initial: month day, year
    month, day, anchor_year, year = mw.ustring.match(date_string, patterns['Mdy'][1]);
    month, day, anchor_year, year = mw.ustring.match(date_string, patterns['Mdy'][1]);
    Line 562: Line 567:
    month, year, century, anchor_year, year2 = mw.ustring.match(date_string, patterns['Sy4-y2'][1]);
    month, year, century, anchor_year, year2 = mw.ustring.match(date_string, patterns['Sy4-y2'][1]);
    if 'Winter' ~= month and 'Summer' ~= month then return false end; -- 'month' can only be Winter or Summer
    if 'Winter' ~= month and 'Summer' ~= month then return false end; -- 'month' can only be Winter or Summer
    anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years
    anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years
    year2 = century..year2; -- add the century to year2 for comparisons
    year2 = century..year2; -- add the century to year2 for comparisons
    if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later
    if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later
    Line 578: Line 583:
    elseif mw.ustring.match(date_string, patterns['My-My'][1]) then -- month/season year - month/season year; separated by spaced endash
    elseif mw.ustring.match(date_string, patterns['My-My'][1]) then -- month/season year - month/season year; separated by spaced endash
    month, year, month2, anchor_year, year2 = mw.ustring.match(date_string, patterns['My-My'][1]);
    month, year, month2, anchor_year, year2 = mw.ustring.match(date_string, patterns['My-My'][1]);
    anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years
    anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years
    if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same
    if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same
    if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
    if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
    Line 612: Line 617:
    elseif mw.ustring.match(date_string, patterns['y-y'][1]) then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999
    elseif mw.ustring.match(date_string, patterns['y-y'][1]) then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999
    year, anchor_year, year2 = mw.ustring.match(date_string, patterns['y-y'][1]);
    year, anchor_year, year2 = mw.ustring.match(date_string, patterns['y-y'][1]);
    anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years
    anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years
    if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same
    if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same
    if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
    if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
    Line 619: Line 624:
    local century;
    local century;
    year, century, anchor_year, year2 = mw.ustring.match(date_string, patterns['y4-y2'][1]);
    year, century, anchor_year, year2 = mw.ustring.match(date_string, patterns['y4-y2'][1]);
    anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years
    anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years


    if in_array (param, {'date', 'publication-date', 'year'}) then
    if in_array (param, {'date', 'publication-date', 'year'}) then
    Line 630: Line 635:
    if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
    if not is_valid_year(year2) then return false; end -- no year farther in the future than next year


    elseif mw.ustring.match(date_string, patterns['y'][1]) then -- year; here accept either YYY or YYYY
    elseif mw.ustring.match(date_string, patterns['y'][1]) then -- year; here accept either YYY or YYYY
    anchor_year, year = mw.ustring.match(date_string, patterns['y'][1]);
    anchor_year, year = mw.ustring.match(date_string, patterns['y'][1]);
    if false == is_valid_year(year) then
    if false == is_valid_year(year) then
    Line 737: Line 742:
    --[[--------------------------< Y E A R _ D A T E _ C H E C K >------------------------------------------------
    --[[--------------------------< Y E A R _ D A T E _ C H E C K >------------------------------------------------


    Compare the value provided in |year= with the year value(s) provided in |date=.  This function returns a numeric value:
    Compare the value provided in |year= with the year value(s) provided in |date=.  This function sets a local numeric value:
    0 - year value does not match the year value in date
    0 - year value does not match the year value in date
    1 - (default) year value matches the year value in date or one of the year values when date contains two years
    1 - (default) year value matches the year value in date or one of the year values when date contains two years
    2 - year value matches the year value in date when date is in the form YYYY-MM-DD and year is disambiguated (|year=YYYYx)
    2 - year value matches the year value in date when date is in the form YYYY-MM-DD and year is disambiguated (|year=YYYYx)
    the numernic value in <result> determines the 'output' if any from this function:
    0 – adds error message to error_list sequence table
    1 – adds maint cat
    2 – does nothing


    ]]
    ]]


    local function year_date_check (year_string, date_string)
    local function year_date_check (year_string, year_origin, date_string, date_origin, error_list)
    local year;
    local year;
    local date1;
    local date1;
    local date2;
    local date2;
    local result = 1; -- result of the test; assume that the test passes
    local result = 1; -- result of the test; assume that the test passes
     
    year = year_string:match ('(%d%d%d%d?)');
    year = year_string:match ('(%d%d%d%d?)');


    Line 780: Line 790:
    result = 0;
    result = 0;
    end
    end
    else
    else -- should never get here; this function called only when no other date errors
    result = 0; -- no recognizable year in date
    result = 0; -- no recognizable year in date
    end
    end
    return result;
     
    if 0 == result then -- year / date mismatch
    table.insert (error_list, substitute (cfg.messages['mismatch'], {year_origin, date_origin})); -- add error message to error_list sequence table
    elseif 1 == result then -- redundant year / date
    set_message ('maint_date_year'); -- add a maint cat
    end
    end
    end


    Line 819: Line 834:
    ['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy
    ['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy
    ['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy
    ['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy
    -- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki
    -- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki
    },
    },
    ['Mdy'] = { -- date format is Mdy; reformat to:
    ['Mdy'] = { -- date format is Mdy; reformat to:
    Line 825: Line 840:
    ['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy
    ['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy
    ['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd
    ['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd
    -- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki
    -- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki
    },
    },
    ['dMy'] = { -- date format is dMy; reformat to:
    ['dMy'] = { -- date format is dMy; reformat to:
    Line 831: Line 846:
    ['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy
    ['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy
    ['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd
    ['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd
    -- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki
    -- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki
    },
    },
    ['Md-dy'] = { -- date format is Md-dy; reformat to:
    ['Md-dy'] = { -- date format is Md-dy; reformat to:
    Line 866: Line 881:
    ['any'] = {'%s %s', 'm', 'y'}, -- dmy/mdy agnostic
    ['any'] = {'%s %s', 'm', 'y'}, -- dmy/mdy agnostic
    },
    },
    -- ['yMd'] = { -- not supported at en.wiki
    -- ['yMd'] = { -- not supported at en.wiki
    -- ['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy
    -- ['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy
    -- ['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy
    -- ['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy
    -- ['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd
    -- ['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd
    -- },
    -- },
    }
    }


    Line 887: Line 902:
    end
    end


    -- yMd is not supported at en.wiki
    -- yMd is not supported at en.wiki; if yMd is supported at your wiki, uncomment the next line
    -- if yMd is supported at your wiki, uncomment the next line
    -- if 'yMd' == format_param and in_array (pattern_idx, {'yMd', 'Md-dy', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy'}) then -- these formats not convertable; yMd not supported at en.wiki
    -- if 'yMd' == format_param and in_array (pattern_idx, {'yMd', 'Md-dy', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy'}) then -- these formats not convertable; yMd not supported at en.wiki
    -- if yMd is supported at your wiki, remove or comment-out the next line
    -- if yMd is supported at your wiki, remove or comment-out the next line
    if 'yMd' == format_param then -- yMd not supported at en.wiki
    if 'yMd' == format_param then -- yMd not supported at en.wiki
    Line 1,034: Line 1,048:
    local n;
    local n;
    for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list
    for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list
    if is_set (param_val.val) then
    if is_set (param_val.val) and
    if not mw.ustring.match (param_val.val, '%d%d%d%d%-%d%d%-%d%d') then -- for those that are not ymd dates (ustring because here digits may not be Western)
    not mw.ustring.match (param_val.val, patterns.ymd[1]) then -- for those that are not ymd dates (ustring because here digits may not be Western)
    param_val.val, n = param_val.val:gsub ('%-', '–'); -- replace any hyphen with ndash
    param_val.val, n = param_val.val:gsub ('%-', '–'); -- replace any hyphen with ndash
    if 0 ~= n then
    if 0 ~= n then
    Line 1,041: Line 1,055:
    result = true;
    result = true;
    end
    end
    end
    end
    end
    end
    end
    return result; -- so we know if any hyphens were replaced
    return result; -- so we know if any hyphens were replaced
    end
    --[[--------------------------< E D T F _ T R A N S F O R M >--------------------------------------------------
    Loops through the list of date-holding parameters and converts any EDTF formatted dates to MOS compliant dates.
    Only YYY-MM-XX supported at this time. Not called if the cs1|2 template has any date errors.
    must be done before reformat_dates() and before date_hyphen_to_dash()
    Modifies the date_parameters_list and returns true if transformation is performed, else returns false.
    ]]
    local function edtf_transform (date_parameters_list)
    local result = false;
    local source_date = {};
    for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list
    if is_set(param_val.val) and param_val.val:match (patterns.ymx[1]) then -- if parameter is set and is an EDTF dates
    source_date.year, source_date.month = param_val.val:match (patterns.ymx[1]); -- get year and month number
    source_date.day = 1; -- required by os.time()
    date_parameters_list[param_name].val = mw.text.trim (os.date ('%B %Y', os.time (source_date)));
    result = true;
    end
    end
    return result; -- so we know if a transform was done
    end
    end


    Line 1,093: Line 1,133:
    return modified;
    return modified;
    end
    end




    Line 1,106: Line 1,146:
    is_set = utilities_page_ptr.is_set;
    is_set = utilities_page_ptr.is_set;
    in_array = utilities_page_ptr.in_array;
    in_array = utilities_page_ptr.in_array;
    -- set_message = utilities_page_ptr.set_message;
    set_message = utilities_page_ptr.set_message;
    substitute = utilities_page_ptr.substitute;
    wrap_style = utilities_page_ptr.wrap_style;
    wrap_style = utilities_page_ptr.wrap_style;


    Line 1,122: Line 1,163:
    date_hyphen_to_dash = date_hyphen_to_dash,
    date_hyphen_to_dash = date_hyphen_to_dash,
    date_name_xlate = date_name_xlate,
    date_name_xlate = date_name_xlate,
    edtf_transform = edtf_transform,
    set_selected_modules = set_selected_modules
    set_selected_modules = set_selected_modules
    }
    }