Module:Citation/CS1/Date validation: Difference between revisions

    m>Trappist the monk
    (Synch from sandbox;)
    m>Trappist the monk
    (Synch from sandbox;)
    Line 62: Line 62:


    local function get_season_number (season)
    local function get_season_number (season)
    local season_list = {['Winter']=1, ['Spring']=2, ['Summer']=3, ['Fall']=4, ['Autumn']=4}
    local season_list = {['Winter']=21, ['Spring']=22, ['Summer']=23, ['Fall']=24, ['Autumn']=24}; -- make sure these numbers do not overlap month numbers
    local temp;
    local temp;
    temp=season_list[season];
    temp=season_list[season];
    if temp then return temp; end -- if season is a valid name return its number
    if temp then return temp; end -- if season is a valid name return its number
    return 0; -- misspelled, improper case, or not a season name
    return 0; -- misspelled, improper case, or not a season name
    end
    end


    Line 76: Line 76:


    local function is_proper_name (name)
    local function is_proper_name (name)
    local name_list = {['Christmas']=1}
    local name_list = {['Christmas']=31}
    local temp;
    local temp;
    temp=name_list[name];
    temp=name_list[name];
    Line 137: Line 137:
    end
    end
    else
    else
    month_length=days_in_month[month];
    month_length=days_in_month[tonumber(month)];
    end
    end


    Line 184: Line 184:
    local range_start_number = get_month_number (range_start);
    local range_start_number = get_month_number (range_start);
    if 0 == range_start_number then -- is this a month range?
    if 0 == range_start_number then -- is this a month range?
    local range_start_number = get_season_number (range_start); -- not a month; is it a season? get start season number
    local range_start_number = get_season_number (range_start); -- not a month; is it a season? get start season number
    local range_end_number = get_season_number (range_end); -- get end season number
    local range_end_number = get_season_number (range_end); -- get end season number


    if 0 ~= range_start_number then -- is start of range a season?
    if 0 ~= range_start_number then -- is start of range a season?
    if range_start_number < range_end_number then -- range_start is a season
    if range_start_number < range_end_number then -- range_start is a season
    return true; -- return true when range_end is also a season and follows start season; else false
    return true; -- return true when range_end is also a season and follows start season; else false
    end
    end
    if 4 == range_start_number and 1 == range_end_number then -- special case when range is Fall-Winter or Autumn-Winter
    if 24 == range_start_number and 21 == range_end_number then -- special case when season range is Fall-Winter or Autumn-Winter
    return true;
    return true;
    end
    end
    Line 199: Line 199:
    end
    end


    local range_end_number = get_month_number (range_end); -- get end month number
    local range_end_number = get_month_number (range_end); -- get end month number
    if range_start_number < range_end_number then -- range_start is a month; does range_start precede range_end?
    if range_start_number < range_end_number then -- range_start is a month; does range_start precede range_end?
    if is_valid_month_range_style (range_start, range_end) then -- do months have the same style?
    if is_valid_month_range_style (range_start, range_end) then -- do months have the same style?
    return true; -- proper order and same style
    return true; -- proper order and same style
    end
    end
    end
    end
    return false; -- range_start month number is greater than or equal to range end number; or range end isn't a month
    return false; -- range_start month number is greater than or equal to range end number; or range end isn't a month
    end
    end
    --[[--------------------------< M A K E _ C O I N S _ D A T E >------------------------------------------------
    This function receives a table of date parts for one or two dates and an empty table reference declared in
    Module:Citation/CS1.  The function is called only for |date= parameters and only if the |date=<value> is
    determined to be a valid date format.  The question of what to do with invlaid date formats is not answered here.
    The date parts in the input table are converted to an ISO 8601 conforming date string:
    single whole dates: yyyy-mm-dd
    month and year dates: yyyy-mm
    year dates: yyyy
    ranges: yyyy-mm-dd/yyyy-mm-dd
    yyyy-mm/yyyy-mm
    yyyy/yyyy
    Dates in the Julian calendar are reduced to year or year/year so that we don't have to do calendar conversion from
    Julian to Proleptic Gregorian.
    The input table has:
    year, year2 – always present; if before 1582, ignore months and days if present
    month, month2 – 0 if not provided, 1-12 for months, 21-24 for seasons; 31– proper name dates
    day, day2 –  0 if not provided, 1-31 for days
    the output table receives:
    rftdate: an IS8601 formatted date
    rftchron: a free-form version of the date, usually without year which is in rftdate (season ranges and propername dates)
    rftssn: one of four season keywords: winter, spring, summer, fall (lowercase)
    ]]
    local function make_COinS_date (input, tCOinS_date)
    local date; -- one date or first date in a range
    local date2 = ''; -- end of range date
    if 1582 > tonumber(input.year) or 20 < tonumber(input.month) then -- Julian calendar or season so &rft.date gets year only
    date = input.year;
    if 0 ~= input.year2 and input.year ~= input.year2 then -- if a range, only the second year portion when not the same as range start year
    date = string.format ('%.4d/%.4d', tonumber(input.year), tonumber(input.year2)) -- assemble the date range
    end
    if 20 < tonumber(input.month) then -- if season or propername date
    local season = {[21]='winter', [22]='spring', [23]='summer', [24]='fall', [31]='Christmas'}; -- seasons lowercase, no autumn; proper names use title case
    if 0 == input.month2 then -- single season date
    if 30 <tonumber(input.month) then
    tCOinS_date.rftchron = season[input.month]; -- proper name dates
    else
    tCOinS_date.rftssn = season[input.month]; -- seasons
    end
    else -- season range with a second season specified
    if input.year ~= input.year2 then -- season year – season year range or season year–year
    tCOinS_date.rftssn = season[input.month]; -- start of range season; keep this?
    if 0~= month2 then
    tCOinS_date.rftchron = string.format ('%s %s – %s %s', season[input.month], input.year, season[input.month2], input.year2);
    end
    else -- season–season year range
    tCOinS_date.rftssn = season[input.month]; -- start of range season; keep this?
    tCOinS_date.rftchron = season[input.month] .. '–' .. season[input.month2]; -- season–season year range
    end
    end
    end
    tCOinS_date.rftdate = date;
    return; -- done
    end
    if 0 ~= input.day then
    date = string.format ('%s-%.2d-%.2d', input.year, tonumber(input.month), tonumber(input.day)); -- whole date
    elseif 0 ~= input.month then
    date = string.format ('%s-%.2d', input.year, tonumber(input.month)); -- year and month
    else
    date = string.format ('%s', input.year); -- just year
    end
    if 0 ~= input.year2 then
    if 0 ~= input.day2 then
    date2 = string.format ('/%s-%.2d-%.2d', input.year2, tonumber(input.month2), tonumber(input.day2)); -- whole date
    elseif 0 ~= input.month2 then
    date2 = string.format ('/%s-%.2d', input.year2, tonumber(input.month2)); -- year and month
    else
    date2 = string.format ('/%s', input.year2); -- just year
    end
    end
    tCOinS_date.rftdate = date .. date2; -- date2 has the '/' separator
    return;
    end


    --[[--------------------------< C H E C K _ D A T E >----------------------------------------------------------
    --[[--------------------------< C H E C K _ D A T E >----------------------------------------------------------
    Line 224: Line 310:
    true, anchor_year, COinS_date
    true, anchor_year, COinS_date
    anchor_year can be used in CITEREF anchors
    anchor_year can be used in CITEREF anchors
    COinS_date is date_string without anchor_year disambiguator if any
    COinS_date is date_string without anchor_year disambiguator if any -- this is being obsoleted.  In future:
    COinS_date is ISO 8601 format date; see make_COInS_date()
    ]]
    ]]
    local function check_date (date_string)
    local function check_date (date_string, tCOinS_date)
    local year; -- assume that year2, months, and days are not used;
    local year; -- assume that year2, months, and days are not used;
    local year2=0; -- second year in a year range
    local year2=0; -- second year in a year range
    Line 238: Line 325:
    if date_string:match("^%d%d%d%d%-%d%d%-%d%d$") then -- year-initial numerical year month day format
    if date_string:match("^%d%d%d%d%-%d%d%-%d%d$") then -- year-initial numerical year month day format
    year, month, day=string.match(date_string, "(%d%d%d%d)%-(%d%d)%-(%d%d)");
    year, month, day=string.match(date_string, "(%d%d%d%d)%-(%d%d)%-(%d%d)");
    month=tonumber(month);
    if 12 < tonumber(month) or 1 > tonumber(month) or 1583 > tonumber(year) then return false; end -- month number not valid or not Gregorian calendar
    if 12 < month or 1 > month or 1583 > tonumber(year) then return false; end -- month number not valid or not Gregorian calendar
    anchor_year = year;
    anchor_year = year;


    Line 252: Line 338:
    month = get_month_number (month);
    month = get_month_number (month);
    if 0 == month then return false; end -- return false if month text isn't one of the twelve months
    if 0 == month then return false; end -- return false if month text isn't one of the twelve months
    month2=month; -- for metadata
    year2=year;


    elseif date_string:match("^[1-9]%d? +%a+ +[1-9]%d%d%d%a?$") then -- day-initial: day month year
    elseif date_string:match("^[1-9]%d? +%a+ +[1-9]%d%d%d%a?$") then -- day-initial: day month year
    Line 263: Line 351:
    month = get_month_number (month);
    month = get_month_number (month);
    if 0 == month then return false; end -- return false if month text isn't one of the twelve months
    if 0 == month then return false; end -- return false if month text isn't one of the twelve months
    month2=month; -- for metadata
    year2=year;


    elseif date_string:match("^[1-9]%d? +%a+ – [1-9]%d? +%a+ +[1-9]%d%d%d%a?$") then -- day initial month-day-range: day month - day month year; uses spaced endash
    elseif date_string:match("^[1-9]%d? +%a+ – [1-9]%d? +%a+ +[1-9]%d%d%d%a?$") then -- day initial month-day-range: day month - day month year; uses spaced endash
    day, month, day2, month2, anchor_year, year=date_string:match("(%d%d?) +(%a+) – (%d%d?) +(%a+) +((%d%d%d%d)%a?)");
    day, month, day2, month2, anchor_year, year=date_string:match("(%d%d?) +(%a+) – (%d%d?) +(%a+) +((%d%d%d%d)%a?)");
    if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end -- date range order is left to right: earlier to later;
    if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end -- date range order is left to right: earlier to later;
    month = get_month_number (month);
    month = get_month_number (month); -- for metadata
    month2 = get_month_number (month2);
    month2 = get_month_number (month2);
    year2=year;


    elseif date_string:match("^%a+ +[1-9]%d? – %a+ +[1-9]%d?, +[1-9]%d%d%d?%a?$") then -- month initial month-day-range: month day – month day, year;  uses spaced endash
    elseif date_string:match("^%a+ +[1-9]%d? – %a+ +[1-9]%d?, +[1-9]%d%d%d?%a?$") then -- month initial month-day-range: month day – month day, year;  uses spaced endash
    month, day, month2, day2, anchor_year, year=date_string:match("(%a+) +(%d%d?) – (%a+) +(%d%d?), +((%d%d%d%d)%a?)");
    month, day, month2, day2, anchor_year, year=date_string:match("(%a+) +(%d%d?) – (%a+) +(%d%d?), +((%d%d%d%d)%a?)");
    if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end
    if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end
    month = get_month_number (month);
    month = get_month_number (month); -- for metadata
    month2 = get_month_number (month2);
    month2 = get_month_number (month2);
    year2=year;


    elseif date_string:match("^[1-9]%d? +%a+ +[1-9]%d%d%d – [1-9]%d? +%a+ +[1-9]%d%d%d%a?$") then -- day initial month-day-year-range: day month year - day month year; uses spaced endash
    elseif date_string:match("^[1-9]%d? +%a+ +[1-9]%d%d%d – [1-9]%d? +%a+ +[1-9]%d%d%d%a?$") then -- day initial month-day-year-range: day month year - day month year; uses spaced endash
    day, month, year, day2, month2, anchor_year, year2=date_string:match("(%d%d?) +(%a+) +(%d%d%d%d?) – (%d%d?) +(%a+) +((%d%d%d%d?)%a?)");
    day, month, year, day2, month2, anchor_year, year2=date_string:match("(%d%d?) +(%a+) +(%d%d%d%d?) – (%d%d?) +(%a+) +((%d%d%d%d?)%a?)");
    if tonumber(year2) <= tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later
    if tonumber(year2) <= tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later
    if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end -- year2 no more than one year in the future; months same style
    if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end -- year2 no more than one year in the future; months same style
    month = get_month_number (month);
    month = get_month_number (month); -- for metadata
    month2 = get_month_number (month2);
    month2 = get_month_number (month2);


    elseif date_string:match("^%a+ +[1-9]%d?, +[1-9]%d%d%d – %a+ +[1-9]%d?, +[1-9]%d%d%d%a?$") then -- month initial month-day-year-range: month day, year – month day, year;  uses spaced endash
    elseif date_string:match("^%a+ +[1-9]%d?, +[1-9]%d%d%d – %a+ +[1-9]%d?, +[1-9]%d%d%d%a?$") then -- month initial month-day-year-range: month day, year – month day, year;  uses spaced endash
    month, day, year, month2, day2, anchor_year, year2=date_string:match("(%a+) +(%d%d?), +(%d%d%d%d) – (%a+) +(%d%d?), +((%d%d%d%d)%a?)");
    month, day, year, month2, day2, anchor_year, year2=date_string:match("(%a+) +(%d%d?), +(%d%d%d%d) – (%a+) +(%d%d?), +((%d%d%d%d)%a?)");
    if tonumber(year2) <= tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later
    if tonumber(year2) <= tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later
    if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end -- year2 no more than one year in the future; months same style
    if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end -- year2 no more than one year in the future; months same style
    month = get_month_number (month);
    month = get_month_number (month); -- for metadata
    month2 = get_month_number (month2);
    month2 = get_month_number (month2);


    elseif date_string:match("^%a+ +[1-9]%d%d%d–%d%d%a?$") then -- special case Winter/Summer year-year (YYYY-YY); year separated with unspaced endash
    elseif date_string:match("^%a+ +[1-9]%d%d%d–%d%d%a?$") then -- special case Winter/Summer year-year (YYYY-YY); year separated with unspaced endash
    if nil == date_string:match("^Winter") and nil == date_string:match("^Summer") then return false end; -- 'month' can only be Winter or Summer
    local century;
    local century;
    year, century, anchor_year, year2=date_string:match("%a+ +((%d%d)%d%d)–((%d%d)%a?)");
    month, year, century, anchor_year, year2=date_string:match("(%a+) +((%d%d)%d%d)–((%d%d)%a?)");
    if 'Winter' ~= month and 'Summer' ~= month then return false end; -- 'month' can only be Winter or Summer
    anchor_year=year..'–'..anchor_year; -- assemble anchor_year from both years
    anchor_year=year..'–'..anchor_year; -- assemble anchor_year from both years
    year2 = century..year2; -- add the century to year2 for comparisons
    year2 = century..year2; -- add the century to year2 for comparisons
    if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later
    if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later
    if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
    if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
    month = get_season_number (month);


    elseif date_string:match("^%a+ +[1-9]%d%d%d–[1-9]%d%d%d%a?$") then -- special case Winter/Summer year-year; year separated with unspaced endash
    elseif date_string:match("^%a+ +[1-9]%d%d%d–[1-9]%d%d%d%a?$") then -- special case Winter/Summer year-year; year separated with unspaced endash
    if nil == date_string:match("^Winter") and nil == date_string:match("^Summer") then return false end; -- 'month' can only be Winter or Summer
    month, year, anchor_year, year2=date_string:match("(%a+) +(%d%d%d%d)–((%d%d%d%d)%a?)");
    year, anchor_year, year2=date_string:match("%a+ +(%d%d%d%d)–((%d%d%d%d)%a?)");
    if 'Winter' ~= month and 'Summer' ~= month then return false end; -- 'month' can only be Winter or Summer
    anchor_year=year..'–'..anchor_year; -- assemble anchor_year from both years
    anchor_year=year..'–'..anchor_year; -- assemble anchor_year from both years
    if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later
    if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later
    if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
    if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
    month = get_season_number (month); -- for metadata


    elseif date_string:match("^%a+ +[1-9]%d%d%d% – %a+ +[1-9]%d%d%d%a?$") then -- month/season year - month/season year; separated by spaced endash
    elseif date_string:match("^%a+ +[1-9]%d%d%d% – %a+ +[1-9]%d%d%d%a?$") then -- month/season year - month/season year; separated by spaced endash
    Line 311: Line 405:
    if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same
    if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same
    if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
    if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
    if not((0 ~= get_month_number(month) and 0 ~= get_month_number(month2) and is_valid_month_range_style(month, month2)) or -- both must be month year, same month style
    if 0 ~= get_month_number(month) and 0 ~= get_month_number(month2) and is_valid_month_range_style(month, month2) then -- both must be month year, same month style
    (0 ~= get_season_number(month) and 0 ~= get_season_number(month2))) then return false; end -- or season year, not mixed
    month = get_month_number(month);
    month2 = get_month_number(month2);
    elseif 0 ~= get_season_number(month) and 0 ~= get_season_number(month2) then -- both must be or season year, not mixed
    month = get_season_number(month);
    month2 = get_season_number(month2);
    else
    return false;
    end


    elseif date_string:match ("^%a+–%a+ +[1-9]%d%d%d%a?$") then -- month/season range year; months separated by endash  
    elseif date_string:match ("^%a+–%a+ +[1-9]%d%d%d%a?$") then -- month/season range year; months separated by endash  
    month, month2, anchor_year, year=date_string:match ("(%a+)–(%a+)%s*((%d%d%d%d)%a?)");
    month, month2, anchor_year, year=date_string:match ("(%a+)–(%a+)%s*((%d%d%d%d)%a?)");
    if (not is_valid_month_season_range(month, month2)) or (not is_valid_year(year)) then
    if (not is_valid_month_season_range(month, month2)) or (not is_valid_year(year)) then return false; end
    return false;
    if 0 ~= get_month_number(month) then -- determined to be a valid range so just check this one to know if month or season
    month = get_month_number(month);
    month2 = get_month_number(month2);
    else
    month = get_season_number(month);
    month2 = get_season_number(month2);
    end
    end
    year2=year;
    elseif date_string:match("^%a+ +%d%d%d%d%a?$") then -- month/season year or proper-name year
    elseif date_string:match("^%a+ +%d%d%d%d%a?$") then -- month/season year or proper-name year
    Line 324: Line 431:
    if not is_valid_year(year) then return false; end
    if not is_valid_year(year) then return false; end
    if not is_valid_month_or_season (month) and 0 == is_proper_name (month) then return false; end
    if not is_valid_month_or_season (month) and 0 == is_proper_name (month) then return false; end
    if 0 ~= get_month_number(month) then -- determined to be a valid range so just check this one to know if month or season
    month = get_month_number(month);
    elseif 0 ~= get_season_number(month) then
    month = get_season_number(month);
    else
    month = is_proper_name (month); -- must be proper name; not supported in COinS
    end


    elseif date_string:match("^[1-9]%d%d%d?–[1-9]%d%d%d?%a?$") then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999
    elseif date_string:match("^[1-9]%d%d%d?–[1-9]%d%d%d?%a?$") then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999
    Line 369: Line 483:
    if false == result then return false; end
    if false == result then return false; end
    -- if here, then date_string is valid; get coins_date from date_string (leave CITEREF disambiguator) ...
    -- if here, then date_string is valid; get coins_date from date_string (leave CITEREF disambiguator) ...
    coins_date=date_string:match("^(.+%d)%a?$"); -- last character of valid disambiguatable date is always a digit
    -- coins_date=date_string:match("^(.+%d)%a?$"); -- last character of valid disambiguatable date is always a digit
    coins_date= mw.ustring.gsub(coins_date, "–", "-" ); -- ... and replace any ndash with a hyphen
    -- coins_date= mw.ustring.gsub(coins_date, "–", "-" ); -- ... and replace any ndash with a hyphen
     
    if nil ~= tCOinS_date then -- this table only passed into this function when testing |date= parameter values
    make_COinS_date ({year=year, month=month, day=day, year2=year2, month2=month2, day2=day2}, tCOinS_date); -- make an ISO 8601 date string for COinS
    end
    return true, anchor_year, coins_date; -- format is good and date string represents a real date
    return true, anchor_year; -- format is good and date string represents a real date
    -- return true, anchor_year, coins_date; -- format is good and date string represents a real date
    end
    end


    Line 386: Line 505:
    ]]
    ]]


    --function p.dates(date_parameters_list)
    local function dates(date_parameters_list, tCOinS_date)
    local function dates(date_parameters_list)
    local anchor_year; -- will return as nil if the date being tested is not |date=
    local anchor_year; -- will return as nil if the date being tested is not |date=
    local COinS_date; -- will return as nil if the date being tested is not |date=
    local COinS_date; -- will return as nil if the date being tested is not |date=
    local error_message = "";
    local error_message = "";
    local mismatch = 0;
    -- local mismatch = 0;
    local good_date = false;
    local good_date = false;
    Line 410: Line 528:
    good_date, anchor_year, COinS_date = true, v:match("((nd)%a?)"); --"nd"; no error when date parameter is set to no date
    good_date, anchor_year, COinS_date = true, v:match("((nd)%a?)"); --"nd"; no error when date parameter is set to no date
    else
    else
    good_date, anchor_year, COinS_date = check_date (v); -- go test the date
    good_date, anchor_year, COinS_date = check_date (v, tCOinS_date); -- go test the date
    end
    end
    elseif 'access-date'==k then -- if the parameter is |date=
    elseif 'access-date'==k then -- if the parameter is |date=
    Line 428: Line 546:
    end
    end
    end
    end
    return anchor_year, COinS_date, error_message, mismatch; -- and done
    -- return anchor_year, COinS_date, error_message, mismatch; -- and done
    return anchor_year, error_message; -- and done
    end
    end