Module:Citation/CS1: Difference between revisions

Synch from sandbox;
m>Trappist the monk
(Fix for broken |ref=harv;)
m>Trappist the monk
(Synch from sandbox;)
Line 53: Line 53:
-- Populates numbered arguments in a message string using an argument table.
-- Populates numbered arguments in a message string using an argument table.
function substitute( msg, args )
function substitute( msg, args )
    return args and tostring( mw.message.newRawMessage( msg, args ) ) or msg;
-- return args and tostring( mw.message.newRawMessage( msg, args ) ) or msg;
return args and mw.message.newRawMessage( msg, args ):plain() or msg;
end
 
--[[
Apply kerning to open the space between the quote mark provided by the Module and a leading or trailing quote mark contained in a |title= or |chapter= parameter's value.
This function will positive kern  either single or double quotes:
"'Unkerned title with leading and trailing single quote marks'"
" 'Kerned title with leading and trailing single quote marks' " (in real life the kerning isn't as wide as this example)
]]
function kern_quotes (str)
local left='<span style="padding-left:0.2em;">%1</span>'; -- spacing to use when title contains leading single or double quote mark
local right='<span style="padding-right:0.2em;">%1</span>'; -- spacing to use when title contains trailing single or double quote mark
str = string.gsub( str, "^([\"\'])", left, 1 ); -- replace (captured) leading single or double quote with left-side <span>
str = string.gsub( str, "([\"\'])$", right, 1 ); -- replace (captured) trailing single or double quote with right-side <span>
return str;
end
end


Line 120: Line 136:
Looks for a parameter's name in the whitelist.
Looks for a parameter's name in the whitelist.


Parameters in the whitelist can have three valuse:
Parameters in the whitelist can have three values:
true - active, supported parameters
true - active, supported parameters
false - deprecated, supported parameters
false - deprecated, supported parameters
Line 240: Line 256:
         label=handler.label , prefix="//www.amazon."..domain.."/dp/",id=id,
         label=handler.label , prefix="//www.amazon."..domain.."/dp/",id=id,
         encode=handler.encode, separator = handler.separator})
         encode=handler.encode, separator = handler.separator})
end
--[[
Format PMID and do simple error checking.  PMIDs are sequential numbers beginning at 1 and counting up.  This code checks the PMID to see that it
contains only digits and is less than test_limit; the value in local variable test_limit will need to be updated periodically as more PMIDs are issued.
]]
function pmid(id)
local test_limit = 30000000; -- update this value as PMIDs approach
local handler = cfg.id_handlers['PMID'];
local err_cat =  ''; -- presume that PMID is valid
if id:match("[^%d]") then -- if PMID has anything but digits
err_cat = ' ' .. seterror( 'bad_pmid' ); -- set an error message
else -- PMID is only digits
local id_num = tonumber(id); -- convert id to a number for range testing
if 1 > id_num or test_limit < id_num then -- if PMID is outside test limit boundaries
err_cat = ' ' .. seterror( 'bad_pmid' ); -- set an error message
end
end
return externallinkid({link = handler.link, label = handler.label,
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
end
end


Line 299: Line 337:
inactive = ""  
inactive = ""  
end
end
if ( string.sub(id,1,3) ~= "10." ) then    
 
if nil == id:match("^10%.[^%s–]-[^%p]$") then -- doi must begin with '10.', must not contain spaces or endashes, and must not end with punctuation
cat = seterror( 'bad_doi' );
cat = seterror( 'bad_doi' );
end
end
return text .. inactive .. cat  
return text .. inactive .. ' ' .. cat  
end
end


Line 381: Line 420:
end
end


if "pressrelease" == cite_class then -- if this citation is cite press release
if "podcast" == cite_class then -- if this citation is cite podcast
return "Podcast"; -- display podcast annotation
 
elseif "pressrelease" == cite_class then -- if this citation is cite press release
return "Press release"; -- display press release annotation
return "Press release"; -- display press release annotation


elseif "speech" == cite_class then -- if this citation is cite speech
return "Speech"; -- display speech annotation
elseif "techreport" == cite_class then -- if this citation is cite techreport
elseif "techreport" == cite_class then -- if this citation is cite techreport
return "Technical report"; -- display techreport annotation
return "Technical report"; -- display techreport annotation
Line 395: Line 434:
end
end


-- returns a number according to the month in a date: 1 for January, etc.  If not a valid month, returns 0
-- returns a number according to the month in a date: 1 for January, etc.  Capitalization and spelling must be correct. If not a valid month, returns 0
function get_month_number (month)
function get_month_number (month)
local long_months = {['january']=1, ['february']=2, ['march']=3, ['april']=4, ['may']=5, ['june']=6, ['july']=7, ['august']=8, ['september']=9, ['october']=10, ['november']=11, ['december']=12};
local long_months = {['January']=1, ['February']=2, ['March']=3, ['April']=4, ['May']=5, ['June']=6, ['July']=7, ['August']=8, ['September']=9, ['October']=10, ['November']=11, ['December']=12};
local short_months = {['jan']=1, ['feb']=2, ['mar']=3, ['apr']=4, ['may']=5, ['jun']=6, ['jul']=7, ['aug']=8, ['sep']=9, ['oct']=10, ['nov']=11, ['dec']=12};
local short_months = {['Jan']=1, ['Feb']=2, ['Mar']=3, ['Apr']=4, ['May']=5, ['Jun']=6, ['Jul']=7, ['Aug']=8, ['Sep']=9, ['Oct']=10, ['Nov']=11, ['Dec']=12};
local temp;
local temp;
temp=long_months[month:lower()];
temp=long_months[month];
if temp then return temp; end -- if month is the long-form name
if temp then return temp; end -- if month is the long-form name
temp=short_months[month:lower()];
temp=short_months[month];
if temp then return temp; end -- if month is the short-form name
if temp then return temp; end -- if month is the short-form name
return 0; -- misspelled or not a month name
return 0; -- misspelled, improper case, or not a month name
end
end


-- returns true if date has one of the five seasons.  Else false.
-- returns a number according to the sequence of seasons in a year: 1 for Winter, etcCapitalization and spelling must be correct. If not a valid season, returns 0
function is_valid_season (season)
function get_season_number (season)
if inArray( season, {'winter', 'spring', 'summer', 'fall', 'autumn'} ) then
local season_list = {['Winter']=1, ['Spring']=2, ['Summer']=3, ['Fall']=4, ['Autumn']=4}
return true;
local temp;
end
temp=season_list[season];
return false;
if temp then return temp; end -- if season is a valid name return its number
return 0; -- misspelled, improper case, or not a season name
end
end


Line 445: Line 485:
end
end


--Check a pair of months or seasons to see if both are valid members of a month or season pair.
--[[
--TODO: Check order to make sure that the left month/season properly precedes the right month/season
Check a pair of months or seasons to see if both are valid members of a month or season pair.
 
Month pairs are expected to be left to right, earliest to latest in time.  Similarly, seasons are also left to right, earliest to latest in time.  There is
an oddity with seasons.  Winter is assigned a value of 1, spring 2, ..., fall and autumn 4.  Because winter can follow fall/autumn at the end of a calender year, a special test
is made to see if |date=Fall-Winter yyyy (4-1) is the date.
]]


function is_valid_month_season_range(range_start, range_end)
function is_valid_month_season_range(range_start, range_end)
if 0 == get_month_number (range_start:lower()) then -- is this a month range?
local range_start_number = get_month_number (range_start);
if true == is_valid_season (range_start:lower()) then -- not a month range, is this a season range?
return is_valid_season (range_end:lower()); -- range_start is season; return true if range_end also a season; else false
if 0 == range_start_number then -- is this a month range?
local range_start_number = get_season_number (range_start); -- not a month; is it a season? get start season number
local range_end_number = get_season_number (range_end); -- get end season number
 
if 0 ~= range_start_number then -- is start of range a season?
if range_start_number < range_end_number then -- range_start is a season
return true; -- return true when range_end is also a season and follows start season; else false
end
if 4 == range_start_number and 1 == range_end_number then -- special case when range is Fall-Winter or Autumn-Winter
return true;
end
end
end
return false; -- range_start is not a month or a season
return false; -- range_start is not a month or a season; or range_start is a season and range_end is not; or improper season sequence
end
end
if 0 == get_month_number (range_end:lower()) then -- range_start is a month; is range_end also a  month?
 
return false; -- not a month range
local range_end_number = get_month_number (range_end); -- get end month number
if range_start_number < range_end_number then -- range_start is a month; does range_start precede range_end?
return true; -- if yes, return true
end
end
return true;
return false; -- range_start month number is greater than or equal to range end number; or range end isn't a month
end
end


Line 480: Line 537:
function check_date (date_string)
function check_date (date_string)
local year;
local year;
local month;
local month=0; -- assume that month and day are not used; if either is zero then final year/month/day validation is not necessary
local day;
local day=0;
local day2=0; -- second day in a day range
local anchor_year;
local anchor_year;
local coins_date;
local coins_date;


if date_string:match("^%d%d%d%d%-%d%d%-%d%d$") then -- Year-initial numerical year month day format
if date_string:match("^%d%d%d%d%-%d%d%-%d%d$") then -- Year-initial numerical year month day format
coins_date = date_string:match("%d%d%d%d%-%d%d%-%d%d");
year, month, day=string.match(date_string, "(%d%d%d%d)%-(%d%d)%-(%d%d)");
year, month, day=string.match(date_string, "(%d%d%d%d)%-(%d%d)%-(%d%d)");
month=tonumber(month);
if 12 < month or 1 > month or 1583 > tonumber(year) then return false; end -- month number not valid or not Gregorian calendar
anchor_year = year;
anchor_year = year;
month=tonumber(month);
if 12 < month or 1 > month then return false; end


elseif date_string:match("^%a+%s*%d%d*%s*,%s*%d%d%d%d%a?$") then -- month-initial: month day, year
elseif date_string:match("^%a+ +[1-9]%d?, +[1-9]%d%d%d%a?$") then -- month-initial: month day, year
coins_date = date_string:match("%a+%s*%d%d*%s*,%s*%d%d%d%d");
month, day, anchor_year, year=string.match(date_string, "(%a+)%s*(%d%d?),%s*((%d%d%d%d)%a?)");
month, day, anchor_year, year=string.match(date_string, "(%a+)%s*(%d%d*)%s*,%s*((%d%d%d%d)%a?)");
month = get_month_number (month);
month = get_month_number (month:lower());
if 0 == month then return false; end -- return false if month text isn't one of the twelve months
if 0 == month then return false; end -- return false if month text isn't one of the twelve months
elseif date_string:match("^%d%d*%s*%a+%s*%d%d%d%d%a?$") then -- date-initial: day month year
elseif date_string:match("^%a+ +[1-9]%d?–[1-9]%d?, +[1-9]%d%d%d%a?$") then -- month-initial day range: month day–day, year; days are separated by endash
coins_date = date_string:match("%d%d*%s*%a+%s*%d%d%d%d");
month, day, day2, anchor_year, year=string.match(date_string, "(%a+) +(%d%d?)–(%d%d?), +((%d%d%d%d)%a?)");
if tonumber(day) >= tonumber(day2) then return false; end -- date range order is left to right: earlier to later; dates may not be the same;
month = get_month_number (month);
if 0 == month then return false; end -- return false if month text isn't one of the twelve months
 
elseif date_string:match("^[1-9]%d? +%a+ +[1-9]%d%d%d%a?$") then -- day-initial: day month year
day, month, anchor_year, year=string.match(date_string, "(%d%d*)%s*(%a+)%s*((%d%d%d%d)%a?)");
day, month, anchor_year, year=string.match(date_string, "(%d%d*)%s*(%a+)%s*((%d%d%d%d)%a?)");
month = get_month_number (month:lower());
month = get_month_number (month);
if 0 == month then return false; end -- return false if month text isn't one of the twelve months
 
elseif date_string:match("^[1-9]%d?–[1-9]%d? +%a+ +[1-9]%d%d%d%a?$") then -- day-range-initial: day–day month year; days are separated by endash
day, day2, month, anchor_year, year=string.match(date_string, "(%d%d?)–(%d%d?) +(%a+) +((%d%d%d%d)%a?)");
if tonumber(day) >= tonumber(day2) then return false; end -- date range order is left to right: earlier to later; dates may not be the same;
month = get_month_number (month);
if 0 == month then return false; end -- return false if month text isn't one of the twelve months
if 0 == month then return false; end -- return false if month text isn't one of the twelve months


elseif mw.ustring.match (date_string, "^%a+%s*[%s%-/–]%s*%a+%s*%d%d%d%d%a?$") then -- month/season range year
elseif mw.ustring.match (date_string, "^%a+%a+ +[1-9]%d%d%d%a?$") then -- month/season range year; months separated by endash
local month2
local month2
coins_date = mw.ustring.match (date_string, "%a+%s*[%s%-/–]%s*%a+%s*%d%d%d%d");
month, month2, anchor_year, year=mw.ustring.match (date_string, "(%a+)[%-/–](%a+)%s*((%d%d%d%d)%a?)");
coins_date= mw.ustring.gsub( coins_date, "–", "-" ); -- replace ndash with hyphen
month, month2, anchor_year, year=mw.ustring.match (date_string, "(%a+)%s*[%s%-/–]%s*(%a+)%s*((%d%d%d%d)%a?)");
day=0; -- mark day as not used
if false == is_valid_month_season_range(month, month2) then
if false == is_valid_month_season_range(month, month2) then
return false;
return false;
end
end
elseif date_string:match("^%a+%s*%d%d%d%d%a?$") then -- month/season year
elseif date_string:match("^%a+ +%d%d%d%d%a?$") then -- month/season year
coins_date = date_string:match("%a+%s*%d%d%d%d");
month, anchor_year, year=string.match(date_string, "(%a+)%s*((%d%d%d%d)%a?)");
month, anchor_year, year=string.match(date_string, "(%a+)%s*((%d%d%d%d)%a?)");
day=0; -- mark day as not used
if 0 == get_month_number (month) then -- if month text isn't one of the twelve months, might be a season
local season=month; -- copy
if 0 == get_season_number (month) then -- not a month, is it a season?
month = get_month_number (month:lower());
if month == 0 then -- if month text isn't one of the twelve months, might be a season
if false == is_valid_season (season:lower()) then
return false; -- return false not a month or one of the five seasons
return false; -- return false not a month or one of the five seasons
end
end
end
end


elseif date_string:match("^%d%d%d%d?%a?$") then -- year; here accept either YYY or YYYY
elseif date_string:match("^[1-9]%d%d%d?%a?$") then -- year; here accept either YYY or YYYY
coins_date = date_string:match("^%d%d%d%d?");
anchor_year, year=string.match(date_string, "((%d%d%d%d?)%a?)");
anchor_year, year=string.match(date_string, "((%d%d%d%d?)%a?)");
month, day = 0, 0; -- mark day and month as not used
 
else
else
return false; -- date format not one of the MOS:DATE approved formats
return false; -- date format not one of the MOS:DATE approved formats
Line 535: Line 594:


if 0~=month and 0~=day then -- check year month day dates for validity
if 0~=month and 0~=day then -- check year month day dates for validity
if 0~=day2 then -- If there is a second day (d–d Mmm YYYY or Mmm d–d, YYYY) test the second date
if false==is_valid_date(year,month,day2) then
return false; -- second date in date range string is not a real date return false; unset anchor_year and coins_date
end -- if second date range string is valid, fall through to test the first date range
end
if false==is_valid_date(year,month,day) then
if false==is_valid_date(year,month,day) then
return false; -- date string is not a real date return false; unset anchor_year and coins_date
return false; -- date string is not a real date return false; unset anchor_year and coins_date
end
end
end
end
coins_date= mw.ustring.gsub( date_string, "–", "-" ); -- if here, then date_string is valid; set coins_date and replace any ndash with a hyphen
return true, anchor_year, coins_date; -- format is good and date string represents a real date
return true, anchor_year, coins_date; -- format is good and date string represents a real date
Line 561: Line 627:
for k, v in pairs(date_parameters_list) do -- for each date-holding parameter in the list
for k, v in pairs(date_parameters_list) do -- for each date-holding parameter in the list
if is_set(v) then -- if the parameter has a value
if is_set(v) then -- if the parameter has a value
if v:match("^c%.%s%d%d%d%d?%a?$") then -- special case for c. year or with or without CITEREF disambiguator - only |date= and |year=
if v:match("^c%. [1-9]%d%d%d?%a?$") then -- special case for c. year or with or without CITEREF disambiguator - only |date= and |year=
if 'date'==k then
if 'date'==k then
good_date, anchor_year, COinS_date = true, v:match("((c%.%s%d%d%d%d?)%a?)"); -- anchor year and COinS_date only from |date= parameter
good_date, anchor_year, COinS_date = true, v:match("((c%. [1-9]d%d%d?)%a?)"); -- anchor year and COinS_date only from |date= parameter
elseif 'year'==k then
elseif 'year'==k then
good_date =  true;
good_date =  true;
end
end
elseif 'year'==k then -- if the parameter is |year= (but not c. year)
elseif 'year'==k then -- if the parameter is |year= (but not c. year)
if v:match("^%d%d%d%d?%a?$") then -- year with or without CITEREF disambiguator
if v:match("^[1-9]%d%d%d?%a?$") then -- year with or without CITEREF disambiguator
good_date =  true;
good_date =  true;
end
end
Line 912: Line 978:
         elseif k == 'PMC' then
         elseif k == 'PMC' then
             table.insert( new_list, {handler.label, pmc( v, options.Embargo ) } );
             table.insert( new_list, {handler.label, pmc( v, options.Embargo ) } );
        elseif k == 'PMID' then
            table.insert( new_list, {handler.label, pmid( v ) } );
         elseif k == 'ISSN' then
         elseif k == 'ISSN' then
         table.insert( new_list, {handler.label, issn( v ) } );
         table.insert( new_list, {handler.label, issn( v ) } );
Line 1,278: Line 1,346:
if not is_embargoed(Embargo) then
if not is_embargoed(Embargo) then
URL=cfg.id_handlers['PMC'].prefix .. ID_list['PMC']; -- set url to be the same as the PMC external link if not embargoed
URL=cfg.id_handlers['PMC'].prefix .. ID_list['PMC']; -- set url to be the same as the PMC external link if not embargoed
URLorigin = cfg.id_handlers['PMC'].parameters[1]; -- set URLorigin to parameter name for use in error message if citation is missing a |title=
end
end
end
end
Line 1,430: Line 1,499:
         not is_set(TranscriptURL) then
         not is_set(TranscriptURL) then
          
          
         -- Test if cite web is called without giving a URL
         -- Test if cite web or cite podcast |url= is missing or empty
        if ( config.CitationClass == "web" ) then
if inArray(config.CitationClass, {"web","podcast"}) then
            table.insert( z.message_tail, { seterror( 'cite_web_url', {}, true ) } );
table.insert( z.message_tail, { seterror( 'cite_web_url', {}, true ) } );
        end
end
          
          
         -- Test if accessdate is given without giving a URL
         -- Test if accessdate is given without giving a URL
Line 1,476: Line 1,545:
         TransChapter = wrap( 'trans-italic-title', TransChapter );
         TransChapter = wrap( 'trans-italic-title', TransChapter );
     else
     else
Chapter = kern_quotes (Chapter); -- if necessary, separate chapter title's leading and trailing quote marks from Module provided quote marks
         Chapter = wrap( 'quoted-title', Chapter );
         Chapter = wrap( 'quoted-title', Chapter );
         TransChapter = wrap( 'trans-quoted-title', TransChapter );
         TransChapter = wrap( 'trans-quoted-title', TransChapter );
Line 1,523: Line 1,593:
      
      
     if is_set(Periodical) then
     if is_set(Periodical) then
Title = kern_quotes (Title); -- if necessary, separate title's leading and trailing quote marks from Module provided quote marks
         Title = wrap( 'quoted-title', Title );
         Title = wrap( 'quoted-title', Title );
         TransTitle = wrap( 'trans-quoted-title', TransTitle );
         TransTitle = wrap( 'trans-quoted-title', TransTitle );
     elseif inArray(config.CitationClass, {"web","news","pressrelease","conference"}) and
     elseif inArray(config.CitationClass, {"web","news","pressrelease","conference","podcast"}) and
             not is_set(Chapter) then
             not is_set(Chapter) then
Title = kern_quotes (Title); -- if necessary, separate title's leading and trailing quote marks from Module provided quote marks
         Title = wrap( 'quoted-title', Title );
         Title = wrap( 'quoted-title', Title );
         TransTitle = wrap( 'trans-quoted-title', TransTitle );
         TransTitle = wrap( 'trans-quoted-title', TransTitle );
Line 1,593: Line 1,665:
         if is_set(Pages) then
         if is_set(Pages) then
             if is_set(Periodical) and
             if is_set(Periodical) and
                 not inArray(config.CitationClass, {"encyclopaedia","web","book","news"}) then
                 not inArray(config.CitationClass, {"encyclopaedia","web","book","news","podcast"}) then
                 Pages = ": " .. Pages;
                 Pages = ": " .. Pages;
             elseif tonumber(Pages) ~= nil then
             elseif tonumber(Pages) ~= nil then
Line 1,603: Line 1,675:
     else
     else
         if is_set(Periodical) and
         if is_set(Periodical) and
             not inArray(config.CitationClass, {"encyclopaedia","web","book","news"}) then
             not inArray(config.CitationClass, {"encyclopaedia","web","book","news","podcast"}) then
             Page = ": " .. Page;
             Page = ": " .. Page;
         else
         else
Line 1,641: Line 1,713:
]]
]]
if is_set (Language) then
if is_set (Language) then
-- local name = mw.language.fetchLanguageName( Language:lower(), "en" ); -- experiment: this seems to return correct ISO 639-1 language names
local name = cfg.iso639_1[Language:lower()]; -- get the language name if Language parameter has a valid iso 639-1 code
local name = cfg.iso639_1[Language:lower()]; -- get the language name if Language parameter has a valid iso 639-1 code
if nil == name then
if nil == name then
Line 1,659: Line 1,732:
-- handle type parameter for those CS1 citations that have default values
-- handle type parameter for those CS1 citations that have default values


if inArray(config.CitationClass, {"pressrelease","techreport","thesis", "speech"}) then
if inArray(config.CitationClass, {"podcast","pressrelease","techreport","thesis"}) then
TitleType = set_titletype (config.CitationClass, TitleType);
TitleType = set_titletype (config.CitationClass, TitleType);
if is_set(Degree) and "Thesis" == TitleType then -- special case for cite thesis
if is_set(Degree) and "Thesis" == TitleType then -- special case for cite thesis
Line 1,782: Line 1,855:
     local Publisher;
     local Publisher;
     if is_set(Periodical) and
     if is_set(Periodical) and
         not inArray(config.CitationClass, {"encyclopaedia","web","pressrelease"}) then
         not inArray(config.CitationClass, {"encyclopaedia","web","pressrelease","podcast"}) then
         if is_set(PublisherName) then
         if is_set(PublisherName) then
             if is_set(PublicationPlace) then
             if is_set(PublicationPlace) then
Line 1,829: Line 1,902:
         end
         end
     end
     end
--[[
Handle the oddity that is cite speech.  This code overrides whatever may be the value assigned to TitleNote (through |department=) and forces it to be " (Speech)" so that
the annotation directly follows the |title= parameter value in the citation rather than the |event= parameter value (if provided).
]]
if "speech" == config.CitationClass then -- cite speech only
TitleNote = " (Speech)"; -- annotate the citation
if is_set (Periodical) then -- if Periodical, perhaps because of an included |website= or |journal= parameter
if is_set (Conference) then -- and if |event= is set
Conference = Conference .. sepc .. " "; -- then add appropriate punctuation to the end of the Conference variable before rendering
end
end
end


     -- Piece all bits together at last.  Here, all should be non-nil.
     -- Piece all bits together at last.  Here, all should be non-nil.
Line 1,951: Line 2,037:
             end
             end
names[ #names + 1 ] = first_set(Year, anchor_year); -- Year first for legacy citations
names[ #names + 1 ] = first_set(Year, anchor_year); -- Year first for legacy citations
-- names[ #names + 1 ] = Year or anchor_year; -- Year first for legacy citations
             id = anchorid(names)
             id = anchorid(names)
         end
         end
Anonymous user