Module:Citation/CS1: Difference between revisions

synch from sandbox;
m>Trappist the monk
No edit summary
(synch from sandbox;)
Line 549: Line 549:
end
end
-- if we get this far we have prefix and script
-- if we get this far we have prefix and script
name = mw.language.fetchLanguageName( lang, "en" ); -- get language name so that we can use it to categorize
name = cfg.lang_code_remap[lang] or mw.language.fetchLanguageName( lang, "en" ); -- get language name so that we can use it to categorize
if is_set (name) then -- is prefix a proper ISO 639-1 language code?
if is_set (name) then -- is prefix a proper ISO 639-1 language code?
script_value = script_value:gsub ('^%l%l%s*:%s*', ''); -- strip prefix from script
script_value = script_value:gsub ('^%l%l%s*:%s*', ''); -- strip prefix from script
Line 628: Line 628:


chapter = script_concatenate (chapter, scriptchapter) -- <bdi> tags, lang atribute, categorization, etc; must be done after title is wrapped
chapter = script_concatenate (chapter, scriptchapter) -- <bdi> tags, lang atribute, categorization, etc; must be done after title is wrapped
if is_set (chapterurl) then
chapter = external_link (chapterurl, chapter, chapter_url_source, access); -- adds bare_url_missing_title error if appropriate
end


if is_set (transchapter) then
if is_set (transchapter) then
Line 639: Line 643:
end
end


if is_set (chapterurl) then
-- if is_set (chapterurl) then
chapter = external_link (chapterurl, chapter, chapter_url_source, access); -- adds bare_url_missing_title error if appropriate
-- chapter = external_link (chapterurl, chapter, chapter_url_source, access); -- adds bare_url_missing_title error if appropriate
end
-- end


return chapter .. chapter_error;
return chapter .. chapter_error;
Line 799: Line 803:
end
end
-- limited enumerated parameters list
-- limited enumerated parameters list
name = name:gsub( "%d+", "#" ); -- replace digit(s) with # (last25 becomes last#)
name = name:gsub("%d+", "#" ); -- replace digit(s) with # (last25 becomes last#) (mw.ustring because non-Western 'local' digits)
state = whitelist.limited_numbered_arguments[ name ];
state = whitelist.limited_numbered_arguments[ name ];
if true == state then return true; end -- valid actively supported parameter
if true == state then return true; end -- valid actively supported parameter
Line 818: Line 822:
end
end
-- all enumerated parameters allowed
-- all enumerated parameters allowed
name = name:gsub( "%d+", "#" ); -- replace digit(s) with # (last25 becomes last#
name = name:gsub("%d+", "#" ); -- replace digit(s) with # (last25 becomes last#) (mw.ustring because non-Western 'local' digits)
state = whitelist.numbered_arguments[ name ];
state = whitelist.numbered_arguments[ name ];


Line 896: Line 900:


local function safe_join( tbl, duplicate_char )
local function safe_join( tbl, duplicate_char )
--[[
local f = {}; -- create a function table appropriate to type of 'dupicate character'
Note: we use string functions here, rather than ustring functions.
if 1 == #duplicate_char then -- for single byte ascii characters use the string library functions
f.gsub=string.gsub
This has considerably faster performance and should work correctly as
f.match=string.match
long as the duplicate_char is strict ASCII. The strings
f.sub=string.sub
in tbl may be ASCII or UTF8.
else -- for multi-byte characters use the ustring library functions
]]
f.gsub=mw.ustring.gsub
f.match=mw.ustring.match
f.sub=mw.ustring.sub
end
 
local str = ''; -- the output string
local str = ''; -- the output string
local comp = ''; -- what does 'comp' mean?
local comp = ''; -- what does 'comp' mean?
Line 920: Line 927:
end
end
-- typically duplicate_char is sepc
-- typically duplicate_char is sepc
if comp:sub(1,1) == duplicate_char then -- is first charactier same as duplicate_char? why test first character?
if f.sub(comp, 1,1) == duplicate_char then -- is first character same as duplicate_char? why test first character?
--  Because individual string segments often (always?) begin with terminal punct for th
--  Because individual string segments often (always?) begin with terminal punct for the
--  preceding segment: 'First element' .. 'sepc next element' .. etc?
--  preceding segment: 'First element' .. 'sepc next element' .. etc?
trim = false;
trim = false;
end_chr = str:sub(-1,-1); -- get the last character of the output string
end_chr = f.sub(str, -1,-1); -- get the last character of the output string
-- str = str .. "<HERE(enchr=" .. end_chr.. ")" -- debug stuff?
-- str = str .. "<HERE(enchr=" .. end_chr.. ")" -- debug stuff?
if end_chr == duplicate_char then -- if same as separator
if end_chr == duplicate_char then -- if same as separator
str = str:sub(1,-2); -- remove it
str = f.sub(str, 1,-2); -- remove it
elseif end_chr == "'" then -- if it might be wikimarkup
elseif end_chr == "'" then -- if it might be wikimarkup
if str:sub(-3,-1) == duplicate_char .. "''" then -- if last three chars of str are sepc''  
if f.sub(str, -3,-1) == duplicate_char .. "''" then -- if last three chars of str are sepc''  
str = str:sub(1, -4) .. "''"; -- remove them and add back ''
str = f.sub(str, 1, -4) .. "''"; -- remove them and add back ''
elseif str:sub(-5,-1) == duplicate_char .. "]]''" then -- if last five chars of str are sepc]]''  
elseif f.sub(str, -5,-1) == duplicate_char .. "]]''" then -- if last five chars of str are sepc]]''  
trim = true; -- why? why do this and next differently from previous?
trim = true; -- why? why do this and next differently from previous?
elseif str:sub(-4,-1) == duplicate_char .. "]''" then -- if last four chars of str are sepc]''  
elseif f.sub(str, -4,-1) == duplicate_char .. "]''" then -- if last four chars of str are sepc]''  
trim = true; -- same question
trim = true; -- same question
end
end
elseif end_chr == "]" then -- if it might be wikimarkup
elseif end_chr == "]" then -- if it might be wikimarkup
if str:sub(-3,-1) == duplicate_char .. "]]" then -- if last three chars of str are sepc]] wikilink  
if f.sub(str, -3,-1) == duplicate_char .. "]]" then -- if last three chars of str are sepc]] wikilink  
trim = true;
trim = true;
elseif str:sub(-3,-1) == duplicate_char .. '"]' then -- if last three chars of str are sepc"] quoted external link  
elseif f.sub(str, -3,-1) == duplicate_char .. '"]' then -- if last three chars of str are sepc"] quoted external link  
trim = true;
trim = true;
elseif str:sub(-2,-1) == duplicate_char .. "]" then -- if last two chars of str are sepc] external link
elseif f.sub(str, -2,-1) == duplicate_char .. "]" then -- if last two chars of str are sepc] external link
trim = true;
trim = true;
elseif str:sub(-4,-1) == duplicate_char .. "'']" then -- normal case when |url=something & |title=Title.
elseif f.sub(str, -4,-1) == duplicate_char .. "'']" then -- normal case when |url=something & |title=Title.
trim = true;
trim = true;
end
end
elseif end_chr == " " then -- if last char of output string is a space
elseif end_chr == " " then -- if last char of output string is a space
if str:sub(-2,-1) == duplicate_char .. " " then -- if last two chars of str are <sepc><space>
if f.sub(str, -2,-1) == duplicate_char .. " " then -- if last two chars of str are <sepc><space>
str = str:sub(1,-3); -- remove them both
str = f.sub(str, 1,-3); -- remove them both
end
end
end
end
Line 955: Line 962:
if value ~= comp then -- value does not equal comp when value contains html markup
if value ~= comp then -- value does not equal comp when value contains html markup
local dup2 = duplicate_char;
local dup2 = duplicate_char;
if dup2:match( "%A" ) then dup2 = "%" .. dup2; end -- if duplicate_char not a letter then escape it
if f.match(dup2, "%A" ) then dup2 = "%" .. dup2; end -- if duplicate_char not a letter then escape it
value = value:gsub( "(%b<>)" .. dup2, "%1", 1 ) -- remove duplicate_char if it follows html markup
value = f.gsub(value, "(%b<>)" .. dup2, "%1", 1 ) -- remove duplicate_char if it follows html markup
else
else
value = value:sub( 2, -1 ); -- remove duplicate_char when it is first character
value = f.sub(value, 2, -1 ); -- remove duplicate_char when it is first character
end
end
end
end
Line 967: Line 974:
end
end
return str;
return str;
end
end




Line 1,105: Line 1,112:


if 'vanc' == format then -- Vancouver-like author/editor name styling?
if 'vanc' == format then -- Vancouver-like author/editor name styling?
sep = ','; -- name-list separator between authors is a comma
sep = cfg.presentation['sep_nl_vanc']; -- name-list separator between authors is a comma
namesep = ' '; -- last/first separator is a space
namesep = cfg.presentation['sep_name_vanc']; -- last/first separator is a space
else
else
sep = ';' -- name-list separator between authors is a semicolon
sep = cfg.presentation['sep_nl']; -- name-list separator between authors is a semicolon
namesep = ', ' -- last/first separator is <comma><space>
namesep = cfg.presentation['sep_name']; -- last/first separator is <comma><space>
end
end
Line 1,394: Line 1,401:


local function get_iso639_code (lang, this_wiki_code)
local function get_iso639_code (lang, this_wiki_code)
local remap = {
if cfg.lang_name_remap[lang:lower()] then -- if there is a remapped name (because MediaWiki uses something that we don't think is correct)
['bangla'] = {'Bengali', 'bn'}, -- MediaWiki returns Bangla (the endonym) but we want Bengali (the exonym); here we remap
return cfg.lang_name_remap[lang:lower()][1], cfg.lang_name_remap[lang:lower()][2]; -- for this language 'name', return a possibly new name and appropriate code
['bengali'] = {'Bengali', 'bn'}, -- MediaWiki doesn't use exonym so here we provide correct language name and 639-1 code
['bihari'] = {'Bihari', 'bh'}, -- MediaWiki replace 'Bihari' with 'Bhojpuri' so 'Bihari' cannot be found
['bhojpuri'] = {'Bhojpuri', 'bho'}, -- MediaWiki uses 'bh' as a subdomain name for Bhojpuri wWikipedia: bh.wikipedia.org
}
if remap[lang:lower()] then
return remap[lang:lower()][1], remap[lang:lower()][2]; -- for this language 'name', return a possibly new name and appropriate code
end
end


Line 1,447: Line 1,447:
local this_wiki_code = this_wiki:getCode() -- get this wiki's language code
local this_wiki_code = this_wiki:getCode() -- get this wiki's language code
local this_wiki_name = mw.language.fetchLanguageName(this_wiki_code, this_wiki_code); -- get this wiki's language name
local this_wiki_name = mw.language.fetchLanguageName(this_wiki_code, this_wiki_code); -- get this wiki's language name
local remap = {
['bh'] = 'Bihari', -- MediaWiki uses 'bh' as a subdomain name for Bhojpuri wWikipedia: bh.wikipedia.org
['bn'] = 'Bengali', -- MediaWiki returns Bangla
}


names_table = mw.text.split (lang, '%s*,%s*'); -- names should be a comma separated list
names_table = mw.text.split (lang, '%s*,%s*'); -- names should be a comma separated list
Line 1,462: Line 1,457:
if 2 == lang:len() or 3 == lang:len() then -- if two-or three-character code
if 2 == lang:len() or 3 == lang:len() then -- if two-or three-character code
name = mw.language.fetchLanguageName( lang:lower(), this_wiki_code); -- get language name if |language= is a proper code
name = mw.language.fetchLanguageName( lang:lower(), this_wiki_code); -- get language name if |language= is a proper code
if not is_set (name) then
name = cfg.lang_code_remap[lang]; -- not supported by MediaWiki; is it in remap?
end
end
end
Line 1,471: Line 1,469:
if is_set (code) then -- only 2- or 3-character codes
if is_set (code) then -- only 2- or 3-character codes
name = remap[code] or name; -- override wikimedia when they misuse language codes/names
name = cfg.lang_code_remap[code] or name; -- override wikimedia when they misuse language codes/names


if this_wiki_code ~= code then -- when the language is not the same as this wiki's language
if this_wiki_code ~= code then -- when the language is not the same as this wiki's language
if 2 == code:len() then -- and is a two-character code
if 2 == code:len() then -- and is a two-character code
add_prop_cat ('foreign_lang_source' .. code, {name, code}) -- categorize it
add_prop_cat ('foreign_lang_source' .. code, {name, code}) -- categorize it
else -- or is a recognized language (but has a three-character code)
else -- or is a recognized language (but has a three-character code)
add_prop_cat ('foreign_lang_source_2' .. code, {code}) -- categorize it differently TODO: support mutliple three-character code categories per cs1|2 template
add_prop_cat ('foreign_lang_source_2' .. code, {code}) -- categorize it differently TODO: support mutliple three-character code categories per cs1|2 template
end
end
end
end
Line 1,508: Line 1,506:


Set style settings for CS1 citation templates. Returns separator and postscript settings
Set style settings for CS1 citation templates. Returns separator and postscript settings
At en.wiki, for cs1:
ps gets: '.'
sep gets: '.'


]]
]]
Line 1,513: Line 1,514:
local function set_cs1_style (ps)
local function set_cs1_style (ps)
if not is_set (ps) then -- unless explicitely set to something
if not is_set (ps) then -- unless explicitely set to something
ps = '.'; -- terminate the rendered citation with a period
ps = cfg.presentation['ps_cs1']; -- terminate the rendered citation
end
end
return '.', ps; -- separator is a full stop
return cfg.presentation['sep_cs1'], ps; -- element separator
end
end


Line 1,522: Line 1,523:


Set style settings for CS2 citation templates. Returns separator, postscript, ref settings
Set style settings for CS2 citation templates. Returns separator, postscript, ref settings
At en.wiki, for cs2:
ps gets: '' (empty string - no terminal punctuation)
sep gets: ','


]]
]]
Line 1,527: Line 1,531:
local function set_cs2_style (ps, ref)
local function set_cs2_style (ps, ref)
if not is_set (ps) then -- if |postscript= has not been set, set cs2 default
if not is_set (ps) then -- if |postscript= has not been set, set cs2 default
ps = ''; -- make sure it isn't nil
ps = cfg.presentation['ps_cs2']; -- terminate the rendered citation
end
end
if not is_set (ref) then -- if |ref= is not set
if not is_set (ref) then -- if |ref= is not set
ref = "harv"; -- set default |ref=harv
ref = "harv"; -- set default |ref=harv
end
end
return ',', ps, ref; -- separator is a comma
return cfg.presentation['sep_cs2'], ps, ref; -- element separator
end
end


Line 1,955: Line 1,959:
end
end


local is_journal = 'journal' == cite_class or (in_array (cite_class, {'citation', 'map'}) and 'journal' == origin);
local is_journal = 'journal' == cite_class or (in_array (cite_class, {'citation', 'map', 'interview'}) and 'journal' == origin);
if is_set (page) then
if is_set (page) then
Line 2,600: Line 2,604:


-- legacy: promote PublicationDate to Date if neither Date nor Year are set.
-- legacy: promote PublicationDate to Date if neither Date nor Year are set.
local Date_origin; -- to hold the name of parameter promoted to Date; required for date error messaging
if not is_set (Date) then
if not is_set (Date) then
Date = Year; -- promote Year to Date
Date = Year; -- promote Year to Date
Line 2,606: Line 2,612:
Date = PublicationDate; -- promote PublicationDate to Date
Date = PublicationDate; -- promote PublicationDate to Date
PublicationDate = ''; -- unset, no longer needed
PublicationDate = ''; -- unset, no longer needed
Date_origin = A:ORIGIN('PublicationDate'); -- save the name of the promoted parameter
else
Date_origin = A:ORIGIN('Year'); -- save the name of the promoted parameter
end
end
else
Date_origin = A:ORIGIN('Date'); -- not a promotion; name required for error messaging
end
end


Line 2,620: Line 2,631:
local error_message = '';
local error_message = '';
-- AirDate has been promoted to Date so not necessary to check it
-- AirDate has been promoted to Date so not necessary to check it
local date_parameters_list = {['access-date']=AccessDate, ['archive-date']=ArchiveDate, ['date']=Date, ['doi-broken-date']=DoiBroken,
-- local date_parameters_list = {['access-date']=AccessDate, ['archive-date']=ArchiveDate, ['date']=Date, ['doi-broken-date']=DoiBroken,
['embargo']=Embargo, ['lay-date']=LayDate, ['publication-date']=PublicationDate, ['year']=Year};
-- ['embargo']=Embargo, ['lay-date']=LayDate, ['publication-date']=PublicationDate, ['year']=Year};


local date_parameters_list = {
['access-date'] = {val=AccessDate, name=A:ORIGIN ('AccessDate')},
['archive-date'] = {val=ArchiveDate, name=A:ORIGIN ('ArchiveDate')},
['date'] = {val=Date, name=Date_origin},
['doi-broken-date'] = {val=DoiBroken, name=A:ORIGIN ('DoiBroken')},
['embargo'] = {val=Embargo, name=A:ORIGIN ('Embargo')},
['lay-date'] = {val=LayDate, name=A:ORIGIN ('LayDate')},
['publication-date'] ={val=PublicationDate, name=A:ORIGIN ('PublicationDate')},
['year'] = {val=Year, name=A:ORIGIN ('Year')},
};
anchor_year, Embargo, error_message = dates(date_parameters_list, COinS_date);
anchor_year, Embargo, error_message = dates(date_parameters_list, COinS_date);
-- start temporary Julian / Gregorian calendar uncertainty categorization
-- start temporary Julian / Gregorian calendar uncertainty categorization
if COinS_date.inter_cal_cat then
if COinS_date.inter_cal_cat then
Line 2,656: Line 2,678:
-- for those wikis that can and want to have English date names translated to the local language,
-- for those wikis that can and want to have English date names translated to the local language,
-- uncomment these three lines.  Not supported by en.wiki (for obvious reasons)
-- uncomment these three lines.  Not supported by en.wiki (for obvious reasons)
-- if date_name_xlate (date_parameters_list) then
-- set date_name_xlate() second argument to true to translate English digits to local digits (will translate ymd dates)
-- if date_name_xlate (date_parameters_list, false) then
-- modified = true;
-- modified = true;
-- end
-- end


if modified then -- if the date_parameters_list values were modified
if modified then -- if the date_parameters_list values were modified
AccessDate = date_parameters_list['access-date']; -- overwrite date holding parameters with modified values
AccessDate = date_parameters_list['access-date'].val; -- overwrite date holding parameters with modified values
ArchiveDate = date_parameters_list['archive-date'];
ArchiveDate = date_parameters_list['archive-date'].val;
Date = date_parameters_list['date'];
Date = date_parameters_list['date'].val;
DoiBroken = date_parameters_list['doi-broken-date'];
DoiBroken = date_parameters_list['doi-broken-date'].val;
LayDate = date_parameters_list['lay-date'];
LayDate = date_parameters_list['lay-date'].val;
PublicationDate = date_parameters_list['publication-date'];
PublicationDate = date_parameters_list['publication-date'].val;
end
end
else
else
Line 2,863: Line 2,886:
DeadURL = DeadURL:lower(); -- used later when assembling archived text
DeadURL = DeadURL:lower(); -- used later when assembling archived text
if is_set( ArchiveURL ) then
if is_set( ArchiveURL ) then
if is_set (ChapterURL) then -- URL not set so if chapter-url is set apply archive url to it
if is_set (ChapterURL) then -- if chapter-url is set apply archive url to it
OriginalURL = ChapterURL; -- save copy of source chapter's url for archive text
OriginalURL = ChapterURL; -- save copy of source chapter's url for archive text
OriginalURLorigin = ChapterURLorigin; -- name of chapter-url parameter for error messages
OriginalURLorigin = ChapterURLorigin; -- name of chapter-url parameter for error messages
OriginalFormat = ChapterFormat; -- and original |format=
OriginalFormat = ChapterFormat; -- and original |chapter-format=
if 'no' ~= DeadURL then
if 'no' ~= DeadURL then
ChapterURL = ArchiveURL -- swap-in the archive's url
ChapterURL = ArchiveURL -- swap-in the archive's url
ChapterURLorigin = A:ORIGIN('ArchiveURL') -- name of archive-url parameter for error messages
ChapterURLorigin = A:ORIGIN('ArchiveURL') -- name of archive-url parameter for error messages
ChapterFormat = ArchiveFormat or ''; -- swap in archive's format
ChapterFormat = ArchiveFormat or ''; -- swap in archive's format
ChapterUrlAccess = nil; -- restricted access levels do not make sense for archived urls
end
end
elseif is_set (URL) then
elseif is_set (URL) then
Line 3,049: Line 3,073:
if is_set (Translators) then
if is_set (Translators) then
Others = sepc .. ' ' .. wrap_msg ('translated', Translators, use_lowercase) .. Others;
Others = safe_join ({sepc .. ' ', wrap_msg ('translated', Translators, use_lowercase), Others}, sepc);
end
end
if is_set (Interviewers) then
if is_set (Interviewers) then
Others = sepc .. ' ' .. wrap_msg ('interview', Interviewers, use_lowercase) .. Others;
Others = safe_join ({sepc .. ' ', wrap_msg ('interview', Interviewers, use_lowercase), Others}, sepc);
end
end
Line 3,233: Line 3,257:
if in_array(config.CitationClass, {"journal","citation"}) and is_set(Periodical) then
if in_array(config.CitationClass, {"journal","citation"}) and is_set(Periodical) then
if is_set(Others) then Others = Others .. sepc .. " " end
if is_set(Others) then Others = safe_join ({Others, sepc .. " "}, sepc) end -- add terminal punctuation & space; check for dup sepc; TODO why do we need to do this here?
tcommon = safe_join( {Others, Title, TitleNote, Conference, Periodical, Format, TitleType, Series, Language, Edition, Publisher, Agency, Volume}, sepc );
tcommon = safe_join( {Others, Title, TitleNote, Conference, Periodical, Format, TitleType, Series, Language, Edition, Publisher, Agency, Volume}, sepc );
elseif in_array(config.CitationClass, {"book","citation"}) and not is_set(Periodical) then -- special cases for book cites
elseif in_array(config.CitationClass, {"book","citation"}) and not is_set(Periodical) then -- special cases for book cites
Line 3,510: Line 3,534:
for k, v in pairs( pframe.args ) do
for k, v in pairs( pframe.args ) do
if v ~= '' then
if v ~= '' then
k = mw.ustring.gsub (k, '%d', cfg.date_names.local_digits); -- for enumerated parameters, translate 'local' digits to Western 0-9
if not validate( k, config.CitationClass ) then
if not validate( k, config.CitationClass ) then
error_text = "";
error_text = "";
Line 3,548: Line 3,573:
end
end
missing_pipe_check (v); -- do we think that there is a parameter that is missing a pipe?
missing_pipe_check (v); -- do we think that there is a parameter that is missing a pipe?
-- TODO: is this the best place for this translation?
args[k] = v;
elseif args[k] ~= nil or (k == 'postscript') then
args[k] = v;
args[k] = v;
elseif args[k] ~= nil or (k == 'postscript') then -- here when v is empty string
args[k] = v; -- why do we do this?  we don't support 'empty' parameters
end
end
end
end