Jump to content

Module:Citation/CS1: Difference between revisions

synch from sandbox;
m>Trappist the monk
No edit summary
(synch from sandbox;)
Line 549: Line 549:
end
end
-- if we get this far we have prefix and script
-- if we get this far we have prefix and script
name = mw.language.fetchLanguageName( lang, "en" ); -- get language name so that we can use it to categorize
name = cfg.lang_code_remap[lang] or mw.language.fetchLanguageName( lang, "en" ); -- get language name so that we can use it to categorize
if is_set (name) then -- is prefix a proper ISO 639-1 language code?
if is_set (name) then -- is prefix a proper ISO 639-1 language code?
script_value = script_value:gsub ('^%l%l%s*:%s*', ''); -- strip prefix from script
script_value = script_value:gsub ('^%l%l%s*:%s*', ''); -- strip prefix from script
Line 628: Line 628:


chapter = script_concatenate (chapter, scriptchapter) -- <bdi> tags, lang atribute, categorization, etc; must be done after title is wrapped
chapter = script_concatenate (chapter, scriptchapter) -- <bdi> tags, lang atribute, categorization, etc; must be done after title is wrapped
if is_set (chapterurl) then
chapter = external_link (chapterurl, chapter, chapter_url_source, access); -- adds bare_url_missing_title error if appropriate
end


if is_set (transchapter) then
if is_set (transchapter) then
Line 639: Line 643:
end
end


if is_set (chapterurl) then
-- if is_set (chapterurl) then
chapter = external_link (chapterurl, chapter, chapter_url_source, access); -- adds bare_url_missing_title error if appropriate
-- chapter = external_link (chapterurl, chapter, chapter_url_source, access); -- adds bare_url_missing_title error if appropriate
end
-- end


return chapter .. chapter_error;
return chapter .. chapter_error;
Line 799: Line 803:
end
end
-- limited enumerated parameters list
-- limited enumerated parameters list
name = name:gsub( "%d+", "#" ); -- replace digit(s) with # (last25 becomes last#)
name = name:gsub("%d+", "#" ); -- replace digit(s) with # (last25 becomes last#) (mw.ustring because non-Western 'local' digits)
state = whitelist.limited_numbered_arguments[ name ];
state = whitelist.limited_numbered_arguments[ name ];
if true == state then return true; end -- valid actively supported parameter
if true == state then return true; end -- valid actively supported parameter
Line 818: Line 822:
end
end
-- all enumerated parameters allowed
-- all enumerated parameters allowed
name = name:gsub( "%d+", "#" ); -- replace digit(s) with # (last25 becomes last#
name = name:gsub("%d+", "#" ); -- replace digit(s) with # (last25 becomes last#) (mw.ustring because non-Western 'local' digits)
state = whitelist.numbered_arguments[ name ];
state = whitelist.numbered_arguments[ name ];


Line 896: Line 900:


local function safe_join( tbl, duplicate_char )
local function safe_join( tbl, duplicate_char )
--[[
local f = {}; -- create a function table appropriate to type of 'dupicate character'
Note: we use string functions here, rather than ustring functions.
if 1 == #duplicate_char then -- for single byte ascii characters use the string library functions
f.gsub=string.gsub
This has considerably faster performance and should work correctly as
f.match=string.match
long as the duplicate_char is strict ASCII. The strings
f.sub=string.sub
in tbl may be ASCII or UTF8.
else -- for multi-byte characters use the ustring library functions
]]
f.gsub=mw.ustring.gsub
f.match=mw.ustring.match
f.sub=mw.ustring.sub
end
 
local str = ''; -- the output string
local str = ''; -- the output string
local comp = ''; -- what does 'comp' mean?
local comp = ''; -- what does 'comp' mean?
Line 920: Line 927:
end
end
-- typically duplicate_char is sepc
-- typically duplicate_char is sepc
if comp:sub(1,1) == duplicate_char then -- is first charactier same as duplicate_char? why test first character?
if f.sub(comp, 1,1) == duplicate_char then -- is first character same as duplicate_char? why test first character?
--  Because individual string segments often (always?) begin with terminal punct for th
--  Because individual string segments often (always?) begin with terminal punct for the
--  preceding segment: 'First element' .. 'sepc next element' .. etc?
--  preceding segment: 'First element' .. 'sepc next element' .. etc?
trim = false;
trim = false;
end_chr = str:sub(-1,-1); -- get the last character of the output string
end_chr = f.sub(str, -1,-1); -- get the last character of the output string
-- str = str .. "<HERE(enchr=" .. end_chr.. ")" -- debug stuff?
-- str = str .. "<HERE(enchr=" .. end_chr.. ")" -- debug stuff?
if end_chr == duplicate_char then -- if same as separator
if end_chr == duplicate_char then -- if same as separator
str = str:sub(1,-2); -- remove it
str = f.sub(str, 1,-2); -- remove it
elseif end_chr == "'" then -- if it might be wikimarkup
elseif end_chr == "'" then -- if it might be wikimarkup
if str:sub(-3,-1) == duplicate_char .. "''" then -- if last three chars of str are sepc''  
if f.sub(str, -3,-1) == duplicate_char .. "''" then -- if last three chars of str are sepc''  
str = str:sub(1, -4) .. "''"; -- remove them and add back ''
str = f.sub(str, 1, -4) .. "''"; -- remove them and add back ''
elseif str:sub(-5,-1) == duplicate_char .. "]]''" then -- if last five chars of str are sepc]]''  
elseif f.sub(str, -5,-1) == duplicate_char .. "]]''" then -- if last five chars of str are sepc]]''  
trim = true; -- why? why do this and next differently from previous?
trim = true; -- why? why do this and next differently from previous?
elseif str:sub(-4,-1) == duplicate_char .. "]''" then -- if last four chars of str are sepc]''  
elseif f.sub(str, -4,-1) == duplicate_char .. "]''" then -- if last four chars of str are sepc]''  
trim = true; -- same question
trim = true; -- same question
end
end
elseif end_chr == "]" then -- if it might be wikimarkup
elseif end_chr == "]" then -- if it might be wikimarkup
if str:sub(-3,-1) == duplicate_char .. "]]" then -- if last three chars of str are sepc]] wikilink  
if f.sub(str, -3,-1) == duplicate_char .. "]]" then -- if last three chars of str are sepc]] wikilink  
trim = true;
trim = true;
elseif str:sub(-3,-1) == duplicate_char .. '"]' then -- if last three chars of str are sepc"] quoted external link  
elseif f.sub(str, -3,-1) == duplicate_char .. '"]' then -- if last three chars of str are sepc"] quoted external link  
trim = true;
trim = true;
elseif str:sub(-2,-1) == duplicate_char .. "]" then -- if last two chars of str are sepc] external link
elseif f.sub(str, -2,-1) == duplicate_char .. "]" then -- if last two chars of str are sepc] external link
trim = true;
trim = true;
elseif str:sub(-4,-1) == duplicate_char .. "'']" then -- normal case when |url=something & |title=Title.
elseif f.sub(str, -4,-1) == duplicate_char .. "'']" then -- normal case when |url=something & |title=Title.
trim = true;
trim = true;
end
end
elseif end_chr == " " then -- if last char of output string is a space
elseif end_chr == " " then -- if last char of output string is a space
if str:sub(-2,-1) == duplicate_char .. " " then -- if last two chars of str are <sepc><space>
if f.sub(str, -2,-1) == duplicate_char .. " " then -- if last two chars of str are <sepc><space>
str = str:sub(1,-3); -- remove them both
str = f.sub(str, 1,-3); -- remove them both
end
end
end
end
Line 955: Line 962:
if value ~= comp then -- value does not equal comp when value contains html markup
if value ~= comp then -- value does not equal comp when value contains html markup
local dup2 = duplicate_char;
local dup2 = duplicate_char;
if dup2:match( "%A" ) then dup2 = "%" .. dup2; end -- if duplicate_char not a letter then escape it
if f.match(dup2, "%A" ) then dup2 = "%" .. dup2; end -- if duplicate_char not a letter then escape it
value = value:gsub( "(%b<>)" .. dup2, "%1", 1 ) -- remove duplicate_char if it follows html markup
value = f.gsub(value, "(%b<>)" .. dup2, "%1", 1 ) -- remove duplicate_char if it follows html markup
else
else
value = value:sub( 2, -1 ); -- remove duplicate_char when it is first character
value = f.sub(value, 2, -1 ); -- remove duplicate_char when it is first character
end
end
end
end
Line 967: Line 974:
end
end
return str;
return str;
end
end




Line 1,105: Line 1,112:


if 'vanc' == format then -- Vancouver-like author/editor name styling?
if 'vanc' == format then -- Vancouver-like author/editor name styling?
sep = ','; -- name-list separator between authors is a comma
sep = cfg.presentation['sep_nl_vanc']; -- name-list separator between authors is a comma
namesep = ' '; -- last/first separator is a space
namesep = cfg.presentation['sep_name_vanc']; -- last/first separator is a space
else
else
sep = ';' -- name-list separator between authors is a semicolon
sep = cfg.presentation['sep_nl']; -- name-list separator between authors is a semicolon
namesep = ', ' -- last/first separator is <comma><space>
namesep = cfg.presentation['sep_name']; -- last/first separator is <comma><space>
end
end
Line 1,394: Line 1,401:


local function get_iso639_code (lang, this_wiki_code)
local function get_iso639_code (lang, this_wiki_code)
local remap = {
if cfg.lang_name_remap[lang:lower()] then -- if there is a remapped name (because MediaWiki uses something that we don't think is correct)
['bangla'] = {'Bengali', 'bn'}, -- MediaWiki returns Bangla (the endonym) but we want Bengali (the exonym); here we remap
return cfg.lang_name_remap[lang:lower()][1], cfg.lang_name_remap[lang:lower()][2]; -- for this language 'name', return a possibly new name and appropriate code
['bengali'] = {'Bengali', 'bn'}, -- MediaWiki doesn't use exonym so here we provide correct language name and 639-1 code
['bihari'] = {'Bihari', 'bh'}, -- MediaWiki replace 'Bihari' with 'Bhojpuri' so 'Bihari' cannot be found
['bhojpuri'] = {'Bhojpuri', 'bho'}, -- MediaWiki uses 'bh' as a subdomain name for Bhojpuri wWikipedia: bh.wikipedia.org
}
if remap[lang:lower()] then
return remap[lang:lower()][1], remap[lang:lower()][2]; -- for this language 'name', return a possibly new name and appropriate code
end
end


Line 1,447: Line 1,447:
local this_wiki_code = this_wiki:getCode() -- get this wiki's language code
local this_wiki_code = this_wiki:getCode() -- get this wiki's language code
local this_wiki_name = mw.language.fetchLanguageName(this_wiki_code, this_wiki_code); -- get this wiki's language name
local this_wiki_name = mw.language.fetchLanguageName(this_wiki_code, this_wiki_code); -- get this wiki's language name
local remap = {
['bh'] = 'Bihari', -- MediaWiki uses 'bh' as a subdomain name for Bhojpuri wWikipedia: bh.wikipedia.org
['bn'] = 'Bengali', -- MediaWiki returns Bangla
}


names_table = mw.text.split (lang, '%s*,%s*'); -- names should be a comma separated list
names_table = mw.text.split (lang, '%s*,%s*'); -- names should be a comma separated list
Line 1,462: Line 1,457:
if 2 == lang:len() or 3 == lang:len() then -- if two-or three-character code
if 2 == lang:len() or 3 == lang:len() then -- if two-or three-character code
name = mw.language.fetchLanguageName( lang:lower(), this_wiki_code); -- get language name if |language= is a proper code
name = mw.language.fetchLanguageName( lang:lower(), this_wiki_code); -- get language name if |language= is a proper code
if not is_set (name) then
name = cfg.lang_code_remap[lang]; -- not supported by MediaWiki; is it in remap?
end
end
end
Line 1,471: Line 1,469:
if is_set (code) then -- only 2- or 3-character codes
if is_set (code) then -- only 2- or 3-character codes
name = remap[code] or name; -- override wikimedia when they misuse language codes/names
name = cfg.lang_code_remap[code] or name; -- override wikimedia when they misuse language codes/names


if this_wiki_code ~= code then -- when the language is not the same as this wiki's language
if this_wiki_code ~= code then -- when the language is not the same as this wiki's language
if 2 == code:len() then -- and is a two-character code
if 2 == code:len() then -- and is a two-character code
add_prop_cat ('foreign_lang_source' .. code, {name, code}) -- categorize it
add_prop_cat ('foreign_lang_source' .. code, {name, code}) -- categorize it
else -- or is a recognized language (but has a three-character code)
else -- or is a recognized language (but has a three-character code)
add_prop_cat ('foreign_lang_source_2' .. code, {code}) -- categorize it differently TODO: support mutliple three-character code categories per cs1|2 template
add_prop_cat ('foreign_lang_source_2' .. code, {code}) -- categorize it differently TODO: support mutliple three-character code categories per cs1|2 template
end
end
end
end
Line 1,508: Line 1,506:


Set style settings for CS1 citation templates. Returns separator and postscript settings
Set style settings for CS1 citation templates. Returns separator and postscript settings
At en.wiki, for cs1:
ps gets: '.'
sep gets: '.'


]]
]]
Line 1,513: Line 1,514:
local function set_cs1_style (ps)
local function set_cs1_style (ps)
if not is_set (ps) then -- unless explicitely set to something
if not is_set (ps) then -- unless explicitely set to something
ps = '.'; -- terminate the rendered citation with a period
ps = cfg.presentation['ps_cs1']; -- terminate the rendered citation
end
end
return '.', ps; -- separator is a full stop
return cfg.presentation['sep_cs1'], ps; -- element separator
end
end


Line 1,522: Line 1,523:


Set style settings for CS2 citation templates. Returns separator, postscript, ref settings
Set style settings for CS2 citation templates. Returns separator, postscript, ref settings
At en.wiki, for cs2:
ps gets: '' (empty string - no terminal punctuation)
sep gets: ','


]]
]]
Line 1,527: Line 1,531:
local function set_cs2_style (ps, ref)
local function set_cs2_style (ps, ref)
if not is_set (ps) then -- if |postscript= has not been set, set cs2 default
if not is_set (ps) then -- if |postscript= has not been set, set cs2 default
ps = ''; -- make sure it isn't nil
ps = cfg.presentation['ps_cs2']; -- terminate the rendered citation
end
end
if not is_set (ref) then -- if |ref= is not set
if not is_set (ref) then -- if |ref= is not set
ref = "harv"; -- set default |ref=harv
ref = "harv"; -- set default |ref=harv
end
end
return ',', ps, ref; -- separator is a comma
return cfg.presentation['sep_cs2'], ps, ref; -- element separator
end
end


Line 1,955: Line 1,959:
end
end


local is_journal = 'journal' == cite_class or (in_array (cite_class, {'citation', 'map'}) and 'journal' == origin);
local is_journal = 'journal' == cite_class or (in_array (cite_class, {'citation', 'map', 'interview'}) and 'journal' == origin);
if is_set (page) then
if is_set (page) then
Line 2,600: Line 2,604:


-- legacy: promote PublicationDate to Date if neither Date nor Year are set.
-- legacy: promote PublicationDate to Date if neither Date nor Year are set.
local Date_origin; -- to hold the name of parameter promoted to Date; required for date error messaging
if not is_set (Date) then
if not is_set (Date) then
Date = Year; -- promote Year to Date
Date = Year; -- promote Year to Date
Line 2,606: Line 2,612:
Date = PublicationDate; -- promote PublicationDate to Date
Date = PublicationDate; -- promote PublicationDate to Date
PublicationDate = ''; -- unset, no longer needed
PublicationDate = ''; -- unset, no longer needed
Date_origin = A:ORIGIN('PublicationDate'); -- save the name of the promoted parameter
else
Date_origin = A:ORIGIN('Year'); -- save the name of the promoted parameter
end
end
else
Date_origin = A:ORIGIN('Date'); -- not a promotion; name required for error messaging
end
end


Line 2,620: Line 2,631:
local error_message = '';
local error_message = '';
-- AirDate has been promoted to Date so not necessary to check it
-- AirDate has been promoted to Date so not necessary to check it
local date_parameters_list = {['access-date']=AccessDate, ['archive-date']=ArchiveDate, ['date']=Date, ['doi-broken-date']=DoiBroken,
-- local date_parameters_list = {['access-date']=AccessDate, ['archive-date']=ArchiveDate, ['date']=Date, ['doi-broken-date']=DoiBroken,
['embargo']=Embargo, ['lay-date']=LayDate, ['publication-date']=PublicationDate, ['year']=Year};
-- ['embargo']=Embargo, ['lay-date']=LayDate, ['publication-date']=PublicationDate, ['year']=Year};


local date_parameters_list = {
['access-date'] = {val=AccessDate, name=A:ORIGIN ('AccessDate')},
['archive-date'] = {val=ArchiveDate, name=A:ORIGIN ('ArchiveDate')},
['date'] = {val=Date, name=Date_origin},
['doi-broken-date'] = {val=DoiBroken, name=A:ORIGIN ('DoiBroken')},
['embargo'] = {val=Embargo, name=A:ORIGIN ('Embargo')},
['lay-date'] = {val=LayDate, name=A:ORIGIN ('LayDate')},
['publication-date'] ={val=PublicationDate, name=A:ORIGIN ('PublicationDate')},
['year'] = {val=Year, name=A:ORIGIN ('Year')},
};
anchor_year, Embargo, error_message = dates(date_parameters_list, COinS_date);
anchor_year, Embargo, error_message = dates(date_parameters_list, COinS_date);
-- start temporary Julian / Gregorian calendar uncertainty categorization
-- start temporary Julian / Gregorian calendar uncertainty categorization
if COinS_date.inter_cal_cat then
if COinS_date.inter_cal_cat then
Line 2,656: Line 2,678:
-- for those wikis that can and want to have English date names translated to the local language,
-- for those wikis that can and want to have English date names translated to the local language,
-- uncomment these three lines.  Not supported by en.wiki (for obvious reasons)
-- uncomment these three lines.  Not supported by en.wiki (for obvious reasons)
-- if date_name_xlate (date_parameters_list) then
-- set date_name_xlate() second argument to true to translate English digits to local digits (will translate ymd dates)
-- if date_name_xlate (date_parameters_list, false) then
-- modified = true;
-- modified = true;
-- end
-- end


if modified then -- if the date_parameters_list values were modified
if modified then -- if the date_parameters_list values were modified
AccessDate = date_parameters_list['access-date']; -- overwrite date holding parameters with modified values
AccessDate = date_parameters_list['access-date'].val; -- overwrite date holding parameters with modified values
ArchiveDate = date_parameters_list['archive-date'];
ArchiveDate = date_parameters_list['archive-date'].val;
Date = date_parameters_list['date'];
Date = date_parameters_list['date'].val;
DoiBroken = date_parameters_list['doi-broken-date'];
DoiBroken = date_parameters_list['doi-broken-date'].val;
LayDate = date_parameters_list['lay-date'];
LayDate = date_parameters_list['lay-date'].val;
PublicationDate = date_parameters_list['publication-date'];
PublicationDate = date_parameters_list['publication-date'].val;
end
end
else
else
Line 2,863: Line 2,886:
DeadURL = DeadURL:lower(); -- used later when assembling archived text
DeadURL = DeadURL:lower(); -- used later when assembling archived text
if is_set( ArchiveURL ) then
if is_set( ArchiveURL ) then
if is_set (ChapterURL) then -- URL not set so if chapter-url is set apply archive url to it
if is_set (ChapterURL) then -- if chapter-url is set apply archive url to it
OriginalURL = ChapterURL; -- save copy of source chapter's url for archive text
OriginalURL = ChapterURL; -- save copy of source chapter's url for archive text
OriginalURLorigin = ChapterURLorigin; -- name of chapter-url parameter for error messages
OriginalURLorigin = ChapterURLorigin; -- name of chapter-url parameter for error messages
OriginalFormat = ChapterFormat; -- and original |format=
OriginalFormat = ChapterFormat; -- and original |chapter-format=
if 'no' ~= DeadURL then
if 'no' ~= DeadURL then
ChapterURL = ArchiveURL -- swap-in the archive's url
ChapterURL = ArchiveURL -- swap-in the archive's url
ChapterURLorigin = A:ORIGIN('ArchiveURL') -- name of archive-url parameter for error messages
ChapterURLorigin = A:ORIGIN('ArchiveURL') -- name of archive-url parameter for error messages
ChapterFormat = ArchiveFormat or ''; -- swap in archive's format
ChapterFormat = ArchiveFormat or ''; -- swap in archive's format
ChapterUrlAccess = nil; -- restricted access levels do not make sense for archived urls
end
end
elseif is_set (URL) then
elseif is_set (URL) then
Line 3,049: Line 3,073:
if is_set (Translators) then
if is_set (Translators) then
Others = sepc .. ' ' .. wrap_msg ('translated', Translators, use_lowercase) .. Others;
Others = safe_join ({sepc .. ' ', wrap_msg ('translated', Translators, use_lowercase), Others}, sepc);
end
end
if is_set (Interviewers) then
if is_set (Interviewers) then
Others = sepc .. ' ' .. wrap_msg ('interview', Interviewers, use_lowercase) .. Others;
Others = safe_join ({sepc .. ' ', wrap_msg ('interview', Interviewers, use_lowercase), Others}, sepc);
end
end
Line 3,233: Line 3,257:
if in_array(config.CitationClass, {"journal","citation"}) and is_set(Periodical) then
if in_array(config.CitationClass, {"journal","citation"}) and is_set(Periodical) then
if is_set(Others) then Others = Others .. sepc .. " " end
if is_set(Others) then Others = safe_join ({Others, sepc .. " "}, sepc) end -- add terminal punctuation & space; check for dup sepc; TODO why do we need to do this here?
tcommon = safe_join( {Others, Title, TitleNote, Conference, Periodical, Format, TitleType, Series, Language, Edition, Publisher, Agency, Volume}, sepc );
tcommon = safe_join( {Others, Title, TitleNote, Conference, Periodical, Format, TitleType, Series, Language, Edition, Publisher, Agency, Volume}, sepc );
elseif in_array(config.CitationClass, {"book","citation"}) and not is_set(Periodical) then -- special cases for book cites
elseif in_array(config.CitationClass, {"book","citation"}) and not is_set(Periodical) then -- special cases for book cites
Line 3,510: Line 3,534:
for k, v in pairs( pframe.args ) do
for k, v in pairs( pframe.args ) do
if v ~= '' then
if v ~= '' then
k = mw.ustring.gsub (k, '%d', cfg.date_names.local_digits); -- for enumerated parameters, translate 'local' digits to Western 0-9
if not validate( k, config.CitationClass ) then
if not validate( k, config.CitationClass ) then
error_text = "";
error_text = "";
Line 3,548: Line 3,573:
end
end
missing_pipe_check (v); -- do we think that there is a parameter that is missing a pipe?
missing_pipe_check (v); -- do we think that there is a parameter that is missing a pipe?
-- TODO: is this the best place for this translation?
args[k] = v;
elseif args[k] ~= nil or (k == 'postscript') then
args[k] = v;
args[k] = v;
elseif args[k] ~= nil or (k == 'postscript') then -- here when v is empty string
args[k] = v; -- why do we do this?  we don't support 'empty' parameters
end
end
end
end
Cookies help us deliver our services. By using our services, you agree to our use of cookies.