Module:Citation/CS1: Difference between revisions
synch from sandbox;
m>Trappist the monk No edit summary |
(synch from sandbox;) |
||
Line 549: | Line 549: | ||
end | end | ||
-- if we get this far we have prefix and script | -- if we get this far we have prefix and script | ||
name = mw.language.fetchLanguageName( lang, "en" ); | name = cfg.lang_code_remap[lang] or mw.language.fetchLanguageName( lang, "en" ); -- get language name so that we can use it to categorize | ||
if is_set (name) then -- is prefix a proper ISO 639-1 language code? | if is_set (name) then -- is prefix a proper ISO 639-1 language code? | ||
script_value = script_value:gsub ('^%l%l%s*:%s*', ''); -- strip prefix from script | script_value = script_value:gsub ('^%l%l%s*:%s*', ''); -- strip prefix from script | ||
Line 628: | Line 628: | ||
chapter = script_concatenate (chapter, scriptchapter) -- <bdi> tags, lang atribute, categorization, etc; must be done after title is wrapped | chapter = script_concatenate (chapter, scriptchapter) -- <bdi> tags, lang atribute, categorization, etc; must be done after title is wrapped | ||
if is_set (chapterurl) then | |||
chapter = external_link (chapterurl, chapter, chapter_url_source, access); -- adds bare_url_missing_title error if appropriate | |||
end | |||
if is_set (transchapter) then | if is_set (transchapter) then | ||
Line 639: | Line 643: | ||
end | end | ||
-- if is_set (chapterurl) then | |||
-- chapter = external_link (chapterurl, chapter, chapter_url_source, access); -- adds bare_url_missing_title error if appropriate | |||
-- end | |||
return chapter .. chapter_error; | return chapter .. chapter_error; | ||
Line 799: | Line 803: | ||
end | end | ||
-- limited enumerated parameters list | -- limited enumerated parameters list | ||
name = name:gsub( "%d+", "#" ); -- replace digit(s) with # (last25 becomes last#) | name = name:gsub("%d+", "#" ); -- replace digit(s) with # (last25 becomes last#) (mw.ustring because non-Western 'local' digits) | ||
state = whitelist.limited_numbered_arguments[ name ]; | state = whitelist.limited_numbered_arguments[ name ]; | ||
if true == state then return true; end -- valid actively supported parameter | if true == state then return true; end -- valid actively supported parameter | ||
Line 818: | Line 822: | ||
end | end | ||
-- all enumerated parameters allowed | -- all enumerated parameters allowed | ||
name = name:gsub( "%d+", "#" ); -- replace digit(s) with # (last25 becomes last# | name = name:gsub("%d+", "#" ); -- replace digit(s) with # (last25 becomes last#) (mw.ustring because non-Western 'local' digits) | ||
state = whitelist.numbered_arguments[ name ]; | state = whitelist.numbered_arguments[ name ]; | ||
Line 896: | Line 900: | ||
local function safe_join( tbl, duplicate_char ) | local function safe_join( tbl, duplicate_char ) | ||
-- | local f = {}; -- create a function table appropriate to type of 'dupicate character' | ||
if 1 == #duplicate_char then -- for single byte ascii characters use the string library functions | |||
f.gsub=string.gsub | |||
f.match=string.match | |||
f.sub=string.sub | |||
else -- for multi-byte characters use the ustring library functions | |||
f.gsub=mw.ustring.gsub | |||
f.match=mw.ustring.match | |||
f.sub=mw.ustring.sub | |||
end | |||
local str = ''; -- the output string | local str = ''; -- the output string | ||
local comp = ''; -- what does 'comp' mean? | local comp = ''; -- what does 'comp' mean? | ||
Line 920: | Line 927: | ||
end | end | ||
-- typically duplicate_char is sepc | -- typically duplicate_char is sepc | ||
if | if f.sub(comp, 1,1) == duplicate_char then -- is first character same as duplicate_char? why test first character? | ||
-- Because individual string segments often (always?) begin with terminal punct for | -- Because individual string segments often (always?) begin with terminal punct for the | ||
-- preceding segment: 'First element' .. 'sepc next element' .. etc? | -- preceding segment: 'First element' .. 'sepc next element' .. etc? | ||
trim = false; | trim = false; | ||
end_chr = | end_chr = f.sub(str, -1,-1); -- get the last character of the output string | ||
-- str = str .. "<HERE(enchr=" .. end_chr.. ")" -- debug stuff? | -- str = str .. "<HERE(enchr=" .. end_chr.. ")" -- debug stuff? | ||
if end_chr == duplicate_char then -- if same as separator | if end_chr == duplicate_char then -- if same as separator | ||
str = | str = f.sub(str, 1,-2); -- remove it | ||
elseif end_chr == "'" then -- if it might be wikimarkup | elseif end_chr == "'" then -- if it might be wikimarkup | ||
if | if f.sub(str, -3,-1) == duplicate_char .. "''" then -- if last three chars of str are sepc'' | ||
str = | str = f.sub(str, 1, -4) .. "''"; -- remove them and add back '' | ||
elseif | elseif f.sub(str, -5,-1) == duplicate_char .. "]]''" then -- if last five chars of str are sepc]]'' | ||
trim = true; -- why? why do this and next differently from previous? | trim = true; -- why? why do this and next differently from previous? | ||
elseif | elseif f.sub(str, -4,-1) == duplicate_char .. "]''" then -- if last four chars of str are sepc]'' | ||
trim = true; -- same question | trim = true; -- same question | ||
end | end | ||
elseif end_chr == "]" then -- if it might be wikimarkup | elseif end_chr == "]" then -- if it might be wikimarkup | ||
if | if f.sub(str, -3,-1) == duplicate_char .. "]]" then -- if last three chars of str are sepc]] wikilink | ||
trim = true; | trim = true; | ||
elseif | elseif f.sub(str, -3,-1) == duplicate_char .. '"]' then -- if last three chars of str are sepc"] quoted external link | ||
trim = true; | trim = true; | ||
elseif | elseif f.sub(str, -2,-1) == duplicate_char .. "]" then -- if last two chars of str are sepc] external link | ||
trim = true; | trim = true; | ||
elseif | elseif f.sub(str, -4,-1) == duplicate_char .. "'']" then -- normal case when |url=something & |title=Title. | ||
trim = true; | trim = true; | ||
end | end | ||
elseif end_chr == " " then -- if last char of output string is a space | elseif end_chr == " " then -- if last char of output string is a space | ||
if | if f.sub(str, -2,-1) == duplicate_char .. " " then -- if last two chars of str are <sepc><space> | ||
str = | str = f.sub(str, 1,-3); -- remove them both | ||
end | end | ||
end | end | ||
Line 955: | Line 962: | ||
if value ~= comp then -- value does not equal comp when value contains html markup | if value ~= comp then -- value does not equal comp when value contains html markup | ||
local dup2 = duplicate_char; | local dup2 = duplicate_char; | ||
if | if f.match(dup2, "%A" ) then dup2 = "%" .. dup2; end -- if duplicate_char not a letter then escape it | ||
value = | value = f.gsub(value, "(%b<>)" .. dup2, "%1", 1 ) -- remove duplicate_char if it follows html markup | ||
else | else | ||
value = | value = f.sub(value, 2, -1 ); -- remove duplicate_char when it is first character | ||
end | end | ||
end | end | ||
Line 967: | Line 974: | ||
end | end | ||
return str; | return str; | ||
end | end | ||
Line 1,105: | Line 1,112: | ||
if 'vanc' == format then -- Vancouver-like author/editor name styling? | if 'vanc' == format then -- Vancouver-like author/editor name styling? | ||
sep = ' | sep = cfg.presentation['sep_nl_vanc']; -- name-list separator between authors is a comma | ||
namesep = ' '; | namesep = cfg.presentation['sep_name_vanc']; -- last/first separator is a space | ||
else | else | ||
sep = '; | sep = cfg.presentation['sep_nl']; -- name-list separator between authors is a semicolon | ||
namesep = ' | namesep = cfg.presentation['sep_name']; -- last/first separator is <comma><space> | ||
end | end | ||
Line 1,394: | Line 1,401: | ||
local function get_iso639_code (lang, this_wiki_code) | local function get_iso639_code (lang, this_wiki_code) | ||
if cfg.lang_name_remap[lang:lower()] then -- if there is a remapped name (because MediaWiki uses something that we don't think is correct) | |||
return cfg.lang_name_remap[lang:lower()][1], cfg.lang_name_remap[lang:lower()][2]; -- for this language 'name', return a possibly new name and appropriate code | |||
return | |||
end | end | ||
Line 1,447: | Line 1,447: | ||
local this_wiki_code = this_wiki:getCode() -- get this wiki's language code | local this_wiki_code = this_wiki:getCode() -- get this wiki's language code | ||
local this_wiki_name = mw.language.fetchLanguageName(this_wiki_code, this_wiki_code); -- get this wiki's language name | local this_wiki_name = mw.language.fetchLanguageName(this_wiki_code, this_wiki_code); -- get this wiki's language name | ||
names_table = mw.text.split (lang, '%s*,%s*'); -- names should be a comma separated list | names_table = mw.text.split (lang, '%s*,%s*'); -- names should be a comma separated list | ||
Line 1,462: | Line 1,457: | ||
if 2 == lang:len() or 3 == lang:len() then -- if two-or three-character code | if 2 == lang:len() or 3 == lang:len() then -- if two-or three-character code | ||
name = mw.language.fetchLanguageName( lang:lower(), this_wiki_code); -- get language name if |language= is a proper code | name = mw.language.fetchLanguageName( lang:lower(), this_wiki_code); -- get language name if |language= is a proper code | ||
if not is_set (name) then | |||
name = cfg.lang_code_remap[lang]; -- not supported by MediaWiki; is it in remap? | |||
end | |||
end | end | ||
Line 1,471: | Line 1,469: | ||
if is_set (code) then -- only 2- or 3-character codes | if is_set (code) then -- only 2- or 3-character codes | ||
name = | name = cfg.lang_code_remap[code] or name; -- override wikimedia when they misuse language codes/names | ||
if this_wiki_code ~= code then -- when the language is not the same as this wiki's language | if this_wiki_code ~= code then -- when the language is not the same as this wiki's language | ||
if 2 == code:len() then -- and is a two-character code | if 2 == code:len() then -- and is a two-character code | ||
add_prop_cat ('foreign_lang_source' .. code, {name, code}) | add_prop_cat ('foreign_lang_source' .. code, {name, code}) -- categorize it | ||
else -- or is a recognized language (but has a three-character code) | else -- or is a recognized language (but has a three-character code) | ||
add_prop_cat ('foreign_lang_source_2' .. code, {code}) | add_prop_cat ('foreign_lang_source_2' .. code, {code}) -- categorize it differently TODO: support mutliple three-character code categories per cs1|2 template | ||
end | end | ||
end | end | ||
Line 1,508: | Line 1,506: | ||
Set style settings for CS1 citation templates. Returns separator and postscript settings | Set style settings for CS1 citation templates. Returns separator and postscript settings | ||
At en.wiki, for cs1: | |||
ps gets: '.' | |||
sep gets: '.' | |||
]] | ]] | ||
Line 1,513: | Line 1,514: | ||
local function set_cs1_style (ps) | local function set_cs1_style (ps) | ||
if not is_set (ps) then -- unless explicitely set to something | if not is_set (ps) then -- unless explicitely set to something | ||
ps = ' | ps = cfg.presentation['ps_cs1']; -- terminate the rendered citation | ||
end | end | ||
return ' | return cfg.presentation['sep_cs1'], ps; -- element separator | ||
end | end | ||
Line 1,522: | Line 1,523: | ||
Set style settings for CS2 citation templates. Returns separator, postscript, ref settings | Set style settings for CS2 citation templates. Returns separator, postscript, ref settings | ||
At en.wiki, for cs2: | |||
ps gets: '' (empty string - no terminal punctuation) | |||
sep gets: ',' | |||
]] | ]] | ||
Line 1,527: | Line 1,531: | ||
local function set_cs2_style (ps, ref) | local function set_cs2_style (ps, ref) | ||
if not is_set (ps) then -- if |postscript= has not been set, set cs2 default | if not is_set (ps) then -- if |postscript= has not been set, set cs2 default | ||
ps = ''; | ps = cfg.presentation['ps_cs2']; -- terminate the rendered citation | ||
end | end | ||
if not is_set (ref) then -- if |ref= is not set | if not is_set (ref) then -- if |ref= is not set | ||
ref = "harv"; -- set default |ref=harv | ref = "harv"; -- set default |ref=harv | ||
end | end | ||
return ' | return cfg.presentation['sep_cs2'], ps, ref; -- element separator | ||
end | end | ||
Line 1,955: | Line 1,959: | ||
end | end | ||
local is_journal = 'journal' == cite_class or (in_array (cite_class, {'citation', 'map'}) and 'journal' == origin); | local is_journal = 'journal' == cite_class or (in_array (cite_class, {'citation', 'map', 'interview'}) and 'journal' == origin); | ||
if is_set (page) then | if is_set (page) then | ||
Line 2,600: | Line 2,604: | ||
-- legacy: promote PublicationDate to Date if neither Date nor Year are set. | -- legacy: promote PublicationDate to Date if neither Date nor Year are set. | ||
local Date_origin; -- to hold the name of parameter promoted to Date; required for date error messaging | |||
if not is_set (Date) then | if not is_set (Date) then | ||
Date = Year; -- promote Year to Date | Date = Year; -- promote Year to Date | ||
Line 2,606: | Line 2,612: | ||
Date = PublicationDate; -- promote PublicationDate to Date | Date = PublicationDate; -- promote PublicationDate to Date | ||
PublicationDate = ''; -- unset, no longer needed | PublicationDate = ''; -- unset, no longer needed | ||
Date_origin = A:ORIGIN('PublicationDate'); -- save the name of the promoted parameter | |||
else | |||
Date_origin = A:ORIGIN('Year'); -- save the name of the promoted parameter | |||
end | end | ||
else | |||
Date_origin = A:ORIGIN('Date'); -- not a promotion; name required for error messaging | |||
end | end | ||
Line 2,620: | Line 2,631: | ||
local error_message = ''; | local error_message = ''; | ||
-- AirDate has been promoted to Date so not necessary to check it | -- AirDate has been promoted to Date so not necessary to check it | ||
-- local date_parameters_list = {['access-date']=AccessDate, ['archive-date']=ArchiveDate, ['date']=Date, ['doi-broken-date']=DoiBroken, | |||
-- ['embargo']=Embargo, ['lay-date']=LayDate, ['publication-date']=PublicationDate, ['year']=Year}; | |||
local date_parameters_list = { | |||
['access-date'] = {val=AccessDate, name=A:ORIGIN ('AccessDate')}, | |||
['archive-date'] = {val=ArchiveDate, name=A:ORIGIN ('ArchiveDate')}, | |||
['date'] = {val=Date, name=Date_origin}, | |||
['doi-broken-date'] = {val=DoiBroken, name=A:ORIGIN ('DoiBroken')}, | |||
['embargo'] = {val=Embargo, name=A:ORIGIN ('Embargo')}, | |||
['lay-date'] = {val=LayDate, name=A:ORIGIN ('LayDate')}, | |||
['publication-date'] ={val=PublicationDate, name=A:ORIGIN ('PublicationDate')}, | |||
['year'] = {val=Year, name=A:ORIGIN ('Year')}, | |||
}; | |||
anchor_year, Embargo, error_message = dates(date_parameters_list, COinS_date); | anchor_year, Embargo, error_message = dates(date_parameters_list, COinS_date); | ||
-- start temporary Julian / Gregorian calendar uncertainty categorization | -- start temporary Julian / Gregorian calendar uncertainty categorization | ||
if COinS_date.inter_cal_cat then | if COinS_date.inter_cal_cat then | ||
Line 2,656: | Line 2,678: | ||
-- for those wikis that can and want to have English date names translated to the local language, | -- for those wikis that can and want to have English date names translated to the local language, | ||
-- uncomment these three lines. Not supported by en.wiki (for obvious reasons) | -- uncomment these three lines. Not supported by en.wiki (for obvious reasons) | ||
-- if date_name_xlate (date_parameters_list) then | -- set date_name_xlate() second argument to true to translate English digits to local digits (will translate ymd dates) | ||
-- if date_name_xlate (date_parameters_list, false) then | |||
-- modified = true; | -- modified = true; | ||
-- end | -- end | ||
if modified then -- if the date_parameters_list values were modified | if modified then -- if the date_parameters_list values were modified | ||
AccessDate = date_parameters_list['access-date']; | AccessDate = date_parameters_list['access-date'].val; -- overwrite date holding parameters with modified values | ||
ArchiveDate = date_parameters_list['archive-date']; | ArchiveDate = date_parameters_list['archive-date'].val; | ||
Date = date_parameters_list['date']; | Date = date_parameters_list['date'].val; | ||
DoiBroken = date_parameters_list['doi-broken-date']; | DoiBroken = date_parameters_list['doi-broken-date'].val; | ||
LayDate = date_parameters_list['lay-date']; | LayDate = date_parameters_list['lay-date'].val; | ||
PublicationDate = date_parameters_list['publication-date']; | PublicationDate = date_parameters_list['publication-date'].val; | ||
end | end | ||
else | else | ||
Line 2,863: | Line 2,886: | ||
DeadURL = DeadURL:lower(); -- used later when assembling archived text | DeadURL = DeadURL:lower(); -- used later when assembling archived text | ||
if is_set( ArchiveURL ) then | if is_set( ArchiveURL ) then | ||
if is_set (ChapterURL) then -- | if is_set (ChapterURL) then -- if chapter-url is set apply archive url to it | ||
OriginalURL = ChapterURL; -- save copy of source chapter's url for archive text | OriginalURL = ChapterURL; -- save copy of source chapter's url for archive text | ||
OriginalURLorigin = ChapterURLorigin; -- name of chapter-url parameter for error messages | OriginalURLorigin = ChapterURLorigin; -- name of chapter-url parameter for error messages | ||
OriginalFormat = ChapterFormat; -- and original |format= | OriginalFormat = ChapterFormat; -- and original |chapter-format= | ||
if 'no' ~= DeadURL then | if 'no' ~= DeadURL then | ||
ChapterURL = ArchiveURL -- swap-in the archive's url | ChapterURL = ArchiveURL -- swap-in the archive's url | ||
ChapterURLorigin = A:ORIGIN('ArchiveURL') -- name of archive-url parameter for error messages | ChapterURLorigin = A:ORIGIN('ArchiveURL') -- name of archive-url parameter for error messages | ||
ChapterFormat = ArchiveFormat or ''; -- swap in archive's format | ChapterFormat = ArchiveFormat or ''; -- swap in archive's format | ||
ChapterUrlAccess = nil; -- restricted access levels do not make sense for archived urls | |||
end | end | ||
elseif is_set (URL) then | elseif is_set (URL) then | ||
Line 3,049: | Line 3,073: | ||
if is_set (Translators) then | if is_set (Translators) then | ||
Others = sepc .. ' ' | Others = safe_join ({sepc .. ' ', wrap_msg ('translated', Translators, use_lowercase), Others}, sepc); | ||
end | end | ||
if is_set (Interviewers) then | if is_set (Interviewers) then | ||
Others = sepc .. ' ' | Others = safe_join ({sepc .. ' ', wrap_msg ('interview', Interviewers, use_lowercase), Others}, sepc); | ||
end | end | ||
Line 3,233: | Line 3,257: | ||
if in_array(config.CitationClass, {"journal","citation"}) and is_set(Periodical) then | if in_array(config.CitationClass, {"journal","citation"}) and is_set(Periodical) then | ||
if is_set(Others) then Others = Others | if is_set(Others) then Others = safe_join ({Others, sepc .. " "}, sepc) end -- add terminal punctuation & space; check for dup sepc; TODO why do we need to do this here? | ||
tcommon = safe_join( {Others, Title, TitleNote, Conference, Periodical, Format, TitleType, Series, Language, Edition, Publisher, Agency, Volume}, sepc ); | tcommon = safe_join( {Others, Title, TitleNote, Conference, Periodical, Format, TitleType, Series, Language, Edition, Publisher, Agency, Volume}, sepc ); | ||
elseif in_array(config.CitationClass, {"book","citation"}) and not is_set(Periodical) then -- special cases for book cites | elseif in_array(config.CitationClass, {"book","citation"}) and not is_set(Periodical) then -- special cases for book cites | ||
Line 3,510: | Line 3,534: | ||
for k, v in pairs( pframe.args ) do | for k, v in pairs( pframe.args ) do | ||
if v ~= '' then | if v ~= '' then | ||
k = mw.ustring.gsub (k, '%d', cfg.date_names.local_digits); -- for enumerated parameters, translate 'local' digits to Western 0-9 | |||
if not validate( k, config.CitationClass ) then | if not validate( k, config.CitationClass ) then | ||
error_text = ""; | error_text = ""; | ||
Line 3,548: | Line 3,573: | ||
end | end | ||
missing_pipe_check (v); -- do we think that there is a parameter that is missing a pipe? | missing_pipe_check (v); -- do we think that there is a parameter that is missing a pipe? | ||
-- TODO: is this the best place for this translation? | |||
args[k] = v; | args[k] = v; | ||
elseif args[k] ~= nil or (k == 'postscript') then -- here when v is empty string | |||
args[k] = v; -- why do we do this? we don't support 'empty' parameters | |||
end | end | ||
end | end |