Module:Citation/CS1: Difference between revisions

Synch from sandbox; script error fix in strip_apostrophe_markup();
m>Trappist the monk
(Disable strip_apostrophe_markup() and make_coins_title(); causing script errors;)
m>Trappist the monk
(Synch from sandbox; script error fix in strip_apostrophe_markup();)
Line 696: Line 696:
end
end


--[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------
Returns a string where all of lua's magic characters have been escaped.  This is important because functions like
string.gsub() treat their pattern and replace strings as patterns, not literal strings.
]]
function escape_lua_magic_chars (argument)
argument = argument:gsub("%%", "%%%%"); -- replace % with %%
argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other lua magic pattern characters
return argument;
end
--[[--------------------------< S T R I P _ A P O S T R O P H E _ M A R K U P >--------------------------------


--[[
Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata
Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata
This function strips common patterns of apostrophe markup.  We presume that editors who have taken the time to
This function strips common patterns of apostrophe markup.  We presume that editors who have taken the time to
Line 703: Line 714:
]]
]]
function strip_apostrophe_markup (argument)
function strip_apostrophe_markup (argument)
local pattern, c1, c2;
local pattern, cap, cap2;
if not is_set (argument) then return argument; end
if not is_set (argument) then return argument; end
while true do -- look for and remove all 5-apostrophe (bold and italic) markup
while true do -- look for and remove all 5-apostrophe (bold and italic) markup
if argument:match ("%'%'%'%'%'") then -- is there an instance of bold-italic?
if argument:match ("%'%'%'%'%'") then -- is there an instance of bold-italic?
if argument:match ("%'%'%'%'%'.*%'%'%'%'%'") then -- 5, 5
if argument:match ("%'%'%'%'%'.*%'%'%'%'%'") then -- 5, 5
pattern, c1 = argument:match ("(%'%'%'%'%'(.*)%'%'%'%'%')");
pattern, cap = argument:match ("(%'%'%'%'%'(.*)%'%'%'%'%')");
c2 = ""; -- set to empty string so we do only one replacement at end
cap2 = ""; -- set to empty string so we do only one replacement at end
elseif argument:match ("%'%'%'%'%'.*%'%'%'.*%'%'") then -- bold italic followed by italic (5, 3, 2)
elseif argument:match ("%'%'%'%'%'.*%'%'%'.*%'%'") then -- bold italic followed by italic (5, 3, 2)
pattern, c1, c2 = argument:match ("(%'%'%'%'%'(.*)%'%'%'(.*)%'%')");
pattern, cap, cap2 = argument:match ("(%'%'%'%'%'(.*)%'%'%'(.*)%'%')");
elseif argument:match ("%'%'%'%'%'.*%'%'.*%'%'%'") then -- bold italic followed by bold (5, 2, 3)
elseif argument:match ("%'%'%'%'%'.*%'%'.*%'%'%'") then -- bold italic followed by bold (5, 2, 3)
pattern, c1, c2 = argument:match ("(%'%'%'%'%'(.*)%'%'(.*)%'%'%')");
pattern, cap, cap2 = argument:match ("(%'%'%'%'%'(.*)%'%'(.*)%'%'%')");
elseif argument:match ("%'%'%'.*%'%'.*%'%'%'%'%'") then -- bold italic followed by italic (3, 2, 5)
elseif argument:match ("%'%'%'.*%'%'.*%'%'%'%'%'") then -- bold italic followed by italic (3, 2, 5)
pattern, c1, c2 = argument:match ("(%'%'%'(.*)%'%'(.*)%'%'%'%'%')");
pattern, cap, cap2 = argument:match ("(%'%'%'(.*)%'%'(.*)%'%'%'%'%')");
elseif argument:match ("%'%'.*%'%'%'.*%'%'%'%'%'") then -- italic followed by bold (2, 3, 5)
elseif argument:match ("%'%'.*%'%'%'.*%'%'%'%'%'") then -- italic followed by bold (2, 3, 5)
pattern, c1, c2 = argument:match ("(%'%'(.*)%'%'%'(.*)%'%'%'%'%')");
pattern, cap, cap2 = argument:match ("(%'%'(.*)%'%'%'(.*)%'%'%'%'%')");
end
end
pattern = pattern:gsub("([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1"); -- pattern is not a literal string; escape lua's magic pattern characters
cap = escape_lua_magic_chars (cap); -- replace lua magic characters
argument=argument:gsub(pattern, c1..c2); -- remove the markup
cap2 = escape_lua_magic_chars (cap2); -- replace lua magic characters
pattern = escape_lua_magic_chars (pattern); -- replace lua magic characters
argument=argument:gsub(pattern, cap..cap2); -- remove the markup
else
else
break; -- none or no more 5-apostrophe matches
break; -- none or no more 5-apostrophe matches
Line 728: Line 741:
while true do -- look for and remove all 3-apostrophe (bold) markup
while true do -- look for and remove all 3-apostrophe (bold) markup
if argument:match ("%'%'%'.*%'%'%'") then -- is there an instance of bold?
if argument:match ("%'%'%'.*%'%'%'") then -- is there an instance of bold?
pattern, c1 = argument:match ("(%'%'%'(.*)%'%'%')")
pattern, cap = argument:match ("(%'%'%'(.*)%'%'%')")
pattern = pattern:gsub("([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1"); -- pattern is not a literal string; escape lua's magic pattern characters
cap = escape_lua_magic_chars (cap); -- replace lua magic characters
argument=argument:gsub(pattern, c1); -- remove the markup
pattern = escape_lua_magic_chars (pattern); -- replace lua magic characters
argument=argument:gsub(pattern, cap); -- remove the markup
else
else
break; -- none or no more 3 matches
break; -- none or no more 3 matches
Line 737: Line 751:
while true do -- look for and remove all 2-apostrophe (italic) markup
while true do -- look for and remove all 2-apostrophe (italic) markup
if argument:match ("%'%'.*%'%'") then -- is there an instance of italic?
if argument:match ("%'%'.*%'%'") then -- is there an instance of italic?
pattern, c1 = argument:match ("(%'%'(.*)%'%')")
pattern, cap = argument:match ("(%'%'(.*)%'%')")
pattern = pattern:gsub("([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1"); -- pattern is not a literal string; escape lua's magic pattern characters
cap = escape_lua_magic_chars (cap); -- replace lua magic characters
argument=argument:gsub(pattern, c1); -- remove the markup
pattern = escape_lua_magic_chars (pattern); -- replace lua magic characters
argument=argument:gsub(pattern, cap); -- remove the markup
else
else
break; -- none or no more 2 matches
break; -- none or no more 2 matches
Line 773: Line 788:
end
end


-- Extract page numbers from external wikilinks in any of the |page=, |pages=, or |at= parameters for use in COinS.
--[[--------------------------< G E T _ C O I N S _ P A G E S >------------------------------------------------
 
Extract page numbers from external wikilinks in any of the |page=, |pages=, or |at= parameters for use in COinS.
 
]]
function get_coins_pages (pages)
function get_coins_pages (pages)
local pattern;
local pattern;
if not is_set (pages) then return pages; end -- if no page numbers then we're done
if not is_set (pages) then return pages; end -- if no page numbers then we're done
while true do
while true do
pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the url and following space(s): "[url "
pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the url and following space(s): "[url "
if nil == pattern then break; end -- no more urls
if nil == pattern then break; end -- no more urls
pattern = pattern:gsub("([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1"); -- pattern is not a literal string; escape lua's magic pattern characters
pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape lua's magic pattern characters
pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible
pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible
end
end
pages = pages:gsub("[%[%]]", ""); -- remove the brackets
pages = pages:gsub("[%[%]]", ""); -- remove the brackets
pages = pages:gsub("–", "-" ); -- replace endashes with hyphens
pages = pages:gsub("–", "-" ); -- replace endashes with hyphens
pages = pages:gsub("&%w+;", "-" ); -- and replace html entities (&ndash; etc.) with hyphens; do we need to replace numerical entities like &#32; and the like?
pages = pages:gsub("&%w+;", "-" ); -- and replace html entities (&ndash; etc.) with hyphens; do we need to replace numerical entities like &#32; and the like?
Line 1,720: Line 1,739:
local OCinSoutput = COinS{
local OCinSoutput = COinS{
['Periodical'] = Periodical,
['Periodical'] = Periodical,
['Chapter'] = Chapter, --strip_apostrophe_markup (Chapter), -- Chapter stripped of bold / italic wikimarkup
['Chapter'] = strip_apostrophe_markup (Chapter), -- Chapter stripped of bold / italic wikimarkup
['Title'] = Title, --make_coins_title (Title, ScriptTitle), -- strip_apostrophe_markup (Title), -- Title stripped of bold / italic wikimarkup
['Title'] = make_coins_title (Title, ScriptTitle), -- Title and ScriptTitle stripped of bold / italic wikimarkup
['PublicationPlace'] = PublicationPlace,
['PublicationPlace'] = PublicationPlace,
['Date'] = first_set(COinS_date, Date), -- COinS_date has correctly formatted date if Date is valid; any reason to keep Date here?  Should we be including invalid dates in metadata?
['Date'] = first_set(COinS_date, Date), -- COinS_date has correctly formatted date if Date is valid; any reason to keep Date here?  Should we be including invalid dates in metadata?
Line 1,727: Line 1,746:
['Volume'] = Volume,
['Volume'] = Volume,
['Issue'] = Issue,
['Issue'] = Issue,
['Pages'] = get_coins_pages (first_set(Page, Pages, At)), -- pages stripped of external links
['Pages'] = get_coins_pages (first_set(Page, Pages, At)), -- pages stripped of external links
['Edition'] = Edition,
['Edition'] = Edition,
['PublisherName'] = PublisherName,
['PublisherName'] = PublisherName,
Line 1,932: Line 1,951:
Title = script_concatenate (Title, ScriptTitle); -- <bdi> tags, lang atribute, categorization, etc; must be done after title is wrapped
Title = script_concatenate (Title, ScriptTitle); -- <bdi> tags, lang atribute, categorization, etc; must be done after title is wrapped
TransTitle = wrap( 'trans-quoted-title', TransTitle );
TransTitle = wrap( 'trans-quoted-title', TransTitle );
-- Chapter = ''; -- chapter not allowed
-- Chapter = ''; -- chapter not allowed - disabled because doing this here doesn't display error msg and promoted url is lost
else
else
Title = wrap( 'italic-title', Title );
Title = wrap( 'italic-title', Title );
Line 1,959: Line 1,978:
end
end
end
end
 
if is_set(Place) then
if is_set(Place) then
Place = " " .. wrap( 'written', Place, use_lowercase ) .. sepc .. " ";
Place = " " .. wrap( 'written', Place, use_lowercase ) .. sepc .. " ";
Anonymous user