Module:Citation/CS1: Difference between revisions
Synch from sandbox; script error fix in strip_apostrophe_markup();
m>Trappist the monk (Disable strip_apostrophe_markup() and make_coins_title(); causing script errors;) |
m>Trappist the monk (Synch from sandbox; script error fix in strip_apostrophe_markup();) |
||
Line 696: | Line 696: | ||
end | end | ||
--[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >---------------------------------- | |||
Returns a string where all of lua's magic characters have been escaped. This is important because functions like | |||
string.gsub() treat their pattern and replace strings as patterns, not literal strings. | |||
]] | |||
function escape_lua_magic_chars (argument) | |||
argument = argument:gsub("%%", "%%%%"); -- replace % with %% | |||
argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other lua magic pattern characters | |||
return argument; | |||
end | |||
--[[--------------------------< S T R I P _ A P O S T R O P H E _ M A R K U P >-------------------------------- | |||
Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata | Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata | ||
This function strips common patterns of apostrophe markup. We presume that editors who have taken the time to | This function strips common patterns of apostrophe markup. We presume that editors who have taken the time to | ||
Line 703: | Line 714: | ||
]] | ]] | ||
function strip_apostrophe_markup (argument) | function strip_apostrophe_markup (argument) | ||
local pattern, | local pattern, cap, cap2; | ||
if not is_set (argument) then return argument; end | if not is_set (argument) then return argument; end | ||
while true do -- look for and remove all 5-apostrophe (bold and italic) markup | while true do -- look for and remove all 5-apostrophe (bold and italic) markup | ||
if argument:match ("%'%'%'%'%'") then | if argument:match ("%'%'%'%'%'") then -- is there an instance of bold-italic? | ||
if argument:match ("%'%'%'%'%'.*%'%'%'%'%'") then | if argument:match ("%'%'%'%'%'.*%'%'%'%'%'") then -- 5, 5 | ||
pattern, | pattern, cap = argument:match ("(%'%'%'%'%'(.*)%'%'%'%'%')"); | ||
cap2 = ""; -- set to empty string so we do only one replacement at end | |||
elseif argument:match ("%'%'%'%'%'.*%'%'%'.*%'%'") then -- bold italic followed by italic (5, 3, 2) | elseif argument:match ("%'%'%'%'%'.*%'%'%'.*%'%'") then -- bold italic followed by italic (5, 3, 2) | ||
pattern, | pattern, cap, cap2 = argument:match ("(%'%'%'%'%'(.*)%'%'%'(.*)%'%')"); | ||
elseif argument:match ("%'%'%'%'%'.*%'%'.*%'%'%'") then -- bold italic followed by bold (5, 2, 3) | elseif argument:match ("%'%'%'%'%'.*%'%'.*%'%'%'") then -- bold italic followed by bold (5, 2, 3) | ||
pattern, | pattern, cap, cap2 = argument:match ("(%'%'%'%'%'(.*)%'%'(.*)%'%'%')"); | ||
elseif argument:match ("%'%'%'.*%'%'.*%'%'%'%'%'") then -- bold italic followed by italic (3, 2, 5) | elseif argument:match ("%'%'%'.*%'%'.*%'%'%'%'%'") then -- bold italic followed by italic (3, 2, 5) | ||
pattern, | pattern, cap, cap2 = argument:match ("(%'%'%'(.*)%'%'(.*)%'%'%'%'%')"); | ||
elseif argument:match ("%'%'.*%'%'%'.*%'%'%'%'%'") then -- italic followed by bold (2, 3, 5) | elseif argument:match ("%'%'.*%'%'%'.*%'%'%'%'%'") then -- italic followed by bold (2, 3, 5) | ||
pattern, | pattern, cap, cap2 = argument:match ("(%'%'(.*)%'%'%'(.*)%'%'%'%'%')"); | ||
end | end | ||
cap = escape_lua_magic_chars (cap); -- replace lua magic characters | |||
argument=argument:gsub(pattern, | cap2 = escape_lua_magic_chars (cap2); -- replace lua magic characters | ||
pattern = escape_lua_magic_chars (pattern); -- replace lua magic characters | |||
argument=argument:gsub(pattern, cap..cap2); -- remove the markup | |||
else | else | ||
break; -- none or no more 5-apostrophe matches | break; -- none or no more 5-apostrophe matches | ||
Line 728: | Line 741: | ||
while true do -- look for and remove all 3-apostrophe (bold) markup | while true do -- look for and remove all 3-apostrophe (bold) markup | ||
if argument:match ("%'%'%'.*%'%'%'") then -- is there an instance of bold? | if argument:match ("%'%'%'.*%'%'%'") then -- is there an instance of bold? | ||
pattern, | pattern, cap = argument:match ("(%'%'%'(.*)%'%'%')") | ||
cap = escape_lua_magic_chars (cap); -- replace lua magic characters | |||
argument=argument:gsub(pattern, | pattern = escape_lua_magic_chars (pattern); -- replace lua magic characters | ||
argument=argument:gsub(pattern, cap); -- remove the markup | |||
else | else | ||
break; -- none or no more 3 matches | break; -- none or no more 3 matches | ||
Line 737: | Line 751: | ||
while true do -- look for and remove all 2-apostrophe (italic) markup | while true do -- look for and remove all 2-apostrophe (italic) markup | ||
if argument:match ("%'%'.*%'%'") then -- is there an instance of italic? | if argument:match ("%'%'.*%'%'") then -- is there an instance of italic? | ||
pattern, | pattern, cap = argument:match ("(%'%'(.*)%'%')") | ||
cap = escape_lua_magic_chars (cap); -- replace lua magic characters | |||
argument=argument:gsub(pattern, | pattern = escape_lua_magic_chars (pattern); -- replace lua magic characters | ||
argument=argument:gsub(pattern, cap); -- remove the markup | |||
else | else | ||
break; -- none or no more 2 matches | break; -- none or no more 2 matches | ||
Line 773: | Line 788: | ||
end | end | ||
-- Extract page numbers from external wikilinks in any of the |page=, |pages=, or |at= parameters for use in COinS. | --[[--------------------------< G E T _ C O I N S _ P A G E S >------------------------------------------------ | ||
Extract page numbers from external wikilinks in any of the |page=, |pages=, or |at= parameters for use in COinS. | |||
]] | |||
function get_coins_pages (pages) | function get_coins_pages (pages) | ||
local pattern; | local pattern; | ||
if not is_set (pages) then return pages; end | if not is_set (pages) then return pages; end -- if no page numbers then we're done | ||
while true do | while true do | ||
pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the url and following space(s): "[url " | pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the url and following space(s): "[url " | ||
if nil == pattern then break; end | if nil == pattern then break; end -- no more urls | ||
pattern = pattern | pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape lua's magic pattern characters | ||
pages = pages:gsub(pattern, ""); | pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible | ||
end | end | ||
pages = pages:gsub("[%[%]]", ""); | pages = pages:gsub("[%[%]]", ""); -- remove the brackets | ||
pages = pages:gsub("–", "-" ); -- replace endashes with hyphens | pages = pages:gsub("–", "-" ); -- replace endashes with hyphens | ||
pages = pages:gsub("&%w+;", "-" ); -- and replace html entities (– etc.) with hyphens; do we need to replace numerical entities like   and the like? | pages = pages:gsub("&%w+;", "-" ); -- and replace html entities (– etc.) with hyphens; do we need to replace numerical entities like   and the like? | ||
Line 1,720: | Line 1,739: | ||
local OCinSoutput = COinS{ | local OCinSoutput = COinS{ | ||
['Periodical'] = Periodical, | ['Periodical'] = Periodical, | ||
['Chapter'] = | ['Chapter'] = strip_apostrophe_markup (Chapter), -- Chapter stripped of bold / italic wikimarkup | ||
['Title'] = | ['Title'] = make_coins_title (Title, ScriptTitle), -- Title and ScriptTitle stripped of bold / italic wikimarkup | ||
['PublicationPlace'] = PublicationPlace, | ['PublicationPlace'] = PublicationPlace, | ||
['Date'] = first_set(COinS_date, Date), -- COinS_date has correctly formatted date if Date is valid; any reason to keep Date here? Should we be including invalid dates in metadata? | ['Date'] = first_set(COinS_date, Date), -- COinS_date has correctly formatted date if Date is valid; any reason to keep Date here? Should we be including invalid dates in metadata? | ||
Line 1,727: | Line 1,746: | ||
['Volume'] = Volume, | ['Volume'] = Volume, | ||
['Issue'] = Issue, | ['Issue'] = Issue, | ||
['Pages'] = get_coins_pages (first_set(Page, Pages, At)), -- pages stripped of external links | ['Pages'] = get_coins_pages (first_set(Page, Pages, At)), -- pages stripped of external links | ||
['Edition'] = Edition, | ['Edition'] = Edition, | ||
['PublisherName'] = PublisherName, | ['PublisherName'] = PublisherName, | ||
Line 1,932: | Line 1,951: | ||
Title = script_concatenate (Title, ScriptTitle); -- <bdi> tags, lang atribute, categorization, etc; must be done after title is wrapped | Title = script_concatenate (Title, ScriptTitle); -- <bdi> tags, lang atribute, categorization, etc; must be done after title is wrapped | ||
TransTitle = wrap( 'trans-quoted-title', TransTitle ); | TransTitle = wrap( 'trans-quoted-title', TransTitle ); | ||
-- Chapter = ''; -- chapter not allowed | -- Chapter = ''; -- chapter not allowed - disabled because doing this here doesn't display error msg and promoted url is lost | ||
else | else | ||
Title = wrap( 'italic-title', Title ); | Title = wrap( 'italic-title', Title ); | ||
Line 1,959: | Line 1,978: | ||
end | end | ||
end | end | ||
if is_set(Place) then | if is_set(Place) then | ||
Place = " " .. wrap( 'written', Place, use_lowercase ) .. sepc .. " "; | Place = " " .. wrap( 'written', Place, use_lowercase ) .. sepc .. " "; |