Editing Module:Citation/CS1/COinS
The edit can be undone. Please check the comparison below to verify that this is what you want to do, and then publish the changes below to finish undoing the edit.
Latest revision | Your text | ||
Line 1: | Line 1: | ||
--[[ | |||
History of changes since last sync: 2015-12-12 | |||
]] | |||
local coins = {}; | |||
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | --[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | ||
]] | ]] | ||
local is_set, in_array, remove_wiki_link | local is_set, in_array, remove_wiki_link; -- functions in Module:Citation/CS1/Utilities | ||
local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration | local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration | ||
--[[--------------------------< S T R I P _ A P O S T R O P H E _ M A R K U P >-------------------------------- | |||
Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata. | |||
This function strips common patterns of apostrophe markup. We presume that editors who have taken the time to | |||
markup a title have, as a result, provided valid markup. When they don't, some single apostrophes are left behind. | |||
]] | |||
local function strip_apostrophe_markup (argument) | |||
if not is_set (argument) then return argument; end | |||
if argument:find ( "''", 1, true ) == nil then -- Is there at least one double apostrophe? If not, exit. | |||
return argument; | |||
end | |||
while true do | |||
if argument:find ( "'''''", 1, true ) then -- bold italic (5) | |||
argument=argument:gsub("%'%'%'%'%'", ""); -- remove all instances of it | |||
elseif argument:find ( "''''", 1, true ) then -- italic start and end without content (4) | |||
argument=argument:gsub("%'%'%'%'", ""); | |||
elseif argument:find ( "'''", 1, true ) then -- bold (3) | |||
argument=argument:gsub("%'%'%'", ""); | |||
elseif argument:find ( "''", 1, true ) then -- italic (2) | |||
argument=argument:gsub("%'%'", ""); | |||
else | |||
break; | |||
end | |||
end | |||
return argument; -- done | |||
end | |||
Line 12: | Line 51: | ||
Makes a title for COinS from Title and / or ScriptTitle (or any other name-script pairs) | Makes a title for COinS from Title and / or ScriptTitle (or any other name-script pairs) | ||
Apostrophe markup (bold, italics) is stripped from each value so that the COinS metadata isn't | Apostrophe markup (bold, italics) is stripped from each value so that the COinS metadata isn't correupted with strings | ||
of %27%27... | of %27%27... | ||
Line 21: | Line 60: | ||
title = strip_apostrophe_markup (title); -- strip any apostrophe markup | title = strip_apostrophe_markup (title); -- strip any apostrophe markup | ||
else | else | ||
title = ''; -- if not set, make sure title is an empty string | title=''; -- if not set, make sure title is an empty string | ||
end | end | ||
if is_set (script) then | if is_set (script) then | ||
Line 27: | Line 66: | ||
script = strip_apostrophe_markup (script); -- strip any apostrophe markup | script = strip_apostrophe_markup (script); -- strip any apostrophe markup | ||
else | else | ||
script = ''; | script=''; -- if not set, make sure script is an empty string | ||
end | end | ||
if is_set (title) and is_set (script) then | if is_set (title) and is_set (script) then | ||
Line 33: | Line 72: | ||
end | end | ||
return title .. script; -- return the concatenation | return title .. script; -- return the concatenation | ||
end | end | ||
Line 60: | Line 86: | ||
while true do | while true do | ||
pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the | pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the url and following space(s): "[url " | ||
if nil == pattern then break; end -- no more | if nil == pattern then break; end -- no more urls | ||
pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape | pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape lua's magic pattern characters | ||
pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible | pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible | ||
end | end | ||
pages = pages:gsub("[%[%]]", ""); -- remove the brackets | pages = pages:gsub("[%[%]]", ""); -- remove the brackets | ||
pages = pages:gsub("–", "-" ); | pages = pages:gsub("–", "-" ); -- replace endashes with hyphens | ||
pages = pages:gsub("&%w+;", "-" ); | pages = pages:gsub("&%w+;", "-" ); -- and replace html entities (– etc.) with hyphens; do we need to replace numerical entities like   and the like? | ||
return pages; | return pages; | ||
end | end | ||
Line 74: | Line 100: | ||
--[=[-------------------------< C O I N S _ R E P L A C E _ M A T H _ S T R I P M A R K E R >------------------ | --[=[-------------------------< C O I N S _ R E P L A C E _ M A T H _ S T R I P M A R K E R >------------------ | ||
There are three options for math markup | There are three options for math markup that depends on the editor's math preference settings. These settings | ||
are at [[Special:Preferences#mw-prefsection-rendering]] and are | |||
PNG images | PNG images | ||
TeX source | TeX source | ||
MathML with SVG or PNG fallback | MathML with SVG or PNG fallback | ||
All three are heavy with | All three are heavy with html and css which don't belong in the metadata. | ||
This function gets the rendered form of an equation according to the editor's preference. It then searches the | |||
of the | rendering for the text equivalent of the rendered equation and replaces the rendering with that. | ||
When a replacement is made, this function returns true and the value with replacement | |||
When a replacement is made, this function returns true and the value with replacement | |||
]=] | ]=] | ||
local function coins_replace_math_stripmarker (value) | local function coins_replace_math_stripmarker (value) | ||
local stripmarker = | local stripmarker = '\127UNIQ%-%-math%-[%a%d]+%-QINU\127'; -- math stripmarker pattern | ||
local rendering = value:match (stripmarker); -- is there a math stripmarker | local rendering = value:match (stripmarker); -- is there a math stripmarker | ||
if not rendering then -- when value doesn't have a math stripmarker, abandon this test | if not rendering then -- when value doesn't have a math stripmarker, abandon this test | ||
-- return 'not rendering'; | |||
return false, value; | return false, value; | ||
end | end | ||
Line 116: | Line 138: | ||
return true, value:gsub (stripmarker, rendering, 1); | return true, value:gsub (stripmarker, rendering, 1); | ||
end | end | ||
--[[--------------------------< C O I N S _ C L E A N U P >---------------------------------------------------- | --[[--------------------------< C O I N S _ C L E A N U P >---------------------------------------------------- | ||
Cleanup parameter values for the metadata by removing or replacing invisible characters and certain | Cleanup parameter values for the metadata by removing or replacing invisible characters and certain html entities. | ||
2015-12-10: there is a bug in mw.text.unstripNoWiki (). It | 2015-12-10: there is a bug in mw.text.unstripNoWiki (). It replaced math stripmarkers with the appropriate content | ||
when it shouldn't. See https://phabricator.wikimedia.org/T121085 and Wikipedia_talk:Lua#stripmarkers_and_mw.text.unstripNoWiki.28.29 | when it shouldn't. See https://phabricator.wikimedia.org/T121085 and Wikipedia_talk:Lua#stripmarkers_and_mw.text.unstripNoWiki.28.29 | ||
TODO: move the replacement patterns and replacement values into a table in /Configuration similar to the invisible | TODO: move the replacement patterns and replacement values into a table in /Configuration similar to the invisible | ||
characters table? | characters table? | ||
]] | ]] | ||
local function coins_cleanup (value) | local function coins_cleanup (value) | ||
local replaced = true; -- default state to get the do loop running | local replaced = true; -- default state to get the do loop running | ||
while replaced do | while replaced do | ||
replaced, value = coins_replace_math_stripmarker (value); -- replace math stripmarker with text representation of the equation | replaced, value = coins_replace_math_stripmarker (value); -- replace math stripmarker with text representation of the equation | ||
end | end | ||
value = value:gsub ( | value = value:gsub ('\127UNIQ%-%-math%-[%a%d]+%-QINU\127', "MATH RENDER ERROR"); -- | ||
value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content | value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content | ||
value = value:gsub ('<span class="nowrap" style="padding%-left:0%.1em;">' | value = value:gsub ('<span class="nowrap" style="padding%-left:0%.1em;">'s</span>', "'s"); -- replace {{'s}} template with simple apostrophe-s | ||
value = value:gsub ('‍\226\128\138\039\226\128\139', "'"); -- replace {{'}} with simple apostrophe | |||
value = value:gsub ('\226\128\138\039\226\128\139', "'"); -- replace {{'}} with simple apostrophe (as of 2015-12-11) | |||
value = value:gsub (' ', ' '); -- replace entity with plain space | value = value:gsub (' ', ' '); -- replace entity with plain space | ||
value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space | value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space | ||
value = value:gsub ('‍', ''); -- remove ‍ entities | |||
value = value:gsub ('[\226\128\141\226\128\139]', '') -- remove zero-width joiner, zero-width space | |||
value = value:gsub ('[\194\173\009\010\013]', ' '); -- replace soft hyphen, horizontal tab, line feed, carriage return with plain space | |||
value = value:gsub ('[\009\010\013 ] | |||
return value; | return value; | ||
end | end | ||
Line 180: | Line 200: | ||
}); | }); | ||
if in_array (class, {'arxiv | if in_array (class, {'arxiv', 'journal', 'news'}) or (in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or | ||
('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then | ('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then | ||
OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier | OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier | ||
if | if 'arxiv' == class then -- set genre according to the type of citation template we are rendering | ||
OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv | OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv | ||
elseif 'conference' == class then | elseif 'conference' == class then | ||
OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set) | OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set) | ||
Line 197: | Line 217: | ||
-- these used only for periodicals | -- these used only for periodicals | ||
OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall | OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall | ||
OCinSoutput["rft.chron"] = data.Chron; -- free-form date components | OCinSoutput["rft.chron"] = data.Chron; -- free-form date components | ||
OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books | OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books | ||
Line 209: | Line 228: | ||
elseif 'conference' == class then -- cite conference when Periodical not set | elseif 'conference' == class then -- cite conference when Periodical not set | ||
OCinSoutput["rft.genre"] = "conference"; | OCinSoutput["rft.genre"] = "conference"; | ||
elseif in_array (class, {'book', 'citation', 'encyclopaedia', 'interview', 'map'}) then | elseif in_array (class, {'book', 'citation', 'encyclopaedia', 'interview', 'map'}) then | ||
if is_set (data.Chapter) then | if is_set (data.Chapter) then | ||
Line 221: | Line 239: | ||
end | end | ||
end | end | ||
else -- {'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'} | else --{'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'} | ||
OCinSoutput["rft.genre"] = "unknown"; | OCinSoutput["rft.genre"] = "unknown"; | ||
end | end | ||
Line 237: | Line 255: | ||
OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation | OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation | ||
end | end | ||
-- and now common parameters (as much as possible) | -- and now common parameters (as much as possible) | ||
OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation | OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation | ||
for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all? | for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all? | ||
-- if k == 'ISBN' then v = clean_isbn( v ) end | |||
if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end | if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end | ||
local id = cfg.id_handlers[k].COinS; | local id = cfg.id_handlers[k].COinS; | ||
if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry | if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry | ||
OCinSoutput["rft_id"] = table.concat{ id, "/", v }; | OCinSoutput["rft_id"] = table.concat{ id, "/", v }; | ||
elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc | elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc that have defined COinS keywords | ||
OCinSoutput[ id ] = v; | OCinSoutput[ id ] = v; | ||
elseif id then -- when cfg.id_handlers[k].COinS is not nil | |||
OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v }; -- others; provide a url | |||
elseif id then -- when cfg.id_handlers[k].COinS is not nil | |||
OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v | |||
end | end | ||
end | end | ||
--[[ | |||
for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all? | |||
local id, value = cfg.id_handlers[k].COinS; | |||
if k == 'ISBN' then value = clean_isbn( v ); else value = v; end | |||
if string.sub( id or "", 1, 4 ) == 'info' then | |||
OCinSoutput["rft_id"] = table.concat{ id, "/", v }; | |||
else | |||
OCinSoutput[ id ] = value; | |||
end | |||
end | |||
]] | |||
local last, first; | local last, first; | ||
for k, v in ipairs( data.Authors ) do | for k, v in ipairs( data.Authors ) do | ||
last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki | last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki strip markers, non-printing or invisible characers | ||
if k == 1 then -- for the first author name only | if k == 1 then -- for the first author name only | ||
if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name | if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name | ||
OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation | OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation | ||
OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation | OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation | ||
Line 271: | Line 298: | ||
OCinSoutput["rft.au"] = last; -- book, journal, dissertation | OCinSoutput["rft.au"] = last; -- book, journal, dissertation | ||
end | end | ||
end | end | ||
end | end | ||
Line 277: | Line 303: | ||
OCinSoutput.rft_id = data.URL; | OCinSoutput.rft_id = data.URL; | ||
OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage }; | OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage }; | ||
OCinSoutput = setmetatable( OCinSoutput, nil ); | |||
-- sort with version string always first, and combine. | -- sort with version string always first, and combine. | ||
table.sort( OCinSoutput ); | |||
table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004" | table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004" | ||
return table.concat(OCinSoutput, "&"); | return table.concat(OCinSoutput, "&"); | ||
end | end | ||
Line 302: | Line 321: | ||
cfg = cfg_table_ptr; | cfg = cfg_table_ptr; | ||
is_set = utilities_page_ptr.is_set; -- import functions from | is_set = utilities_page_ptr.is_set; -- import functions from select Module:Citation/CS1/Utilities module | ||
in_array = utilities_page_ptr.in_array; | in_array = utilities_page_ptr.in_array; | ||
remove_wiki_link = utilities_page_ptr.remove_wiki_link; | remove_wiki_link = utilities_page_ptr.remove_wiki_link; | ||
end | end | ||
return { | return { |