Editing Module:Citation/CS1/COinS
The edit can be undone. Please check the comparison below to verify that this is what you want to do, and then publish the changes below to finish undoing the edit.
Latest revision | Your text | ||
Line 21: | Line 21: | ||
title = strip_apostrophe_markup (title); -- strip any apostrophe markup | title = strip_apostrophe_markup (title); -- strip any apostrophe markup | ||
else | else | ||
title = ''; -- if not set, make sure title is an empty string | title=''; -- if not set, make sure title is an empty string | ||
end | end | ||
if is_set (script) then | if is_set (script) then | ||
Line 27: | Line 27: | ||
script = strip_apostrophe_markup (script); -- strip any apostrophe markup | script = strip_apostrophe_markup (script); -- strip any apostrophe markup | ||
else | else | ||
script = ''; | script=''; -- if not set, make sure script is an empty string | ||
end | end | ||
if is_set (title) and is_set (script) then | if is_set (title) and is_set (script) then | ||
Line 38: | Line 38: | ||
--[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >---------------------------------- | --[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >---------------------------------- | ||
Returns a string where all of | Returns a string where all of lua's magic characters have been escaped. This is important because functions like | ||
string.gsub() treat their pattern and replace strings as patterns, not literal strings. | string.gsub() treat their pattern and replace strings as patterns, not literal strings. | ||
]] | ]] | ||
Line 44: | Line 44: | ||
local function escape_lua_magic_chars (argument) | local function escape_lua_magic_chars (argument) | ||
argument = argument:gsub("%%", "%%%%"); -- replace % with %% | argument = argument:gsub("%%", "%%%%"); -- replace % with %% | ||
argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other | argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other lua magic pattern characters | ||
return argument; | return argument; | ||
end | end | ||
Line 60: | Line 60: | ||
while true do | while true do | ||
pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the | pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the url and following space(s): "[url " | ||
if nil == pattern then break; end -- no more | if nil == pattern then break; end -- no more urls | ||
pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape | pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape lua's magic pattern characters | ||
pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible | pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible | ||
end | end | ||
pages = pages:gsub("[%[%]]", ""); -- remove the brackets | pages = pages:gsub("[%[%]]", ""); -- remove the brackets | ||
pages = pages:gsub("–", "-" ); | pages = pages:gsub("–", "-" ); -- replace endashes with hyphens | ||
pages = pages:gsub("&%w+;", "-" ); | pages = pages:gsub("&%w+;", "-" ); -- and replace html entities (– etc.) with hyphens; do we need to replace numerical entities like   and the like? | ||
return pages; | return pages; | ||
end | end | ||
Line 80: | Line 80: | ||
MathML with SVG or PNG fallback | MathML with SVG or PNG fallback | ||
All three are heavy with | All three are heavy with html and css which doesn't belong in the metadata. | ||
Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings | Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings | ||
Line 87: | Line 87: | ||
This function gets the rendered form of an equation according to the editor's preference before the page is saved. It | This function gets the rendered form of an equation according to the editor's preference before the page is saved. It | ||
then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so | then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so | ||
that the page is saved without extraneous | that the page is saved without extraneous html/css markup and with a reasonably readable text form of the equation. | ||
When a replacement is made, this function returns true and the value with replacement; otherwise false and the | When a replacement is made, this function returns true and the value with replacement; otherwise false and the intital | ||
value. To replace multipe equations it is | value. To replace multipe equations it is necesary to call this function from within a loop. | ||
]=] | ]=] | ||
Line 120: | Line 120: | ||
--[[--------------------------< C O I N S _ C L E A N U P >---------------------------------------------------- | --[[--------------------------< C O I N S _ C L E A N U P >---------------------------------------------------- | ||
Cleanup parameter values for the metadata by removing or replacing invisible characters and certain | Cleanup parameter values for the metadata by removing or replacing invisible characters and certain html entities. | ||
2015-12-10: there is a bug in mw.text.unstripNoWiki (). It replaces math stripmarkers with the appropriate content | 2015-12-10: there is a bug in mw.text.unstripNoWiki (). It replaces math stripmarkers with the appropriate content | ||
Line 137: | Line 137: | ||
end | end | ||
value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); | value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); -- one or more couldn't be replaced; insert vague error message | ||
value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content | value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content | ||
Line 143: | Line 143: | ||
value = value:gsub (' ', ' '); -- replace entity with plain space | value = value:gsub (' ', ' '); -- replace entity with plain space | ||
value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space | value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space | ||
if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero | if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero width joiner characters from indic script | ||
value = value:gsub ('‍', ''); | value = value:gsub ('‍', ''); -- remove ‍ entities | ||
value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', ''); -- remove zero-width joiner, zero-width space, soft hyphen | value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', ''); -- remove zero-width joiner, zero-width space, soft hyphen | ||
end | end | ||
value = value:gsub ('[\009\010\013 ] | value = value:gsub ('[\009\010\013]', ' '); -- replace horizontal tab, line feed, carriage return with plain space | ||
return value; | return value; | ||
end | end | ||
Line 197: | Line 197: | ||
-- these used only for periodicals | -- these used only for periodicals | ||
OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall | OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall | ||
OCinSoutput["rft.chron"] = data.Chron; -- free-form date components | OCinSoutput["rft.chron"] = data.Chron; -- free-form date components | ||
OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books | OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books | ||
Line 221: | Line 220: | ||
end | end | ||
end | end | ||
else -- {'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'} | else --{'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'} | ||
OCinSoutput["rft.genre"] = "unknown"; | OCinSoutput["rft.genre"] = "unknown"; | ||
end | end | ||
Line 237: | Line 236: | ||
OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation | OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation | ||
end | end | ||
-- and now common parameters (as much as possible) | -- and now common parameters (as much as possible) | ||
OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation | OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation | ||
for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all? | for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all? | ||
if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end | if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end | ||
Line 246: | Line 244: | ||
if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry | if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry | ||
OCinSoutput["rft_id"] = table.concat{ id, "/", v }; | OCinSoutput["rft_id"] = table.concat{ id, "/", v }; | ||
elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc | elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc that have defined COinS keywords | ||
OCinSoutput[ id ] = v; | OCinSoutput[ id ] = v; | ||
elseif id then -- when cfg.id_handlers[k].COinS is not nil | |||
OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v }; -- others; provide a url | |||
elseif id then -- when cfg.id_handlers[k].COinS is not nil | |||
OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v | |||
end | end | ||
end | end | ||
Line 257: | Line 253: | ||
local last, first; | local last, first; | ||
for k, v in ipairs( data.Authors ) do | for k, v in ipairs( data.Authors ) do | ||
last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki | last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki strip markers, non-printing or invisible characers | ||
if k == 1 then -- for the first author name only | if k == 1 then -- for the first author name only | ||
if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name | if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name | ||
OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation | OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation | ||
OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation | OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation | ||
Line 271: | Line 267: | ||
OCinSoutput["rft.au"] = last; -- book, journal, dissertation | OCinSoutput["rft.au"] = last; -- book, journal, dissertation | ||
end | end | ||
end | end | ||
end | end | ||
Line 277: | Line 272: | ||
OCinSoutput.rft_id = data.URL; | OCinSoutput.rft_id = data.URL; | ||
OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage }; | OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage }; | ||
OCinSoutput = setmetatable( OCinSoutput, nil ); | |||
-- sort with version string always first, and combine. | -- sort with version string always first, and combine. | ||
-- table.sort( OCinSoutput ); | --table.sort( OCinSoutput ); | ||
table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004" | table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004" | ||
return table.concat(OCinSoutput, "&"); | return table.concat(OCinSoutput, "&"); | ||
end | end |