Module:Citation/CS1/COinS: Difference between revisions

    From Nonbinary Wiki
    m>Trappist the monk
    No edit summary
     
    (24 intermediate revisions by 7 users not shown)
    Line 1: Line 1:
    --[[
    History of changes since last sync: 2015-12-12
    ]]
    local coins = {};


    --[[--------------------------< F O R W A R D  D E C L A R A T I O N S >--------------------------------------
    --[[--------------------------< F O R W A R D  D E C L A R A T I O N S >--------------------------------------
    ]]
    ]]


    local is_set, in_array, remove_wiki_link; -- functions in Module:Citation/CS1/Utilities
    local is_set, in_array, remove_wiki_link, strip_apostrophe_markup; -- functions in Module:Citation/CS1/Utilities


    local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
    local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
    --[[--------------------------< S T R I P _ A P O S T R O P H E _ M A R K U P >--------------------------------
    Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata.
    This function strips common patterns of apostrophe markup.  We presume that editors who have taken the time to
    markup a title have, as a result, provided valid markup. When they don't, some single apostrophes are left behind.
    ]]
    local function strip_apostrophe_markup (argument)
    if not is_set (argument) then return argument; end
    if argument:find ( "''", 1, true ) == nil then -- Is there at least one double apostrophe?  If not, exit.
    return argument;
    end
    while true do
    if argument:find ( "'''''", 1, true ) then -- bold italic (5)
    argument=argument:gsub("%'%'%'%'%'", ""); -- remove all instances of it
    elseif argument:find ( "''''", 1, true ) then -- italic start and end without content (4)
    argument=argument:gsub("%'%'%'%'", "");
    elseif argument:find ( "'''", 1, true ) then -- bold (3)
    argument=argument:gsub("%'%'%'", "");
    elseif argument:find ( "''", 1, true ) then -- italic (2)
    argument=argument:gsub("%'%'", "");
    else
    break;
    end
    end
    return argument; -- done
    end




    Line 51: Line 12:
    Makes a title for COinS from Title and / or ScriptTitle (or any other name-script pairs)
    Makes a title for COinS from Title and / or ScriptTitle (or any other name-script pairs)


    Apostrophe markup (bold, italics) is stripped from each value so that the COinS metadata isn't correupted with strings
    Apostrophe markup (bold, italics) is stripped from each value so that the COinS metadata isn't corrupted with strings
    of %27%27...
    of %27%27...


    Line 60: Line 21:
    title = strip_apostrophe_markup (title); -- strip any apostrophe markup
    title = strip_apostrophe_markup (title); -- strip any apostrophe markup
    else
    else
    title=''; -- if not set, make sure title is an empty string
    title = ''; -- if not set, make sure title is an empty string
    end
    end
    if is_set (script) then
    if is_set (script) then
    Line 66: Line 27:
    script = strip_apostrophe_markup (script); -- strip any apostrophe markup
    script = strip_apostrophe_markup (script); -- strip any apostrophe markup
    else
    else
    script=''; -- if not set, make sure script is an empty string
    script = ''; -- if not set, make sure script is an empty string
    end
    end
    if is_set (title) and is_set (script) then
    if is_set (title) and is_set (script) then
    Line 72: Line 33:
    end
    end
    return title .. script; -- return the concatenation
    return title .. script; -- return the concatenation
    end
    --[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------
    Returns a string where all of Lua's magic characters have been escaped.  This is important because functions like
    string.gsub() treat their pattern and replace strings as patterns, not literal strings.
    ]]
    local function escape_lua_magic_chars (argument)
    argument = argument:gsub("%%", "%%%%"); -- replace % with %%
    argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other Lua magic pattern characters
    return argument;
    end
    end


    Line 86: Line 60:
    while true do
    while true do
    pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the url and following space(s): "[url "
    pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the URL and following space(s): "[url "
    if nil == pattern then break; end -- no more urls
    if nil == pattern then break; end -- no more URLs
    pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape lua's magic pattern characters
    pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape Lua's magic pattern characters
    pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible
    pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible
    end
    end
    pages = pages:gsub("[%[%]]", ""); -- remove the brackets
    pages = pages:gsub("[%[%]]", ""); -- remove the brackets
    pages = pages:gsub("–", "-" ); -- replace endashes with hyphens
    pages = pages:gsub("–", "-" ); -- replace endashes with hyphens
    pages = pages:gsub("&%w+;", "-" ); -- and replace html entities (&ndash; etc.) with hyphens; do we need to replace numerical entities like &#32; and the like?
    pages = pages:gsub("&%w+;", "-" ); -- and replace HTML entities (&ndash; etc.) with hyphens; do we need to replace numerical entities like &#32; and the like?
    return pages;
    return pages;
    end
    end
    Line 100: Line 74:
    --[=[-------------------------< C O I N S _ R E P L A C E _ M A T H _ S T R I P M A R K E R >------------------
    --[=[-------------------------< C O I N S _ R E P L A C E _ M A T H _ S T R I P M A R K E R >------------------


    There are three options for math markup that depends on the editor's math preference settings.  These settings
    There are three options for math markup rendering that depend on the editor's math preference settings.  These
    are at [[Special:Preferences#mw-prefsection-rendering]] and are
    settings are at [[Special:Preferences#mw-prefsection-rendering]] and are
    PNG images
    PNG images
    TeX source
    TeX source
    MathML with SVG or PNG fallback
    MathML with SVG or PNG fallback


    All three are heavy with html and css which don't belong in the metadata.
    All three are heavy with HTML and CSS which doesn't belong in the metadata.
     
    Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings
    of the last editor to save the page.
     
    This function gets the rendered form of an equation according to the editor's preference before the page is saved.  It
    then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so
    that the page is saved without extraneous HTML/CSS markup and with a reasonably readable text form of the equation.


    This function gets the rendered form of an equation according to the editor's preferenceIt then searches the
    When a replacement is made, this function returns true and the value with replacement; otherwise false and the initial
    rendering for the text equivalent of the rendered equation and replaces the rendering with that.
    valueTo replace multipe equations it is necessary to call this function from within a loop.


    ]=]
    ]=]


    local function coins_replace_math_stripmarker (value)
    local function coins_replace_math_stripmarker (value)
    local stripmarker = '\127UNIQ%-%-math%-[%a%d]+%-QINU\127'; -- math stripmarker pattern
    local stripmarker = cfg.stripmarkers['math'];
    local rendering = value:match (stripmarker); -- is there a math stripmarker
    local rendering = value:match (stripmarker); -- is there a math stripmarker


    if not rendering then -- when value doesn't have a math stripmarker, abandon this test
    if not rendering then -- when value doesn't have a math stripmarker, abandon this test
    return 'not rendering';
    return false, value;
    -- return value;
    end
    end
    Line 131: Line 111:
    rendering = rendering:match ('<annotation[^>]+>(.+)</annotation>') -- extract just the math text
    rendering = rendering:match ('<annotation[^>]+>(.+)</annotation>') -- extract just the math text
    else
    else
    return value; -- had math stripmarker but not one of the three defined forms
    return false, value; -- had math stripmarker but not one of the three defined forms
    end
    end
    return value:gsub (stripmarker, rendering, 1);
    return true, value:gsub (stripmarker, rendering, 1);
    end
    end


    --[[--------------------------< C O I N S _ C L E A N U P >----------------------------------------------------
    --[[--------------------------< C O I N S _ C L E A N U P >----------------------------------------------------


    Cleanup parameter values for the metadata by removing or replacing invisible characters and certain html entities.
    Cleanup parameter values for the metadata by removing or replacing invisible characters and certain HTML entities.


    2015-12-10: there is a bug in mw.text.unstripNoWiki ().  It replaced math stripmarkers with the appropriate content
    2015-12-10: there is a bug in mw.text.unstripNoWiki ().  It replaces math stripmarkers with the appropriate content
    when it shouldn't.  See https://phabricator.wikimedia.org/T121085 and Wikipedia_talk:Lua#stripmarkers_and_mw.text.unstripNoWiki.28.29
    when it shouldn't.  See https://phabricator.wikimedia.org/T121085 and Wikipedia_talk:Lua#stripmarkers_and_mw.text.unstripNoWiki.28.29


    TODO: move the replacement patterns and replacement values into a table in /Configuration similar to the invisible
    TODO: move the replacement patterns and replacement values into a table in /Configuration similar to the invisible
    characters table?
    characters table?
    ]]
    ]]


    local function coins_cleanup (value)
    local function coins_cleanup (value)
    value = coins_replace_math_stripmarker (value); -- replace math stripmarker with text representation of the equation
    local replaced = true; -- default state to get the do loop running
     
    while replaced do -- loop until all math stripmarkers replaced
    replaced, value = coins_replace_math_stripmarker (value); -- replace math stripmarker with text representation of the equation
    end
     
    value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); -- one or more couldn't be replaced; insert vague error message
    value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content
    value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content
    value = value:gsub ('<span class="nowrap" style="padding%-left:0%.1em;">&#39;s</span>', "'s"); -- replace {{'s}} template with simple apostrophe-s
    value = value:gsub ('<span class="nowrap" style="padding%-left:0%.1em;">&#39;(s?)</span>', "'%1"); -- replace {{'}} or {{'s}} with simple apostrophe or apostrophe-s
    value = value:gsub ('&zwj;\226\128\138\039\226\128\139', "'"); -- replace {{'}} with simple apostrophe
    value = value:gsub ('\226\128\138\039\226\128\139', "'"); -- replace {{'}} with simple apostrophe (as of 2015-12-11)
    value = value:gsub ('&nbsp;', ' '); -- replace &nbsp; entity with plain space
    value = value:gsub ('&nbsp;', ' '); -- replace &nbsp; entity with plain space
    value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space
    value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space
    value = value:gsub ('&zwj;', ''); -- remove &zwj; entities
    if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero-width joiner characters from indic script
    value = value:gsub ('[\226\128\141\226\128\139]', '') -- remove zero-width joiner, zero-width space
    value = value:gsub ('&zwj;', ''); -- remove &zwj; entities
    value = value:gsub ('[\194\173\009\010\013]', ' '); -- replace soft hyphen, horizontal tab, line feed, carriage return with plain space
    value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', ''); -- remove zero-width joiner, zero-width space, soft hyphen
    end
    value = value:gsub ('[\009\010\013 ]+', ' '); -- replace horizontal tab, line feed, carriage return with plain space
    return value;
    return value;
    end
    end
    Line 191: Line 180:
    });
    });
    if in_array (class, {'arxiv', 'journal', 'news'}) or (in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or  
    if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'ssrn', 'journal', 'news', 'magazine'}) or (in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or  
    ('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then
    ('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then
    OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier
    OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier
    if 'arxiv' == class then -- set genre according to the type of citation template we are rendering
    if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'ssrn'}) then -- set genre according to the type of citation template we are rendering
    OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv
    OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv, cite biorxiv, cite citeseerx, cite ssrn
    elseif 'conference' == class then
    elseif 'conference' == class then
    OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set)
    OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set)
    Line 208: Line 197:
    -- these used only for periodicals
    -- these used only for periodicals
    OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall
    OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall
    OCinSoutput["rft.quarter"] = data.Quarter; -- single digits 1->first quarter, etc.
    OCinSoutput["rft.chron"] = data.Chron; -- free-form date components
    OCinSoutput["rft.chron"] = data.Chron; -- free-form date components
    OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books
    OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books
    Line 219: Line 209:
    elseif 'conference' == class then -- cite conference when Periodical not set
    elseif 'conference' == class then -- cite conference when Periodical not set
    OCinSoutput["rft.genre"] = "conference";
    OCinSoutput["rft.genre"] = "conference";
    OCinSoutput["rft.atitle"] = data.Chapter; -- conference paper as chapter in proceedings (book)
    elseif in_array (class, {'book', 'citation', 'encyclopaedia', 'interview', 'map'}) then
    elseif in_array (class, {'book', 'citation', 'encyclopaedia', 'interview', 'map'}) then
    if is_set (data.Chapter) then
    if is_set (data.Chapter) then
    Line 230: Line 221:
    end
    end
    end
    end
    else --{'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'}
    else -- {'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'}
    OCinSoutput["rft.genre"] = "unknown";
    OCinSoutput["rft.genre"] = "unknown";
    end
    end
    Line 246: Line 237:
    OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation
    OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation
    end
    end
    -- NB. Not currently supported are "info:ofi/fmt:kev:mtx:patent", "info:ofi/fmt:kev:mtx:dc", "info:ofi/fmt:kev:mtx:sch_svc", "info:ofi/fmt:kev:mtx:ctx"
    -- and now common parameters (as much as possible)
    -- and now common parameters (as much as possible)
    OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation
    OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation
     
    for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all?
    for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all?
    -- if k == 'ISBN' then v = clean_isbn( v ) end
    if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end
    if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end
    local id = cfg.id_handlers[k].COinS;
    local id = cfg.id_handlers[k].COinS;
    if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry
    if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry
    OCinSoutput["rft_id"] = table.concat{ id, "/", v };
    OCinSoutput["rft_id"] = table.concat{ id, "/", v };
    elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc that have defined COinS keywords
    elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc. that have defined COinS keywords
    OCinSoutput[ id ] = v;
    OCinSoutput[ id ] = v;
    elseif id then -- when cfg.id_handlers[k].COinS is not nil
    elseif 'url' == id then -- for urls that are assembled in ~/Identifiers; |asin= and |ol=
    OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v }; -- others; provide a url
    OCinSoutput["rft_id"] = table.concat ({data.ID_list[k], "#id-name=", cfg.id_handlers[k].label});
    elseif id then -- when cfg.id_handlers[k].COinS is not nil so urls created here
    OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v, cfg.id_handlers[k].suffix or '', "#id-name=", cfg.id_handlers[k].label }; -- others; provide a URL and indicate identifier name as #fragment (human-readable, but transparent to browsers)
    end
    end
    end
    end


    --[[
    for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all?
    local id, value = cfg.id_handlers[k].COinS;
    if k == 'ISBN' then value = clean_isbn( v ); else value = v; end
    if string.sub( id or "", 1, 4 ) == 'info' then
    OCinSoutput["rft_id"] = table.concat{ id, "/", v };
    else
    OCinSoutput[ id ] = value;
    end
    end
    ]]
    local last, first;
    local last, first;
    for k, v in ipairs( data.Authors ) do
    for k, v in ipairs( data.Authors ) do
    last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki strip markers, non-printing or invisible characers
    last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki stripmarkers, non-printing or invisible characters
    if k == 1 then -- for the first author name only
    if k == 1 then -- for the first author name only
    if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name
    if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name
    OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation
    OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation
    OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation
    OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation
    Line 289: Line 271:
    OCinSoutput["rft.au"] = last; -- book, journal, dissertation
    OCinSoutput["rft.au"] = last; -- book, journal, dissertation
    end
    end
    -- TODO: At present we do not report "et al.". Add anything special if this condition applies?
    end
    end
    end
    end
    Line 294: Line 277:
    OCinSoutput.rft_id = data.URL;
    OCinSoutput.rft_id = data.URL;
    OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage };
    OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage };
    -- TODO: Add optional extra info:
    -- rfr_dat=#REVISION<version> (referrer private data)
    -- ctx_id=<data.RawPage>#<ref> (identifier for the context object)
    -- ctx_tim=<ts> (timestamp in format yyyy-mm-ddThh:mm:ssTZD or yyyy-mm-dd)
    -- ctx_enc=info:ofi/enc:UTF-8 (character encoding)
    OCinSoutput = setmetatable( OCinSoutput, nil );
    OCinSoutput = setmetatable( OCinSoutput, nil );
     
    -- sort with version string always first, and combine.
    -- sort with version string always first, and combine.
    table.sort( OCinSoutput );
    -- table.sort( OCinSoutput );
    table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004"
    table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004"
    return table.concat(OCinSoutput, "&");
    return table.concat(OCinSoutput, "&");
    end
    end
    Line 312: Line 302:
    cfg = cfg_table_ptr;
    cfg = cfg_table_ptr;


    is_set = utilities_page_ptr.is_set; -- import functions from select Module:Citation/CS1/Utilities module
    is_set = utilities_page_ptr.is_set; -- import functions from selected Module:Citation/CS1/Utilities module
    in_array = utilities_page_ptr.in_array;
    in_array = utilities_page_ptr.in_array;
    remove_wiki_link = utilities_page_ptr.remove_wiki_link;
    remove_wiki_link = utilities_page_ptr.remove_wiki_link;
    strip_apostrophe_markup = utilities_page_ptr.strip_apostrophe_markup;
    end
    end




    --[[--------------------------< E X P O R T E D  F U N C T I O N S >------------------------------------------
    ]]


    return {
    return {

    Latest revision as of 11:41, 21 May 2021

    Documentation for this module may be created at Module:Citation/CS1/COinS/doc

    
    --[[--------------------------< F O R W A R D   D E C L A R A T I O N S >--------------------------------------
    ]]
    
    local is_set, in_array, remove_wiki_link, strip_apostrophe_markup;				-- functions in Module:Citation/CS1/Utilities
    
    local cfg;																		-- table of configuration tables that are defined in Module:Citation/CS1/Configuration
    
    
    --[[--------------------------< M A K E _ C O I N S _ T I T L E >----------------------------------------------
    
    Makes a title for COinS from Title and / or ScriptTitle (or any other name-script pairs)
    
    Apostrophe markup (bold, italics) is stripped from each value so that the COinS metadata isn't corrupted with strings
    of %27%27...
    
    ]]
    
    local function make_coins_title (title, script)
    	if is_set (title) then
    		title = strip_apostrophe_markup (title);								-- strip any apostrophe markup
    	else
    		title = '';																-- if not set, make sure title is an empty string
    	end
    	if is_set (script) then
    		script = script:gsub ('^%l%l%s*:%s*', '');								-- remove language prefix if present (script value may now be empty string)
    		script = strip_apostrophe_markup (script);								-- strip any apostrophe markup
    	else
    		script = '';															-- if not set, make sure script is an empty string
    	end
    	if is_set (title) and is_set (script) then
    		script = ' ' .. script;													-- add a space before we concatenate
    	end
    	return title .. script;														-- return the concatenation
    end
    
    
    --[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------
    
    Returns a string where all of Lua's magic characters have been escaped.  This is important because functions like
    string.gsub() treat their pattern and replace strings as patterns, not literal strings.
    ]]
    
    local function escape_lua_magic_chars (argument)
    	argument = argument:gsub("%%", "%%%%");										-- replace % with %%
    	argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1");				-- replace all other Lua magic pattern characters
    	return argument;
    end
    
    
    --[[--------------------------< G E T _ C O I N S _ P A G E S >------------------------------------------------
    
    Extract page numbers from external wikilinks in any of the |page=, |pages=, or |at= parameters for use in COinS.
    
    ]]
    
    local function get_coins_pages (pages)
    	local pattern;
    	if not is_set (pages) then return pages; end								-- if no page numbers then we're done
    	
    	while true do
    		pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]");					-- pattern is the opening bracket, the URL and following space(s): "[url "
    		if nil == pattern then break; end										-- no more URLs
    		pattern = escape_lua_magic_chars (pattern);								-- pattern is not a literal string; escape Lua's magic pattern characters
    		pages = pages:gsub(pattern, "");										-- remove as many instances of pattern as possible
    	end
    	pages = pages:gsub("[%[%]]", "");											-- remove the brackets
    	pages = pages:gsub("–", "-" );												-- replace endashes with hyphens
    	pages = pages:gsub("&%w+;", "-" );											-- and replace HTML entities (&ndash; etc.) with hyphens; do we need to replace numerical entities like &#32; and the like?
    	return pages;
    end
    
    
    --[=[-------------------------< C O I N S _ R E P L A C E _ M A T H _ S T R I P M A R K E R >------------------
    
    There are three options for math markup rendering that depend on the editor's math preference settings.  These
    settings are at [[Special:Preferences#mw-prefsection-rendering]] and are
    	PNG images
    	TeX source
    	MathML with SVG or PNG fallback
    
    All three are heavy with HTML and CSS which doesn't belong in the metadata.
    
    Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings
    of the last editor to save the page.
    
    This function gets the rendered form of an equation according to the editor's preference before the page is saved.  It
    then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so
    that the page is saved without extraneous HTML/CSS markup and with a reasonably readable text form of the equation.
    
    When a replacement is made, this function returns true and the value with replacement; otherwise false and the initial
    value.  To replace multipe equations it is necessary to call this function from within a loop.
    
    ]=]
    
    local function coins_replace_math_stripmarker (value)
    	local stripmarker = cfg.stripmarkers['math'];
    	local rendering = value:match (stripmarker);								-- is there a math stripmarker
    
    	if not rendering then														-- when value doesn't have a math stripmarker, abandon this test
    		return false, value;
    	end
    	
    	rendering = mw.text.unstripNoWiki (rendering);								-- convert stripmarker into rendered value (or nil? ''? when math render error)
    	
    	if rendering:match ('alt="[^"]+"') then										-- if PNG math option
    		rendering = rendering:match ('alt="([^"]+)"');							-- extract just the math text
    	elseif rendering:match ('$%s+.+%s+%$') then									-- if TeX math option; $ is legit character that is escapes as \$
    		rendering = rendering:match ('$%s+(.+)%s+%$')							-- extract just the math text
    	elseif rendering:match ('<annotation[^>]+>.+</annotation>') then			-- if MathML math option
    		rendering = rendering:match ('<annotation[^>]+>(.+)</annotation>')		-- extract just the math text
    	else
    		return false, value;													-- had math stripmarker but not one of the three defined forms
    	end
    	
    	return true, value:gsub (stripmarker, rendering, 1);
    end
    
    
    --[[--------------------------< C O I N S _ C L E A N U P >----------------------------------------------------
    
    Cleanup parameter values for the metadata by removing or replacing invisible characters and certain HTML entities.
    
    2015-12-10: there is a bug in mw.text.unstripNoWiki ().  It replaces math stripmarkers with the appropriate content
    when it shouldn't.  See https://phabricator.wikimedia.org/T121085 and Wikipedia_talk:Lua#stripmarkers_and_mw.text.unstripNoWiki.28.29
    
    TODO: move the replacement patterns and replacement values into a table in /Configuration similar to the invisible
    characters table?
    
    ]]
    
    local function coins_cleanup (value)
    	local replaced = true;														-- default state to get the do loop running
    
    	while replaced do															-- loop until all math stripmarkers replaced
    		replaced, value = coins_replace_math_stripmarker (value);				-- replace math stripmarker with text representation of the equation
    	end
    
    	value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR");			-- one or more couldn't be replaced; insert vague error message
    	
    	value = mw.text.unstripNoWiki (value);										-- replace nowiki stripmarkers with their content
    	value = value:gsub ('<span class="nowrap" style="padding%-left:0%.1em;">&#39;(s?)</span>', "'%1");	-- replace {{'}} or {{'s}} with simple apostrophe or apostrophe-s
    	value = value:gsub ('&nbsp;', ' ');											-- replace &nbsp; entity with plain space
    	value = value:gsub ('\226\128\138', ' ');									-- replace hair space with plain space
    	if not mw.ustring.find (value, cfg.indic_script) then						-- don't remove zero-width joiner characters from indic script
    		value = value:gsub ('&zwj;', '');										-- remove &zwj; entities
    		value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', '');	-- remove zero-width joiner, zero-width space, soft hyphen
    	end
    	value = value:gsub ('[\009\010\013 ]+', ' ');								-- replace horizontal tab, line feed, carriage return with plain space
    	return value;
    end
    
    
    --[[--------------------------< C O I N S >--------------------------------------------------------------------
    
    COinS metadata (see <http://ocoins.info/>) allows automated tools to parse the citation information.
    
    ]]
    
    local function COinS(data, class)
    	if 'table' ~= type(data) or nil == next(data) then
    		return '';
    	end
    
    	for k, v in pairs (data) do													-- spin through all of the metadata parameter values
    		if 'ID_list' ~= k and 'Authors' ~= k then								-- except the ID_list and Author tables (author nowiki stripmarker done when Author table processed)
    			data[k] = coins_cleanup (v);
    		end
    	end
    
    	local ctx_ver = "Z39.88-2004";
    	
    	-- treat table strictly as an array with only set values.
    	local OCinSoutput = setmetatable( {}, {
    		__newindex = function(self, key, value)
    			if is_set(value) then
    				rawset( self, #self+1, table.concat{ key, '=', mw.uri.encode( remove_wiki_link( value ) ) } );
    			end
    		end
    	});
    	
    	if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'ssrn', 'journal', 'news', 'magazine'}) or (in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or 
    		('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then
    			OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal";			-- journal metadata identifier
    			if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'ssrn'}) then	-- set genre according to the type of citation template we are rendering
    				OCinSoutput["rft.genre"] = "preprint";							-- cite arxiv, cite biorxiv, cite citeseerx, cite ssrn
    			elseif 'conference' == class then
    				OCinSoutput["rft.genre"] = "conference";						-- cite conference (when Periodical set)
    			elseif 'web' == class then
    				OCinSoutput["rft.genre"] = "unknown";							-- cite web (when Periodical set)
    			else
    				OCinSoutput["rft.genre"] = "article";							-- journal and other 'periodical' articles
    			end
    			OCinSoutput["rft.jtitle"] = data.Periodical;						-- journal only
    			OCinSoutput["rft.atitle"] = data.Title;								-- 'periodical' article titles
    
    																				-- these used only for periodicals
    			OCinSoutput["rft.ssn"] = data.Season;								-- keywords: winter, spring, summer, fall
    			OCinSoutput["rft.quarter"] = data.Quarter;							-- single digits 1->first quarter, etc.
    			OCinSoutput["rft.chron"] = data.Chron;								-- free-form date components
    			OCinSoutput["rft.volume"] = data.Volume;							-- does not apply to books
    			OCinSoutput["rft.issue"] = data.Issue;
    			OCinSoutput["rft.pages"] = data.Pages;								-- also used in book metadata
    
    	elseif 'thesis' ~= class then												-- all others except cite thesis are treated as 'book' metadata; genre distinguishes
    		OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:book";					-- book metadata identifier
    		if 'report' == class or 'techreport' == class then						-- cite report and cite techreport
    			OCinSoutput["rft.genre"] = "report";
    		elseif 'conference' == class then										-- cite conference when Periodical not set
    			OCinSoutput["rft.genre"] = "conference";
    			OCinSoutput["rft.atitle"] = data.Chapter;							-- conference paper as chapter in proceedings (book)
    		elseif in_array (class, {'book', 'citation', 'encyclopaedia', 'interview', 'map'}) then
    			if is_set (data.Chapter) then
    				OCinSoutput["rft.genre"] = "bookitem";
    				OCinSoutput["rft.atitle"] = data.Chapter;						-- book chapter, encyclopedia article, interview in a book, or map title
    			else
    				if 'map' == class or 'interview' == class then
    					OCinSoutput["rft.genre"] = 'unknown';						-- standalone map or interview
    				else
    					OCinSoutput["rft.genre"] = 'book';							-- book and encyclopedia
    				end
    			end
    		else	-- {'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'}
    			OCinSoutput["rft.genre"] = "unknown";
    		end
    		OCinSoutput["rft.btitle"] = data.Title;									-- book only
    		OCinSoutput["rft.place"] = data.PublicationPlace;						-- book only
    		OCinSoutput["rft.series"] = data.Series;								-- book only
    		OCinSoutput["rft.pages"] = data.Pages;									-- book, journal
    		OCinSoutput["rft.edition"] = data.Edition;								-- book only
    		OCinSoutput["rft.pub"] = data.PublisherName;							-- book and dissertation
    		
    	else																		-- cite thesis
    		OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:dissertation";			-- dissertation metadata identifier
    		OCinSoutput["rft.title"] = data.Title;									-- dissertation (also patent but that is not yet supported)
    		OCinSoutput["rft.degree"] = data.Degree;								-- dissertation only
    		OCinSoutput['rft.inst'] = data.PublisherName;							-- book and dissertation
    	end
    	-- NB. Not currently supported are "info:ofi/fmt:kev:mtx:patent", "info:ofi/fmt:kev:mtx:dc", "info:ofi/fmt:kev:mtx:sch_svc", "info:ofi/fmt:kev:mtx:ctx"
    																				-- and now common parameters (as much as possible)
    	OCinSoutput["rft.date"] = data.Date;										-- book, journal, dissertation
    
    	for k, v in pairs( data.ID_list ) do										-- what to do about these? For now assume that they are common to all?
    		if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end
    		local id = cfg.id_handlers[k].COinS;
    		if string.sub( id or "", 1, 4 ) == 'info' then							-- for ids that are in the info:registry
    			OCinSoutput["rft_id"] = table.concat{ id, "/", v };
    		elseif string.sub (id or "", 1, 3 ) == 'rft' then						-- for isbn, issn, eissn, etc. that have defined COinS keywords
    			OCinSoutput[ id ] = v;
    		elseif 'url' == id then													-- for urls that are assembled in ~/Identifiers; |asin= and |ol=
    			OCinSoutput["rft_id"] = table.concat ({data.ID_list[k], "#id-name=", cfg.id_handlers[k].label});
    		elseif id then															-- when cfg.id_handlers[k].COinS is not nil so urls created here
    			OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v, cfg.id_handlers[k].suffix or '', "#id-name=", cfg.id_handlers[k].label };	-- others; provide a URL and indicate identifier name as #fragment (human-readable, but transparent to browsers)
    		end
    	end
    
    	local last, first;
    	for k, v in ipairs( data.Authors ) do
    		last, first = coins_cleanup (v.last), coins_cleanup (v.first or '');	-- replace any nowiki stripmarkers, non-printing or invisible characters
    		if k == 1 then															-- for the first author name only
    			if is_set(last) and is_set(first) then								-- set these COinS values if |first= and |last= specify the first author name
    				OCinSoutput["rft.aulast"] = last;								-- book, journal, dissertation
    				OCinSoutput["rft.aufirst"] = first;								-- book, journal, dissertation
    			elseif is_set(last) then 
    				OCinSoutput["rft.au"] = last;									-- book, journal, dissertation -- otherwise use this form for the first name
    			end
    		else																	-- for all other authors
    			if is_set(last) and is_set(first) then
    				OCinSoutput["rft.au"] = table.concat{ last, ", ", first };		-- book, journal, dissertation
    			elseif is_set(last) then
    				OCinSoutput["rft.au"] = last;									-- book, journal, dissertation
    			end
    			-- TODO: At present we do not report "et al.". Add anything special if this condition applies?
    		end
    	end
    
    	OCinSoutput.rft_id = data.URL;
    	OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage };
    
    	-- TODO: Add optional extra info:
    	-- rfr_dat=#REVISION<version> (referrer private data)
    	-- ctx_id=<data.RawPage>#<ref> (identifier for the context object)
    	-- ctx_tim=<ts> (timestamp in format yyyy-mm-ddThh:mm:ssTZD or yyyy-mm-dd)
    	-- ctx_enc=info:ofi/enc:UTF-8 (character encoding)
    	
    	OCinSoutput = setmetatable( OCinSoutput, nil );
    
    	-- sort with version string always first, and combine.
    	-- table.sort( OCinSoutput );
    	table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004"
    	return table.concat(OCinSoutput, "&");
    end
    
    
    --[[--------------------------< S E T _ S E L E C T E D _ M O D U L E S >--------------------------------------
    
    Sets local cfg table and imported functions table to same (live or sandbox) as that used by the other modules.
    
    ]]
    
    local function set_selected_modules (cfg_table_ptr, utilities_page_ptr)
    	cfg = cfg_table_ptr;
    
    	is_set = utilities_page_ptr.is_set;											-- import functions from selected Module:Citation/CS1/Utilities module
    	in_array = utilities_page_ptr.in_array;
    	remove_wiki_link = utilities_page_ptr.remove_wiki_link;
    	strip_apostrophe_markup = utilities_page_ptr.strip_apostrophe_markup;
    end
    
    
    --[[--------------------------< E X P O R T E D   F U N C T I O N S >------------------------------------------
    ]]
    
    return {
    	make_coins_title = make_coins_title,
    	get_coins_pages = get_coins_pages,
    	COinS = COinS,
    	set_selected_modules = set_selected_modules,
    	}