Module:Citation/CS1/Configuration: Difference between revisions

sync from sandbox;
(synch from sandbox;)
(sync from sandbox;)
Line 1: Line 1:
local citation_config = {};


--[[--------------------------< U N C A T E G O R I Z E D _ N A M E S P A C E S >------------------------------
--[[--------------------------< U N C A T E G O R I Z E D _ N A M E S P A C E S >------------------------------
Line 12: Line 10:


local uncategorized_subpages = {'/[Ss]andbox', '/[Tt]estcases'}; -- list of Lua patterns found in page names of pages we should not categorize
local uncategorized_subpages = {'/[Ss]andbox', '/[Tt]estcases'}; -- list of Lua patterns found in page names of pages we should not categorize


--[[--------------------------< M E S S A G E S >--------------------------------------------------------------
--[[--------------------------< M E S S A G E S >--------------------------------------------------------------
Line 95: Line 94:
['unknown_argument_map'] = 'Argument map not defined for this variable',
['unknown_argument_map'] = 'Argument map not defined for this variable',
['bare_url_no_origin'] = 'Bare url found but origin indicator is nil or empty',
['bare_url_no_origin'] = 'Bare url found but origin indicator is nil or empty',
}
--[[--------------------------< E T _ A L _ P A T T E R N S >--------------------------------------------------
This table provides Lua patterns for the phrase "et al" and variants in name text
(author, editor, etc.). The main module uses these to identify and emit the 'etal' message.
]]
local et_al_patterns = {
"[;,]? *[\"']*%f[%a][Ee][Tt]%.? *[Aa][Ll][%.\"']*$", -- variations on the 'et al' theme
"[;,]? *[\"']*%f[%a][Ee][Tt]%.? *[Aa][Ll][Ii][Aa][%.\"']*$", -- variations on the 'et alia' theme
"[;,]? *%f[%a]and [Oo]thers", -- and alternate to et al.
}
--[[--------------------------< E D I T O R _ M A R K U P _ P A T T E R N S >----------------------------------
This table provides Lua patterns for the phrase "ed" and variants in name text
(author, editor, etc.). The main module uses these to identify and emit the
'extra_text_names' message. (It is not the only series of patterns for this message.)
]]
local editor_markup_patterns = { -- these patterns match annotations at end of name
'%f[%(%[][%(%[]%s*[Ee][Dd][Ss]?%.?%s*[%)%]]?$', -- (ed) or (eds): leading '(', case insensitive 'ed', optional 's', '.' and/or ')'
'[,%.%s]%f[e]eds?%.?$', -- ed or eds: without '('or ')'; case sensitive (ED could be initials Ed could be name)
'%f[%(%[][%(%[]%s*[Ee][Dd][Ii][Tt][Oo][Rr][Ss]?%.?%s*[%)%]]?$', -- (editor) or (editors): leading '(', case insensitive, optional '.' and/or ')'
'[,%.%s]%f[Ee][Ee][Dd][Ii][Tt][Oo][Rr][Ss]?%.?$', -- editor or editors: without '('or ')'; case insensitive
-- these patterns match annotations at beginning of name
'^eds?[%.,;]', -- ed. or eds.: lower case only, optional 's', requires '.'
'^[%(%[]%s*[Ee][Dd][Ss]?%.?%s*[%)%]]', -- (ed) or (eds): also sqare brackets, case insensitive, optional 's', '.'
'^[%(%[]?%s*[Ee][Dd][Ii][Tt][Oo][Rr][Ss]?%A', -- (editor or (editors: also sq brackets, case insensitive, optional brackets, 's'
'^[%(%[]?%s*[Ee][Dd][Ii][Tt][Ee][Dd]%A', -- (edited: also sq brackets, case insensitive, optional brackets
}
}


Line 187: Line 221:
['ASINTLD'] = {'ASIN-TLD', 'asin-tld'},
['ASINTLD'] = {'ASIN-TLD', 'asin-tld'},
['At'] = 'at',
['At'] = 'at',
['Authors'] = {'authors', 'people', 'host', 'credits'},
['Authors'] = {'authors', 'people', 'credits'},
['BookTitle'] = {'book-title', 'booktitle'},
['BookTitle'] = {'book-title', 'booktitle'},
['Cartography'] = 'cartography',
['Cartography'] = 'cartography',
Line 205: Line 239:
['DF'] = 'df',
['DF'] = 'df',
['DisplayAuthors'] = {'display-authors', 'displayauthors'},
['DisplayAuthors'] = {'display-authors', 'displayauthors'},
['DisplayContributors'] = 'display-contributors',
['DisplayEditors'] = {'display-editors', 'displayeditors'},
['DisplayEditors'] = {'display-editors', 'displayeditors'},
['DisplayInterviewers'] = 'display-interviewers',
['DisplayTranslators'] = 'display-translators',
['Docket'] = 'docket',
['Docket'] = 'docket',
['DoiBroken'] = {'doi-broken', 'doi-broken-date', 'doi-inactive-date'},
['DoiBroken'] = {'doi-broken', 'doi-broken-date', 'doi-inactive-date'},
Line 286: Line 323:


['AuthorList-First'] = {"first#", "given#", "author-first#", "author#-first"},
['AuthorList-First'] = {"first#", "given#", "author-first#", "author#-first"},
['AuthorList-Last'] = {"last#", "author#", "surname#", "author-last#", "author#-last", "subject#"},
['AuthorList-Last'] = {"last#", "author#", "surname#", "author-last#", "author#-last", "subject#", 'host#'},
['AuthorList-Link'] = {"authorlink#", "author-link#", "author#-link", "subjectlink#", "author#link", "subject-link#", "subject#-link", "subject#link"},
['AuthorList-Link'] = {"authorlink#", "author-link#", "author#-link", "subjectlink#", "author#link", "subject-link#", "subject#-link", "subject#link"},
['AuthorList-Mask'] = {"author-mask#", "authormask#", "author#mask", "author#-mask"},
['AuthorList-Mask'] = {"author-mask#", "authormask#", "author#mask", "author#-mask"},
Line 321: Line 358:


local special_case_translation = {
local special_case_translation = {
['AuthorList'] = "authors list", -- these for multiple names maint categories
['AuthorList'] = 'authors list', -- these for multiple names maint categories
['ContributorList'] = "contributors list",
['ContributorList'] = 'contributors list',
['EditorList'] = "editors list",
['EditorList'] = 'editors list',
['InterviewerList'] = "interviewers list",
['InterviewerList'] = 'interviewers list',
['TranslatorList'] = "translators list",
['TranslatorList'] = 'translators list',
['authors'] = "authors", -- used in get_display_authors_editors()
['authors'] = 'authors', -- used in get_display_names()
['editors'] = "editors",
['contributors'] = 'contributors',
['editors'] = 'editors',
['interviewers'] = 'interviewers',
['translators'] = 'translators',


['archived_copy'] = '^archived?%s+copy$', -- lua pattern to match pseudo title used by Internet Archive bot and others as place holder for unknown |title= value
['archived_copy'] = '^archived?%s+copy$', -- lua pattern to match pseudo title used by Internet Archive bot and others as place holder for unknown |title= value
Line 367: Line 407:
local date_names = {
local date_names = {
['en'] = { -- English
['en'] = { -- English
['long'] = {['January']=1, ['February']=2, ['March']=3, ['April']=4, ['May']=5, ['June']=6, ['July']=7, ['August']=8, ['September']=9, ['October']=10, ['November']=11, ['December']=12};
['long'] = {['January']=1, ['February']=2, ['March']=3, ['April']=4, ['May']=5, ['June']=6, ['July']=7, ['August']=8, ['September']=9, ['October']=10, ['November']=11, ['December']=12},
['short'] = {['Jan']=1, ['Feb']=2, ['Mar']=3, ['Apr']=4, ['May']=5, ['Jun']=6, ['Jul']=7, ['Aug']=8, ['Sep']=9, ['Oct']=10, ['Nov']=11, ['Dec']=12};
['short'] = {['Jan']=1, ['Feb']=2, ['Mar']=3, ['Apr']=4, ['May']=5, ['Jun']=6, ['Jul']=7, ['Aug']=8, ['Sep']=9, ['Oct']=10, ['Nov']=11, ['Dec']=12},
['season'] = {['Winter']=24, ['Spring']=21, ['Summer']=22, ['Fall']=23, ['Autumn']=23};
['season'] = {['Winter']=24, ['Spring']=21, ['Summer']=22, ['Fall']=23, ['Autumn']=23},
['named'] = {['Christmas']=99};
['named'] = {['Christmas']=99},
},
},
['local'] = { -- replace these English date names with the local language equivalents
['local'] = { -- replace these English date names with the local language equivalents
['long'] = {['January']=1, ['February']=2, ['March']=3, ['April']=4, ['May']=5, ['June']=6, ['July']=7, ['August']=8, ['September']=9, ['October']=10, ['November']=11, ['December']=12};
['long'] = {['January']=1, ['February']=2, ['March']=3, ['April']=4, ['May']=5, ['June']=6, ['July']=7, ['August']=8, ['September']=9, ['October']=10, ['November']=11, ['December']=12},
['short'] = {['Jan']=1, ['Feb']=2, ['Mar']=3, ['Apr']=4, ['May']=5, ['Jun']=6, ['Jul']=7, ['Aug']=8, ['Sep']=9, ['Oct']=10, ['Nov']=11, ['Dec']=12};
['short'] = {['Jan']=1, ['Feb']=2, ['Mar']=3, ['Apr']=4, ['May']=5, ['Jun']=6, ['Jul']=7, ['Aug']=8, ['Sep']=9, ['Oct']=10, ['Nov']=11, ['Dec']=12},
['season'] = {['Winter']=24, ['Spring']=21, ['Summer']=22, ['Fall']=23, ['Autumn']=23};
['season'] = {['Winter']=24, ['Spring']=21, ['Summer']=22, ['Fall']=23, ['Autumn']=23},
['named'] = {['Christmas']=99};
['named'] = {['Christmas']=99},
},
},
['local_digits'] = {['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7', ['8'] = '8', ['9'] = '9'}; -- used to convert local language digits to Western 0-9
['inv_local_l'] = {}, -- used in date reformatting; copy of date_names['local'].long where k/v are inverted: [1]='<local name>' etc
['xlate_digits'] = {};
['inv_local_s'] = {}, -- used in date reformatting; copy of date_names['local'].short where k/v are inverted: [1]='<local name>' etc
['local_digits'] = {['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7', ['8'] = '8', ['9'] = '9'}, -- used to convert local language digits to Western 0-9
['xlate_digits'] = {},
}
}
for name, i in pairs (date_names['local'].long) do -- this table is ['name'] = i
date_names['inv_local_l'][i] = name; -- invert to get [i] = 'name' for conversions from ymd
end
for name, i in pairs (date_names['local'].short) do -- this table is ['name'] = i
date_names['inv_local_s'][i] = name; -- invert to get [i] = 'name' for conversions from ymd
end


for ld, ed in pairs (date_names.local_digits) do -- make a digit translation table for simple date translation from en to local language using local_digits table
for ld, ed in pairs (date_names.local_digits) do -- make a digit translation table for simple date translation from en to local language using local_digits table
date_names.xlate_digits [ed] = ld; -- en digit becomes index with local digit as the value
date_names.xlate_digits [ed] = ld; -- en digit becomes index with local digit as the value
end
end
local df_template_patterns = { -- table of redirects to {{Use dmy dates}} and {{Use mdy dates}}
'{{ *[Uu]se (dmy) dates *[|}]', -- 915k -- sorted by approximate transclusion count
'{{ *[Uu]se *(mdy) *dates *[|}]', -- 161k
'{{ *[Uu]se (DMY) dates *[|}]', -- 2929
'{{ *[Uu]se *(dmy) *[|}]', -- 250 + 34
'{{ *([Dd]my) *[|}]', -- 272
'{{ *[Uu]se (MDY) dates *[|}]', -- 173
'{{ *[Uu]se *(mdy) *[|}]', -- 59 + 12
'{{ *([Mm]dy) *[|}]', -- 9
'{{ *[Uu]se (MDY) *[|}]', -- 3
'{{ *([Dd]MY) *[|}]', -- 2
'{{ *([Mm]DY) *[|}]', -- 0
-- '{{ *[Uu]se(mdy) *[|}]',
-- '{{ *[Uu]se(mdy)dates *[|}]',
-- '{{ *[Uu]se(dmy) *[|}]',
}
local function get_date_format ()
local content = mw.title.getCurrentTitle():getContent(); -- get the content of the article
for _, pattern in ipairs (df_template_patterns) do -- loop through the patterns looking for {{Use dmy dates}} or {{Use mdy dates}} or any of their redirects
local start, _, match = content:find(pattern); -- match is the three letters indicating desired date format
if match then
content = content:match ('%b{}', start); -- get the whole template
if content:match ('| *cs1%-dates *= *[lsy][sy]?') then -- look for |cs1-dates=publication date length access-/archive-date length
return match:lower() .. '-' .. content:match ('| *cs1%-dates *= *([lsy][sy]?)');
else
return match:lower() .. '-all'; -- no |cs1-style= k/v pair; return value appropriate for use in |df=
end
end
end
end
local global_df = get_date_format ();




Line 414: Line 497:
['contribution'] = {'afterword', 'foreword', 'introduction', 'preface'}, -- generic contribution titles that are rendered unquoted in the 'chapter' position
['contribution'] = {'afterword', 'foreword', 'introduction', 'preface'}, -- generic contribution titles that are rendered unquoted in the 'chapter' position
['date-format'] = {'dmy', 'dmy-all', 'mdy', 'mdy-all', 'ymd', 'ymd-all'},
['date-format'] = {'dmy', 'dmy-all', 'mdy', 'mdy-all', 'ymd', 'ymd-all'},
-- ['date-format'] = {'dmy', 'dmy-all', 'mdy', 'mdy-all', 'ymd', 'ymd-all', 'yMd', 'yMd-all'}, -- not supported at en.wiki
['url-access'] = {'subscription', 'limited', 'registration'},   -- access level of a URL (subscription required, limited access, free registration required), free to read by default
['url-access'] = {'subscription', 'limited', 'registration'},   -- access level of a URL (subscription required, limited access, free registration required), free to read by default
     ['id-access'] = {'free'},                                          -- access level of an identifier (free to read), subscription required (or no full text) by default
     ['id-access'] = {'free'},                                          -- access level of an identifier (free to read), subscription required (or no full text) by default
Line 438: Line 522:
table data in an arbitrary order.  Here, we want to process the table from top to bottom because the entries at
table data in an arbitrary order.  Here, we want to process the table from top to bottom because the entries at
the top of the table are also found in the ranges specified by the entries at the bottom of the table.
the top of the table are also found in the ranges specified by the entries at the bottom of the table.
This list contains patterns for templates like {{'}} which isn't an error but transcludes characters that are
invisible.  These kinds of patterns must be recognized by the functions that use this list.


Also here is a pattern that recognizes stripmarkers that begin and end with the delete characters.  The nowiki
Also here is a pattern that recognizes stripmarkers that begin and end with the delete characters.  The nowiki
Line 502: Line 583:
'he', 'hi', 'hy', 'ja', 'ka', 'kk', 'km', 'kn', 'ko', 'ku',
'he', 'hi', 'hy', 'ja', 'ka', 'kk', 'km', 'kn', 'ko', 'ku',
'mk', 'ml', 'mn', 'mr', 'my', 'ne', 'ps', 'ru', 'sd', 'si',
'mk', 'ml', 'mn', 'mr', 'my', 'ne', 'ps', 'ru', 'sd', 'si',
'sr', 'ta', 'tg', 'th', 'uk', 'ug', 'ur', 'yi', 'zh'
'sr', 'ta', 'tg', 'th', 'uk', 'ug', 'ur', 'uz', 'yi', 'zh'
};
};


Line 530: Line 611:
['valencian'] = {'Valencian', 'ca'}, -- variant of Catalan; categorizes as Catalan
['valencian'] = {'Valencian', 'ca'}, -- variant of Catalan; categorizes as Catalan
}
}


--[[--------------------------< M A I N T E N A N C E _ C A T E G O R I E S >----------------------------------
--[[--------------------------< M A I N T E N A N C E _ C A T E G O R I E S >----------------------------------
Line 544: Line 626:
['date_format'] = 'CS1 maint: Date format',
['date_format'] = 'CS1 maint: Date format',
['date_year'] = 'CS1 maint: Date and year',
['date_year'] = 'CS1 maint: Date and year',
['disp_auth_ed'] = 'CS1 maint: display-$1', -- $1 is authors or editors; gets value from special_case_translation table
['disp_name'] = 'CS1 maint: display-$1', -- $1 is authors, contributors, editors, interviewers, translators; gets value from special_case_translation table
['editors'] = 'CS1 maint: Uses editors parameter',
['editors'] = 'CS1 maint: Uses editors parameter',
['embargo'] = 'CS1 maint: PMC embargo expired',
['embargo'] = 'CS1 maint: PMC embargo expired',
['english'] = 'CS1 maint: English language specified',
['english'] = 'CS1 maint: English language specified',
['etal'] = 'CS1 maint: Explicit use of et al.',
['extra_text'] = 'CS1 maint: Extra text',
['extra_text'] = 'CS1 maint: Extra text',
['extra_text_names'] = 'CS1 maint: Extra text: $1', -- $1 is <name>s list; gets value from special_case_translation table
['extra_text_names'] = 'CS1 maint: Extra text: $1', -- $1 is <name>s list; gets value from special_case_translation table
['ignore_isbn_err'] = 'CS1 maint: Ignored ISBN errors',
['ignore_isbn_err'] = 'CS1 maint: Ignored ISBN errors',
['missing_pipe'] = 'CS1 maint: Missing pipe',
['mult_names'] = 'CS1 maint: Multiple names: $1', -- $1 is <name>s list; gets value from special_case_translation table
['mult_names'] = 'CS1 maint: Multiple names: $1', -- $1 is <name>s list; gets value from special_case_translation table
['others'] = 'CS1 maint: others',
['pmc_format'] = 'CS1 maint: PMC format',
['pmc_format'] = 'CS1 maint: PMC format',
['unfit'] = 'CS1 maint: Unfit url',
['unfit'] = 'CS1 maint: Unfit url',
Line 846: Line 927:
anchor = 'empty_citation',
anchor = 'empty_citation',
category = 'Pages with empty citations',
category = 'Pages with empty citations',
hidden = false
},
etal = {
message = 'Explicit use of et al. in: <code class="cs1-code">&#124;$1=</code>',
anchor = 'explicit_et_al',
category = 'CS1 errors: Explicit use of et al.',
hidden = false
hidden = false
},
},
first_missing_last = {
first_missing_last = {
message = '<code class="cs1-code">&#124;first$2=</code> missing <code class="cs1-code">&#124;last$2=</code> in $1',
message = '<code class="cs1-code">&#124;$1=</code> missing <code class="cs1-code">&#124;$2=</code>', -- $1 is first alias, $2 is matching last alias
anchor = 'first_missing_last',
anchor = 'first_missing_last',
category = 'CS1 errors: missing author or editor',
category = 'CS1 errors: missing name', -- author, contributor, editor, interviewer, translator
hidden = false
hidden = false
},
},
Line 873: Line 960:
},
},
missing_name = {
missing_name = {
message = 'Missing <code class="cs1-code">&#124;last$2=</code> in $1',
message = 'Missing <code class="cs1-code">&#124;$1$2=</code>', -- $1 is modified NameList; $2 is enumerator
anchor = 'missing_name',
anchor = 'missing_name',
category = 'CS1 errors: missing author or editor',
category = 'CS1 errors: missing name', -- author, contributor, editor, interviewer, translator
hidden = false
},
missing_pipe = {
message = 'Missing pipe in: <code class="cs1-code">&#124;$1=</code>',
anchor = 'missing_pipe',
category = 'CS1 errors: Missing pipe',
hidden = false
hidden = false
},
},
Line 1,239: Line 1,332:




--[[--------------------------< E X P O R T S >----------------------------------------------------------------
--[[--------------------------< E X P O R T E D  T A B L E S >------------------------------------------------
]]
]]


Line 1,248: Line 1,341:
date_names = date_names,
date_names = date_names,
error_conditions = error_conditions,
error_conditions = error_conditions,
editor_markup_patterns = editor_markup_patterns,
et_al_patterns = et_al_patterns,
global_df = global_df,
id_handlers = id_handlers,
id_handlers = id_handlers,
keywords = keywords,
keywords = keywords,