Module:Citation/CS1/Configuration: Difference between revisions
sync from sandbox;
(synch from sandbox;) |
(sync from sandbox;) |
||
Line 1: | Line 1: | ||
--[[--------------------------< U N C A T E G O R I Z E D _ N A M E S P A C E S >------------------------------ | --[[--------------------------< U N C A T E G O R I Z E D _ N A M E S P A C E S >------------------------------ | ||
Line 12: | Line 10: | ||
local uncategorized_subpages = {'/[Ss]andbox', '/[Tt]estcases'}; -- list of Lua patterns found in page names of pages we should not categorize | local uncategorized_subpages = {'/[Ss]andbox', '/[Tt]estcases'}; -- list of Lua patterns found in page names of pages we should not categorize | ||
--[[--------------------------< M E S S A G E S >-------------------------------------------------------------- | --[[--------------------------< M E S S A G E S >-------------------------------------------------------------- | ||
Line 95: | Line 94: | ||
['unknown_argument_map'] = 'Argument map not defined for this variable', | ['unknown_argument_map'] = 'Argument map not defined for this variable', | ||
['bare_url_no_origin'] = 'Bare url found but origin indicator is nil or empty', | ['bare_url_no_origin'] = 'Bare url found but origin indicator is nil or empty', | ||
} | |||
--[[--------------------------< E T _ A L _ P A T T E R N S >-------------------------------------------------- | |||
This table provides Lua patterns for the phrase "et al" and variants in name text | |||
(author, editor, etc.). The main module uses these to identify and emit the 'etal' message. | |||
]] | |||
local et_al_patterns = { | |||
"[;,]? *[\"']*%f[%a][Ee][Tt]%.? *[Aa][Ll][%.\"']*$", -- variations on the 'et al' theme | |||
"[;,]? *[\"']*%f[%a][Ee][Tt]%.? *[Aa][Ll][Ii][Aa][%.\"']*$", -- variations on the 'et alia' theme | |||
"[;,]? *%f[%a]and [Oo]thers", -- and alternate to et al. | |||
} | |||
--[[--------------------------< E D I T O R _ M A R K U P _ P A T T E R N S >---------------------------------- | |||
This table provides Lua patterns for the phrase "ed" and variants in name text | |||
(author, editor, etc.). The main module uses these to identify and emit the | |||
'extra_text_names' message. (It is not the only series of patterns for this message.) | |||
]] | |||
local editor_markup_patterns = { -- these patterns match annotations at end of name | |||
'%f[%(%[][%(%[]%s*[Ee][Dd][Ss]?%.?%s*[%)%]]?$', -- (ed) or (eds): leading '(', case insensitive 'ed', optional 's', '.' and/or ')' | |||
'[,%.%s]%f[e]eds?%.?$', -- ed or eds: without '('or ')'; case sensitive (ED could be initials Ed could be name) | |||
'%f[%(%[][%(%[]%s*[Ee][Dd][Ii][Tt][Oo][Rr][Ss]?%.?%s*[%)%]]?$', -- (editor) or (editors): leading '(', case insensitive, optional '.' and/or ')' | |||
'[,%.%s]%f[Ee][Ee][Dd][Ii][Tt][Oo][Rr][Ss]?%.?$', -- editor or editors: without '('or ')'; case insensitive | |||
-- these patterns match annotations at beginning of name | |||
'^eds?[%.,;]', -- ed. or eds.: lower case only, optional 's', requires '.' | |||
'^[%(%[]%s*[Ee][Dd][Ss]?%.?%s*[%)%]]', -- (ed) or (eds): also sqare brackets, case insensitive, optional 's', '.' | |||
'^[%(%[]?%s*[Ee][Dd][Ii][Tt][Oo][Rr][Ss]?%A', -- (editor or (editors: also sq brackets, case insensitive, optional brackets, 's' | |||
'^[%(%[]?%s*[Ee][Dd][Ii][Tt][Ee][Dd]%A', -- (edited: also sq brackets, case insensitive, optional brackets | |||
} | } | ||
Line 187: | Line 221: | ||
['ASINTLD'] = {'ASIN-TLD', 'asin-tld'}, | ['ASINTLD'] = {'ASIN-TLD', 'asin-tld'}, | ||
['At'] = 'at', | ['At'] = 'at', | ||
['Authors'] = {'authors', 'people | ['Authors'] = {'authors', 'people', 'credits'}, | ||
['BookTitle'] = {'book-title', 'booktitle'}, | ['BookTitle'] = {'book-title', 'booktitle'}, | ||
['Cartography'] = 'cartography', | ['Cartography'] = 'cartography', | ||
Line 205: | Line 239: | ||
['DF'] = 'df', | ['DF'] = 'df', | ||
['DisplayAuthors'] = {'display-authors', 'displayauthors'}, | ['DisplayAuthors'] = {'display-authors', 'displayauthors'}, | ||
['DisplayContributors'] = 'display-contributors', | |||
['DisplayEditors'] = {'display-editors', 'displayeditors'}, | ['DisplayEditors'] = {'display-editors', 'displayeditors'}, | ||
['DisplayInterviewers'] = 'display-interviewers', | |||
['DisplayTranslators'] = 'display-translators', | |||
['Docket'] = 'docket', | ['Docket'] = 'docket', | ||
['DoiBroken'] = {'doi-broken', 'doi-broken-date', 'doi-inactive-date'}, | ['DoiBroken'] = {'doi-broken', 'doi-broken-date', 'doi-inactive-date'}, | ||
Line 286: | Line 323: | ||
['AuthorList-First'] = {"first#", "given#", "author-first#", "author#-first"}, | ['AuthorList-First'] = {"first#", "given#", "author-first#", "author#-first"}, | ||
['AuthorList-Last'] = {"last#", "author#", "surname#", "author-last#", "author#-last", "subject#"}, | ['AuthorList-Last'] = {"last#", "author#", "surname#", "author-last#", "author#-last", "subject#", 'host#'}, | ||
['AuthorList-Link'] = {"authorlink#", "author-link#", "author#-link", "subjectlink#", "author#link", "subject-link#", "subject#-link", "subject#link"}, | ['AuthorList-Link'] = {"authorlink#", "author-link#", "author#-link", "subjectlink#", "author#link", "subject-link#", "subject#-link", "subject#link"}, | ||
['AuthorList-Mask'] = {"author-mask#", "authormask#", "author#mask", "author#-mask"}, | ['AuthorList-Mask'] = {"author-mask#", "authormask#", "author#mask", "author#-mask"}, | ||
Line 321: | Line 358: | ||
local special_case_translation = { | local special_case_translation = { | ||
['AuthorList'] = | ['AuthorList'] = 'authors list', -- these for multiple names maint categories | ||
['ContributorList'] = | ['ContributorList'] = 'contributors list', | ||
['EditorList'] = | ['EditorList'] = 'editors list', | ||
['InterviewerList'] = | ['InterviewerList'] = 'interviewers list', | ||
['TranslatorList'] = | ['TranslatorList'] = 'translators list', | ||
['authors'] = | ['authors'] = 'authors', -- used in get_display_names() | ||
['editors'] = | ['contributors'] = 'contributors', | ||
['editors'] = 'editors', | |||
['interviewers'] = 'interviewers', | |||
['translators'] = 'translators', | |||
['archived_copy'] = '^archived?%s+copy$', -- lua pattern to match pseudo title used by Internet Archive bot and others as place holder for unknown |title= value | ['archived_copy'] = '^archived?%s+copy$', -- lua pattern to match pseudo title used by Internet Archive bot and others as place holder for unknown |title= value | ||
Line 367: | Line 407: | ||
local date_names = { | local date_names = { | ||
['en'] = { -- English | ['en'] = { -- English | ||
['long'] = {['January']=1, ['February']=2, ['March']=3, ['April']=4, ['May']=5, ['June']=6, ['July']=7, ['August']=8, ['September']=9, ['October']=10, ['November']=11, ['December']=12} | ['long'] = {['January']=1, ['February']=2, ['March']=3, ['April']=4, ['May']=5, ['June']=6, ['July']=7, ['August']=8, ['September']=9, ['October']=10, ['November']=11, ['December']=12}, | ||
['short'] = {['Jan']=1, ['Feb']=2, ['Mar']=3, ['Apr']=4, ['May']=5, ['Jun']=6, ['Jul']=7, ['Aug']=8, ['Sep']=9, ['Oct']=10, ['Nov']=11, ['Dec']=12} | ['short'] = {['Jan']=1, ['Feb']=2, ['Mar']=3, ['Apr']=4, ['May']=5, ['Jun']=6, ['Jul']=7, ['Aug']=8, ['Sep']=9, ['Oct']=10, ['Nov']=11, ['Dec']=12}, | ||
['season'] = {['Winter']=24, ['Spring']=21, ['Summer']=22, ['Fall']=23, ['Autumn']=23} | ['season'] = {['Winter']=24, ['Spring']=21, ['Summer']=22, ['Fall']=23, ['Autumn']=23}, | ||
['named'] = {['Christmas']=99} | ['named'] = {['Christmas']=99}, | ||
}, | }, | ||
['local'] = { -- replace these English date names with the local language equivalents | ['local'] = { -- replace these English date names with the local language equivalents | ||
['long'] = {['January']=1, ['February']=2, ['March']=3, ['April']=4, ['May']=5, ['June']=6, ['July']=7, ['August']=8, ['September']=9, ['October']=10, ['November']=11, ['December']=12} | ['long'] = {['January']=1, ['February']=2, ['March']=3, ['April']=4, ['May']=5, ['June']=6, ['July']=7, ['August']=8, ['September']=9, ['October']=10, ['November']=11, ['December']=12}, | ||
['short'] = {['Jan']=1, ['Feb']=2, ['Mar']=3, ['Apr']=4, ['May']=5, ['Jun']=6, ['Jul']=7, ['Aug']=8, ['Sep']=9, ['Oct']=10, ['Nov']=11, ['Dec']=12} | ['short'] = {['Jan']=1, ['Feb']=2, ['Mar']=3, ['Apr']=4, ['May']=5, ['Jun']=6, ['Jul']=7, ['Aug']=8, ['Sep']=9, ['Oct']=10, ['Nov']=11, ['Dec']=12}, | ||
['season'] = {['Winter']=24, ['Spring']=21, ['Summer']=22, ['Fall']=23, ['Autumn']=23} | ['season'] = {['Winter']=24, ['Spring']=21, ['Summer']=22, ['Fall']=23, ['Autumn']=23}, | ||
['named'] = {['Christmas']=99} | ['named'] = {['Christmas']=99}, | ||
}, | }, | ||
['local_digits'] = {['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7', ['8'] = '8', ['9'] = '9'} | ['inv_local_l'] = {}, -- used in date reformatting; copy of date_names['local'].long where k/v are inverted: [1]='<local name>' etc | ||
['xlate_digits'] = {} | ['inv_local_s'] = {}, -- used in date reformatting; copy of date_names['local'].short where k/v are inverted: [1]='<local name>' etc | ||
['local_digits'] = {['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7', ['8'] = '8', ['9'] = '9'}, -- used to convert local language digits to Western 0-9 | |||
['xlate_digits'] = {}, | |||
} | } | ||
for name, i in pairs (date_names['local'].long) do -- this table is ['name'] = i | |||
date_names['inv_local_l'][i] = name; -- invert to get [i] = 'name' for conversions from ymd | |||
end | |||
for name, i in pairs (date_names['local'].short) do -- this table is ['name'] = i | |||
date_names['inv_local_s'][i] = name; -- invert to get [i] = 'name' for conversions from ymd | |||
end | |||
for ld, ed in pairs (date_names.local_digits) do -- make a digit translation table for simple date translation from en to local language using local_digits table | for ld, ed in pairs (date_names.local_digits) do -- make a digit translation table for simple date translation from en to local language using local_digits table | ||
date_names.xlate_digits [ed] = ld; -- en digit becomes index with local digit as the value | date_names.xlate_digits [ed] = ld; -- en digit becomes index with local digit as the value | ||
end | end | ||
local df_template_patterns = { -- table of redirects to {{Use dmy dates}} and {{Use mdy dates}} | |||
'{{ *[Uu]se (dmy) dates *[|}]', -- 915k -- sorted by approximate transclusion count | |||
'{{ *[Uu]se *(mdy) *dates *[|}]', -- 161k | |||
'{{ *[Uu]se (DMY) dates *[|}]', -- 2929 | |||
'{{ *[Uu]se *(dmy) *[|}]', -- 250 + 34 | |||
'{{ *([Dd]my) *[|}]', -- 272 | |||
'{{ *[Uu]se (MDY) dates *[|}]', -- 173 | |||
'{{ *[Uu]se *(mdy) *[|}]', -- 59 + 12 | |||
'{{ *([Mm]dy) *[|}]', -- 9 | |||
'{{ *[Uu]se (MDY) *[|}]', -- 3 | |||
'{{ *([Dd]MY) *[|}]', -- 2 | |||
'{{ *([Mm]DY) *[|}]', -- 0 | |||
-- '{{ *[Uu]se(mdy) *[|}]', | |||
-- '{{ *[Uu]se(mdy)dates *[|}]', | |||
-- '{{ *[Uu]se(dmy) *[|}]', | |||
} | |||
local function get_date_format () | |||
local content = mw.title.getCurrentTitle():getContent(); -- get the content of the article | |||
for _, pattern in ipairs (df_template_patterns) do -- loop through the patterns looking for {{Use dmy dates}} or {{Use mdy dates}} or any of their redirects | |||
local start, _, match = content:find(pattern); -- match is the three letters indicating desired date format | |||
if match then | |||
content = content:match ('%b{}', start); -- get the whole template | |||
if content:match ('| *cs1%-dates *= *[lsy][sy]?') then -- look for |cs1-dates=publication date length access-/archive-date length | |||
return match:lower() .. '-' .. content:match ('| *cs1%-dates *= *([lsy][sy]?)'); | |||
else | |||
return match:lower() .. '-all'; -- no |cs1-style= k/v pair; return value appropriate for use in |df= | |||
end | |||
end | |||
end | |||
end | |||
local global_df = get_date_format (); | |||
Line 414: | Line 497: | ||
['contribution'] = {'afterword', 'foreword', 'introduction', 'preface'}, -- generic contribution titles that are rendered unquoted in the 'chapter' position | ['contribution'] = {'afterword', 'foreword', 'introduction', 'preface'}, -- generic contribution titles that are rendered unquoted in the 'chapter' position | ||
['date-format'] = {'dmy', 'dmy-all', 'mdy', 'mdy-all', 'ymd', 'ymd-all'}, | ['date-format'] = {'dmy', 'dmy-all', 'mdy', 'mdy-all', 'ymd', 'ymd-all'}, | ||
-- ['date-format'] = {'dmy', 'dmy-all', 'mdy', 'mdy-all', 'ymd', 'ymd-all', 'yMd', 'yMd-all'}, -- not supported at en.wiki | |||
['url-access'] = {'subscription', 'limited', 'registration'}, -- access level of a URL (subscription required, limited access, free registration required), free to read by default | ['url-access'] = {'subscription', 'limited', 'registration'}, -- access level of a URL (subscription required, limited access, free registration required), free to read by default | ||
['id-access'] = {'free'}, -- access level of an identifier (free to read), subscription required (or no full text) by default | ['id-access'] = {'free'}, -- access level of an identifier (free to read), subscription required (or no full text) by default | ||
Line 438: | Line 522: | ||
table data in an arbitrary order. Here, we want to process the table from top to bottom because the entries at | table data in an arbitrary order. Here, we want to process the table from top to bottom because the entries at | ||
the top of the table are also found in the ranges specified by the entries at the bottom of the table. | the top of the table are also found in the ranges specified by the entries at the bottom of the table. | ||
Also here is a pattern that recognizes stripmarkers that begin and end with the delete characters. The nowiki | Also here is a pattern that recognizes stripmarkers that begin and end with the delete characters. The nowiki | ||
Line 502: | Line 583: | ||
'he', 'hi', 'hy', 'ja', 'ka', 'kk', 'km', 'kn', 'ko', 'ku', | 'he', 'hi', 'hy', 'ja', 'ka', 'kk', 'km', 'kn', 'ko', 'ku', | ||
'mk', 'ml', 'mn', 'mr', 'my', 'ne', 'ps', 'ru', 'sd', 'si', | 'mk', 'ml', 'mn', 'mr', 'my', 'ne', 'ps', 'ru', 'sd', 'si', | ||
'sr', 'ta', 'tg', 'th', 'uk', 'ug', 'ur', 'yi', 'zh' | 'sr', 'ta', 'tg', 'th', 'uk', 'ug', 'ur', 'uz', 'yi', 'zh' | ||
}; | }; | ||
Line 530: | Line 611: | ||
['valencian'] = {'Valencian', 'ca'}, -- variant of Catalan; categorizes as Catalan | ['valencian'] = {'Valencian', 'ca'}, -- variant of Catalan; categorizes as Catalan | ||
} | } | ||
--[[--------------------------< M A I N T E N A N C E _ C A T E G O R I E S >---------------------------------- | --[[--------------------------< M A I N T E N A N C E _ C A T E G O R I E S >---------------------------------- | ||
Line 544: | Line 626: | ||
['date_format'] = 'CS1 maint: Date format', | ['date_format'] = 'CS1 maint: Date format', | ||
['date_year'] = 'CS1 maint: Date and year', | ['date_year'] = 'CS1 maint: Date and year', | ||
[' | ['disp_name'] = 'CS1 maint: display-$1', -- $1 is authors, contributors, editors, interviewers, translators; gets value from special_case_translation table | ||
['editors'] = 'CS1 maint: Uses editors parameter', | ['editors'] = 'CS1 maint: Uses editors parameter', | ||
['embargo'] = 'CS1 maint: PMC embargo expired', | ['embargo'] = 'CS1 maint: PMC embargo expired', | ||
['english'] = 'CS1 maint: English language specified', | ['english'] = 'CS1 maint: English language specified', | ||
['extra_text'] = 'CS1 maint: Extra text', | ['extra_text'] = 'CS1 maint: Extra text', | ||
['extra_text_names'] = 'CS1 maint: Extra text: $1', -- $1 is <name>s list; gets value from special_case_translation table | ['extra_text_names'] = 'CS1 maint: Extra text: $1', -- $1 is <name>s list; gets value from special_case_translation table | ||
['ignore_isbn_err'] = 'CS1 maint: Ignored ISBN errors', | ['ignore_isbn_err'] = 'CS1 maint: Ignored ISBN errors', | ||
['mult_names'] = 'CS1 maint: Multiple names: $1', -- $1 is <name>s list; gets value from special_case_translation table | ['mult_names'] = 'CS1 maint: Multiple names: $1', -- $1 is <name>s list; gets value from special_case_translation table | ||
['others'] = 'CS1 maint: others', | |||
['pmc_format'] = 'CS1 maint: PMC format', | ['pmc_format'] = 'CS1 maint: PMC format', | ||
['unfit'] = 'CS1 maint: Unfit url', | ['unfit'] = 'CS1 maint: Unfit url', | ||
Line 846: | Line 927: | ||
anchor = 'empty_citation', | anchor = 'empty_citation', | ||
category = 'Pages with empty citations', | category = 'Pages with empty citations', | ||
hidden = false | |||
}, | |||
etal = { | |||
message = 'Explicit use of et al. in: <code class="cs1-code">|$1=</code>', | |||
anchor = 'explicit_et_al', | |||
category = 'CS1 errors: Explicit use of et al.', | |||
hidden = false | hidden = false | ||
}, | }, | ||
first_missing_last = { | first_missing_last = { | ||
message = '<code class="cs1-code">| | message = '<code class="cs1-code">|$1=</code> missing <code class="cs1-code">|$2=</code>', -- $1 is first alias, $2 is matching last alias | ||
anchor = 'first_missing_last', | anchor = 'first_missing_last', | ||
category = 'CS1 errors: missing author | category = 'CS1 errors: missing name', -- author, contributor, editor, interviewer, translator | ||
hidden = false | hidden = false | ||
}, | }, | ||
Line 873: | Line 960: | ||
}, | }, | ||
missing_name = { | missing_name = { | ||
message = 'Missing <code class="cs1-code">| | message = 'Missing <code class="cs1-code">|$1$2=</code>', -- $1 is modified NameList; $2 is enumerator | ||
anchor = 'missing_name', | anchor = 'missing_name', | ||
category = 'CS1 errors: missing author | category = 'CS1 errors: missing name', -- author, contributor, editor, interviewer, translator | ||
hidden = false | |||
}, | |||
missing_pipe = { | |||
message = 'Missing pipe in: <code class="cs1-code">|$1=</code>', | |||
anchor = 'missing_pipe', | |||
category = 'CS1 errors: Missing pipe', | |||
hidden = false | hidden = false | ||
}, | }, | ||
Line 1,239: | Line 1,332: | ||
--[[--------------------------< E X P O R T S > | --[[--------------------------< E X P O R T E D T A B L E S >------------------------------------------------ | ||
]] | ]] | ||
Line 1,248: | Line 1,341: | ||
date_names = date_names, | date_names = date_names, | ||
error_conditions = error_conditions, | error_conditions = error_conditions, | ||
editor_markup_patterns = editor_markup_patterns, | |||
et_al_patterns = et_al_patterns, | |||
global_df = global_df, | |||
id_handlers = id_handlers, | id_handlers = id_handlers, | ||
keywords = keywords, | keywords = keywords, |