Module:Citation/CS1: Difference between revisions
revised archive url check
m>Trappist the monk (fix archive_url_check();) |
m>Trappist the monk (revised archive url check) |
||
Line 1,668: | Line 1,668: | ||
|archive-url= is an archive.org url that does not have a complete timestamp (YYYYMMDDhhmmss 14 digits) in the correct place | |archive-url= is an archive.org url that does not have a complete timestamp (YYYYMMDDhhmmss 14 digits) in the correct place | ||
otherwise returns |archive-url= and |archive-date= | otherwise returns |archive-url= and |archive-date= | ||
There are two mostly compatible archive.org urls: | |||
//web.archive.org/<timestamp>... -- the old form | |||
//web.archive.org/web/<timestamp>... -- the new form | |||
The old form does not support or map to the new form when it contains a display flag. There are four identified flags | |||
('id_', 'js_', 'cs_', 'im_') but since archive.org ignores others following the same form (two letters and an underscore) | |||
we don't check for these specific flags but we do check the form. | |||
]=] | ]=] | ||
local function archive_url_check (url, date) | local function archive_url_check (url, date) | ||
if url:match('//web | local err_msg = ''; -- start with the error message empty | ||
local path, timestamp, flag; -- portions of the archive.or url | |||
if not url:match('//web%.archive%.org/') then | |||
return url, date; -- not an archive.org archive, return archiveURL and ArchiveDate | |||
end | |||
if url:match('//web%.archive%.org/save/') then -- if a save command url, we don't want to allow saving of the target page | |||
table.insert( z.message_tail, { set_error( 'archive_url', {'save command'}, true ) } ); -- add error message | |||
return '', ''; -- return empty strings for archiveURL and ArchiveDate | |||
end | |||
if url:match('//web%.archive%.org/web/%*/') or url:match('//web%.archive%.org/%*/') then -- wildcard with or without 'web/' path element | |||
table.insert( z.message_tail, { set_error( 'archive_url', {'wildcard'}, true ) } ); -- add error message and | |||
return '', ''; -- return empty strings for archiveURL and ArchiveDate | |||
end | |||
path, timestamp, flag = url:match('//web%.archive%.org/([^%d]*)(%d+)([^/]*)/'); -- split out some of the url parts for evaluation | |||
if not is_set(timestamp) or 14 ~= timestamp:len() then -- path and flag optional, must have 14-digit timestamp here | |||
err_msg = 'timestamp'; | |||
elseif is_set(path) and 'web/' ~= path then -- older archive urls do not have the extra 'web/' path element | |||
err_msg = 'path'; | |||
elseif is_set (flag) and not is_set (path) then -- flag not allowed with the old form url (without the 'web/' path element) | |||
err_msg = 'flag'; | |||
elseif is_set (flag) and not flag:match ('%a%a_') then -- flag if present must be two alpha characters and underscore (requires 'web/' path element) | |||
err_msg = 'flag'; | |||
else | |||
return url, date; -- return archiveURL and ArchiveDate | |||
end | end | ||
return | -- if here something not right so | ||
table.insert( z.message_tail, { set_error( 'archive_url', {err_msg}, true ) } ); -- add error message and | |||
return '', ''; -- return empty strings for archiveURL and ArchiveDate | |||
end | end | ||
Line 1,831: | Line 1,859: | ||
-- local ArchiveDate = A['ArchiveDate']; | -- local ArchiveDate = A['ArchiveDate']; | ||
-- local ArchiveURL = A['ArchiveURL']; | -- local ArchiveURL = A['ArchiveURL']; | ||
-- if ArchiveURL:match('//web | -- if ArchiveURL:match('//web%.archive%.org/save/') then -- if an archive.org save command url, we don't want to save target page ... | ||
-- ArchiveURL = ''; -- every time a reader clicks the link so | -- ArchiveURL = ''; -- every time a reader clicks the link so | ||
-- ArchiveDate = ''; -- unset these | -- ArchiveDate = ''; -- unset these |