Module:Citation/CS1: Difference between revisions

revised archive url check
m>Trappist the monk
(fix archive_url_check();)
m>Trappist the monk
(revised archive url check)
Line 1,668: Line 1,668:
|archive-url= is an archive.org url that does not have a complete timestamp (YYYYMMDDhhmmss 14 digits) in the correct place
|archive-url= is an archive.org url that does not have a complete timestamp (YYYYMMDDhhmmss 14 digits) in the correct place
otherwise returns |archive-url= and |archive-date=
otherwise returns |archive-url= and |archive-date=
There are two mostly compatible archive.org urls:
//web.archive.org/<timestamp>... -- the old form
//web.archive.org/web/<timestamp>... -- the new form
The old form does not support or map to the new form when it contains a display flag.  There are four identified flags
('id_', 'js_', 'cs_', 'im_') but since archive.org ignores others following the same form (two letters and an underscore)
we don't check for these specific flags but we do check the form.


]=]
]=]


local function archive_url_check (url, date)
local function archive_url_check (url, date)
if url:match('//web\.archive\.org/') then -- for archive.org urls:
local err_msg = ''; -- start with the error message empty
if url:match('//web\.archive\.org/save/') then -- if a save command url, we don't want to save target page  
local path, timestamp, flag; -- portions of the archive.or url
table.insert( z.message_tail, { set_error( 'archive_url', {'save command'}, true ) } ); -- add error message
return '', ''; -- return empty strings for archiveURL and ArchiveDate
if not url:match('//web%.archive%.org/') then
elseif url:match('//web\.archive\.org/web/%d%d%d%d%d%d%d%d%d%d%d%d%d%d/') or -- if there is what looks like a correct timestamp
return url, date; -- not an archive.org archive, return archiveURL and ArchiveDate
url:match('//web\.archive\.org/%d%d%d%d%d%d%d%d%d%d%d%d%d%d/') then -- without /web/ gets remapped to have the /web/ by archive.org
end
return url, date; -- return archiveURL and ArchiveDate
 
else -- malformed url
if url:match('//web%.archive%.org/save/') then -- if a save command url, we don't want to allow saving of the target page  
table.insert( z.message_tail, { set_error( 'archive_url', {'timestamp'}, true ) } ); -- add error message
table.insert( z.message_tail, { set_error( 'archive_url', {'save command'}, true ) } ); -- add error message
return '', ''; -- return empty strings for archiveURL and ArchiveDate
return '', ''; -- return empty strings for archiveURL and ArchiveDate
end
end
 
if url:match('//web%.archive%.org/web/%*/') or url:match('//web%.archive%.org/%*/') then -- wildcard with or without 'web/' path element
table.insert( z.message_tail, { set_error( 'archive_url', {'wildcard'}, true ) } ); -- add error message and
return '', ''; -- return empty strings for archiveURL and ArchiveDate
end
path, timestamp, flag = url:match('//web%.archive%.org/([^%d]*)(%d+)([^/]*)/'); -- split out some of the url parts for evaluation
 
if not is_set(timestamp) or 14 ~= timestamp:len() then -- path and flag optional, must have 14-digit timestamp here
err_msg = 'timestamp';
elseif is_set(path) and 'web/' ~= path then -- older archive urls do not have the extra 'web/' path element
err_msg = 'path';
elseif is_set (flag) and not is_set (path) then -- flag not allowed with the old form url (without the 'web/' path element)
err_msg = 'flag';
elseif is_set (flag) and not flag:match ('%a%a_') then -- flag if present must be two alpha characters and underscore (requires 'web/' path element)
err_msg = 'flag';
else
return url, date; -- return archiveURL and ArchiveDate
end
end
return url, date; -- not an archive.org archive
-- if here something not right so
table.insert( z.message_tail, { set_error( 'archive_url', {err_msg}, true ) } ); -- add error message and
return '', ''; -- return empty strings for archiveURL and ArchiveDate
end
end


Line 1,831: Line 1,859:
-- local ArchiveDate = A['ArchiveDate'];
-- local ArchiveDate = A['ArchiveDate'];
-- local ArchiveURL = A['ArchiveURL'];
-- local ArchiveURL = A['ArchiveURL'];
-- if ArchiveURL:match('//web\.archive\.org/save/') then -- if an archive.org save command url, we don't want to save target page ...
-- if ArchiveURL:match('//web%.archive%.org/save/') then -- if an archive.org save command url, we don't want to save target page ...
-- ArchiveURL = ''; -- every time a reader clicks the link so
-- ArchiveURL = ''; -- every time a reader clicks the link so
-- ArchiveDate = ''; -- unset these
-- ArchiveDate = ''; -- unset these
Anonymous user