Module:Citation/CS1: Difference between revisions

    m>Trappist the monk
    (fix archive_url_check();)
    m>Trappist the monk
    (revised archive url check)
    Line 1,668: Line 1,668:
    |archive-url= is an archive.org url that does not have a complete timestamp (YYYYMMDDhhmmss 14 digits) in the correct place
    |archive-url= is an archive.org url that does not have a complete timestamp (YYYYMMDDhhmmss 14 digits) in the correct place
    otherwise returns |archive-url= and |archive-date=
    otherwise returns |archive-url= and |archive-date=
    There are two mostly compatible archive.org urls:
    //web.archive.org/<timestamp>... -- the old form
    //web.archive.org/web/<timestamp>... -- the new form
    The old form does not support or map to the new form when it contains a display flag.  There are four identified flags
    ('id_', 'js_', 'cs_', 'im_') but since archive.org ignores others following the same form (two letters and an underscore)
    we don't check for these specific flags but we do check the form.


    ]=]
    ]=]


    local function archive_url_check (url, date)
    local function archive_url_check (url, date)
    if url:match('//web\.archive\.org/') then -- for archive.org urls:
    local err_msg = ''; -- start with the error message empty
    if url:match('//web\.archive\.org/save/') then -- if a save command url, we don't want to save target page  
    local path, timestamp, flag; -- portions of the archive.or url
    table.insert( z.message_tail, { set_error( 'archive_url', {'save command'}, true ) } ); -- add error message
    return '', ''; -- return empty strings for archiveURL and ArchiveDate
    if not url:match('//web%.archive%.org/') then
    elseif url:match('//web\.archive\.org/web/%d%d%d%d%d%d%d%d%d%d%d%d%d%d/') or -- if there is what looks like a correct timestamp
    return url, date; -- not an archive.org archive, return archiveURL and ArchiveDate
    url:match('//web\.archive\.org/%d%d%d%d%d%d%d%d%d%d%d%d%d%d/') then -- without /web/ gets remapped to have the /web/ by archive.org
    end
    return url, date; -- return archiveURL and ArchiveDate
     
    else -- malformed url
    if url:match('//web%.archive%.org/save/') then -- if a save command url, we don't want to allow saving of the target page  
    table.insert( z.message_tail, { set_error( 'archive_url', {'timestamp'}, true ) } ); -- add error message
    table.insert( z.message_tail, { set_error( 'archive_url', {'save command'}, true ) } ); -- add error message
    return '', ''; -- return empty strings for archiveURL and ArchiveDate
    return '', ''; -- return empty strings for archiveURL and ArchiveDate
    end
    end
     
    if url:match('//web%.archive%.org/web/%*/') or url:match('//web%.archive%.org/%*/') then -- wildcard with or without 'web/' path element
    table.insert( z.message_tail, { set_error( 'archive_url', {'wildcard'}, true ) } ); -- add error message and
    return '', ''; -- return empty strings for archiveURL and ArchiveDate
    end
    path, timestamp, flag = url:match('//web%.archive%.org/([^%d]*)(%d+)([^/]*)/'); -- split out some of the url parts for evaluation
     
    if not is_set(timestamp) or 14 ~= timestamp:len() then -- path and flag optional, must have 14-digit timestamp here
    err_msg = 'timestamp';
    elseif is_set(path) and 'web/' ~= path then -- older archive urls do not have the extra 'web/' path element
    err_msg = 'path';
    elseif is_set (flag) and not is_set (path) then -- flag not allowed with the old form url (without the 'web/' path element)
    err_msg = 'flag';
    elseif is_set (flag) and not flag:match ('%a%a_') then -- flag if present must be two alpha characters and underscore (requires 'web/' path element)
    err_msg = 'flag';
    else
    return url, date; -- return archiveURL and ArchiveDate
    end
    end
    return url, date; -- not an archive.org archive
    -- if here something not right so
    table.insert( z.message_tail, { set_error( 'archive_url', {err_msg}, true ) } ); -- add error message and
    return '', ''; -- return empty strings for archiveURL and ArchiveDate
    end
    end


    Line 1,831: Line 1,859:
    -- local ArchiveDate = A['ArchiveDate'];
    -- local ArchiveDate = A['ArchiveDate'];
    -- local ArchiveURL = A['ArchiveURL'];
    -- local ArchiveURL = A['ArchiveURL'];
    -- if ArchiveURL:match('//web\.archive\.org/save/') then -- if an archive.org save command url, we don't want to save target page ...
    -- if ArchiveURL:match('//web%.archive%.org/save/') then -- if an archive.org save command url, we don't want to save target page ...
    -- ArchiveURL = ''; -- every time a reader clicks the link so
    -- ArchiveURL = ''; -- every time a reader clicks the link so
    -- ArchiveDate = ''; -- unset these
    -- ArchiveDate = ''; -- unset these