#!/usr/bin/env perl
#
# Decodes v1 and v2 urldefense URLs
# Give it a list of files (or pipe stdin through it) and it will replace the URLs
#
# For v3, use decodeurldefense.py
#

use warnings;
use v5.16;  # RHEL 7
no strict "vars";
#use diagnostics;
use HTML::Entities qw/decode_entities/;
use URI::Escape qw/uri_unescape/;

LINE: while (<>) {
    $v1re = qr{https://urldefense[.]proofpoint[.]com/v1/url[?]u=(?<u>.+?)&k=.+?&s=[0-9a-f]{64}};
    $v2re = qr{https://urldefense[.]proofpoint[.]com/v2/url[?]u=(?<u>.+?)&[dc]=.+?&e=};

    if ($_ =~ $v1re) {
        $htmlencodedurl = uri_unescape($+{u});
        $url = decode_entities($htmlencodedurl);
        $_ =~ s/$v1re/$url/g;
    }
    elsif ($_ =~ $v2re) {
        $u = $+{u};
        $u =~ tr{-_}{%/};
        $htmlencodedurl = uri_unescape($u);
        $url = decode_entities($htmlencodedurl);
        $_ =~ s/$v2re/$url/g;
    }
} continue {
    print or die "$!";
}
