#!/usr/bin/perl -w

##########################################################################
# FILE       ISR-form.pl
# AUTHOR     Francisco Amato
# EMAIL      famato+at+infobyte+dot+com+dot+ar
# COMPANY    [ISR] - Infobyte Security Research
# VERSION    1.0
##########################################################################

use strict;
use HTML::PullParser ();
use HTML::Entities qw(decode_entities);
use Data::Dump qw(dump);
use File::Find;
use Getopt::Std;

my @FORM_TAGS = qw(form input textarea button select option);
my $t="";
#my $m_vbose;
my $m_report="";

#Init Process;
&main;

##########################################################################
# FUNCTION   main
# RECEIVES   
# RETURNS
# EXPECTS
# DOES       main function
sub main {
 
    my %args;
    my $m_loc;
    my $m_file;
    my $m_sc=0;

    # Get parameter
    &getopts("l:f:o:h", \%args);

    # Set/Check commands
    if(defined $args{l}) { $m_loc = $args{l}; $m_sc++; }
    if(defined $args{f}) { $m_file = $args{f}; $m_sc++;}
    if(defined $args{o}) { $m_report = $args{o};}
#    if(defined $args{v}) { $m_vbose=1;}
    if(defined $args{h}) { &get_usage; }
    if ($m_sc == 0 || $m_sc == 2) { print "Error: Use source <-l> or <-f>\n"; &get_usage;}

    #Log report
    &init_log;

    if ($m_file ne "") { 
	#type: Alone
	my ($m_warn,@forms) = &get_form($m_file);
	&get_report($m_file,$m_warn,@forms);
    }else {
	#type: Dir
	finddepth(\&get_location,$m_loc);
    }
}

##########################################################################
# FUNCTION   init_log
# RECEIVES   
# RETURNS
# EXPECTS
# DOES       Initialize Log file
sub init_log
{
    if ($m_report ne "") {
        open (FZ,">> $m_report") || die "Error: Can't open file $m_report to save the report\n";
	my $msg= "LOG REPORT -- ISR-form.pl (*) Francisco Amato (*)  www.infobyte.com.ar\n";
	print FZ $msg;
	close(FZ);
    }
}

##########################################################################
# FUNCTION   get_location
# RECEIVES   
# RETURNS
# EXPECTS
# DOES       Event finddepth analyze file
sub get_location
{
    $_=$File::Find::name;
    if(/\.htm/ or /\.html/ or /\.asp/ or /\.php/) #List of files to analyze
    {
    	my ($m_warn,@forms) = &get_form($_);
	&get_report($_,$m_warn,@forms);
    }
}

##########################################################################
# FUNCTION   get_form
# RECEIVES   $m_file
# RETURNS
# EXPECTS
# DOES       parse html to search form tags
sub get_form {

 my ($m_file) = @_;
 
 my $p = HTML::PullParser->new(file => $m_file,
			      start => 'tag, attr',
			      end   => 'tag',
			      text  => '@{text}',
			      report_tags => \@FORM_TAGS,
			     ) || die "Error: $!";

 my @forms;
 my $m_warn=0;
 
 while (defined(my $t = $p->get_token)) {
    next unless ref $t; # skip text
    if ($t->[0] eq "form") {
	shift @$t;
	push(@forms, $t);
	while (defined(my $t = $p->get_token)) {
	    next unless ref $t;  # skip text
	    last if $t->[0] eq "/form";
	    if ($t->[0] eq "select") {
		my $sel = $t;
		push(@{$forms[-1]}, $t);
		while (defined(my $t = $p->get_token)) {
		    next unless ref $t; # skip text
		    last if $t->[0] eq "/select";
		    #print "select ", dump($t), "\n";
		    if ($t->[0] eq "option") {
			my $value = $t->[1]->{value};
			my $text = get_text($p, "/option");
			unless (defined $value) {
			    $value = decode_entities($text);
			}
			push(@$sel, $value);
		    }
		    else {
			$m_warn++;
			warn "$t->[0] inside select";
		    }
		}
	    }
	    elsif ($t->[0] =~ /^\/?option$/) {
		$m_warn++;	    
		warn "option tag outside select";
	    }
	    elsif ($t->[0] eq "textarea") {
		push(@{$forms[-1]}, $t);
		$t->[1]{value} = get_text($p, "/textarea");
	    }
	    elsif ($t->[0] =~ m,^/,) {
		$m_warn++;	    
		warn "stray $t->[0] tag";
	    }
	    else {
		push(@{$forms[-1]}, $t);
	    }
	}
    }
    else {
	$m_warn++;    
	warn "form tag $t->[0] outside form";
    }
 }
 return ($m_warn,@forms);
}

##########################################################################
# FUNCTION   get_text
# RECEIVES   $p,$stop
# RETURNS
# EXPECTS
# DOES       helper function
sub get_text {
    my($p, $stop) = @_;
    my $text;
    while (defined(my $t = $p->get_token)) {
	if (ref $t) {
	    $p->unget_token($t) unless $t->[0] eq $stop;
	    last;
	}
	else {
	    $text .= $t;
	}
    }
    return $text;
}

##########################################################################
# FUNCTION   get_report
# RECEIVES   $forms
# RETURNS
# EXPECTS
# DOES       make the report
sub get_report {
    my ($m_file,$m_warn,@forms) = @_;

    my $c = $#forms+1;
    my $debug;
    
    $debug = "#" x 100 ."\n";
    $debug .= "\nSummary:\n\n";
    $debug .= "File: $m_file\n";
    $debug .= "Form found : $c\n";
    $debug .= "Warn : $m_warn\n";

    $debug .= "\nInformation:\n\n";
    my $i;
    foreach my $val (@forms) {
	$i++;
        $debug .= "Form $i:\n";
        $debug .= dump($val);
        $debug .= "\n"."-" x 100 ."\n";
    
    }
    
    #Debug: info
    print $debug;
    
    if ($m_report ne "") {
        open (FZ,">> $m_report") || die "Error: Can't open file $m_report to save the report\n";
	print FZ $debug;
	close(FZ);
    }
}

##########################################################################
# FUNCTION   get_usage
# RECEIVES   $forms
# RETURNS
# EXPECTS
# DOES       help
sub get_usage
{
    print "\n-- ISR-form.pl (*) Francisco Amato (*)  www.infobyte.com.ar\n";
    print "-- Get form info of html file  ----------------------------\n\n";
    print " Usage: $0 -l </audit/microsoft/> -o /audit/report.txt\n\n";
    print " <-l> Location of .html, .htm, .asp, .php (Complete path ex: /audit/www.site.com/\n";
    print " <-f> Location of html file to parser</> \n";
    print " <-o> Name of report (complete path)\n";
#    print " <-v> Verbose\n";
    print " <-h> Help\n\n";
    print "Example: \n# wget -r ./www.microsoft.com\n# $0 -l /audit/www.microsoft.com -o /audit/report.txt\n\n";
    exit;
}
