#!/usr/bin/perl

# WebScraperHelper : Helper GUI for Web::Scraper
#
# Description: tool what can try to select your XPath in GUI.
# Version: 0.0.1
# Author: Yasuhiro Matsumoto <mattn.jp@gmail.com>
# License: GPLv2

use strict;
use Gtk2 qw/-init/;
use LWP::UserAgent;
use HTML::TreeBuilder::XPath;
use HTML::Selector::XPath;
use URI;
use Encode;
use HTTP::Response::Encoding;
use List::Util qw(first);

my $tree;
my $user_agent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)';
my $vbox = Gtk2::VBox->new(0, 10);

#---------------------------------------------
# URL
my $url_hbox = Gtk2::HBox->new;
my $url_label = Gtk2::Label->new('URL:');
my $url_text = Gtk2::Entry->new;
my $url_update = Gtk2::Button->new('_Get');
$url_label->set_size_request (50, -1);
$url_label->set_alignment (1, 0.5);
$url_hbox->pack_start ($url_label, 0, 0, 0);
$url_hbox->add ($url_text);
$url_hbox->add ($url_update);
$vbox->pack_start ($url_hbox, 0, 0, 0);

#---------------------------------------------
# XPath
my $xpath_hbox = Gtk2::HBox->new;
my $xpath_label = Gtk2::Label->new('XPath:');
my $xpath_text = Gtk2::Entry->new;
my $xpath_update = Gtk2::Button->new('_Update');
$xpath_label->set_size_request (50, -1);
$xpath_label->set_alignment (1, 0.5);
$xpath_hbox->pack_start ($xpath_label, 0, 0, 0);
$xpath_hbox->add ($xpath_text);
$xpath_hbox->add ($xpath_update);
$vbox->pack_start ($xpath_hbox, 0, 0, 0);

#---------------------------------------------
# Source View
my $result_view = Gtk2::TextView->new;
$result_view->set_editable (0);
$result_view->set_wrap_mode ('word-char');
my $scrolled_window = Gtk2::ScrolledWindow->new;
$scrolled_window->set_policy ('automatic', 'automatic');
$scrolled_window->add_with_viewport ($result_view);
$vbox->pack_start ($scrolled_window, 1, 1, 0);

$xpath_text->sensitive (0);
$xpath_update->sensitive (0);

#---------------------------------------------
# Top Level Window
my $window = Gtk2::Window->new('toplevel');
$window->set_title ('WebScraperHelper');
$window->signal_connect (
    'destroy' => sub {
        Gtk2->main_quit
    }
);
$window->add ($vbox);
$window->set_border_width (5);
$window->set_size_request (500, 500);

#---------------------------------------------
# Event Handler
sub get_html {
    my $url = $url_text->get_text ();
	my $ua = LWP::UserAgent->new(agent => $user_agent);
    my $res = $ua->get( URI->new($url) );
    my $html;
    if ($res->is_success) {
        eval {
            my @encoding = (
                $res->encoding,
                ($res->header('Content-Type') =~ /charset=([\w\-]+)/g),
                'latin-1',
            );
            my $encoding = first { defined $_ && Encode::find_encoding($_) } @encoding;
            $html = Encode::decode($encoding, $res->content);
            $tree->delete if $tree;
            $tree = HTML::TreeBuilder::XPath->new;
            $tree->parse($html);
            $tree->eof;
        };
    }
    $xpath_text->sensitive ($html ? 1 : 0);
    $xpath_update->sensitive ($html ? 1 : 0);
    $result_view->get_buffer ()->set_text ( $html );
    0;
}

sub select_node {
    my $html_cut;
    my $exp = $xpath_text->get_text();
    if ($exp) {
        my @nodes;
        eval {
            my $xpath = $exp =~ m!^/! ? $exp : HTML::Selector::XPath::selector_to_xpath($exp);
            @nodes = eval {
                local $SIG{__WARN__} = sub { };
                $tree->findnodes($xpath);
            };
        };
        $html_cut = '';
        for my $node (@nodes) {
            if (ref($node) eq 'HTML::TreeBuilder::XPath::Attribute') {
                $html_cut .= $node->toString;
            } else {
                if ($node->isTextNode) {
                    $html_cut .= HTML::Entities::encode($node->as_XML, q("'<>&));
                } else {
					$html_cut .= $node->as_XML;
                }
            }
        }
    } else {
        $html_cut = $tree->as_XML;
    }
    $result_view->get_buffer ()->set_text ( $html_cut );
    $xpath_text->grab_focus;
    0;
}

sub xpath_key_pressed {
    my ($this, $key) = @_;
    if ($key->state->['control-mask'] and $key->keyval eq 114) {
        select_node;
    }
    0;
}

$url_update->signal_connect ( 'clicked' => *get_html );
$xpath_update->signal_connect ( 'clicked' => *select_node );
$xpath_text->signal_connect ( 'key-press-event' => *xpath_key_pressed );

#---------------------------------------------
$window->show_all;

eval {
  use Getopt::Long;
  GetOptions(
	'ua=s' => \$user_agent
  );
};
if (@ARGV) {
    $url_text->set_text($ARGV[0]);
    get_html;
}

Gtk2->main;
