スクリプト
hatena-news.pl
use strict;
use warnings;
use utf8;
use Web::Scraper;
use URI;
my $uri = URI->new( "http://news.hatelabo.jp/" );
my $entry_scanner = scraper {
process 'h1.article', summary => 'TEXT';
process 'div.section', body => 'RAW';
};
my $scanner = scraper {
process '//td[.//span[text()="主なニュース"]]//ul/li',
'entries[]' => scraper {
process 'a',
title => 'TEXT',
link => '@href',
info => sub {
$entry_scanner->scrape(
URI->new_abs( $_->attr('href'), $uri )
);
}
};
result 'entries';
};
my $feed = {
title => 'はてなニュース',
link => $uri->as_string,
};
for my $entry (@{ $scanner->scrape( $uri ) }) {
push @{$feed->{entries}}, {
title => $entry->{title},
link => $entry->{link},
summary => $entry->{info}->{summary},
body => $entry->{info}->{body},
};
}
use YAML;
binmode STDOUT, ":utf8";
print Dump $feed;
hatena-news.yaml
global:
log:
level: error
plugins:
- module: Subscription::Config
config:
feed:
- script:///path/to/hatena-news.pl
- module: CustomFeed::Script
- module: Publish::Feed
config:
dir: /path/to/hatena-news
filename: hatena-news.rss
format: RSS
※Windowsで動かす人は環境変数PATHEXTを以下の様にしておく必要あり
set PATHEXT=%PATHEXT%;.PL
あと、出力先はご自由にPublish::なんちゃらで...
ま、その内フィード出来るだろけど。