Copy from web site
Start a git repo independent of my web site.
This commit is contained in:
commit
ff784ba351
|
@ -0,0 +1,10 @@
|
||||||
|
AuthName hjp-work-in-progress
|
||||||
|
AuthType Basic
|
||||||
|
AuthUserFile /usr/local/www/offline/hjp/passwd
|
||||||
|
AuthGroupFile /usr/local/www/offline/hjp/group
|
||||||
|
<Limit GET>
|
||||||
|
require valid-user
|
||||||
|
</Limit>
|
||||||
|
<Limit POST>
|
||||||
|
require valid-user
|
||||||
|
</Limit>
|
|
@ -0,0 +1,223 @@
|
||||||
|
package Rss2Html::Feed;
|
||||||
|
|
||||||
|
=head1 Name
|
||||||
|
|
||||||
|
Rss2Html::Feed - a feed
|
||||||
|
|
||||||
|
=head1 Description
|
||||||
|
|
||||||
|
This class represents a single feed.
|
||||||
|
|
||||||
|
It currently has only one public method, update(), which updates the
|
||||||
|
feed.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use v5.14;
|
||||||
|
use Moose;
|
||||||
|
|
||||||
|
use Cache::Memcached;
|
||||||
|
use Encode qw(decode_utf8);
|
||||||
|
use HTTP::Date;
|
||||||
|
use XML::Atom::Client;
|
||||||
|
use XML::RAI;
|
||||||
|
|
||||||
|
with 'MooseX::Log::Log4perl';
|
||||||
|
|
||||||
|
has 'id' => (
|
||||||
|
is => 'rw',
|
||||||
|
);
|
||||||
|
has 'url' => (
|
||||||
|
is => 'rw',
|
||||||
|
);
|
||||||
|
has 'update_frequency' => (
|
||||||
|
is => 'rw',
|
||||||
|
);
|
||||||
|
has 'last_update' => (
|
||||||
|
is => 'rw',
|
||||||
|
);
|
||||||
|
has 'title' => (
|
||||||
|
is => 'rw',
|
||||||
|
);
|
||||||
|
has 'type' => (
|
||||||
|
is => 'rw',
|
||||||
|
);
|
||||||
|
has 'allow_img' => (
|
||||||
|
is => 'rw',
|
||||||
|
);
|
||||||
|
has 'expire' => (
|
||||||
|
is => 'rw',
|
||||||
|
);
|
||||||
|
|
||||||
|
has 'dbh' => (
|
||||||
|
is => 'rw',
|
||||||
|
);
|
||||||
|
|
||||||
|
my $mcd = Cache::Memcached->new(servers => ['127.0.0.1:11211']);
|
||||||
|
|
||||||
|
|
||||||
|
=head1 Methods
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
=head2 update
|
||||||
|
|
||||||
|
Update the feed. This is unconditional. The logic to decide which feeds
|
||||||
|
must be updated is in L<Rss2Html::FeedList::update_adaptive>.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub update {
|
||||||
|
my ($self) = @_;
|
||||||
|
my $dbh = $self->dbh;
|
||||||
|
|
||||||
|
my $now = int(time());
|
||||||
|
|
||||||
|
if ($self->{type} eq 'rss') {
|
||||||
|
local $@ = "unknown error";
|
||||||
|
|
||||||
|
# XML::RAI has a parse_uri method, but we explicitely fetch the URI to get better diagnostics.
|
||||||
|
# Probably should do this for atom, too, so we can autodetect the feed type.
|
||||||
|
my $ua = LWP::UserAgent->new;
|
||||||
|
$ua->agent("rss2html/1.0 (+http://www.hjp.at/projekte/rss2html/)");
|
||||||
|
my $response = $ua->get($self->{url});
|
||||||
|
if ($response->code != 200) {
|
||||||
|
$self->log->warn("GET $self->{url} failed: " . $response->status_line);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
$self->log->info("$self->{url} has content type " . $response->content_type);
|
||||||
|
|
||||||
|
my $rai = eval {XML::RAI->parse_string($response->content) };
|
||||||
|
if ($rai) {
|
||||||
|
$rai->time_format('EPOCH');
|
||||||
|
if ($rai->channel->title ne $self->{title}) {
|
||||||
|
$self->log->info("set title");
|
||||||
|
$dbh->do("update feeds set title=? where id=?", {},
|
||||||
|
$rai->channel->title, $self->{id});
|
||||||
|
}
|
||||||
|
$self->log->info("checking " . scalar(@{$rai->items}) . " items in db");
|
||||||
|
for my $item ( @{$rai->items} ) {
|
||||||
|
my $item_db = $dbh->selectrow_hashref("select * from items where link=?", {}, $item->link);
|
||||||
|
if ($item_db) {
|
||||||
|
$self->log->info("item $item_db->{id} seen");
|
||||||
|
$dbh->do("update items set seen=? where id=?", {}, $now, $item_db->{id});
|
||||||
|
} else {
|
||||||
|
$self->log->info("new");
|
||||||
|
|
||||||
|
my $issued = $item->issued // $now;
|
||||||
|
if ($issued < $self->{last_update} || $issued > $now) {
|
||||||
|
# $issued is clearly wrong. Could be any time between $self->{last_update} and $now.
|
||||||
|
# Choose a random time in the interval.
|
||||||
|
my $guess_issued = $self->{last_update} + int(rand($now - $self->{last_update}));
|
||||||
|
$self->log->info("fixing up issued time: $issued -> $guess_issued");
|
||||||
|
$issued = $guess_issued;
|
||||||
|
}
|
||||||
|
|
||||||
|
$self->log->info("item " . $item->link . " new");
|
||||||
|
$dbh->do("insert into items(title, link, content, issued, seen, feed_id) values(?, ?, ?, ?, ?, ?)",
|
||||||
|
{},
|
||||||
|
$item->title, $item->link, $item->content, $issued, $now, $self->{id}
|
||||||
|
);
|
||||||
|
$self->invalidate_item_info;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
print STDERR "error getting $self->{url}: $@\n";
|
||||||
|
}
|
||||||
|
} elsif ($self->{type} eq 'atom') {
|
||||||
|
my $api = XML::Atom::Client->new();
|
||||||
|
my $atomfeed = $api->getFeed($self->{url});
|
||||||
|
if ($atomfeed) {
|
||||||
|
my @items = $atomfeed->entries;
|
||||||
|
for my $item (@items) {
|
||||||
|
my $link = $self->atom_main_link($item);
|
||||||
|
my $item_db = $dbh->selectrow_hashref("select * from items where link=?", {}, $link);
|
||||||
|
if ($item_db) {
|
||||||
|
$self->log->info("item $item_db->{id} seen");
|
||||||
|
$dbh->do("update items set seen=? where id=?", {}, $now, $item_db->{id});
|
||||||
|
} else {
|
||||||
|
$self->log->info("new");
|
||||||
|
my $title = decode_utf8($item->title); # XXX - Workaround?
|
||||||
|
my $content = $item->content;
|
||||||
|
my $summary = $item->summary;
|
||||||
|
my $body = $content ? $content->body :
|
||||||
|
defined($summary) ? decode_utf8($summary) : undef;
|
||||||
|
|
||||||
|
$self->log->info("item " . $link . " new");
|
||||||
|
|
||||||
|
my $issued = str2time($item->updated) // $now;
|
||||||
|
if ($issued < $self->{last_update} || $issued > $now) {
|
||||||
|
# $issued is clearly wrong. Could be any time between $self->{last_update} and $now.
|
||||||
|
# Choose a random time in the interval.
|
||||||
|
my $guess_issued = $self->{last_update} + int(rand($now - $self->{last_update}));
|
||||||
|
$self->log->info("fixing up issued time: $issued -> $guess_issued");
|
||||||
|
$issued = $guess_issued;
|
||||||
|
}
|
||||||
|
|
||||||
|
$dbh->do("insert into items(title, link, content, issued, seen, feed_id) values(?, ?, ?, ?, ?, ?)",
|
||||||
|
{},
|
||||||
|
$title, $link, $body, $issued, $now, $self->{id}
|
||||||
|
);
|
||||||
|
$self->invalidate_item_info;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
print "<p>unknown type $self->{type}</p>\n";
|
||||||
|
$dbh->do("update feeds set title=?, last_update=? where id=?", {},
|
||||||
|
'unknown', $now, $self->{id});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
# mark all but the last 100 entries as old, so they aren't shown by default.
|
||||||
|
$dbh->do("update items set old=1
|
||||||
|
where id in (select id from items
|
||||||
|
where feed_id=? and (old is null or old != 1)
|
||||||
|
order by issued desc limit -1 offset 100)",
|
||||||
|
{},
|
||||||
|
$self->{id});
|
||||||
|
|
||||||
|
$dbh->do("update feeds set last_update=? where id=?", {}, $now, $self->{id});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sub atom_main_link {
|
||||||
|
my ($self, $item) = @_; # XXX should probably be a method of $item
|
||||||
|
for my $link ($item->link) {
|
||||||
|
$self->log->info("checking href ", $link->href, "\n");
|
||||||
|
if (defined $link->rel && $link->rel ne 'alternate') {
|
||||||
|
$self->log->info("rel ", $link->rel, " unexpected, skip");
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
if (defined $link->type && $link->type ne 'text/html') {
|
||||||
|
$self->log->info("type ", $link->type, " unexpected, skip\n");
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
$self->log->info("match!\n");
|
||||||
|
return $link->href;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
sub invalidate_item_info {
|
||||||
|
my ($self) = @_;
|
||||||
|
|
||||||
|
my $dbh = $self->dbh;
|
||||||
|
my $users = $dbh->selectcol_arrayref("select distinct user from read");
|
||||||
|
for my $user (@$users) {
|
||||||
|
$mcd->delete($self->item_info_key($user));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub item_info_key {
|
||||||
|
my ($self, $user) = @_;
|
||||||
|
return "rss2html/item_info/" . $user . "/" . $self->id;
|
||||||
|
}
|
||||||
|
|
||||||
|
#---
|
||||||
|
__PACKAGE__->meta->make_immutable;
|
||||||
|
1;
|
||||||
|
|
||||||
|
# vim: tw=132
|
|
@ -0,0 +1,131 @@
|
||||||
|
package Rss2Html::FeedList;
|
||||||
|
use warnings;
|
||||||
|
use strict;
|
||||||
|
use Moose;
|
||||||
|
use namespace::autoclean;
|
||||||
|
use DBI;
|
||||||
|
use Rss2Html::Feed;
|
||||||
|
use POSIX qw(strftime);
|
||||||
|
|
||||||
|
with 'MooseX::Log::Log4perl';
|
||||||
|
|
||||||
|
|
||||||
|
has 'dbh' => (
|
||||||
|
is => 'ro',
|
||||||
|
writer => '_set_dbh',
|
||||||
|
);
|
||||||
|
|
||||||
|
sub BUILD {
|
||||||
|
my ($self, $args) = @_;
|
||||||
|
my $dbh = DBI->connect("dbi:SQLite:dbname=rss2html.sqlite", "", "",
|
||||||
|
{
|
||||||
|
sqlite_use_immediate_transaction => 1,
|
||||||
|
});
|
||||||
|
$dbh->{sqlite_unicode} = 1;
|
||||||
|
$self->_set_dbh($dbh);
|
||||||
|
}
|
||||||
|
|
||||||
|
sub feeds {
|
||||||
|
my ($self) = @_;
|
||||||
|
my $dbh = $self->dbh;
|
||||||
|
|
||||||
|
no autovivification qw(fetch);
|
||||||
|
|
||||||
|
my $feeds_sql = $dbh->selectall_arrayref("select * from feeds where update_frequency is not null", { Slice => {} });
|
||||||
|
my @feeds;
|
||||||
|
for (@$feeds_sql) {
|
||||||
|
push @feeds, Rss2Html::Feed->new(%$_, dbh => $dbh);
|
||||||
|
}
|
||||||
|
return \@feeds;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub update_adaptive {
|
||||||
|
my ($self) = @_;
|
||||||
|
my $dbh = $self->dbh;
|
||||||
|
my $start = time();
|
||||||
|
|
||||||
|
no autovivification qw(fetch);
|
||||||
|
|
||||||
|
$dbh->begin_work;
|
||||||
|
|
||||||
|
my $feeds = $self->feeds;
|
||||||
|
my $seconds_per_week = 86400 * 7;
|
||||||
|
for my $feed (@$feeds) {
|
||||||
|
$feed->{title} //= "-";
|
||||||
|
#print "<p>feed: $feed->{id} $feed->{title}<br/>\n";
|
||||||
|
$self->log->info("feed: $feed->{id} $feed->{title}");
|
||||||
|
my $now = time();
|
||||||
|
my $run_time = $now - $start;
|
||||||
|
my $update_time = $now - $feed->{last_update};
|
||||||
|
$self->log->info(strftime("%Y-%m-%d %a %H:%M:%S", localtime($feed->{last_update})), " .. ",
|
||||||
|
strftime("%Y-%m-%d %a %H:%M:%S", localtime($now)), ": $update_time seconds");
|
||||||
|
#print "\t", "update_time: $update_time<br/>\n";
|
||||||
|
|
||||||
|
# We are guaranteed to have all items which were issued in the last
|
||||||
|
# int($feed->{expire} / 7) weeks since we expire only those not seen for
|
||||||
|
# more than $feed->{expire} days. Some feeds keep some items for a long
|
||||||
|
# time while removing others quite quickly, so statistics for older
|
||||||
|
# items may be severely skewed and are not safe to use.
|
||||||
|
my $interval = int($feed->{expire} / 7) * 7 * 86400;
|
||||||
|
|
||||||
|
# We measure those weeks backwards from the last update.
|
||||||
|
# If we measured from now we would almost always miss the update from
|
||||||
|
# exactly 4 weeks ago (one cycle we are too early, the next we are too
|
||||||
|
# late).
|
||||||
|
my $start = $feed->{last_update} - $interval;
|
||||||
|
my $items = $dbh->selectall_arrayref("select * from items where feed_id=? and issued >= ? order by issued",
|
||||||
|
{ Slice => {} },
|
||||||
|
$feed->{id}, $start,
|
||||||
|
);
|
||||||
|
my $first_issued = $items->[0]{issued} // $start;
|
||||||
|
my $total_time = $now - $first_issued;
|
||||||
|
my $updates_in_interval = 0;
|
||||||
|
for my $item (@$items) {
|
||||||
|
my $t = $item->{issued};
|
||||||
|
my $dt1 = $now - $t;
|
||||||
|
my $dt2 = $dt1 % $seconds_per_week;
|
||||||
|
if ($dt2 < $update_time) {
|
||||||
|
$updates_in_interval++;
|
||||||
|
$self->log->info("found $item->{id} ", strftime("%Y-%m-%d %a %H:%M:%S", localtime($t)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$self->log->info("updates_in_interval: $updates_in_interval");
|
||||||
|
$updates_in_interval *= $seconds_per_week / $total_time; # normalize to one week
|
||||||
|
$self->log->info("updates_in_interval: $updates_in_interval");
|
||||||
|
$updates_in_interval += $update_time / $seconds_per_week; # add one update per week
|
||||||
|
$self->log->info("updates_in_interval: $updates_in_interval");
|
||||||
|
#print "\t", "updates_in_interval: $updates_in_interval<br/>\n";
|
||||||
|
if ($updates_in_interval > 0.5) {
|
||||||
|
$self->log->info($feed->{url});
|
||||||
|
$self->log->info("need to update " . $feed->{url});
|
||||||
|
$feed->update;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$self->cleanup();
|
||||||
|
|
||||||
|
$dbh->commit;
|
||||||
|
|
||||||
|
$self->log->info("update done");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sub cleanup {
|
||||||
|
my ($self) = @_;
|
||||||
|
my $dbh = $self->dbh;
|
||||||
|
|
||||||
|
my $now = time();
|
||||||
|
my $feeds = $dbh->selectall_arrayref("select * from feeds", { Slice => {} });
|
||||||
|
|
||||||
|
for my $feed (@$feeds) {
|
||||||
|
my $expire_days = $feed->{expire} // 30;
|
||||||
|
my $seen_limit = $now - $expire_days * 86400;
|
||||||
|
my $rc = $dbh->do("delete from items where feed_id=? and seen < ?", {}, $feed->{id}, $seen_limit);
|
||||||
|
$self->log->info("cleanup of feed $feed->{id} (limit $seen_limit) returned $rc");
|
||||||
|
}
|
||||||
|
$dbh->do("delete from read where not exists (select id from items where id=item_id)");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#---
|
||||||
|
__PACKAGE__->meta->make_immutable;
|
||||||
|
1;
|
|
@ -0,0 +1,31 @@
|
||||||
|
package Rss2Html::Scrubber;
|
||||||
|
use warnings;
|
||||||
|
use strict;
|
||||||
|
use parent 'HTML::Scrubber';
|
||||||
|
|
||||||
|
sub new {
|
||||||
|
my ($class, %options) = @_;
|
||||||
|
my $self = HTML::Scrubber->new();
|
||||||
|
$self->deny(qw(link));
|
||||||
|
$self->allow(qw(p a em strong b i ul ol li dl dt dd br));
|
||||||
|
my %rules = (
|
||||||
|
a => {
|
||||||
|
href => qr{^https?://}i,
|
||||||
|
'*' => 0,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
if ($options{allow_img}) {
|
||||||
|
$rules{img} = {
|
||||||
|
src => qr{^https?://}i,
|
||||||
|
alt => 1,
|
||||||
|
'*' => 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
$self->rules(
|
||||||
|
%rules
|
||||||
|
);
|
||||||
|
bless $self, $class;
|
||||||
|
return $self;
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
|
@ -0,0 +1,66 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use v5.14;
|
||||||
|
|
||||||
|
use XML::RAI;
|
||||||
|
use XML::Atom::Client;
|
||||||
|
use DBI;
|
||||||
|
use CGI;
|
||||||
|
use Rss2Html::Scrubber;
|
||||||
|
use Time::HiRes qw(time);
|
||||||
|
use POSIX qw(strftime);
|
||||||
|
|
||||||
|
$| = 1;
|
||||||
|
|
||||||
|
my $q = CGI->new();
|
||||||
|
binmode STDOUT, ":encoding(UTF-8)";
|
||||||
|
|
||||||
|
my $feed_id = $q->param('id');
|
||||||
|
|
||||||
|
my $dbh = DBI->connect("dbi:SQLite:dbname=rss2html.sqlite", "", "");
|
||||||
|
$dbh->{sqlite_unicode} = 1;
|
||||||
|
|
||||||
|
print "Content-Type: text/html; charset=utf-8\n";
|
||||||
|
print "Refresh: 600\n";
|
||||||
|
print "\n";
|
||||||
|
|
||||||
|
print "<link rel='stylesheet' type='text/css' href='rss2html.css'/>\n";
|
||||||
|
print "<h1>RSS 2 HTML</h1>\n";
|
||||||
|
print "<p>", strftime("%Y-%m-%d %H:%M:%S%z", localtime()), "</p>\n";
|
||||||
|
|
||||||
|
my $feed = $dbh->selectrow_hashref("select * from feeds where id=?", {}, $feed_id);
|
||||||
|
|
||||||
|
print "<span class='feed_info'>\n";
|
||||||
|
print "<span class='feed_id'>", $q->escapeHTML($feed->{id}), "</span>\n";
|
||||||
|
print "<span class='feed_title'>", $q->escapeHTML($feed->{title}), "</span>\n";
|
||||||
|
print "<span class='feed_type'>", $q->escapeHTML($feed->{type}), "</span>\n";
|
||||||
|
print "</span>\n";
|
||||||
|
|
||||||
|
my $now = time();
|
||||||
|
my $seconds_per_week = 86400 * 7;
|
||||||
|
my $start = $now - 4 * $seconds_per_week;
|
||||||
|
my $items = $dbh->selectall_arrayref("select * from items where feed_id=? and issued >= ? order by issued",
|
||||||
|
{ Slice => {} },
|
||||||
|
$feed->{id}, $start,
|
||||||
|
);
|
||||||
|
for my $item (@$items) {
|
||||||
|
my $t = $item->{issued};
|
||||||
|
my $dt1 = $now - $t;
|
||||||
|
my $dt2 = $dt1 % $seconds_per_week;
|
||||||
|
$item->{dt2} = $dt2;
|
||||||
|
}
|
||||||
|
$items = [ sort { $a->{dt2} <=> $b->{dt2} } @$items ];
|
||||||
|
print "<table>\n";
|
||||||
|
for my $item (@$items) {
|
||||||
|
my $t = $now - $item->{dt2};
|
||||||
|
print"<tr>\n";
|
||||||
|
print"<td>", $item->{dt2}, "</td>\n";
|
||||||
|
print"<td>", strftime("%a %H:%M:%S", localtime($t)), "</td>\n";
|
||||||
|
print"<td>", "<a href='", $q->escapeHTML($item->{link}), "'>", $q->escapeHTML($item->{title}), "</a>", "</td>\n";
|
||||||
|
print"</tr>\n";
|
||||||
|
|
||||||
|
}
|
||||||
|
print "</table>\n";
|
||||||
|
# vim: expandtab
|
|
@ -0,0 +1,426 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
# Parameters:
|
||||||
|
#
|
||||||
|
# mark=id
|
||||||
|
# mark item id as read
|
||||||
|
#
|
||||||
|
# redir=id
|
||||||
|
# redirect to url of item id and mark item as read
|
||||||
|
#
|
||||||
|
# fls=(all|unread)
|
||||||
|
# "feed list show": Show all feeds or only feeds with unread items in feed list
|
||||||
|
#
|
||||||
|
# fis= comma separated list of feed ids
|
||||||
|
# show only listed feeds
|
||||||
|
# BUG: fis is only effective if sr is non-zero!
|
||||||
|
#
|
||||||
|
# sr=n
|
||||||
|
# show last n read items. Note: Currently these are the n read items with the
|
||||||
|
# highest ids, which may not be the last items which have been read. Need to
|
||||||
|
# store the read date for that.
|
||||||
|
#
|
||||||
|
# so=1
|
||||||
|
# also show old items.
|
||||||
|
#
|
||||||
|
# sl=1
|
||||||
|
# show items marked as "later".
|
||||||
|
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use v5.14;
|
||||||
|
use utf8;
|
||||||
|
use experimental 'smartmatch'; # for given/when
|
||||||
|
use experimental 'autoderef';
|
||||||
|
|
||||||
|
use CGI;
|
||||||
|
use Cache::Memcached;
|
||||||
|
use DBI;
|
||||||
|
use Data::Dumper;
|
||||||
|
use Encode qw(encode_utf8 decode_utf8);
|
||||||
|
use POSIX qw(strftime);
|
||||||
|
use Rss2Html::Scrubber;
|
||||||
|
use Time::HiRes qw(time);
|
||||||
|
|
||||||
|
$| = 1;
|
||||||
|
|
||||||
|
my $start = time();
|
||||||
|
|
||||||
|
my $q = CGI->new();
|
||||||
|
binmode STDOUT, ":encoding(UTF-8)";
|
||||||
|
my $mcd = Cache::Memcached->new(servers => ['127.0.0.1:11211']);
|
||||||
|
|
||||||
|
if ($q->param('mark')) {
|
||||||
|
mark();
|
||||||
|
} elsif ($q->param('redir')) {
|
||||||
|
redirect();
|
||||||
|
} else {
|
||||||
|
list();
|
||||||
|
}
|
||||||
|
|
||||||
|
sub redirect {
|
||||||
|
my $dbh = DBI->connect("dbi:SQLite:dbname=rss2html.sqlite", "", "",
|
||||||
|
{
|
||||||
|
sqlite_use_immediate_transaction => 1,
|
||||||
|
});
|
||||||
|
$dbh->{sqlite_unicode} = 1;
|
||||||
|
my $item_id = $q->param('redir');
|
||||||
|
my $item = $dbh->selectrow_hashref("select * from items where id=?", {}, $item_id);
|
||||||
|
if ($item) {
|
||||||
|
print "Status: 302\n";
|
||||||
|
print "Location: $item->{link}\n";
|
||||||
|
print "\n";
|
||||||
|
$dbh->do("insert into read(user, item_id) values(?, ?)", {}, $q->remote_user, $item_id);
|
||||||
|
$mcd->delete(item_info_key($q->remote_user, $item->{feed_id}));
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
print "Status: 404\n";
|
||||||
|
print "Content-Type: text/html; charset=utf-8\n";
|
||||||
|
print "\n";
|
||||||
|
print "not found\n";
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
sub mark {
|
||||||
|
print_log("mark start");
|
||||||
|
my $dbh = DBI->connect("dbi:SQLite:dbname=rss2html.sqlite", "", "");
|
||||||
|
$dbh->{sqlite_unicode} = 1;
|
||||||
|
my $item_id = $q->param('mark');
|
||||||
|
my $item = $dbh->selectrow_hashref("select * from items where id=?", {}, $item_id);
|
||||||
|
if ($item) {
|
||||||
|
my $q1 = CGI->new($q);
|
||||||
|
$q1->delete('mark');
|
||||||
|
print "Status: 302\n";
|
||||||
|
print "Location: ", $q1->self_url, "\n";
|
||||||
|
print "\n";
|
||||||
|
$dbh->do("insert into read(user, item_id) values(?, ?)", {}, $q->remote_user, $item_id);
|
||||||
|
$mcd->delete(item_info_key($q->remote_user, $item->{feed_id}));
|
||||||
|
print_log("mark done");
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
print "Status: 404\n";
|
||||||
|
print "Content-Type: text/html; charset=utf-8\n";
|
||||||
|
print "\n";
|
||||||
|
print "not found\n";
|
||||||
|
print_log("mark failed");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
sub list {
|
||||||
|
|
||||||
|
my $feed_list_show = $q->param('fls') // "all";
|
||||||
|
my $feed_item_show = $q->param('fis') // '';
|
||||||
|
my $show_read = $q->param('sr');
|
||||||
|
my $show_old = $q->param('so');
|
||||||
|
my $show_later = $q->param('sl');
|
||||||
|
my %feed_item_show = map(($_+0 => 1), split(/,/, $feed_item_show));
|
||||||
|
|
||||||
|
|
||||||
|
print "Content-Type: text/html; charset=utf-8\n";
|
||||||
|
print "Refresh: 600\n";
|
||||||
|
print "\n";
|
||||||
|
|
||||||
|
print "<meta name='viewport' content='width=device-width, initial-scale=1' />\n";
|
||||||
|
print "<link rel='stylesheet' type='text/css' href='rss2html.css'/>\n";
|
||||||
|
print "<h1>RSS 2 HTML</h1>\n";
|
||||||
|
print "<div class='lastupdate'>", strftime("%Y-%m-%d %H:%M:%S%z", localtime()), "</div>\n";
|
||||||
|
|
||||||
|
my $dbh = DBI->connect("dbi:SQLite:dbname=rss2html.sqlite", "", "");
|
||||||
|
$dbh->{sqlite_unicode} = 1;
|
||||||
|
|
||||||
|
print_log("list_feeds start");
|
||||||
|
list_feeds($dbh, $feed_list_show, $show_old);
|
||||||
|
print_log("list_feeds done");
|
||||||
|
|
||||||
|
my $items;
|
||||||
|
if ($show_read) {
|
||||||
|
my $fields = "feeds.id as feed_id, feeds.title as feed_title, allow_img, link, items.title as item_title, content, items.id as item_id, issued, user, lang";
|
||||||
|
my $tables = "items
|
||||||
|
join feeds on items.feed_id=feeds.id
|
||||||
|
left outer join read on (items.id=read.item_id and user=?)";
|
||||||
|
my @where;
|
||||||
|
if (%feed_item_show) {
|
||||||
|
push @where, "feeds.id in (" . join(',', keys %feed_item_show) . ")";
|
||||||
|
}
|
||||||
|
if (!$show_old) {
|
||||||
|
push @where, "(items.old is null or items.old == 0)"
|
||||||
|
}
|
||||||
|
|
||||||
|
my $where = "where " . join(" and ", @where);
|
||||||
|
|
||||||
|
my $cmd = "select $fields from $tables $where order by issued";
|
||||||
|
print_log("$cmd");
|
||||||
|
my $items1 = $dbh->selectall_arrayref($cmd, { Slice => {} }, $q->remote_user);
|
||||||
|
my $nrd = 0;
|
||||||
|
while (@$items1) {
|
||||||
|
my $item = pop @$items1;
|
||||||
|
{
|
||||||
|
no warnings 'uninitialized';
|
||||||
|
print_log("id=$item->{item_id}, user=$item->{user}");
|
||||||
|
}
|
||||||
|
if (!$item->{user}) {
|
||||||
|
print_log("id=$item->{item_id}: unread");
|
||||||
|
unshift @$items, $item;
|
||||||
|
} elsif ($nrd < $show_read) {
|
||||||
|
print_log("id=$item->{item_id}: $nrd < $show_read");
|
||||||
|
unshift @$items, $item;
|
||||||
|
$nrd++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
my @where = ("user is null");
|
||||||
|
|
||||||
|
if (!$show_old) {
|
||||||
|
push @where, "(items.old is null or items.old == 0)"
|
||||||
|
}
|
||||||
|
|
||||||
|
my $where = "where " . join(" and ", @where);
|
||||||
|
|
||||||
|
|
||||||
|
$items = $dbh->selectall_arrayref(
|
||||||
|
"select feeds.id as feed_id, feeds.title as feed_title, allow_img, link, items.title as item_title, content, items.id as item_id, issued, lang
|
||||||
|
from items
|
||||||
|
join feeds on items.feed_id=feeds.id
|
||||||
|
left outer join read on (items.id=read.item_id and user=?)
|
||||||
|
$where
|
||||||
|
order by issued",
|
||||||
|
{ Slice => {} }, $q->remote_user);
|
||||||
|
}
|
||||||
|
my $feeds = $dbh->selectall_arrayref(
|
||||||
|
"select id, title from feeds where update_frequency is not null order by id",
|
||||||
|
{ Slice => {} }
|
||||||
|
);
|
||||||
|
for my $f (@$feeds) {
|
||||||
|
$f->{selected} = 1 if $feed_item_show{$f->{id}};
|
||||||
|
}
|
||||||
|
print_itemlist($items, $feeds);
|
||||||
|
print_log("list done");
|
||||||
|
}
|
||||||
|
|
||||||
|
sub print_itemlist {
|
||||||
|
my ($items, $feeds) = @_;
|
||||||
|
print "<div class='itemlist'>\n";
|
||||||
|
print "<form action='./'>\n";
|
||||||
|
print "<select name='fis'>\n";
|
||||||
|
print "<option value=''>\n";
|
||||||
|
for my $f (@$feeds) {
|
||||||
|
print "<option value='" . $q->escapeHTML($f->{id}) . "' " . ($f->{selected} ? "selected='selected'" : "") .">" . $q->escapeHTML($f->{title}) . "</option>\n";
|
||||||
|
}
|
||||||
|
print "</select>\n";
|
||||||
|
my $fls = $q->param('fls');
|
||||||
|
print "<input type='hidden' name='fls' value='" . $q->escapeHTML($fls) . "'>\n" if defined $fls;
|
||||||
|
print "<input type='hidden' name='sr' value='10'>\n";
|
||||||
|
print "<input type='submit' value='»'>\n";
|
||||||
|
print "</form>\n";
|
||||||
|
my $n_items = 0;
|
||||||
|
my $n_html_mcd = 0;
|
||||||
|
my $n_scrub_mcd = 0;
|
||||||
|
my $n_scrub = 0;
|
||||||
|
print_log("print_itemlist: \$q=" . $q->self_url);
|
||||||
|
my $q1 = CGI->new($q);
|
||||||
|
print_log("print_itemlist: \$q1=" . $q1->self_url . " (before loop)");
|
||||||
|
print_log(scalar @$items . " before remix");
|
||||||
|
$items = remix($items);
|
||||||
|
print_log(scalar @$items . " after remix");
|
||||||
|
for my $item (@$items) {
|
||||||
|
$n_items++;
|
||||||
|
my $is_read = defined($item->{user});
|
||||||
|
my $scrubbed_content = $mcd->get(scrubbed_content_key($item->{item_id}));
|
||||||
|
if ($scrubbed_content) {
|
||||||
|
$scrubbed_content = decode_utf8($scrubbed_content);
|
||||||
|
$n_scrub_mcd++;
|
||||||
|
} else {
|
||||||
|
my $scrubber = Rss2Html::Scrubber->new(allow_img => $item->{allow_img});
|
||||||
|
$scrubbed_content = $scrubber->scrub($item->{content});
|
||||||
|
$mcd->set(scrubbed_content_key($item->{item_id}), encode_utf8($scrubbed_content), 3600);
|
||||||
|
$n_scrub++;
|
||||||
|
}
|
||||||
|
$q1->param('mark', $item->{item_id});
|
||||||
|
my $item_class = 'item' . ($is_read ? ' read' : '');
|
||||||
|
my $langattr = defined $item->{lang} ? "lang='$item->{lang}'" : "";
|
||||||
|
my $html = "";
|
||||||
|
$html .= "<div class='$item_class' $langattr>\n";
|
||||||
|
$html .= "<span class='itemno'>" . $item->{item_id} . "</span>\n";
|
||||||
|
$html .= "<span class='issued'>" . strftime('%Y-%m-%d %H:%M:%S', localtime($item->{issued})) . "</span>\n";
|
||||||
|
unless ($is_read) {
|
||||||
|
print_log("print_itemlist: \$q1=" . $q1->self_url . " (in loop)");
|
||||||
|
$html .= "<div class='op'><a href='" . $q->escapeHTML($q1->self_url) . "'>Mark read</a></div>\n";
|
||||||
|
}
|
||||||
|
$html .= "<div class='feed'>" . $q->escapeHTML($item->{feed_title}) . "</div>\n";
|
||||||
|
$html .= "<h2><a href='./?redir=" . $q->escapeHTML($item->{item_id}) . "'>" . $q->escapeHTML($item->{item_title}) . "</a></h2>\n";
|
||||||
|
$html .= "<div class='content'>" . $scrubbed_content . "</div>\n";
|
||||||
|
unless ($is_read) {
|
||||||
|
$html .= "<div class='op'><a href='" . $q->escapeHTML($q1->self_url) . "'>Mark read</a></div>\n";
|
||||||
|
}
|
||||||
|
$html .= "<div class='end'></div>\n";
|
||||||
|
$html .= "</div\n>";
|
||||||
|
print $html;
|
||||||
|
|
||||||
|
}
|
||||||
|
print "</div>\n";
|
||||||
|
print_log("itemlist: $n_items items ($n_html_mcd html cached, $n_scrub_mcd scrubbed content cached, $n_scrub scrubbed)");
|
||||||
|
}
|
||||||
|
|
||||||
|
sub remix {
|
||||||
|
my ($items) = @_;
|
||||||
|
|
||||||
|
my $queues = {};
|
||||||
|
for my $item (@$items) {
|
||||||
|
if (!defined $item->{feed_id}) {
|
||||||
|
local $Data::Dumper::Indent = 0;
|
||||||
|
print_log("item w/o feed_id: " . Dumper($item));
|
||||||
|
}
|
||||||
|
$queues->{$item->{feed_id}} //= [];
|
||||||
|
push $queues->{$item->{feed_id}}, $item;
|
||||||
|
}
|
||||||
|
my @ordered_feeds = sort { $queues->{$a}[0]{issued} <=> $queues->{$b}[0]{issued} } keys $queues;
|
||||||
|
my $new_items = [];
|
||||||
|
my $found;
|
||||||
|
do {
|
||||||
|
$found = 0;
|
||||||
|
for my $feed (@ordered_feeds) {
|
||||||
|
my $item = shift $queues->{$feed};
|
||||||
|
if ($item) {
|
||||||
|
push $new_items, $item;
|
||||||
|
$found++;
|
||||||
|
print_log("found an item in feed $feed. " . scalar(@{$queues->{$feed}}) . " items left")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while ($found);
|
||||||
|
|
||||||
|
return $new_items;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub list_feeds {
|
||||||
|
my ($dbh, $feed_list_show, $show_old) = @_;
|
||||||
|
|
||||||
|
no autovivification qw(fetch);
|
||||||
|
|
||||||
|
my $q1 = CGI->new($q);
|
||||||
|
my $other = $feed_list_show eq 'all' ? 'unread' : 'all';
|
||||||
|
$q1->param('fls', $other);
|
||||||
|
print "<div><a href='", $q->escapeHTML($q1->self_url), "'>$other</a></div>\n";
|
||||||
|
print "<div class='feedlist'>\n";
|
||||||
|
$dbh->begin_work;
|
||||||
|
|
||||||
|
my $feeds = $dbh->selectall_arrayref("select * from feeds where update_frequency is not null", { Slice => {} });
|
||||||
|
my $seconds_per_week = 86400 * 7;
|
||||||
|
for my $feed (@$feeds) {
|
||||||
|
$feed->{title} //= "-";
|
||||||
|
print_item_info($dbh, $feed, $feed_list_show, $show_old);
|
||||||
|
|
||||||
|
}
|
||||||
|
$dbh->commit;
|
||||||
|
|
||||||
|
print "</div>\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sub print_item_info {
|
||||||
|
my ($dbh, $feed, $feed_list_show, $show_old) = @_;
|
||||||
|
|
||||||
|
my $item_info_key = item_info_key($q->remote_user, $feed->{id});
|
||||||
|
my $item_info = $mcd->get($item_info_key);
|
||||||
|
my $cmd
|
||||||
|
= "select count(i.id) as nr_items, max(i.issued) as last_issued, max(i.seen) as last_seen, count(r.item_id) as nr_read
|
||||||
|
from items i left outer join (select item_id from read where user=?) r on i.id=r.item_id
|
||||||
|
where i.feed_id=?";
|
||||||
|
unless ($show_old) {
|
||||||
|
$cmd .= " and (old is null or old = 0)";
|
||||||
|
}
|
||||||
|
unless ($item_info) {
|
||||||
|
$item_info
|
||||||
|
= $dbh->selectrow_hashref(
|
||||||
|
$cmd,
|
||||||
|
{},
|
||||||
|
$q->remote_user, $feed->{id}
|
||||||
|
);
|
||||||
|
$mcd->set($item_info_key, $item_info, 3600);
|
||||||
|
}
|
||||||
|
my $now = time();
|
||||||
|
my $unread = $item_info->{nr_items} - $item_info->{nr_read};
|
||||||
|
print_log("print_item_info: feed $feed->{id}, nr_items=$item_info->{nr_items}, read=$item_info->{nr_read}, unread=$unread");
|
||||||
|
return unless ($unread || $feed_list_show eq 'all');
|
||||||
|
|
||||||
|
my $q1 = CGI->new($q);
|
||||||
|
$q1->param('fis', $feed->{id});
|
||||||
|
$q1->param('sr', 10);
|
||||||
|
print "<span class='feed_info'>\n";
|
||||||
|
print "<span class='feed_id'>", $q->escapeHTML($feed->{id}), "</span>\n";
|
||||||
|
print "<span class='feed_title'>",
|
||||||
|
"<a href='", $q1->escapeHTML($q1->self_url), "'>",
|
||||||
|
$q->escapeHTML($feed->{title}),
|
||||||
|
"</a>",
|
||||||
|
"</span>\n";
|
||||||
|
print "<span class='feed_items'>", $item_info->{nr_items}, "</span>\n";
|
||||||
|
print "<span class='feed_unread'>", $unread, "</span>\n";
|
||||||
|
print "<span class='feed_lastissued'>", delta_t($item_info->{last_issued}, $now), "</span>\n";
|
||||||
|
print "<span class='feed_lastseen'>", delta_t($item_info->{last_seen}, $now), "</span>\n";
|
||||||
|
print "</span>\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sub print_log {
|
||||||
|
my $msg = "@_";
|
||||||
|
my $now = time();
|
||||||
|
printf STDERR "%s: %s.%06d %f: %s\n", $0, strftime("%H:%M:%S", localtime($now)), ($now - int($now)) * 1E6, $now - $start, $msg;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub delta_t {
|
||||||
|
my ($t1, $t2) = @_;
|
||||||
|
return "N/A" unless defined $t1 && defined $t2;
|
||||||
|
my $dt = $t2 - $t1;
|
||||||
|
my $s = "";
|
||||||
|
if ($dt < 0) {
|
||||||
|
$s .= "-";
|
||||||
|
$dt = -$dt;
|
||||||
|
}
|
||||||
|
given ($dt) {
|
||||||
|
when ($_ >= 86400 * 10) {
|
||||||
|
$s .= sprintf("%.0fd", $_ / 86400);
|
||||||
|
}
|
||||||
|
when ($_ >= 86400) {
|
||||||
|
$s .= sprintf("%.1fd", $_ / 86400);
|
||||||
|
}
|
||||||
|
when ($_ >= 3600 * 10) {
|
||||||
|
$s .= sprintf("%.0fh", $_ / 3600);
|
||||||
|
}
|
||||||
|
when ($_ >= 3600) {
|
||||||
|
$s .= sprintf("%.1fh", $_ / 3600);
|
||||||
|
}
|
||||||
|
when ($_ >= 60 * 10) {
|
||||||
|
$s .= sprintf("%.0fm", $_ / 60);
|
||||||
|
}
|
||||||
|
when ($_ >= 60) {
|
||||||
|
$s .= sprintf("%.1fm", $_ / 60);
|
||||||
|
}
|
||||||
|
when ($_ >= 10) {
|
||||||
|
$s .= sprintf("%.0fs", $_);
|
||||||
|
}
|
||||||
|
default {
|
||||||
|
$s .= sprintf("%.1fs", $_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sub item_info_key {
|
||||||
|
my ($user, $feed_id) = @_;
|
||||||
|
return "rss2html/item_info/$user/$feed_id";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sub scrubbed_content_key {
|
||||||
|
my ($item_id) = @_;
|
||||||
|
return "rss2html/scrubbed_content/$item_id";
|
||||||
|
}
|
||||||
|
|
||||||
|
sub item_html_key {
|
||||||
|
my ($item_id, $is_read) = @_;
|
||||||
|
return "rss2html/item_html/$item_id/$is_read";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# vim: tw=132 expandtab
|
|
@ -0,0 +1,12 @@
|
||||||
|
log4perl.logger = DEBUG, RotFileApp
|
||||||
|
log4perl.appender.FileApp = Log::Log4perl::Appender::File
|
||||||
|
log4perl.appender.FileApp.filename = test.log
|
||||||
|
log4perl.appender.FileApp.layout = PatternLayout
|
||||||
|
log4perl.appender.FileApp.layout.ConversionPattern = %d [%p] %P %c %m%n
|
||||||
|
log4perl.appender.RotFileApp = Log::Dispatch::FileRotate
|
||||||
|
log4perl.appender.RotFileApp.filename = test.log
|
||||||
|
log4perl.appender.RotFileApp.layout = PatternLayout
|
||||||
|
log4perl.appender.RotFileApp.layout.ConversionPattern = %d [%p] %P %c %m%n
|
||||||
|
log4perl.appender.RotFileApp.max = 7
|
||||||
|
log4perl.appender.RotFileApp.mode = append
|
||||||
|
log4perl.appender.RotFileApp.binmode = :encoding(UTF-8)
|
|
@ -0,0 +1,55 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
use warnings;
|
||||||
|
use strict;
|
||||||
|
use TimeSeries;
|
||||||
|
|
||||||
|
my $wanted_feed_id = shift;
|
||||||
|
|
||||||
|
my $current_feed_id = -1;
|
||||||
|
my @updates_in_interval;
|
||||||
|
|
||||||
|
my $ts = TimeSeries->new(style => 'impulses');
|
||||||
|
$ts->legend('old', 'new');
|
||||||
|
$ts->finalresolution(100);
|
||||||
|
|
||||||
|
my @files = glob("test.log*");
|
||||||
|
@files = sort { -M $b <=> -M $a } @files;
|
||||||
|
|
||||||
|
my $current;
|
||||||
|
my @updates;
|
||||||
|
for my $file (@files) {
|
||||||
|
open my $fh, '<', $file;
|
||||||
|
while (<$fh>) {
|
||||||
|
|
||||||
|
if (/ Rss2Html.FeedList feed: (\d+) .*/) {
|
||||||
|
$current_feed_id = $1;
|
||||||
|
}
|
||||||
|
|
||||||
|
next unless $current_feed_id == $wanted_feed_id;
|
||||||
|
|
||||||
|
if (/Rss2Html.FeedList [ ]
|
||||||
|
\d\d\d\d-\d\d-\d\d [ ] \w\w\w [ ] \d\d:\d\d:\d\d
|
||||||
|
[ ] \.\. [ ]
|
||||||
|
(\d\d\d\d-\d\d-\d\d) [ ] \w\w\w [ ] (\d\d:\d\d:\d\d)
|
||||||
|
: [ ]
|
||||||
|
(\d+) [ ] seconds
|
||||||
|
/x
|
||||||
|
) {
|
||||||
|
$current = { time => "$1T$2", interval => $3 };
|
||||||
|
}
|
||||||
|
if (/Rss2Html.FeedList need to update/) {
|
||||||
|
push @updates, $current;
|
||||||
|
}
|
||||||
|
if (/Rss2Html.Feed item .* new/) {
|
||||||
|
$current->{new}++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (@updates) {
|
||||||
|
$ts->add_timestring($_->{time},
|
||||||
|
$_->{new} ? (0, $_->{interval})
|
||||||
|
: ($_->{interval}, 0 )
|
||||||
|
);
|
||||||
|
}
|
||||||
|
print $ts->plot;
|
|
@ -0,0 +1,47 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
use warnings;
|
||||||
|
use strict;
|
||||||
|
use TimeSeries;
|
||||||
|
|
||||||
|
my $wanted_feed_id = shift;
|
||||||
|
|
||||||
|
my $current_feed_id = -1;
|
||||||
|
my $current_time;
|
||||||
|
my @updates_in_interval;
|
||||||
|
my $update_count;
|
||||||
|
|
||||||
|
my $ts = TimeSeries->new(style => 'points');
|
||||||
|
$ts->legend('raw', 'adjusted');
|
||||||
|
$ts->finalresolution(100);
|
||||||
|
|
||||||
|
my @files = glob("/var/log/roxen/hjp/cgi.????-??-??");
|
||||||
|
@files = @files[-7..-1];
|
||||||
|
for my $file (@files) {
|
||||||
|
open my $fh, '<', $file;
|
||||||
|
while (<$fh>) {
|
||||||
|
|
||||||
|
if (/: \d+\.\d+: feed: (\d+) .*/) {
|
||||||
|
$current_feed_id = $1;
|
||||||
|
$update_count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
next unless $current_feed_id == $wanted_feed_id;
|
||||||
|
|
||||||
|
if (/: [ ]
|
||||||
|
\d\d\d\d-\d\d-\d\d [ ] \w\w\w [ ] \d\d:\d\d:\d\d
|
||||||
|
[ ][ ] \.\. [ ][ ]
|
||||||
|
(\d\d\d\d-\d\d-\d\d) [ ] \w\w\w [ ] (\d\d:\d\d:\d\d)
|
||||||
|
/x
|
||||||
|
) {
|
||||||
|
$current_time = "$1T$2";
|
||||||
|
}
|
||||||
|
if (/updates_in_interval: ([-+\d.e]+)/) {
|
||||||
|
$updates_in_interval[$update_count++] = $1;
|
||||||
|
if ($update_count == 3) {
|
||||||
|
#print "$current_time\t$updates_in_interval[1]\t$updates_in_interval[2]\n";
|
||||||
|
$ts->add_timestring($current_time, @updates_in_interval[1, 2]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
print $ts->plot;
|
|
@ -0,0 +1,162 @@
|
||||||
|
.item {
|
||||||
|
border-style: none;
|
||||||
|
border-width: 1px;
|
||||||
|
background-color: #F6F6FF;
|
||||||
|
margin: 1em;
|
||||||
|
hyphens: auto;
|
||||||
|
-moz-hyphens: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.item:after {
|
||||||
|
display: block;
|
||||||
|
clear: both;
|
||||||
|
content: "";
|
||||||
|
background-color: #00F;
|
||||||
|
height: 6px;
|
||||||
|
background: linear-gradient(#F6F6FF, #EEEEFF);
|
||||||
|
}
|
||||||
|
|
||||||
|
.op {
|
||||||
|
float: right;
|
||||||
|
padding: 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.op a {
|
||||||
|
border-style: none;
|
||||||
|
border-radius: 3px;
|
||||||
|
padding: 3px;
|
||||||
|
background: linear-gradient(#88F, #008);
|
||||||
|
color: #FFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
h2 a {
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
h2 {
|
||||||
|
font-family: "Quercus", sans-serif;
|
||||||
|
font-variant: small-caps;
|
||||||
|
}
|
||||||
|
|
||||||
|
.itemno {
|
||||||
|
font-size: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.itemno:before {
|
||||||
|
content: " (";
|
||||||
|
color: #F00;
|
||||||
|
}
|
||||||
|
|
||||||
|
.itemno:after {
|
||||||
|
content: ") ";
|
||||||
|
color: #F00;
|
||||||
|
}
|
||||||
|
|
||||||
|
.feed_info {
|
||||||
|
display: table-row;
|
||||||
|
border-style: solid;
|
||||||
|
border-width: 1px;
|
||||||
|
margin: .1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.feed_id {
|
||||||
|
font-size: 0.7em;
|
||||||
|
display: table-cell;
|
||||||
|
text-align: right;
|
||||||
|
padding-right: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.feed_title {
|
||||||
|
padding-right: 0.2em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.feed_items {
|
||||||
|
background-color: #CCF;
|
||||||
|
color: #888;
|
||||||
|
display: table-cell;
|
||||||
|
text-align: right;
|
||||||
|
padding-left: 0.2em;
|
||||||
|
padding-right: 0.2em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.feed_unread {
|
||||||
|
background-color: #DDF;
|
||||||
|
color: #080;
|
||||||
|
display: table-cell;
|
||||||
|
text-align: right;
|
||||||
|
padding-left: 0.2em;
|
||||||
|
padding-right: 0.2em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.feed_lastissued {
|
||||||
|
background-color: #CCF;
|
||||||
|
color: #000;
|
||||||
|
display: table-cell;
|
||||||
|
padding-left: 0.2em;
|
||||||
|
padding-right: 0.2em;
|
||||||
|
text-align: right;
|
||||||
|
}
|
||||||
|
|
||||||
|
.feed_lastseen {
|
||||||
|
background-color: #DDF;
|
||||||
|
color: #000;
|
||||||
|
display: table-cell;
|
||||||
|
padding-left: 0.2em;
|
||||||
|
padding-right: 0.2em;
|
||||||
|
text-align: right;
|
||||||
|
}
|
||||||
|
|
||||||
|
.feedlist {
|
||||||
|
background-color: #BBF;
|
||||||
|
float: left;
|
||||||
|
display: table;
|
||||||
|
margin-right: 0.5em;
|
||||||
|
padding: 0.2em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.feedlist a {
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.op a {
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.itemlist {
|
||||||
|
background-color: #EEF;
|
||||||
|
max-width: 40em;
|
||||||
|
float: left;
|
||||||
|
font-family: "Quercus", sans-serif;
|
||||||
|
}
|
||||||
|
|
||||||
|
@media screen and (min-width: 60em) and (max-width: 80em) {
|
||||||
|
.itemlist {
|
||||||
|
max-width: 48%;
|
||||||
|
}
|
||||||
|
.feedlist {
|
||||||
|
max-width: 48%;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
.item.read {
|
||||||
|
background-color: #EEE;
|
||||||
|
}
|
||||||
|
|
||||||
|
.lastupdate {
|
||||||
|
position: absolute;
|
||||||
|
top: 0em;
|
||||||
|
right: 0em;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1 {
|
||||||
|
background: linear-gradient(to right, #008, #CCF);
|
||||||
|
margin: 0em;
|
||||||
|
color: #FFF;
|
||||||
|
}
|
||||||
|
body {
|
||||||
|
margin: 0em;
|
||||||
|
}
|
||||||
|
|
||||||
|
img {
|
||||||
|
max-width: 99%;
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
# missing: Sanitize html
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use Rss2Html::Scrubber;
|
||||||
|
|
||||||
|
use DBI;
|
||||||
|
|
||||||
|
my $dbh = DBI->connect("dbi:SQLite:dbname=rss2html.sqlite", "", "");
|
||||||
|
$dbh->{sqlite_unicode} = 1;
|
||||||
|
my $scrubber = Rss2Html::Scrubber->new();
|
||||||
|
my $item
|
||||||
|
= $dbh->selectrow_hashref(
|
||||||
|
"select link, items.title as item_title, content, items.id as item_id
|
||||||
|
from items
|
||||||
|
where item_id=?
|
||||||
|
",
|
||||||
|
{ Slice => {} }, $ARGV[0]);
|
||||||
|
my $content = $scrubber->scrub($item->{content});
|
||||||
|
open my $fh1, '>:encoding(UTF-8)', "$ARGV[0].raw.html";
|
||||||
|
print $fh1 $item->{content};
|
||||||
|
open my $fh2, '>:encoding(UTF-8)', "$ARGV[0].scrubbed.html";
|
||||||
|
print $fh2 $content;
|
|
@ -0,0 +1,49 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use v5.14;
|
||||||
|
|
||||||
|
use DBI;
|
||||||
|
|
||||||
|
update2();
|
||||||
|
|
||||||
|
sub update2 {
|
||||||
|
no autovivification qw(fetch);
|
||||||
|
|
||||||
|
my $dbh = DBI->connect("dbi:SQLite:dbname=rss2html.sqlite", "", "");
|
||||||
|
$dbh->{sqlite_unicode} = 1;
|
||||||
|
|
||||||
|
my $feeds = $dbh->selectall_arrayref("select * from feeds where update_frequency is not null", { Slice => {} });
|
||||||
|
my $seconds_per_week = 86400 * 7;
|
||||||
|
for my $feed (@$feeds) {
|
||||||
|
print "feed: $feed->{id} $feed->{title}\n";
|
||||||
|
my $now = time();
|
||||||
|
my $update_time = $now - $feed->{last_update};
|
||||||
|
print "\t", "update_time: $update_time\n";
|
||||||
|
my $start = $now - 4 * $seconds_per_week;
|
||||||
|
my $items = $dbh->selectall_arrayref("select * from items where feed_id=? and issued >= ? order by issued",
|
||||||
|
{ Slice => {} },
|
||||||
|
$feed->{id}, $start,
|
||||||
|
);
|
||||||
|
my $first_issued = $items->[0]{issued} // $start;
|
||||||
|
my $total_time = $now - $first_issued;
|
||||||
|
print "\t", "total_time: $total_time\n";
|
||||||
|
my $updates_in_interval = 0;
|
||||||
|
for my $item (@$items) {
|
||||||
|
my $t = $item->{issued};
|
||||||
|
my $dt1 = $now - $t;
|
||||||
|
my $dt2 = $dt1 % $seconds_per_week;
|
||||||
|
if ($dt2 < $update_time) {
|
||||||
|
$updates_in_interval++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# normalize to one week
|
||||||
|
$updates_in_interval *= $seconds_per_week / $total_time;
|
||||||
|
$updates_in_interval += $update_time / $seconds_per_week; # add one update per week
|
||||||
|
print "\t", "updates_in_interval: $updates_in_interval\n";
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# vim: tw=0
|
|
@ -0,0 +1,11 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
use warnings;
|
||||||
|
use strict;
|
||||||
|
use Rss2Html::FeedList;
|
||||||
|
use Log::Log4perl qw(:easy);
|
||||||
|
|
||||||
|
BEGIN { Log::Log4perl->init("log.conf") };
|
||||||
|
|
||||||
|
|
||||||
|
my $feedlist = Rss2Html::FeedList->new();
|
||||||
|
$feedlist->update_adaptive;
|
|
@ -0,0 +1,49 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use v5.14;
|
||||||
|
|
||||||
|
use DBI;
|
||||||
|
use POSIX qw(strftime);
|
||||||
|
|
||||||
|
my $dbh = DBI->connect("dbi:SQLite:dbname=rss2html.sqlite", "", "");
|
||||||
|
$dbh->{sqlite_unicode} = 1;
|
||||||
|
my $feed_id = $ARGV[0];
|
||||||
|
my $issued
|
||||||
|
= $dbh->selectcol_arrayref(
|
||||||
|
"select issued from items where feed_id=? order by issued", {}, $feed_id
|
||||||
|
);
|
||||||
|
|
||||||
|
my @dt;
|
||||||
|
for (1 .. $#{$issued}) {
|
||||||
|
push @dt, $issued->[$_] - $issued->[$_-1];
|
||||||
|
}
|
||||||
|
@dt = sort { $a <=> $b } @dt;
|
||||||
|
my $c = 0;
|
||||||
|
for (@dt) {
|
||||||
|
$c++;
|
||||||
|
printf("%14s %8d %5.1f%%\n", hr($_), $_, $c * 100 / @dt);
|
||||||
|
}
|
||||||
|
|
||||||
|
sub hr {
|
||||||
|
my ($s) = $_;
|
||||||
|
my $hr = "";
|
||||||
|
if ($s > 86400) {
|
||||||
|
my $d = int ($s / 86400);
|
||||||
|
$hr .= $d . "d";
|
||||||
|
$s -= $d * 86400;
|
||||||
|
}
|
||||||
|
if ($hr || $s > 3600) {
|
||||||
|
my $h = int ($s / 3600);
|
||||||
|
$hr .= sprintf("%02dh", $h);
|
||||||
|
$s -= $h * 3600;
|
||||||
|
}
|
||||||
|
if ($hr || $s > 60) {
|
||||||
|
my $m = int ($s / 60);
|
||||||
|
$hr .= sprintf("%02dm", $m);
|
||||||
|
$s -= $m * 60;
|
||||||
|
}
|
||||||
|
$hr .= sprintf("%02ds", $s);
|
||||||
|
return $hr;
|
||||||
|
}
|
Loading…
Reference in New Issue