Copy from web site
Start a git repo independent of my web site.
This commit is contained in:
commit
ff784ba351
|
@ -0,0 +1,10 @@
|
|||
AuthName hjp-work-in-progress
|
||||
AuthType Basic
|
||||
AuthUserFile /usr/local/www/offline/hjp/passwd
|
||||
AuthGroupFile /usr/local/www/offline/hjp/group
|
||||
<Limit GET>
|
||||
require valid-user
|
||||
</Limit>
|
||||
<Limit POST>
|
||||
require valid-user
|
||||
</Limit>
|
|
@ -0,0 +1,223 @@
|
|||
package Rss2Html::Feed;
|
||||
|
||||
=head1 Name
|
||||
|
||||
Rss2Html::Feed - a feed
|
||||
|
||||
=head1 Description
|
||||
|
||||
This class represents a single feed.
|
||||
|
||||
It currently has only one public method, update(), which updates the
|
||||
feed.
|
||||
|
||||
=cut
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use v5.14;
|
||||
use Moose;
|
||||
|
||||
use Cache::Memcached;
|
||||
use Encode qw(decode_utf8);
|
||||
use HTTP::Date;
|
||||
use XML::Atom::Client;
|
||||
use XML::RAI;
|
||||
|
||||
with 'MooseX::Log::Log4perl';
|
||||
|
||||
has 'id' => (
|
||||
is => 'rw',
|
||||
);
|
||||
has 'url' => (
|
||||
is => 'rw',
|
||||
);
|
||||
has 'update_frequency' => (
|
||||
is => 'rw',
|
||||
);
|
||||
has 'last_update' => (
|
||||
is => 'rw',
|
||||
);
|
||||
has 'title' => (
|
||||
is => 'rw',
|
||||
);
|
||||
has 'type' => (
|
||||
is => 'rw',
|
||||
);
|
||||
has 'allow_img' => (
|
||||
is => 'rw',
|
||||
);
|
||||
has 'expire' => (
|
||||
is => 'rw',
|
||||
);
|
||||
|
||||
has 'dbh' => (
|
||||
is => 'rw',
|
||||
);
|
||||
|
||||
my $mcd = Cache::Memcached->new(servers => ['127.0.0.1:11211']);
|
||||
|
||||
|
||||
=head1 Methods
|
||||
|
||||
=cut
|
||||
|
||||
=head2 update
|
||||
|
||||
Update the feed. This is unconditional. The logic to decide which feeds
|
||||
must be updated is in L<Rss2Html::FeedList::update_adaptive>.
|
||||
|
||||
=cut
|
||||
|
||||
sub update {
|
||||
my ($self) = @_;
|
||||
my $dbh = $self->dbh;
|
||||
|
||||
my $now = int(time());
|
||||
|
||||
if ($self->{type} eq 'rss') {
|
||||
local $@ = "unknown error";
|
||||
|
||||
# XML::RAI has a parse_uri method, but we explicitely fetch the URI to get better diagnostics.
|
||||
# Probably should do this for atom, too, so we can autodetect the feed type.
|
||||
my $ua = LWP::UserAgent->new;
|
||||
$ua->agent("rss2html/1.0 (+http://www.hjp.at/projekte/rss2html/)");
|
||||
my $response = $ua->get($self->{url});
|
||||
if ($response->code != 200) {
|
||||
$self->log->warn("GET $self->{url} failed: " . $response->status_line);
|
||||
return;
|
||||
}
|
||||
$self->log->info("$self->{url} has content type " . $response->content_type);
|
||||
|
||||
my $rai = eval {XML::RAI->parse_string($response->content) };
|
||||
if ($rai) {
|
||||
$rai->time_format('EPOCH');
|
||||
if ($rai->channel->title ne $self->{title}) {
|
||||
$self->log->info("set title");
|
||||
$dbh->do("update feeds set title=? where id=?", {},
|
||||
$rai->channel->title, $self->{id});
|
||||
}
|
||||
$self->log->info("checking " . scalar(@{$rai->items}) . " items in db");
|
||||
for my $item ( @{$rai->items} ) {
|
||||
my $item_db = $dbh->selectrow_hashref("select * from items where link=?", {}, $item->link);
|
||||
if ($item_db) {
|
||||
$self->log->info("item $item_db->{id} seen");
|
||||
$dbh->do("update items set seen=? where id=?", {}, $now, $item_db->{id});
|
||||
} else {
|
||||
$self->log->info("new");
|
||||
|
||||
my $issued = $item->issued // $now;
|
||||
if ($issued < $self->{last_update} || $issued > $now) {
|
||||
# $issued is clearly wrong. Could be any time between $self->{last_update} and $now.
|
||||
# Choose a random time in the interval.
|
||||
my $guess_issued = $self->{last_update} + int(rand($now - $self->{last_update}));
|
||||
$self->log->info("fixing up issued time: $issued -> $guess_issued");
|
||||
$issued = $guess_issued;
|
||||
}
|
||||
|
||||
$self->log->info("item " . $item->link . " new");
|
||||
$dbh->do("insert into items(title, link, content, issued, seen, feed_id) values(?, ?, ?, ?, ?, ?)",
|
||||
{},
|
||||
$item->title, $item->link, $item->content, $issued, $now, $self->{id}
|
||||
);
|
||||
$self->invalidate_item_info;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
print STDERR "error getting $self->{url}: $@\n";
|
||||
}
|
||||
} elsif ($self->{type} eq 'atom') {
|
||||
my $api = XML::Atom::Client->new();
|
||||
my $atomfeed = $api->getFeed($self->{url});
|
||||
if ($atomfeed) {
|
||||
my @items = $atomfeed->entries;
|
||||
for my $item (@items) {
|
||||
my $link = $self->atom_main_link($item);
|
||||
my $item_db = $dbh->selectrow_hashref("select * from items where link=?", {}, $link);
|
||||
if ($item_db) {
|
||||
$self->log->info("item $item_db->{id} seen");
|
||||
$dbh->do("update items set seen=? where id=?", {}, $now, $item_db->{id});
|
||||
} else {
|
||||
$self->log->info("new");
|
||||
my $title = decode_utf8($item->title); # XXX - Workaround?
|
||||
my $content = $item->content;
|
||||
my $summary = $item->summary;
|
||||
my $body = $content ? $content->body :
|
||||
defined($summary) ? decode_utf8($summary) : undef;
|
||||
|
||||
$self->log->info("item " . $link . " new");
|
||||
|
||||
my $issued = str2time($item->updated) // $now;
|
||||
if ($issued < $self->{last_update} || $issued > $now) {
|
||||
# $issued is clearly wrong. Could be any time between $self->{last_update} and $now.
|
||||
# Choose a random time in the interval.
|
||||
my $guess_issued = $self->{last_update} + int(rand($now - $self->{last_update}));
|
||||
$self->log->info("fixing up issued time: $issued -> $guess_issued");
|
||||
$issued = $guess_issued;
|
||||
}
|
||||
|
||||
$dbh->do("insert into items(title, link, content, issued, seen, feed_id) values(?, ?, ?, ?, ?, ?)",
|
||||
{},
|
||||
$title, $link, $body, $issued, $now, $self->{id}
|
||||
);
|
||||
$self->invalidate_item_info;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
print "<p>unknown type $self->{type}</p>\n";
|
||||
$dbh->do("update feeds set title=?, last_update=? where id=?", {},
|
||||
'unknown', $now, $self->{id});
|
||||
return;
|
||||
}
|
||||
|
||||
# mark all but the last 100 entries as old, so they aren't shown by default.
|
||||
$dbh->do("update items set old=1
|
||||
where id in (select id from items
|
||||
where feed_id=? and (old is null or old != 1)
|
||||
order by issued desc limit -1 offset 100)",
|
||||
{},
|
||||
$self->{id});
|
||||
|
||||
$dbh->do("update feeds set last_update=? where id=?", {}, $now, $self->{id});
|
||||
}
|
||||
|
||||
|
||||
sub atom_main_link {
|
||||
my ($self, $item) = @_; # XXX should probably be a method of $item
|
||||
for my $link ($item->link) {
|
||||
$self->log->info("checking href ", $link->href, "\n");
|
||||
if (defined $link->rel && $link->rel ne 'alternate') {
|
||||
$self->log->info("rel ", $link->rel, " unexpected, skip");
|
||||
next;
|
||||
}
|
||||
if (defined $link->type && $link->type ne 'text/html') {
|
||||
$self->log->info("type ", $link->type, " unexpected, skip\n");
|
||||
next;
|
||||
}
|
||||
$self->log->info("match!\n");
|
||||
return $link->href;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
sub invalidate_item_info {
|
||||
my ($self) = @_;
|
||||
|
||||
my $dbh = $self->dbh;
|
||||
my $users = $dbh->selectcol_arrayref("select distinct user from read");
|
||||
for my $user (@$users) {
|
||||
$mcd->delete($self->item_info_key($user));
|
||||
}
|
||||
}
|
||||
|
||||
sub item_info_key {
|
||||
my ($self, $user) = @_;
|
||||
return "rss2html/item_info/" . $user . "/" . $self->id;
|
||||
}
|
||||
|
||||
#---
|
||||
__PACKAGE__->meta->make_immutable;
|
||||
1;
|
||||
|
||||
# vim: tw=132
|
|
@ -0,0 +1,131 @@
|
|||
package Rss2Html::FeedList;
|
||||
use warnings;
|
||||
use strict;
|
||||
use Moose;
|
||||
use namespace::autoclean;
|
||||
use DBI;
|
||||
use Rss2Html::Feed;
|
||||
use POSIX qw(strftime);
|
||||
|
||||
with 'MooseX::Log::Log4perl';
|
||||
|
||||
|
||||
has 'dbh' => (
|
||||
is => 'ro',
|
||||
writer => '_set_dbh',
|
||||
);
|
||||
|
||||
sub BUILD {
|
||||
my ($self, $args) = @_;
|
||||
my $dbh = DBI->connect("dbi:SQLite:dbname=rss2html.sqlite", "", "",
|
||||
{
|
||||
sqlite_use_immediate_transaction => 1,
|
||||
});
|
||||
$dbh->{sqlite_unicode} = 1;
|
||||
$self->_set_dbh($dbh);
|
||||
}
|
||||
|
||||
sub feeds {
|
||||
my ($self) = @_;
|
||||
my $dbh = $self->dbh;
|
||||
|
||||
no autovivification qw(fetch);
|
||||
|
||||
my $feeds_sql = $dbh->selectall_arrayref("select * from feeds where update_frequency is not null", { Slice => {} });
|
||||
my @feeds;
|
||||
for (@$feeds_sql) {
|
||||
push @feeds, Rss2Html::Feed->new(%$_, dbh => $dbh);
|
||||
}
|
||||
return \@feeds;
|
||||
}
|
||||
|
||||
sub update_adaptive {
|
||||
my ($self) = @_;
|
||||
my $dbh = $self->dbh;
|
||||
my $start = time();
|
||||
|
||||
no autovivification qw(fetch);
|
||||
|
||||
$dbh->begin_work;
|
||||
|
||||
my $feeds = $self->feeds;
|
||||
my $seconds_per_week = 86400 * 7;
|
||||
for my $feed (@$feeds) {
|
||||
$feed->{title} //= "-";
|
||||
#print "<p>feed: $feed->{id} $feed->{title}<br/>\n";
|
||||
$self->log->info("feed: $feed->{id} $feed->{title}");
|
||||
my $now = time();
|
||||
my $run_time = $now - $start;
|
||||
my $update_time = $now - $feed->{last_update};
|
||||
$self->log->info(strftime("%Y-%m-%d %a %H:%M:%S", localtime($feed->{last_update})), " .. ",
|
||||
strftime("%Y-%m-%d %a %H:%M:%S", localtime($now)), ": $update_time seconds");
|
||||
#print "\t", "update_time: $update_time<br/>\n";
|
||||
|
||||
# We are guaranteed to have all items which were issued in the last
|
||||
# int($feed->{expire} / 7) weeks since we expire only those not seen for
|
||||
# more than $feed->{expire} days. Some feeds keep some items for a long
|
||||
# time while removing others quite quickly, so statistics for older
|
||||
# items may be severely skewed and are not safe to use.
|
||||
my $interval = int($feed->{expire} / 7) * 7 * 86400;
|
||||
|
||||
# We measure those weeks backwards from the last update.
|
||||
# If we measured from now we would almost always miss the update from
|
||||
# exactly 4 weeks ago (one cycle we are too early, the next we are too
|
||||
# late).
|
||||
my $start = $feed->{last_update} - $interval;
|
||||
my $items = $dbh->selectall_arrayref("select * from items where feed_id=? and issued >= ? order by issued",
|
||||
{ Slice => {} },
|
||||
$feed->{id}, $start,
|
||||
);
|
||||
my $first_issued = $items->[0]{issued} // $start;
|
||||
my $total_time = $now - $first_issued;
|
||||
my $updates_in_interval = 0;
|
||||
for my $item (@$items) {
|
||||
my $t = $item->{issued};
|
||||
my $dt1 = $now - $t;
|
||||
my $dt2 = $dt1 % $seconds_per_week;
|
||||
if ($dt2 < $update_time) {
|
||||
$updates_in_interval++;
|
||||
$self->log->info("found $item->{id} ", strftime("%Y-%m-%d %a %H:%M:%S", localtime($t)));
|
||||
}
|
||||
}
|
||||
$self->log->info("updates_in_interval: $updates_in_interval");
|
||||
$updates_in_interval *= $seconds_per_week / $total_time; # normalize to one week
|
||||
$self->log->info("updates_in_interval: $updates_in_interval");
|
||||
$updates_in_interval += $update_time / $seconds_per_week; # add one update per week
|
||||
$self->log->info("updates_in_interval: $updates_in_interval");
|
||||
#print "\t", "updates_in_interval: $updates_in_interval<br/>\n";
|
||||
if ($updates_in_interval > 0.5) {
|
||||
$self->log->info($feed->{url});
|
||||
$self->log->info("need to update " . $feed->{url});
|
||||
$feed->update;
|
||||
}
|
||||
}
|
||||
$self->cleanup();
|
||||
|
||||
$dbh->commit;
|
||||
|
||||
$self->log->info("update done");
|
||||
}
|
||||
|
||||
|
||||
sub cleanup {
|
||||
my ($self) = @_;
|
||||
my $dbh = $self->dbh;
|
||||
|
||||
my $now = time();
|
||||
my $feeds = $dbh->selectall_arrayref("select * from feeds", { Slice => {} });
|
||||
|
||||
for my $feed (@$feeds) {
|
||||
my $expire_days = $feed->{expire} // 30;
|
||||
my $seen_limit = $now - $expire_days * 86400;
|
||||
my $rc = $dbh->do("delete from items where feed_id=? and seen < ?", {}, $feed->{id}, $seen_limit);
|
||||
$self->log->info("cleanup of feed $feed->{id} (limit $seen_limit) returned $rc");
|
||||
}
|
||||
$dbh->do("delete from read where not exists (select id from items where id=item_id)");
|
||||
}
|
||||
|
||||
|
||||
#---
|
||||
__PACKAGE__->meta->make_immutable;
|
||||
1;
|
|
@ -0,0 +1,31 @@
|
|||
package Rss2Html::Scrubber;
|
||||
use warnings;
|
||||
use strict;
|
||||
use parent 'HTML::Scrubber';
|
||||
|
||||
sub new {
|
||||
my ($class, %options) = @_;
|
||||
my $self = HTML::Scrubber->new();
|
||||
$self->deny(qw(link));
|
||||
$self->allow(qw(p a em strong b i ul ol li dl dt dd br));
|
||||
my %rules = (
|
||||
a => {
|
||||
href => qr{^https?://}i,
|
||||
'*' => 0,
|
||||
},
|
||||
);
|
||||
if ($options{allow_img}) {
|
||||
$rules{img} = {
|
||||
src => qr{^https?://}i,
|
||||
alt => 1,
|
||||
'*' => 0,
|
||||
},
|
||||
}
|
||||
$self->rules(
|
||||
%rules
|
||||
);
|
||||
bless $self, $class;
|
||||
return $self;
|
||||
}
|
||||
|
||||
1;
|
|
@ -0,0 +1,66 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use v5.14;
|
||||
|
||||
use XML::RAI;
|
||||
use XML::Atom::Client;
|
||||
use DBI;
|
||||
use CGI;
|
||||
use Rss2Html::Scrubber;
|
||||
use Time::HiRes qw(time);
|
||||
use POSIX qw(strftime);
|
||||
|
||||
$| = 1;
|
||||
|
||||
my $q = CGI->new();
|
||||
binmode STDOUT, ":encoding(UTF-8)";
|
||||
|
||||
my $feed_id = $q->param('id');
|
||||
|
||||
my $dbh = DBI->connect("dbi:SQLite:dbname=rss2html.sqlite", "", "");
|
||||
$dbh->{sqlite_unicode} = 1;
|
||||
|
||||
print "Content-Type: text/html; charset=utf-8\n";
|
||||
print "Refresh: 600\n";
|
||||
print "\n";
|
||||
|
||||
print "<link rel='stylesheet' type='text/css' href='rss2html.css'/>\n";
|
||||
print "<h1>RSS 2 HTML</h1>\n";
|
||||
print "<p>", strftime("%Y-%m-%d %H:%M:%S%z", localtime()), "</p>\n";
|
||||
|
||||
my $feed = $dbh->selectrow_hashref("select * from feeds where id=?", {}, $feed_id);
|
||||
|
||||
print "<span class='feed_info'>\n";
|
||||
print "<span class='feed_id'>", $q->escapeHTML($feed->{id}), "</span>\n";
|
||||
print "<span class='feed_title'>", $q->escapeHTML($feed->{title}), "</span>\n";
|
||||
print "<span class='feed_type'>", $q->escapeHTML($feed->{type}), "</span>\n";
|
||||
print "</span>\n";
|
||||
|
||||
my $now = time();
|
||||
my $seconds_per_week = 86400 * 7;
|
||||
my $start = $now - 4 * $seconds_per_week;
|
||||
my $items = $dbh->selectall_arrayref("select * from items where feed_id=? and issued >= ? order by issued",
|
||||
{ Slice => {} },
|
||||
$feed->{id}, $start,
|
||||
);
|
||||
for my $item (@$items) {
|
||||
my $t = $item->{issued};
|
||||
my $dt1 = $now - $t;
|
||||
my $dt2 = $dt1 % $seconds_per_week;
|
||||
$item->{dt2} = $dt2;
|
||||
}
|
||||
$items = [ sort { $a->{dt2} <=> $b->{dt2} } @$items ];
|
||||
print "<table>\n";
|
||||
for my $item (@$items) {
|
||||
my $t = $now - $item->{dt2};
|
||||
print"<tr>\n";
|
||||
print"<td>", $item->{dt2}, "</td>\n";
|
||||
print"<td>", strftime("%a %H:%M:%S", localtime($t)), "</td>\n";
|
||||
print"<td>", "<a href='", $q->escapeHTML($item->{link}), "'>", $q->escapeHTML($item->{title}), "</a>", "</td>\n";
|
||||
print"</tr>\n";
|
||||
|
||||
}
|
||||
print "</table>\n";
|
||||
# vim: expandtab
|
|
@ -0,0 +1,426 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
# Parameters:
|
||||
#
|
||||
# mark=id
|
||||
# mark item id as read
|
||||
#
|
||||
# redir=id
|
||||
# redirect to url of item id and mark item as read
|
||||
#
|
||||
# fls=(all|unread)
|
||||
# "feed list show": Show all feeds or only feeds with unread items in feed list
|
||||
#
|
||||
# fis= comma separated list of feed ids
|
||||
# show only listed feeds
|
||||
# BUG: fis is only effective if sr is non-zero!
|
||||
#
|
||||
# sr=n
|
||||
# show last n read items. Note: Currently these are the n read items with the
|
||||
# highest ids, which may not be the last items which have been read. Need to
|
||||
# store the read date for that.
|
||||
#
|
||||
# so=1
|
||||
# also show old items.
|
||||
#
|
||||
# sl=1
|
||||
# show items marked as "later".
|
||||
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use v5.14;
|
||||
use utf8;
|
||||
use experimental 'smartmatch'; # for given/when
|
||||
use experimental 'autoderef';
|
||||
|
||||
use CGI;
|
||||
use Cache::Memcached;
|
||||
use DBI;
|
||||
use Data::Dumper;
|
||||
use Encode qw(encode_utf8 decode_utf8);
|
||||
use POSIX qw(strftime);
|
||||
use Rss2Html::Scrubber;
|
||||
use Time::HiRes qw(time);
|
||||
|
||||
$| = 1;
|
||||
|
||||
my $start = time();
|
||||
|
||||
my $q = CGI->new();
|
||||
binmode STDOUT, ":encoding(UTF-8)";
|
||||
my $mcd = Cache::Memcached->new(servers => ['127.0.0.1:11211']);
|
||||
|
||||
if ($q->param('mark')) {
|
||||
mark();
|
||||
} elsif ($q->param('redir')) {
|
||||
redirect();
|
||||
} else {
|
||||
list();
|
||||
}
|
||||
|
||||
sub redirect {
|
||||
my $dbh = DBI->connect("dbi:SQLite:dbname=rss2html.sqlite", "", "",
|
||||
{
|
||||
sqlite_use_immediate_transaction => 1,
|
||||
});
|
||||
$dbh->{sqlite_unicode} = 1;
|
||||
my $item_id = $q->param('redir');
|
||||
my $item = $dbh->selectrow_hashref("select * from items where id=?", {}, $item_id);
|
||||
if ($item) {
|
||||
print "Status: 302\n";
|
||||
print "Location: $item->{link}\n";
|
||||
print "\n";
|
||||
$dbh->do("insert into read(user, item_id) values(?, ?)", {}, $q->remote_user, $item_id);
|
||||
$mcd->delete(item_info_key($q->remote_user, $item->{feed_id}));
|
||||
exit(0);
|
||||
}
|
||||
print "Status: 404\n";
|
||||
print "Content-Type: text/html; charset=utf-8\n";
|
||||
print "\n";
|
||||
print "not found\n";
|
||||
|
||||
}
|
||||
|
||||
sub mark {
|
||||
print_log("mark start");
|
||||
my $dbh = DBI->connect("dbi:SQLite:dbname=rss2html.sqlite", "", "");
|
||||
$dbh->{sqlite_unicode} = 1;
|
||||
my $item_id = $q->param('mark');
|
||||
my $item = $dbh->selectrow_hashref("select * from items where id=?", {}, $item_id);
|
||||
if ($item) {
|
||||
my $q1 = CGI->new($q);
|
||||
$q1->delete('mark');
|
||||
print "Status: 302\n";
|
||||
print "Location: ", $q1->self_url, "\n";
|
||||
print "\n";
|
||||
$dbh->do("insert into read(user, item_id) values(?, ?)", {}, $q->remote_user, $item_id);
|
||||
$mcd->delete(item_info_key($q->remote_user, $item->{feed_id}));
|
||||
print_log("mark done");
|
||||
exit(0);
|
||||
}
|
||||
print "Status: 404\n";
|
||||
print "Content-Type: text/html; charset=utf-8\n";
|
||||
print "\n";
|
||||
print "not found\n";
|
||||
print_log("mark failed");
|
||||
|
||||
}
|
||||
|
||||
sub list {
|
||||
|
||||
my $feed_list_show = $q->param('fls') // "all";
|
||||
my $feed_item_show = $q->param('fis') // '';
|
||||
my $show_read = $q->param('sr');
|
||||
my $show_old = $q->param('so');
|
||||
my $show_later = $q->param('sl');
|
||||
my %feed_item_show = map(($_+0 => 1), split(/,/, $feed_item_show));
|
||||
|
||||
|
||||
print "Content-Type: text/html; charset=utf-8\n";
|
||||
print "Refresh: 600\n";
|
||||
print "\n";
|
||||
|
||||
print "<meta name='viewport' content='width=device-width, initial-scale=1' />\n";
|
||||
print "<link rel='stylesheet' type='text/css' href='rss2html.css'/>\n";
|
||||
print "<h1>RSS 2 HTML</h1>\n";
|
||||
print "<div class='lastupdate'>", strftime("%Y-%m-%d %H:%M:%S%z", localtime()), "</div>\n";
|
||||
|
||||
my $dbh = DBI->connect("dbi:SQLite:dbname=rss2html.sqlite", "", "");
|
||||
$dbh->{sqlite_unicode} = 1;
|
||||
|
||||
print_log("list_feeds start");
|
||||
list_feeds($dbh, $feed_list_show, $show_old);
|
||||
print_log("list_feeds done");
|
||||
|
||||
my $items;
|
||||
if ($show_read) {
|
||||
my $fields = "feeds.id as feed_id, feeds.title as feed_title, allow_img, link, items.title as item_title, content, items.id as item_id, issued, user, lang";
|
||||
my $tables = "items
|
||||
join feeds on items.feed_id=feeds.id
|
||||
left outer join read on (items.id=read.item_id and user=?)";
|
||||
my @where;
|
||||
if (%feed_item_show) {
|
||||
push @where, "feeds.id in (" . join(',', keys %feed_item_show) . ")";
|
||||
}
|
||||
if (!$show_old) {
|
||||
push @where, "(items.old is null or items.old == 0)"
|
||||
}
|
||||
|
||||
my $where = "where " . join(" and ", @where);
|
||||
|
||||
my $cmd = "select $fields from $tables $where order by issued";
|
||||
print_log("$cmd");
|
||||
my $items1 = $dbh->selectall_arrayref($cmd, { Slice => {} }, $q->remote_user);
|
||||
my $nrd = 0;
|
||||
while (@$items1) {
|
||||
my $item = pop @$items1;
|
||||
{
|
||||
no warnings 'uninitialized';
|
||||
print_log("id=$item->{item_id}, user=$item->{user}");
|
||||
}
|
||||
if (!$item->{user}) {
|
||||
print_log("id=$item->{item_id}: unread");
|
||||
unshift @$items, $item;
|
||||
} elsif ($nrd < $show_read) {
|
||||
print_log("id=$item->{item_id}: $nrd < $show_read");
|
||||
unshift @$items, $item;
|
||||
$nrd++;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
my @where = ("user is null");
|
||||
|
||||
if (!$show_old) {
|
||||
push @where, "(items.old is null or items.old == 0)"
|
||||
}
|
||||
|
||||
my $where = "where " . join(" and ", @where);
|
||||
|
||||
|
||||
$items = $dbh->selectall_arrayref(
|
||||
"select feeds.id as feed_id, feeds.title as feed_title, allow_img, link, items.title as item_title, content, items.id as item_id, issued, lang
|
||||
from items
|
||||
join feeds on items.feed_id=feeds.id
|
||||
left outer join read on (items.id=read.item_id and user=?)
|
||||
$where
|
||||
order by issued",
|
||||
{ Slice => {} }, $q->remote_user);
|
||||
}
|
||||
my $feeds = $dbh->selectall_arrayref(
|
||||
"select id, title from feeds where update_frequency is not null order by id",
|
||||
{ Slice => {} }
|
||||
);
|
||||
for my $f (@$feeds) {
|
||||
$f->{selected} = 1 if $feed_item_show{$f->{id}};
|
||||
}
|
||||
print_itemlist($items, $feeds);
|
||||
print_log("list done");
|
||||
}
|
||||
|
||||
sub print_itemlist {
|
||||
my ($items, $feeds) = @_;
|
||||
print "<div class='itemlist'>\n";
|
||||
print "<form action='./'>\n";
|
||||
print "<select name='fis'>\n";
|
||||
print "<option value=''>\n";
|
||||
for my $f (@$feeds) {
|
||||
print "<option value='" . $q->escapeHTML($f->{id}) . "' " . ($f->{selected} ? "selected='selected'" : "") .">" . $q->escapeHTML($f->{title}) . "</option>\n";
|
||||
}
|
||||
print "</select>\n";
|
||||
my $fls = $q->param('fls');
|
||||
print "<input type='hidden' name='fls' value='" . $q->escapeHTML($fls) . "'>\n" if defined $fls;
|
||||
print "<input type='hidden' name='sr' value='10'>\n";
|
||||
print "<input type='submit' value='»'>\n";
|
||||
print "</form>\n";
|
||||
my $n_items = 0;
|
||||
my $n_html_mcd = 0;
|
||||
my $n_scrub_mcd = 0;
|
||||
my $n_scrub = 0;
|
||||
print_log("print_itemlist: \$q=" . $q->self_url);
|
||||
my $q1 = CGI->new($q);
|
||||
print_log("print_itemlist: \$q1=" . $q1->self_url . " (before loop)");
|
||||
print_log(scalar @$items . " before remix");
|
||||
$items = remix($items);
|
||||
print_log(scalar @$items . " after remix");
|
||||
for my $item (@$items) {
|
||||
$n_items++;
|
||||
my $is_read = defined($item->{user});
|
||||
my $scrubbed_content = $mcd->get(scrubbed_content_key($item->{item_id}));
|
||||
if ($scrubbed_content) {
|
||||
$scrubbed_content = decode_utf8($scrubbed_content);
|
||||
$n_scrub_mcd++;
|
||||
} else {
|
||||
my $scrubber = Rss2Html::Scrubber->new(allow_img => $item->{allow_img});
|
||||
$scrubbed_content = $scrubber->scrub($item->{content});
|
||||
$mcd->set(scrubbed_content_key($item->{item_id}), encode_utf8($scrubbed_content), 3600);
|
||||
$n_scrub++;
|
||||
}
|
||||
$q1->param('mark', $item->{item_id});
|
||||
my $item_class = 'item' . ($is_read ? ' read' : '');
|
||||
my $langattr = defined $item->{lang} ? "lang='$item->{lang}'" : "";
|
||||
my $html = "";
|
||||
$html .= "<div class='$item_class' $langattr>\n";
|
||||
$html .= "<span class='itemno'>" . $item->{item_id} . "</span>\n";
|
||||
$html .= "<span class='issued'>" . strftime('%Y-%m-%d %H:%M:%S', localtime($item->{issued})) . "</span>\n";
|
||||
unless ($is_read) {
|
||||
print_log("print_itemlist: \$q1=" . $q1->self_url . " (in loop)");
|
||||
$html .= "<div class='op'><a href='" . $q->escapeHTML($q1->self_url) . "'>Mark read</a></div>\n";
|
||||
}
|
||||
$html .= "<div class='feed'>" . $q->escapeHTML($item->{feed_title}) . "</div>\n";
|
||||
$html .= "<h2><a href='./?redir=" . $q->escapeHTML($item->{item_id}) . "'>" . $q->escapeHTML($item->{item_title}) . "</a></h2>\n";
|
||||
$html .= "<div class='content'>" . $scrubbed_content . "</div>\n";
|
||||
unless ($is_read) {
|
||||
$html .= "<div class='op'><a href='" . $q->escapeHTML($q1->self_url) . "'>Mark read</a></div>\n";
|
||||
}
|
||||
$html .= "<div class='end'></div>\n";
|
||||
$html .= "</div\n>";
|
||||
print $html;
|
||||
|
||||
}
|
||||
print "</div>\n";
|
||||
print_log("itemlist: $n_items items ($n_html_mcd html cached, $n_scrub_mcd scrubbed content cached, $n_scrub scrubbed)");
|
||||
}
|
||||
|
||||
sub remix {
|
||||
my ($items) = @_;
|
||||
|
||||
my $queues = {};
|
||||
for my $item (@$items) {
|
||||
if (!defined $item->{feed_id}) {
|
||||
local $Data::Dumper::Indent = 0;
|
||||
print_log("item w/o feed_id: " . Dumper($item));
|
||||
}
|
||||
$queues->{$item->{feed_id}} //= [];
|
||||
push $queues->{$item->{feed_id}}, $item;
|
||||
}
|
||||
my @ordered_feeds = sort { $queues->{$a}[0]{issued} <=> $queues->{$b}[0]{issued} } keys $queues;
|
||||
my $new_items = [];
|
||||
my $found;
|
||||
do {
|
||||
$found = 0;
|
||||
for my $feed (@ordered_feeds) {
|
||||
my $item = shift $queues->{$feed};
|
||||
if ($item) {
|
||||
push $new_items, $item;
|
||||
$found++;
|
||||
print_log("found an item in feed $feed. " . scalar(@{$queues->{$feed}}) . " items left")
|
||||
}
|
||||
}
|
||||
} while ($found);
|
||||
|
||||
return $new_items;
|
||||
}
|
||||
|
||||
sub list_feeds {
|
||||
my ($dbh, $feed_list_show, $show_old) = @_;
|
||||
|
||||
no autovivification qw(fetch);
|
||||
|
||||
my $q1 = CGI->new($q);
|
||||
my $other = $feed_list_show eq 'all' ? 'unread' : 'all';
|
||||
$q1->param('fls', $other);
|
||||
print "<div><a href='", $q->escapeHTML($q1->self_url), "'>$other</a></div>\n";
|
||||
print "<div class='feedlist'>\n";
|
||||
$dbh->begin_work;
|
||||
|
||||
my $feeds = $dbh->selectall_arrayref("select * from feeds where update_frequency is not null", { Slice => {} });
|
||||
my $seconds_per_week = 86400 * 7;
|
||||
for my $feed (@$feeds) {
|
||||
$feed->{title} //= "-";
|
||||
print_item_info($dbh, $feed, $feed_list_show, $show_old);
|
||||
|
||||
}
|
||||
$dbh->commit;
|
||||
|
||||
print "</div>\n";
|
||||
}
|
||||
|
||||
|
||||
sub print_item_info {
|
||||
my ($dbh, $feed, $feed_list_show, $show_old) = @_;
|
||||
|
||||
my $item_info_key = item_info_key($q->remote_user, $feed->{id});
|
||||
my $item_info = $mcd->get($item_info_key);
|
||||
my $cmd
|
||||
= "select count(i.id) as nr_items, max(i.issued) as last_issued, max(i.seen) as last_seen, count(r.item_id) as nr_read
|
||||
from items i left outer join (select item_id from read where user=?) r on i.id=r.item_id
|
||||
where i.feed_id=?";
|
||||
unless ($show_old) {
|
||||
$cmd .= " and (old is null or old = 0)";
|
||||
}
|
||||
unless ($item_info) {
|
||||
$item_info
|
||||
= $dbh->selectrow_hashref(
|
||||
$cmd,
|
||||
{},
|
||||
$q->remote_user, $feed->{id}
|
||||
);
|
||||
$mcd->set($item_info_key, $item_info, 3600);
|
||||
}
|
||||
my $now = time();
|
||||
my $unread = $item_info->{nr_items} - $item_info->{nr_read};
|
||||
print_log("print_item_info: feed $feed->{id}, nr_items=$item_info->{nr_items}, read=$item_info->{nr_read}, unread=$unread");
|
||||
return unless ($unread || $feed_list_show eq 'all');
|
||||
|
||||
my $q1 = CGI->new($q);
|
||||
$q1->param('fis', $feed->{id});
|
||||
$q1->param('sr', 10);
|
||||
print "<span class='feed_info'>\n";
|
||||
print "<span class='feed_id'>", $q->escapeHTML($feed->{id}), "</span>\n";
|
||||
print "<span class='feed_title'>",
|
||||
"<a href='", $q1->escapeHTML($q1->self_url), "'>",
|
||||
$q->escapeHTML($feed->{title}),
|
||||
"</a>",
|
||||
"</span>\n";
|
||||
print "<span class='feed_items'>", $item_info->{nr_items}, "</span>\n";
|
||||
print "<span class='feed_unread'>", $unread, "</span>\n";
|
||||
print "<span class='feed_lastissued'>", delta_t($item_info->{last_issued}, $now), "</span>\n";
|
||||
print "<span class='feed_lastseen'>", delta_t($item_info->{last_seen}, $now), "</span>\n";
|
||||
print "</span>\n";
|
||||
}
|
||||
|
||||
|
||||
sub print_log {
|
||||
my $msg = "@_";
|
||||
my $now = time();
|
||||
printf STDERR "%s: %s.%06d %f: %s\n", $0, strftime("%H:%M:%S", localtime($now)), ($now - int($now)) * 1E6, $now - $start, $msg;
|
||||
}
|
||||
|
||||
sub delta_t {
|
||||
my ($t1, $t2) = @_;
|
||||
return "N/A" unless defined $t1 && defined $t2;
|
||||
my $dt = $t2 - $t1;
|
||||
my $s = "";
|
||||
if ($dt < 0) {
|
||||
$s .= "-";
|
||||
$dt = -$dt;
|
||||
}
|
||||
given ($dt) {
|
||||
when ($_ >= 86400 * 10) {
|
||||
$s .= sprintf("%.0fd", $_ / 86400);
|
||||
}
|
||||
when ($_ >= 86400) {
|
||||
$s .= sprintf("%.1fd", $_ / 86400);
|
||||
}
|
||||
when ($_ >= 3600 * 10) {
|
||||
$s .= sprintf("%.0fh", $_ / 3600);
|
||||
}
|
||||
when ($_ >= 3600) {
|
||||
$s .= sprintf("%.1fh", $_ / 3600);
|
||||
}
|
||||
when ($_ >= 60 * 10) {
|
||||
$s .= sprintf("%.0fm", $_ / 60);
|
||||
}
|
||||
when ($_ >= 60) {
|
||||
$s .= sprintf("%.1fm", $_ / 60);
|
||||
}
|
||||
when ($_ >= 10) {
|
||||
$s .= sprintf("%.0fs", $_);
|
||||
}
|
||||
default {
|
||||
$s .= sprintf("%.1fs", $_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
sub item_info_key {
|
||||
my ($user, $feed_id) = @_;
|
||||
return "rss2html/item_info/$user/$feed_id";
|
||||
}
|
||||
|
||||
|
||||
sub scrubbed_content_key {
|
||||
my ($item_id) = @_;
|
||||
return "rss2html/scrubbed_content/$item_id";
|
||||
}
|
||||
|
||||
sub item_html_key {
|
||||
my ($item_id, $is_read) = @_;
|
||||
return "rss2html/item_html/$item_id/$is_read";
|
||||
}
|
||||
|
||||
|
||||
# vim: tw=132 expandtab
|
|
@ -0,0 +1,12 @@
|
|||
log4perl.logger = DEBUG, RotFileApp
|
||||
log4perl.appender.FileApp = Log::Log4perl::Appender::File
|
||||
log4perl.appender.FileApp.filename = test.log
|
||||
log4perl.appender.FileApp.layout = PatternLayout
|
||||
log4perl.appender.FileApp.layout.ConversionPattern = %d [%p] %P %c %m%n
|
||||
log4perl.appender.RotFileApp = Log::Dispatch::FileRotate
|
||||
log4perl.appender.RotFileApp.filename = test.log
|
||||
log4perl.appender.RotFileApp.layout = PatternLayout
|
||||
log4perl.appender.RotFileApp.layout.ConversionPattern = %d [%p] %P %c %m%n
|
||||
log4perl.appender.RotFileApp.max = 7
|
||||
log4perl.appender.RotFileApp.mode = append
|
||||
log4perl.appender.RotFileApp.binmode = :encoding(UTF-8)
|
|
@ -0,0 +1,55 @@
|
|||
#!/usr/bin/perl
|
||||
use warnings;
|
||||
use strict;
|
||||
use TimeSeries;
|
||||
|
||||
my $wanted_feed_id = shift;
|
||||
|
||||
my $current_feed_id = -1;
|
||||
my @updates_in_interval;
|
||||
|
||||
my $ts = TimeSeries->new(style => 'impulses');
|
||||
$ts->legend('old', 'new');
|
||||
$ts->finalresolution(100);
|
||||
|
||||
my @files = glob("test.log*");
|
||||
@files = sort { -M $b <=> -M $a } @files;
|
||||
|
||||
my $current;
|
||||
my @updates;
|
||||
for my $file (@files) {
|
||||
open my $fh, '<', $file;
|
||||
while (<$fh>) {
|
||||
|
||||
if (/ Rss2Html.FeedList feed: (\d+) .*/) {
|
||||
$current_feed_id = $1;
|
||||
}
|
||||
|
||||
next unless $current_feed_id == $wanted_feed_id;
|
||||
|
||||
if (/Rss2Html.FeedList [ ]
|
||||
\d\d\d\d-\d\d-\d\d [ ] \w\w\w [ ] \d\d:\d\d:\d\d
|
||||
[ ] \.\. [ ]
|
||||
(\d\d\d\d-\d\d-\d\d) [ ] \w\w\w [ ] (\d\d:\d\d:\d\d)
|
||||
: [ ]
|
||||
(\d+) [ ] seconds
|
||||
/x
|
||||
) {
|
||||
$current = { time => "$1T$2", interval => $3 };
|
||||
}
|
||||
if (/Rss2Html.FeedList need to update/) {
|
||||
push @updates, $current;
|
||||
}
|
||||
if (/Rss2Html.Feed item .* new/) {
|
||||
$current->{new}++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (@updates) {
|
||||
$ts->add_timestring($_->{time},
|
||||
$_->{new} ? (0, $_->{interval})
|
||||
: ($_->{interval}, 0 )
|
||||
);
|
||||
}
|
||||
print $ts->plot;
|
|
@ -0,0 +1,47 @@
|
|||
#!/usr/bin/perl
|
||||
use warnings;
|
||||
use strict;
|
||||
use TimeSeries;
|
||||
|
||||
my $wanted_feed_id = shift;
|
||||
|
||||
my $current_feed_id = -1;
|
||||
my $current_time;
|
||||
my @updates_in_interval;
|
||||
my $update_count;
|
||||
|
||||
my $ts = TimeSeries->new(style => 'points');
|
||||
$ts->legend('raw', 'adjusted');
|
||||
$ts->finalresolution(100);
|
||||
|
||||
my @files = glob("/var/log/roxen/hjp/cgi.????-??-??");
|
||||
@files = @files[-7..-1];
|
||||
for my $file (@files) {
|
||||
open my $fh, '<', $file;
|
||||
while (<$fh>) {
|
||||
|
||||
if (/: \d+\.\d+: feed: (\d+) .*/) {
|
||||
$current_feed_id = $1;
|
||||
$update_count = 0;
|
||||
}
|
||||
|
||||
next unless $current_feed_id == $wanted_feed_id;
|
||||
|
||||
if (/: [ ]
|
||||
\d\d\d\d-\d\d-\d\d [ ] \w\w\w [ ] \d\d:\d\d:\d\d
|
||||
[ ][ ] \.\. [ ][ ]
|
||||
(\d\d\d\d-\d\d-\d\d) [ ] \w\w\w [ ] (\d\d:\d\d:\d\d)
|
||||
/x
|
||||
) {
|
||||
$current_time = "$1T$2";
|
||||
}
|
||||
if (/updates_in_interval: ([-+\d.e]+)/) {
|
||||
$updates_in_interval[$update_count++] = $1;
|
||||
if ($update_count == 3) {
|
||||
#print "$current_time\t$updates_in_interval[1]\t$updates_in_interval[2]\n";
|
||||
$ts->add_timestring($current_time, @updates_in_interval[1, 2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
print $ts->plot;
|
|
@ -0,0 +1,162 @@
|
|||
.item {
|
||||
border-style: none;
|
||||
border-width: 1px;
|
||||
background-color: #F6F6FF;
|
||||
margin: 1em;
|
||||
hyphens: auto;
|
||||
-moz-hyphens: auto;
|
||||
}
|
||||
|
||||
.item:after {
|
||||
display: block;
|
||||
clear: both;
|
||||
content: "";
|
||||
background-color: #00F;
|
||||
height: 6px;
|
||||
background: linear-gradient(#F6F6FF, #EEEEFF);
|
||||
}
|
||||
|
||||
.op {
|
||||
float: right;
|
||||
padding: 6px;
|
||||
}
|
||||
|
||||
.op a {
|
||||
border-style: none;
|
||||
border-radius: 3px;
|
||||
padding: 3px;
|
||||
background: linear-gradient(#88F, #008);
|
||||
color: #FFF;
|
||||
}
|
||||
|
||||
h2 a {
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
h2 {
|
||||
font-family: "Quercus", sans-serif;
|
||||
font-variant: small-caps;
|
||||
}
|
||||
|
||||
.itemno {
|
||||
font-size: 0.5em;
|
||||
}
|
||||
|
||||
.itemno:before {
|
||||
content: " (";
|
||||
color: #F00;
|
||||
}
|
||||
|
||||
.itemno:after {
|
||||
content: ") ";
|
||||
color: #F00;
|
||||
}
|
||||
|
||||
.feed_info {
|
||||
display: table-row;
|
||||
border-style: solid;
|
||||
border-width: 1px;
|
||||
margin: .1em;
|
||||
}
|
||||
|
||||
.feed_id {
|
||||
font-size: 0.7em;
|
||||
display: table-cell;
|
||||
text-align: right;
|
||||
padding-right: 0.5em;
|
||||
}
|
||||
|
||||
.feed_title {
|
||||
padding-right: 0.2em;
|
||||
}
|
||||
|
||||
.feed_items {
|
||||
background-color: #CCF;
|
||||
color: #888;
|
||||
display: table-cell;
|
||||
text-align: right;
|
||||
padding-left: 0.2em;
|
||||
padding-right: 0.2em;
|
||||
}
|
||||
|
||||
.feed_unread {
|
||||
background-color: #DDF;
|
||||
color: #080;
|
||||
display: table-cell;
|
||||
text-align: right;
|
||||
padding-left: 0.2em;
|
||||
padding-right: 0.2em;
|
||||
}
|
||||
|
||||
.feed_lastissued {
|
||||
background-color: #CCF;
|
||||
color: #000;
|
||||
display: table-cell;
|
||||
padding-left: 0.2em;
|
||||
padding-right: 0.2em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
.feed_lastseen {
|
||||
background-color: #DDF;
|
||||
color: #000;
|
||||
display: table-cell;
|
||||
padding-left: 0.2em;
|
||||
padding-right: 0.2em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
.feedlist {
|
||||
background-color: #BBF;
|
||||
float: left;
|
||||
display: table;
|
||||
margin-right: 0.5em;
|
||||
padding: 0.2em;
|
||||
}
|
||||
|
||||
.feedlist a {
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.op a {
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.itemlist {
|
||||
background-color: #EEF;
|
||||
max-width: 40em;
|
||||
float: left;
|
||||
font-family: "Quercus", sans-serif;
|
||||
}
|
||||
|
||||
@media screen and (min-width: 60em) and (max-width: 80em) {
|
||||
.itemlist {
|
||||
max-width: 48%;
|
||||
}
|
||||
.feedlist {
|
||||
max-width: 48%;
|
||||
}
|
||||
}
|
||||
|
||||
.item.read {
|
||||
background-color: #EEE;
|
||||
}
|
||||
|
||||
.lastupdate {
|
||||
position: absolute;
|
||||
top: 0em;
|
||||
right: 0em;
|
||||
}
|
||||
|
||||
h1 {
|
||||
background: linear-gradient(to right, #008, #CCF);
|
||||
margin: 0em;
|
||||
color: #FFF;
|
||||
}
|
||||
body {
|
||||
margin: 0em;
|
||||
}
|
||||
|
||||
img {
|
||||
max-width: 99%;
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
# missing: Sanitize html
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use Rss2Html::Scrubber;
|
||||
|
||||
use DBI;
|
||||
|
||||
my $dbh = DBI->connect("dbi:SQLite:dbname=rss2html.sqlite", "", "");
|
||||
$dbh->{sqlite_unicode} = 1;
|
||||
my $scrubber = Rss2Html::Scrubber->new();
|
||||
my $item
|
||||
= $dbh->selectrow_hashref(
|
||||
"select link, items.title as item_title, content, items.id as item_id
|
||||
from items
|
||||
where item_id=?
|
||||
",
|
||||
{ Slice => {} }, $ARGV[0]);
|
||||
my $content = $scrubber->scrub($item->{content});
|
||||
open my $fh1, '>:encoding(UTF-8)', "$ARGV[0].raw.html";
|
||||
print $fh1 $item->{content};
|
||||
open my $fh2, '>:encoding(UTF-8)', "$ARGV[0].scrubbed.html";
|
||||
print $fh2 $content;
|
|
@ -0,0 +1,49 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use v5.14;
|
||||
|
||||
use DBI;
|
||||
|
||||
update2();
|
||||
|
||||
sub update2 {
|
||||
no autovivification qw(fetch);
|
||||
|
||||
my $dbh = DBI->connect("dbi:SQLite:dbname=rss2html.sqlite", "", "");
|
||||
$dbh->{sqlite_unicode} = 1;
|
||||
|
||||
my $feeds = $dbh->selectall_arrayref("select * from feeds where update_frequency is not null", { Slice => {} });
|
||||
my $seconds_per_week = 86400 * 7;
|
||||
for my $feed (@$feeds) {
|
||||
print "feed: $feed->{id} $feed->{title}\n";
|
||||
my $now = time();
|
||||
my $update_time = $now - $feed->{last_update};
|
||||
print "\t", "update_time: $update_time\n";
|
||||
my $start = $now - 4 * $seconds_per_week;
|
||||
my $items = $dbh->selectall_arrayref("select * from items where feed_id=? and issued >= ? order by issued",
|
||||
{ Slice => {} },
|
||||
$feed->{id}, $start,
|
||||
);
|
||||
my $first_issued = $items->[0]{issued} // $start;
|
||||
my $total_time = $now - $first_issued;
|
||||
print "\t", "total_time: $total_time\n";
|
||||
my $updates_in_interval = 0;
|
||||
for my $item (@$items) {
|
||||
my $t = $item->{issued};
|
||||
my $dt1 = $now - $t;
|
||||
my $dt2 = $dt1 % $seconds_per_week;
|
||||
if ($dt2 < $update_time) {
|
||||
$updates_in_interval++;
|
||||
}
|
||||
}
|
||||
# normalize to one week
|
||||
$updates_in_interval *= $seconds_per_week / $total_time;
|
||||
$updates_in_interval += $update_time / $seconds_per_week; # add one update per week
|
||||
print "\t", "updates_in_interval: $updates_in_interval\n";
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
# vim: tw=0
|
|
@ -0,0 +1,11 @@
|
|||
#!/usr/bin/perl
|
||||
use warnings;
|
||||
use strict;
|
||||
use Rss2Html::FeedList;
|
||||
use Log::Log4perl qw(:easy);
|
||||
|
||||
BEGIN { Log::Log4perl->init("log.conf") };
|
||||
|
||||
|
||||
my $feedlist = Rss2Html::FeedList->new();
|
||||
$feedlist->update_adaptive;
|
|
@ -0,0 +1,49 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use v5.14;
|
||||
|
||||
use DBI;
|
||||
use POSIX qw(strftime);
|
||||
|
||||
my $dbh = DBI->connect("dbi:SQLite:dbname=rss2html.sqlite", "", "");
|
||||
$dbh->{sqlite_unicode} = 1;
|
||||
my $feed_id = $ARGV[0];
|
||||
my $issued
|
||||
= $dbh->selectcol_arrayref(
|
||||
"select issued from items where feed_id=? order by issued", {}, $feed_id
|
||||
);
|
||||
|
||||
my @dt;
|
||||
for (1 .. $#{$issued}) {
|
||||
push @dt, $issued->[$_] - $issued->[$_-1];
|
||||
}
|
||||
@dt = sort { $a <=> $b } @dt;
|
||||
my $c = 0;
|
||||
for (@dt) {
|
||||
$c++;
|
||||
printf("%14s %8d %5.1f%%\n", hr($_), $_, $c * 100 / @dt);
|
||||
}
|
||||
|
||||
sub hr {
|
||||
my ($s) = $_;
|
||||
my $hr = "";
|
||||
if ($s > 86400) {
|
||||
my $d = int ($s / 86400);
|
||||
$hr .= $d . "d";
|
||||
$s -= $d * 86400;
|
||||
}
|
||||
if ($hr || $s > 3600) {
|
||||
my $h = int ($s / 3600);
|
||||
$hr .= sprintf("%02dh", $h);
|
||||
$s -= $h * 3600;
|
||||
}
|
||||
if ($hr || $s > 60) {
|
||||
my $m = int ($s / 60);
|
||||
$hr .= sprintf("%02dm", $m);
|
||||
$s -= $m * 60;
|
||||
}
|
||||
$hr .= sprintf("%02ds", $s);
|
||||
return $hr;
|
||||
}
|
Loading…
Reference in New Issue