2017-06-12 21:29:41 +02:00
|
|
|
package Rss2Html::FeedList;
|
|
|
|
use warnings;
|
|
|
|
use strict;
|
|
|
|
use Moose;
|
|
|
|
use namespace::autoclean;
|
|
|
|
use DBI;
|
|
|
|
use Rss2Html::Feed;
|
|
|
|
use POSIX qw(strftime);
|
|
|
|
|
|
|
|
with 'MooseX::Log::Log4perl';
|
|
|
|
|
|
|
|
|
|
|
|
has 'dbh' => (
|
|
|
|
is => 'ro',
|
|
|
|
writer => '_set_dbh',
|
|
|
|
);
|
|
|
|
|
|
|
|
sub BUILD {
|
|
|
|
my ($self, $args) = @_;
|
2017-10-02 15:26:48 +02:00
|
|
|
my $dbh = DBI->connect("dbi:Pg:dbname=rss2html", "", "");
|
2017-06-12 21:29:41 +02:00
|
|
|
$self->_set_dbh($dbh);
|
|
|
|
}
|
|
|
|
|
|
|
|
sub feeds {
|
|
|
|
my ($self) = @_;
|
|
|
|
my $dbh = $self->dbh;
|
|
|
|
|
|
|
|
no autovivification qw(fetch);
|
|
|
|
|
2017-10-02 15:26:48 +02:00
|
|
|
my $feeds_sql = $dbh->selectall_arrayref("select * from feeds where active", { Slice => {} });
|
2017-06-12 21:29:41 +02:00
|
|
|
my @feeds;
|
|
|
|
for (@$feeds_sql) {
|
|
|
|
push @feeds, Rss2Html::Feed->new(%$_, dbh => $dbh);
|
|
|
|
}
|
|
|
|
return \@feeds;
|
|
|
|
}
|
|
|
|
|
|
|
|
sub update_adaptive {
|
|
|
|
my ($self) = @_;
|
|
|
|
my $dbh = $self->dbh;
|
|
|
|
my $start = time();
|
|
|
|
|
|
|
|
no autovivification qw(fetch);
|
|
|
|
|
|
|
|
$dbh->begin_work;
|
|
|
|
|
|
|
|
my $feeds = $self->feeds;
|
|
|
|
my $seconds_per_week = 86400 * 7;
|
|
|
|
for my $feed (@$feeds) {
|
|
|
|
$feed->{title} //= "-";
|
|
|
|
#print "<p>feed: $feed->{id} $feed->{title}<br/>\n";
|
|
|
|
$self->log->info("feed: $feed->{id} $feed->{title}");
|
|
|
|
my $now = time();
|
|
|
|
my $run_time = $now - $start;
|
|
|
|
my $update_time = $now - $feed->{last_update};
|
|
|
|
$self->log->info(strftime("%Y-%m-%d %a %H:%M:%S", localtime($feed->{last_update})), " .. ",
|
|
|
|
strftime("%Y-%m-%d %a %H:%M:%S", localtime($now)), ": $update_time seconds");
|
|
|
|
#print "\t", "update_time: $update_time<br/>\n";
|
|
|
|
|
|
|
|
# We are guaranteed to have all items which were issued in the last
|
|
|
|
# int($feed->{expire} / 7) weeks since we expire only those not seen for
|
|
|
|
# more than $feed->{expire} days. Some feeds keep some items for a long
|
|
|
|
# time while removing others quite quickly, so statistics for older
|
|
|
|
# items may be severely skewed and are not safe to use.
|
|
|
|
my $interval = int($feed->{expire} / 7) * 7 * 86400;
|
|
|
|
|
|
|
|
# We measure those weeks backwards from the last update.
|
|
|
|
# If we measured from now we would almost always miss the update from
|
|
|
|
# exactly 4 weeks ago (one cycle we are too early, the next we are too
|
|
|
|
# late).
|
|
|
|
my $start = $feed->{last_update} - $interval;
|
|
|
|
my $items = $dbh->selectall_arrayref("select * from items where feed_id=? and issued >= ? order by issued",
|
|
|
|
{ Slice => {} },
|
|
|
|
$feed->{id}, $start,
|
|
|
|
);
|
|
|
|
my $first_issued = $items->[0]{issued} // $start;
|
|
|
|
my $total_time = $now - $first_issued;
|
|
|
|
my $updates_in_interval = 0;
|
|
|
|
for my $item (@$items) {
|
|
|
|
my $t = $item->{issued};
|
|
|
|
my $dt1 = $now - $t;
|
|
|
|
my $dt2 = $dt1 % $seconds_per_week;
|
|
|
|
if ($dt2 < $update_time) {
|
|
|
|
$updates_in_interval++;
|
|
|
|
$self->log->info("found $item->{id} ", strftime("%Y-%m-%d %a %H:%M:%S", localtime($t)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
$self->log->info("updates_in_interval: $updates_in_interval");
|
|
|
|
$updates_in_interval *= $seconds_per_week / $total_time; # normalize to one week
|
|
|
|
$self->log->info("updates_in_interval: $updates_in_interval");
|
|
|
|
$updates_in_interval += $update_time / $seconds_per_week; # add one update per week
|
|
|
|
$self->log->info("updates_in_interval: $updates_in_interval");
|
|
|
|
#print "\t", "updates_in_interval: $updates_in_interval<br/>\n";
|
|
|
|
if ($updates_in_interval > 0.5) {
|
|
|
|
$self->log->info($feed->{url});
|
|
|
|
$self->log->info("need to update " . $feed->{url});
|
|
|
|
$feed->update;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
$self->cleanup();
|
|
|
|
|
|
|
|
$dbh->commit;
|
|
|
|
|
|
|
|
$self->log->info("update done");
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
sub cleanup {
|
|
|
|
my ($self) = @_;
|
|
|
|
my $dbh = $self->dbh;
|
|
|
|
|
|
|
|
my $now = time();
|
|
|
|
my $feeds = $dbh->selectall_arrayref("select * from feeds", { Slice => {} });
|
|
|
|
|
|
|
|
for my $feed (@$feeds) {
|
|
|
|
my $expire_days = $feed->{expire} // 30;
|
|
|
|
my $seen_limit = $now - $expire_days * 86400;
|
|
|
|
my $rc = $dbh->do("delete from items where feed_id=? and seen < ?", {}, $feed->{id}, $seen_limit);
|
|
|
|
$self->log->info("cleanup of feed $feed->{id} (limit $seen_limit) returned $rc");
|
|
|
|
}
|
|
|
|
$dbh->do("delete from read where not exists (select id from items where id=item_id)");
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#---
|
|
|
|
__PACKAGE__->meta->make_immutable;
|
|
|
|
1;
|
2017-10-02 15:26:48 +02:00
|
|
|
# vim: tw=132
|