package Rss2Html::FeedList; use warnings; use strict; use Moose; use namespace::autoclean; use DBI; use Rss2Html::Feed; use POSIX qw(strftime); with 'MooseX::Log::Log4perl'; has 'dbh' => ( is => 'ro', writer => '_set_dbh', ); sub BUILD { my ($self, $args) = @_; my $dbh = DBI->connect("dbi:Pg:dbname=rss2html", "", ""); $self->_set_dbh($dbh); } sub feeds { my ($self) = @_; my $dbh = $self->dbh; no autovivification qw(fetch); my $feeds_sql = $dbh->selectall_arrayref("select * from feeds where active and url is not null", { Slice => {} }); my @feeds; for (@$feeds_sql) { push @feeds, Rss2Html::Feed->new(%$_, dbh => $dbh); } return \@feeds; } sub update_adaptive { my ($self) = @_; my $dbh = $self->dbh; my $start = time(); no autovivification qw(fetch); $dbh->begin_work; my $feeds = $self->feeds; my $seconds_per_week = 86400 * 7; for my $feed (@$feeds) { $feed->{title} //= "-"; #print "
feed: $feed->{id} $feed->{title}
\n";
$self->log->info("feed: $feed->{id} $feed->{title}");
my $now = time();
my $run_time = $now - $start;
my $update_time = $now - $feed->{last_update};
$self->log->info(strftime("%Y-%m-%d %a %H:%M:%S", localtime($feed->{last_update})), " .. ",
strftime("%Y-%m-%d %a %H:%M:%S", localtime($now)), ": $update_time seconds");
#print "\t", "update_time: $update_time
\n";
# We are guaranteed to have all items which were issued in the last
# int($feed->{expire} / 7) weeks since we expire only those not seen for
# more than $feed->{expire} days. Some feeds keep some items for a long
# time while removing others quite quickly, so statistics for older
# items may be severely skewed and are not safe to use.
my $interval = int($feed->{expire} / 7) * 7 * 86400;
# We measure those weeks backwards from the last update.
# If we measured from now we would almost always miss the update from
# exactly 4 weeks ago (one cycle we are too early, the next we are too
# late).
my $start = $feed->{last_update} - $interval;
my $items = $dbh->selectall_arrayref("select * from items where feed_id=? and issued >= ? order by issued",
{ Slice => {} },
$feed->{id}, $start,
);
my $first_issued = $items->[0]{issued} // $start;
my $total_time = $now - $first_issued;
my $updates_in_interval = 0;
for my $item (@$items) {
my $t = $item->{issued};
my $dt1 = $now - $t;
my $dt2 = $dt1 % $seconds_per_week;
if ($dt2 < $update_time) {
$updates_in_interval++;
$self->log->info("found $item->{id} ", strftime("%Y-%m-%d %a %H:%M:%S", localtime($t)));
}
}
$self->log->info("updates_in_interval: $updates_in_interval");
$updates_in_interval *= $seconds_per_week / $total_time; # normalize to one week
$self->log->info("updates_in_interval: $updates_in_interval");
$updates_in_interval += $update_time / $seconds_per_week; # add one update per week
$self->log->info("updates_in_interval: $updates_in_interval");
#print "\t", "updates_in_interval: $updates_in_interval
\n";
if ($updates_in_interval > 0.5) {
$self->log->info($feed->{url});
$self->log->info("need to update " . $feed->{url});
$feed->update;
}
}
$dbh->commit;
$self->log->info("update done");
}
sub cleanup {
my ($self) = @_;
my $dbh = $self->dbh;
my $now = time();
my $feeds = $dbh->selectall_arrayref("select * from feeds", { Slice => {} });
for my $feed (@$feeds) {
my $expire_days = $feed->{expire} // 30;
my $seen_limit = $now - $expire_days * 86400;
my $rc = $dbh->do("delete from items where feed_id=? and seen < ?", {}, $feed->{id}, $seen_limit);
$self->log->info("cleanup of feed $feed->{id} (limit $seen_limit) returned $rc");
# mark all but the last 100 entries as old, so they aren't shown by default.
$dbh->do("update items set old=1
where id in (select id from items
where feed_id=? and (old is null or old != 1)
order by issued desc offset 100)",
{},
$feed->{id});
}
# XXX - this shouldn't be necessary anymore, since PostgreSQL enforces foreign key constraints.
my $rc = $dbh->do("delete from read where not exists (select id from items where id=item_id)");
$self->log->info("cleanup of read items returned $rc");
}
#---
__PACKAGE__->meta->make_immutable;
1;
# vim: tw=132