package Rss2Html::FeedList; use warnings; use strict; use Moose; use namespace::autoclean; use DBI; use Rss2Html::Feed; use POSIX qw(strftime); with 'MooseX::Log::Log4perl'; has 'dbh' => ( is => 'ro', writer => '_set_dbh', ); sub BUILD { my ($self, $args) = @_; my $dbh = DBI->connect("dbi:Pg:dbname=rss2html", "", ""); $self->_set_dbh($dbh); } sub feeds { my ($self) = @_; my $dbh = $self->dbh; no autovivification qw(fetch); my $feeds_sql = $dbh->selectall_arrayref("select * from feeds where active and url is not null", { Slice => {} }); my @feeds; for (@$feeds_sql) { push @feeds, Rss2Html::Feed->new(%$_, dbh => $dbh); } return \@feeds; } sub update_adaptive { my ($self) = @_; my $dbh = $self->dbh; my $start = time(); no autovivification qw(fetch); $dbh->begin_work; my $feeds = $self->feeds; my $seconds_per_week = 86400 * 7; for my $feed (@$feeds) { $feed->{title} //= "-"; #print "

feed: $feed->{id} $feed->{title}
\n"; $self->log->info("feed: $feed->{id} $feed->{title}"); my $now = time(); my $run_time = $now - $start; my $update_time = $now - $feed->{last_update}; $self->log->info(strftime("%Y-%m-%d %a %H:%M:%S", localtime($feed->{last_update})), " .. ", strftime("%Y-%m-%d %a %H:%M:%S", localtime($now)), ": $update_time seconds"); #print "\t", "update_time: $update_time
\n"; # We are guaranteed to have all items which were issued in the last # int($feed->{expire} / 7) weeks since we expire only those not seen for # more than $feed->{expire} days. Some feeds keep some items for a long # time while removing others quite quickly, so statistics for older # items may be severely skewed and are not safe to use. my $interval = int($feed->{expire} / 7) * 7 * 86400; # We measure those weeks backwards from the last update. # If we measured from now we would almost always miss the update from # exactly 4 weeks ago (one cycle we are too early, the next we are too # late). my $start = $feed->{last_update} - $interval; my $items = $dbh->selectall_arrayref("select * from items where feed_id=? and issued >= ? order by issued", { Slice => {} }, $feed->{id}, $start, ); my $first_issued = $items->[0]{issued} // $start; my $total_time = $now - $first_issued; my $updates_in_interval = 0; for my $item (@$items) { my $t = $item->{issued}; my $dt1 = $now - $t; my $dt2 = $dt1 % $seconds_per_week; if ($dt2 < $update_time) { $updates_in_interval++; $self->log->info("found $item->{id} ", strftime("%Y-%m-%d %a %H:%M:%S", localtime($t))); } } $self->log->info("updates_in_interval: $updates_in_interval"); $updates_in_interval *= $seconds_per_week / $total_time; # normalize to one week $self->log->info("updates_in_interval: $updates_in_interval"); $updates_in_interval += $update_time / $seconds_per_week; # add one update per week $self->log->info("updates_in_interval: $updates_in_interval"); #print "\t", "updates_in_interval: $updates_in_interval
\n"; if ($updates_in_interval > 0.5) { $self->log->info($feed->{url}); $self->log->info("need to update " . $feed->{url}); $feed->update; } } $dbh->commit; $self->log->info("update done"); } sub cleanup { my ($self) = @_; my $dbh = $self->dbh; my $now = time(); my $feeds = $dbh->selectall_arrayref("select * from feeds", { Slice => {} }); for my $feed (@$feeds) { my $expire_days = $feed->{expire} // 30; my $seen_limit = $now - $expire_days * 86400; my $rc = $dbh->do("delete from items where feed_id=? and seen < ?", {}, $feed->{id}, $seen_limit); $self->log->info("cleanup of feed $feed->{id} (limit $seen_limit) returned $rc"); # mark all but the last 100 entries as old, so they aren't shown by default. $dbh->do("update items set old=1 where id in (select id from items where feed_id=? and (old is null or old != 1) order by issued desc offset 100)", {}, $feed->{id}); } # XXX - this shouldn't be necessary anymore, since PostgreSQL enforces foreign key constraints. my $rc = $dbh->do("delete from read where not exists (select id from items where id=item_id)"); $self->log->info("cleanup of read items returned $rc"); } #--- __PACKAGE__->meta->make_immutable; 1; # vim: tw=132