rss2html/lib/Rss2Html/FeedList.pm

132 lines
4.2 KiB
Perl
Raw Normal View History

package Rss2Html::FeedList;
use warnings;
use strict;
use Moose;
use namespace::autoclean;
use DBI;
use Rss2Html::Feed;
use POSIX qw(strftime);
with 'MooseX::Log::Log4perl';
has 'dbh' => (
is => 'ro',
writer => '_set_dbh',
);
sub BUILD {
my ($self, $args) = @_;
my $dbh = DBI->connect("dbi:SQLite:dbname=rss2html.sqlite", "", "",
{
sqlite_use_immediate_transaction => 1,
});
$dbh->{sqlite_unicode} = 1;
$self->_set_dbh($dbh);
}
sub feeds {
my ($self) = @_;
my $dbh = $self->dbh;
no autovivification qw(fetch);
my $feeds_sql = $dbh->selectall_arrayref("select * from feeds where update_frequency is not null", { Slice => {} });
my @feeds;
for (@$feeds_sql) {
push @feeds, Rss2Html::Feed->new(%$_, dbh => $dbh);
}
return \@feeds;
}
sub update_adaptive {
my ($self) = @_;
my $dbh = $self->dbh;
my $start = time();
no autovivification qw(fetch);
$dbh->begin_work;
my $feeds = $self->feeds;
my $seconds_per_week = 86400 * 7;
for my $feed (@$feeds) {
$feed->{title} //= "-";
#print "<p>feed: $feed->{id} $feed->{title}<br/>\n";
$self->log->info("feed: $feed->{id} $feed->{title}");
my $now = time();
my $run_time = $now - $start;
my $update_time = $now - $feed->{last_update};
$self->log->info(strftime("%Y-%m-%d %a %H:%M:%S", localtime($feed->{last_update})), " .. ",
strftime("%Y-%m-%d %a %H:%M:%S", localtime($now)), ": $update_time seconds");
#print "\t", "update_time: $update_time<br/>\n";
# We are guaranteed to have all items which were issued in the last
# int($feed->{expire} / 7) weeks since we expire only those not seen for
# more than $feed->{expire} days. Some feeds keep some items for a long
# time while removing others quite quickly, so statistics for older
# items may be severely skewed and are not safe to use.
my $interval = int($feed->{expire} / 7) * 7 * 86400;
# We measure those weeks backwards from the last update.
# If we measured from now we would almost always miss the update from
# exactly 4 weeks ago (one cycle we are too early, the next we are too
# late).
my $start = $feed->{last_update} - $interval;
my $items = $dbh->selectall_arrayref("select * from items where feed_id=? and issued >= ? order by issued",
{ Slice => {} },
$feed->{id}, $start,
);
my $first_issued = $items->[0]{issued} // $start;
my $total_time = $now - $first_issued;
my $updates_in_interval = 0;
for my $item (@$items) {
my $t = $item->{issued};
my $dt1 = $now - $t;
my $dt2 = $dt1 % $seconds_per_week;
if ($dt2 < $update_time) {
$updates_in_interval++;
$self->log->info("found $item->{id} ", strftime("%Y-%m-%d %a %H:%M:%S", localtime($t)));
}
}
$self->log->info("updates_in_interval: $updates_in_interval");
$updates_in_interval *= $seconds_per_week / $total_time; # normalize to one week
$self->log->info("updates_in_interval: $updates_in_interval");
$updates_in_interval += $update_time / $seconds_per_week; # add one update per week
$self->log->info("updates_in_interval: $updates_in_interval");
#print "\t", "updates_in_interval: $updates_in_interval<br/>\n";
if ($updates_in_interval > 0.5) {
$self->log->info($feed->{url});
$self->log->info("need to update " . $feed->{url});
$feed->update;
}
}
$self->cleanup();
$dbh->commit;
$self->log->info("update done");
}
sub cleanup {
my ($self) = @_;
my $dbh = $self->dbh;
my $now = time();
my $feeds = $dbh->selectall_arrayref("select * from feeds", { Slice => {} });
for my $feed (@$feeds) {
my $expire_days = $feed->{expire} // 30;
my $seen_limit = $now - $expire_days * 86400;
my $rc = $dbh->do("delete from items where feed_id=? and seen < ?", {}, $feed->{id}, $seen_limit);
$self->log->info("cleanup of feed $feed->{id} (limit $seen_limit) returned $rc");
}
$dbh->do("delete from read where not exists (select id from items where id=item_id)");
}
#---
__PACKAGE__->meta->make_immutable;
1;