From b22346079f652403d6a5797fd15a04fd3d27ed3d Mon Sep 17 00:00:00 2001 From: "Peter J. Holzer" <hjp@hjp.at> Date: Wed, 18 Dec 2019 11:41:09 +0100 Subject: [PATCH] Implement simple search --- index.cgi | 1 + rss2html.css | 6 ++ search.cgi | 182 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 189 insertions(+) create mode 100755 search.cgi diff --git a/index.cgi b/index.cgi index f45f21f..cfc3858 100755 --- a/index.cgi +++ b/index.cgi @@ -259,6 +259,7 @@ sub print_itemlist { print "<input type='hidden' name='fls' value='" . $q->escapeHTML($fls) . "'>\n" if defined $fls; print "<input type='hidden' name='sr' value='10'>\n"; print "<input type='submit' value='»'>\n"; + print "<a href='./search'>\x{1f50d}</a>\n"; print "</form>\n"; print "<div class='itemlist'>\n"; my $n_items = 0; diff --git a/rss2html.css b/rss2html.css index 546bc7f..0e7df7f 100644 --- a/rss2html.css +++ b/rss2html.css @@ -171,6 +171,12 @@ h1 { margin: 0em; color: #FFF; } + +h1 a { + text-decoration: none; + color: inherit; +} + body { margin: 0em; } diff --git a/search.cgi b/search.cgi new file mode 100755 index 0000000..d0fc4ae --- /dev/null +++ b/search.cgi @@ -0,0 +1,182 @@ +#!/usr/bin/perl + +# Parameters +# +# s=string +# search string + +use v5.24; +use warnings; +use utf8; + +use CGI; +use Cache::Memcached; +use DBI; +use Data::Dumper; +use Encode qw(encode_utf8 decode_utf8); +use POSIX qw(strftime); +use Rss2Html::Scrubber; +use Time::HiRes qw(time); + +my $db_conn = "dbi:Pg:dbname=rss2html"; +$| = 1; + +my $start = time(); + +my $q = CGI->new(); +binmode STDOUT, ":encoding(UTF-8)"; +my $mcd = Cache::Memcached->new(servers => ['127.0.0.1:11211']); + +search(); + +sub search { + print "Content-Type: text/html; charset=utf-8\n"; + print "Refresh: 600\n"; + print "\n"; + + print "<meta name='viewport' content='width=device-width, initial-scale=1' />\n"; + print "<link rel='stylesheet' type='text/css' href='rss2html.css'/>\n"; + print "<h1><a href='/'>RSS 2 HTML</a></h1>\n"; + print "<div class='lastupdate'>", strftime("%Y-%m-%d %H:%M:%S%z", localtime()), "</div>\n"; + + my $dbh = DBI->connect($db_conn, "", ""); + $dbh->{sqlite_unicode} = 1; + + my $fields = "feeds.id as feed_id, feeds.title as feed_title, allow_img, link, items.title as item_title, content, " + . "items.id as item_id, issued, read.username, lang"; + my $tables = "items + join feeds on items.feed_id=feeds.id + left outer join read on (items.id=read.item_id and username=?)"; + my @params = ($q->remote_user); + my @where; + my $search_expr = $q->param('s'); + print_searchform(); + if ($search_expr) { + my @terms = split(' ', $search_expr); # XXX - quotes? + for my $t (@terms) { + my $t2 = $t; + my $not = ""; + if ($t2 =~ /^-/) { + $not = "not "; + $t2 = substr($t2, 1); + } + $t2 =~ s/_/\\_/g; + $t2 =~ s/%/\\%/g; + $t2 =~ s/\?/_/g; + $t2 =~ s/\*/%/g; + $t2 = "%$t2%"; + push @where, "$not(items.title ilike ? or items.link ilike ? or items.content ilike ?)"; + push @params, ($t2, $t2, $t2) + } + my $where = join(" and ", @where); + my $cmd = "select $fields from $tables where $where order by issued desc"; + print_log("$cmd"); + my $items = $dbh->selectall_arrayref($cmd, { Slice => {} }, @params); + print_itemlist($items, []); + } + print_log("search done"); +} + + +sub print_searchform { + print "<form action='./search'>\n"; + print "<input name='s' size='50'>\n"; + print "<input type='submit' value='\x{1f50d}'>\n"; + print "</form>\n"; +} + + +sub print_itemlist { + my ($items, $feeds, $show_form, $remix) = @_; + + # Optionally show form to select feeds, show later, etc. + # This should probably be outside of this function + if ($show_form) { + print "<form action='./'>\n"; + print "<select name='fis'>\n"; + print "<option value=''>\n"; + for my $f (@$feeds) { + print "<option value='" . $q->escapeHTML($f->{id}) . "' " . ($f->{selected} ? "selected='selected'" : "") .">" . $q->escapeHTML($f->{title}) . "</option>\n"; + } + print "</select>\n"; + print "<label><input type='checkbox' name='sl'" . ($q->param('sl') ? " checked" : "") . "> Show later</label>\n"; + my $fls = $q->param('fls'); + print "<input type='hidden' name='fls' value='" . $q->escapeHTML($fls) . "'>\n" if defined $fls; + print "<input type='hidden' name='sr' value='10'>\n"; + print "<input type='submit' value='»'>\n"; + print "</form>\n"; + } + print "<div class='itemlist'>\n"; + my $n_items = 0; + my $n_html_mcd = 0; + my $n_scrub_mcd = 0; + my $n_scrub = 0; + print_log("print_itemlist: \$q=" . $q->self_url); + my $q1 = CGI->new($q); + print_log("print_itemlist: \$q1=" . $q1->self_url . " (before loop)"); + my $q_later = CGI->new($q); + + # Optional remix. Does that have to be here or can we do it before + # calling print_itemlist? + if ($remix) { + print_log(scalar @$items . " before remix"); + $items = remix($items); + print_log(scalar @$items . " after remix"); + } + + for my $item (@$items) { + $n_items++; + my $is_read = defined($item->{username}); + my $scrubbed_content = $mcd->get(scrubbed_content_key($item->{item_id})); + if ($scrubbed_content) { + $scrubbed_content = decode_utf8($scrubbed_content); + $n_scrub_mcd++; + } else { + my $scrubber = Rss2Html::Scrubber->new(allow_img => $item->{allow_img}); + $scrubbed_content = $scrubber->scrub($item->{content}); + $mcd->set(scrubbed_content_key($item->{item_id}), encode_utf8($scrubbed_content), 3600); + $n_scrub++; + } + $q1->param('mark', $item->{item_id}); + $q_later->param('later', $item->{item_id}); + my $item_class = 'item' . ($is_read ? ' read' : ''); + my $langattr = defined $item->{lang} ? "lang='$item->{lang}'" : ""; + my $html = ""; + $html .= "<div class='$item_class' $langattr>\n"; + $html .= "<span class='itemno'>" . $item->{item_id} . "</span>\n"; + $html .= "<span class='issued'>" . strftime('%Y-%m-%d %H:%M:%S', localtime($item->{issued})) . "</span>\n"; + my $mark_read_button = ""; + my $mark_later_button = ""; + unless ($is_read) { + $mark_read_button = "<div class='op'><a href='" . $q->escapeHTML($q1->self_url) . "'>Mark read</a></div>\n"; + $mark_later_button = "<div class='op'><a href='" . $q->escapeHTML($q_later->self_url) . "'>Show later</a></div>\n"; + } + $html .= $mark_read_button; + $html .= $mark_later_button; + $html .= "<div class='feed'>" . $q->escapeHTML($item->{feed_title}) . "</div>\n"; + $html .= "<h2><a href='./?redir=" . $q->escapeHTML($item->{item_id}) . "'>" . $q->escapeHTML($item->{item_title}) . "</a></h2>\n"; + $html .= "<div class='content'>" . $scrubbed_content . "</div>\n"; + #$html .= $mark_read_button; + #$html .= $mark_later_button; + $html .= "<div class='end'></div>\n"; + $html .= "</div\n>"; + print $html; + + } + print "</div>\n"; + print_log("itemlist: $n_items items ($n_html_mcd html cached, $n_scrub_mcd scrubbed content cached, $n_scrub scrubbed)"); +} + + +sub print_log { + my $msg = "@_"; + my $now = time(); + printf STDERR "%s: %s.%06d %f: %s\n", $0, strftime("%H:%M:%S", localtime($now)), ($now - int($now)) * 1E6, $now - $start, $msg; +} + + +sub scrubbed_content_key { + my ($item_id) = @_; + return "rss2html/scrubbed_content/$item_id"; +} +