From b22346079f652403d6a5797fd15a04fd3d27ed3d Mon Sep 17 00:00:00 2001
From: "Peter J. Holzer" <hjp@hjp.at>
Date: Wed, 18 Dec 2019 11:41:09 +0100
Subject: [PATCH] Implement simple search

---
 index.cgi    |   1 +
 rss2html.css |   6 ++
 search.cgi   | 182 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 189 insertions(+)
 create mode 100755 search.cgi

diff --git a/index.cgi b/index.cgi
index f45f21f..cfc3858 100755
--- a/index.cgi
+++ b/index.cgi
@@ -259,6 +259,7 @@ sub print_itemlist {
     print "<input type='hidden' name='fls' value='" . $q->escapeHTML($fls) . "'>\n" if defined $fls;
     print "<input type='hidden' name='sr' value='10'>\n";
     print "<input type='submit' value='»'>\n";
+    print "<a href='./search'>\x{1f50d}</a>\n";
     print "</form>\n";
     print "<div class='itemlist'>\n";
     my $n_items = 0;
diff --git a/rss2html.css b/rss2html.css
index 546bc7f..0e7df7f 100644
--- a/rss2html.css
+++ b/rss2html.css
@@ -171,6 +171,12 @@ h1 {
     margin: 0em;
     color: #FFF;
 }
+
+h1 a {
+    text-decoration: none;
+    color: inherit;
+}
+
 body {
     margin: 0em;
 }
diff --git a/search.cgi b/search.cgi
new file mode 100755
index 0000000..d0fc4ae
--- /dev/null
+++ b/search.cgi
@@ -0,0 +1,182 @@
+#!/usr/bin/perl
+
+# Parameters
+#
+# s=string
+#     search string
+
+use v5.24;
+use warnings;
+use utf8;
+
+use CGI;
+use Cache::Memcached;
+use DBI;
+use Data::Dumper;
+use Encode qw(encode_utf8 decode_utf8);
+use POSIX qw(strftime);
+use Rss2Html::Scrubber;
+use Time::HiRes qw(time);
+
+my $db_conn = "dbi:Pg:dbname=rss2html";
+$| = 1;
+
+my $start = time();
+
+my $q = CGI->new();
+binmode STDOUT, ":encoding(UTF-8)";
+my $mcd = Cache::Memcached->new(servers => ['127.0.0.1:11211']);
+
+search();
+
+sub search {
+    print "Content-Type: text/html; charset=utf-8\n";
+    print "Refresh: 600\n";
+    print "\n";
+
+    print "<meta name='viewport' content='width=device-width, initial-scale=1' />\n";
+    print "<link rel='stylesheet' type='text/css' href='rss2html.css'/>\n";
+    print "<h1><a href='/'>RSS 2 HTML</a></h1>\n";
+    print "<div class='lastupdate'>", strftime("%Y-%m-%d %H:%M:%S%z", localtime()), "</div>\n";
+
+    my $dbh = DBI->connect($db_conn, "", "");
+    $dbh->{sqlite_unicode} = 1;
+
+    my $fields = "feeds.id as feed_id, feeds.title as feed_title, allow_img, link, items.title as item_title, content, "
+               . "items.id as item_id, issued, read.username, lang";
+    my $tables = "items
+            join feeds on items.feed_id=feeds.id
+            left outer join read on (items.id=read.item_id and username=?)";
+    my @params = ($q->remote_user);
+    my @where;
+    my $search_expr = $q->param('s');
+    print_searchform();
+    if ($search_expr) {
+        my @terms = split(' ', $search_expr); # XXX - quotes?
+        for my $t (@terms) {
+            my $t2 = $t;
+            my $not = "";
+            if ($t2 =~ /^-/) {
+                $not = "not ";
+                $t2 = substr($t2, 1);
+            }
+            $t2 =~ s/_/\\_/g;
+            $t2 =~ s/%/\\%/g;
+            $t2 =~ s/\?/_/g;
+            $t2 =~ s/\*/%/g;
+            $t2 = "%$t2%";
+            push @where, "$not(items.title ilike ? or items.link ilike ? or items.content ilike ?)";
+            push @params, ($t2, $t2, $t2)
+        }
+        my $where = join(" and ", @where);
+        my $cmd = "select $fields from $tables where $where order by issued desc";
+        print_log("$cmd");
+        my $items = $dbh->selectall_arrayref($cmd, { Slice => {} }, @params);
+        print_itemlist($items, []);
+    }
+    print_log("search done");
+}
+
+
+sub print_searchform {
+    print "<form action='./search'>\n";
+    print "<input name='s' size='50'>\n";
+    print "<input type='submit' value='\x{1f50d}'>\n";
+    print "</form>\n";
+}
+
+
+sub print_itemlist {
+    my ($items, $feeds, $show_form, $remix) = @_;
+
+    # Optionally show form to select feeds, show later, etc.
+    # This should probably be outside of this function
+    if ($show_form) {
+        print "<form action='./'>\n";
+        print "<select name='fis'>\n";
+        print "<option value=''>\n";
+        for my $f (@$feeds) {
+            print "<option value='" . $q->escapeHTML($f->{id}) . "' " . ($f->{selected} ? "selected='selected'" : "") .">" . $q->escapeHTML($f->{title}) . "</option>\n";
+        }
+        print "</select>\n";
+        print "<label><input type='checkbox' name='sl'" . ($q->param('sl') ? " checked" : "") . "> Show later</label>\n";
+        my $fls = $q->param('fls');
+        print "<input type='hidden' name='fls' value='" . $q->escapeHTML($fls) . "'>\n" if defined $fls;
+        print "<input type='hidden' name='sr' value='10'>\n";
+        print "<input type='submit' value='»'>\n";
+        print "</form>\n";
+    }
+    print "<div class='itemlist'>\n";
+    my $n_items = 0;
+    my $n_html_mcd = 0;
+    my $n_scrub_mcd = 0;
+    my $n_scrub = 0;
+    print_log("print_itemlist: \$q=" . $q->self_url);
+    my $q1 = CGI->new($q);
+    print_log("print_itemlist: \$q1=" . $q1->self_url . " (before loop)");
+    my $q_later = CGI->new($q);
+
+    # Optional remix. Does that have to be here or can we do it before
+    # calling print_itemlist? 
+    if ($remix) {
+        print_log(scalar @$items . " before remix");
+        $items = remix($items);
+        print_log(scalar @$items . " after remix");
+    }
+
+    for my $item (@$items) {
+        $n_items++;
+        my $is_read = defined($item->{username});
+        my $scrubbed_content = $mcd->get(scrubbed_content_key($item->{item_id}));
+        if ($scrubbed_content) {
+            $scrubbed_content = decode_utf8($scrubbed_content);
+            $n_scrub_mcd++;
+        } else {
+            my $scrubber = Rss2Html::Scrubber->new(allow_img => $item->{allow_img});
+            $scrubbed_content = $scrubber->scrub($item->{content});
+            $mcd->set(scrubbed_content_key($item->{item_id}), encode_utf8($scrubbed_content), 3600);
+            $n_scrub++;
+        }
+        $q1->param('mark', $item->{item_id});
+        $q_later->param('later', $item->{item_id});
+        my $item_class = 'item' . ($is_read ? ' read' : '');
+        my $langattr = defined $item->{lang} ? "lang='$item->{lang}'" : "";
+        my $html = "";
+        $html .= "<div class='$item_class' $langattr>\n";
+        $html .= "<span class='itemno'>" . $item->{item_id} . "</span>\n";
+        $html .= "<span class='issued'>" . strftime('%Y-%m-%d %H:%M:%S', localtime($item->{issued})) . "</span>\n";
+        my $mark_read_button = "";
+        my $mark_later_button = "";
+        unless ($is_read) {
+            $mark_read_button = "<div class='op'><a href='" . $q->escapeHTML($q1->self_url) . "'>Mark read</a></div>\n";
+            $mark_later_button = "<div class='op'><a href='" . $q->escapeHTML($q_later->self_url) . "'>Show later</a></div>\n";
+        }
+        $html .= $mark_read_button;
+        $html .= $mark_later_button;
+        $html .= "<div class='feed'>" . $q->escapeHTML($item->{feed_title}) . "</div>\n";
+        $html .= "<h2><a href='./?redir=" . $q->escapeHTML($item->{item_id}) . "'>" . $q->escapeHTML($item->{item_title}) . "</a></h2>\n";
+        $html .= "<div class='content'>" . $scrubbed_content . "</div>\n";
+        #$html .= $mark_read_button;
+        #$html .= $mark_later_button;
+        $html .= "<div class='end'></div>\n";
+        $html .= "</div\n>";
+        print $html;
+
+    }
+    print "</div>\n";
+    print_log("itemlist: $n_items items ($n_html_mcd html cached, $n_scrub_mcd scrubbed content cached, $n_scrub scrubbed)");
+}
+
+
+sub print_log {
+    my $msg = "@_";
+    my $now = time();
+    printf STDERR "%s: %s.%06d %f: %s\n", $0, strftime("%H:%M:%S", localtime($now)), ($now - int($now)) * 1E6, $now - $start, $msg;
+}
+
+
+sub scrubbed_content_key {
+    my ($item_id) = @_;
+    return "rss2html/scrubbed_content/$item_id";
+}
+