From c9a336e58f9ad4ac150b22bc07fbae19c4fccf57 Mon Sep 17 00:00:00 2001 From: "Peter J. Holzer" Date: Tue, 18 Jun 2019 22:11:23 +0200 Subject: [PATCH] Handle cite attribute --- mbox2web | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/mbox2web b/mbox2web index 6dacbf1..d148017 100755 --- a/mbox2web +++ b/mbox2web @@ -469,12 +469,15 @@ class HTMLPart(html.parser.HTMLParser): if href: self.base = href[0] elif tag in self.allowed_tags: + cleaned_attrs, extra = self.clean_attrs(tag, attrs) attrstr = "".join( [' %s="%s"' % (a[0], html.escape(a[1])) if a[1] else ' %s' % (a[0]) - for a in self.clean_attrs(tag, attrs) + for a in cleaned_attrs ] ) self.content.append("<%s%s>" % ( tag, attrstr )) + if extra: + self.content.append(extra) elif tag in self.hide_tags: self.hide = True elif tag in self.ignore_tags: @@ -507,6 +510,7 @@ class HTMLPart(html.parser.HTMLParser): "noshade", "type", ] clean_attrs = [] + extra = None for a in attrs: if a[0] in safe_attrs: clean_attrs.append(a) @@ -527,9 +531,14 @@ class HTMLPart(html.parser.HTMLParser): print("Ignored src attribute", a, file=sys.stderr) elif a[0] == "target": pass + elif a[0] == "cite": + if a[1].startswith("mid:"): + mid = a[1][4:] + encmid = encode_message_id(mid) + extra = "\u2397" % encmid else: print("Encountered unknown attribute", a, file=sys.stderr) - return clean_attrs + return clean_attrs, extra class TextEnrichedPart: