Extract message ids from mbox files
Parses all mbox files on the command line and extracts message ids from Message-Id, In-Reply-To and References headers. Just a test script to see what message ids look like in practice.
This commit is contained in:
parent
56eccc3741
commit
80352f727f
|
@ -0,0 +1,29 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import mailbox
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
for f in sys.argv[1:]:
|
||||||
|
print("F", f)
|
||||||
|
mb = mailbox.mbox(f)
|
||||||
|
|
||||||
|
for m in mb:
|
||||||
|
try:
|
||||||
|
for match in re.findall(r'<(.*?)>', m["Message-ID"]):
|
||||||
|
print('M', match)
|
||||||
|
if "In-Reply-To" in m:
|
||||||
|
h = str(m["In-Reply-To"]) # sometimes it's a string,
|
||||||
|
# sometimes an email.header.Header. But the latter's
|
||||||
|
# __str__ method returns something sensible, so let's just
|
||||||
|
# force it to be a string
|
||||||
|
for match in re.findall(r'<(.*?)>', h):
|
||||||
|
print('I', match)
|
||||||
|
if "References" in m:
|
||||||
|
for match in re.findall(r'<(.*?)>', m["References"]):
|
||||||
|
print('R', match)
|
||||||
|
except:
|
||||||
|
print("Error in message:", file=sys.stderr)
|
||||||
|
print(m.as_string(), file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
Loading…
Reference in New Issue