Extract message ids from mbox files
Parses all mbox files on the command line and extracts message ids from Message-Id, In-Reply-To and References headers. Just a test script to see what message ids look like in practice.
This commit is contained in:
parent
56eccc3741
commit
80352f727f
|
@ -0,0 +1,29 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import mailbox
|
||||
import re
|
||||
import sys
|
||||
|
||||
for f in sys.argv[1:]:
|
||||
print("F", f)
|
||||
mb = mailbox.mbox(f)
|
||||
|
||||
for m in mb:
|
||||
try:
|
||||
for match in re.findall(r'<(.*?)>', m["Message-ID"]):
|
||||
print('M', match)
|
||||
if "In-Reply-To" in m:
|
||||
h = str(m["In-Reply-To"]) # sometimes it's a string,
|
||||
# sometimes an email.header.Header. But the latter's
|
||||
# __str__ method returns something sensible, so let's just
|
||||
# force it to be a string
|
||||
for match in re.findall(r'<(.*?)>', h):
|
||||
print('I', match)
|
||||
if "References" in m:
|
||||
for match in re.findall(r'<(.*?)>', m["References"]):
|
||||
print('R', match)
|
||||
except:
|
||||
print("Error in message:", file=sys.stderr)
|
||||
print(m.as_string(), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
Loading…
Reference in New Issue