Extract message ids from mbox files

Parses all mbox files on the command line and extracts message ids from
Message-Id, In-Reply-To and References headers.

Just a test script to see what message ids look like in practice.
This commit is contained in:
Peter J. Holzer 2019-01-19 21:18:14 +01:00
parent 56eccc3741
commit 80352f727f
1 changed files with 29 additions and 0 deletions

29
get_message_ids Executable file
View File

@ -0,0 +1,29 @@
#!/usr/bin/python3
import mailbox
import re
import sys
for f in sys.argv[1:]:
print("F", f)
mb = mailbox.mbox(f)
for m in mb:
try:
for match in re.findall(r'<(.*?)>', m["Message-ID"]):
print('M', match)
if "In-Reply-To" in m:
h = str(m["In-Reply-To"]) # sometimes it's a string,
# sometimes an email.header.Header. But the latter's
# __str__ method returns something sensible, so let's just
# force it to be a string
for match in re.findall(r'<(.*?)>', h):
print('I', match)
if "References" in m:
for match in re.findall(r'<(.*?)>', m["References"]):
print('R', match)
except:
print("Error in message:", file=sys.stderr)
print(m.as_string(), file=sys.stderr)
sys.exit(1)