From fdce7eb54532a0bdbd56ca3d56099b190eb7284c Mon Sep 17 00:00:00 2001 From: David Edmondson Date: Sat, 30 Apr 2016 07:51:47 +0100 Subject: [PATCH] emacs: Observe the charset of MIME parts when reading them. `notmuch--get-bodypart-raw' previously assumed that all non-binary MIME parts could be successfully read by assuming that they were UTF-8 encoded. This was demonstrated to be wrong, specifically when a part was marked as ISO8859-1 and included accented characters (which were incorrectly rendered as a result). Rather than assuming UTF-8, attempt to use the part's declared charset when reading it, falling back to US-ASCII if the declared charset is unknown, unsupported or invalid. --- emacs/notmuch-lib.el | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/emacs/notmuch-lib.el b/emacs/notmuch-lib.el index 78978ee3..f05ded6f 100644 --- a/emacs/notmuch-lib.el +++ b/emacs/notmuch-lib.el @@ -23,6 +23,7 @@ ;;; Code: +(require 'mm-util) (require 'mm-view) (require 'mm-decode) (require 'cl) @@ -572,7 +573,20 @@ the given type." ,@(when process-crypto '("--decrypt")) ,(notmuch-id-to-query (plist-get msg :id)))) (coding-system-for-read - (if binaryp 'no-conversion 'utf-8))) + (if binaryp 'no-conversion + (let ((coding-system (mm-charset-to-coding-system + (plist-get part :content-charset)))) + ;; Sadly, + ;; `mm-charset-to-coding-system' seems + ;; to return things that are not + ;; considered acceptable values for + ;; `coding-system-for-read'. + (if (coding-system-p coding-system) + coding-system + ;; RFC 2047 says that the default + ;; charset is US-ASCII. RFC6657 + ;; complicates this somewhat. + 'us-ascii))))) (apply #'call-process notmuch-command nil '(t nil) nil args) (buffer-string)))))) (when (and cache data)