notmuch/test/T300-encoding.sh
Sebastian Poeplau 0f08bf7166 lib: detect mislabeled Windows-1252 parts
Use GMime functionality to detect mislabeled messages and apply the
correct (Windows) encoding instead.
2018-08-29 06:34:39 -03:00

69 lines
3.1 KiB
Bash
Executable file
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
test_description="encoding issues"
. $(dirname "$0")/test-lib.sh || exit 1
test_begin_subtest "Message with text of unknown charset"
add_message '[content-type]="text/plain; charset=unknown-8bit"' \
"[body]=irrelevant"
output=$(notmuch show id:${gen_msg_id} 2>&1 | notmuch_show_sanitize_all)
test_expect_equal "$output" " message{ id:XXXXX depth:0 match:1 excluded:0 filename:XXXXX
header{
Notmuch Test Suite <test_suite@notmuchmail.org> (2001-01-05) (inbox unread)
Subject: Message with text of unknown charset
From: Notmuch Test Suite <test_suite@notmuchmail.org>
To: Notmuch Test Suite <test_suite@notmuchmail.org>
Date: GENERATED_DATE
header}
body{
part{ ID: 1, Content-type: text/plain
irrelevant
part}
body}
message}"
test_begin_subtest "Search for ISO-8859-2 encoded message"
add_message '[content-type]="text/plain; charset=iso-8859-2"' \
'[content-transfer-encoding]=8bit' \
'[subject]="ISO-8859-2 encoded message"' \
"[body]=$'Czech word tu\350\362\341\350\350\355 means pinguin\'s.'" # ISO-8859-2 characters are generated by shell's escape sequences
output=$(notmuch search tučňáččí 2>&1 | notmuch_show_sanitize_all)
test_expect_equal "$output" "thread:0000000000000002 2001-01-05 [1/1] Notmuch Test Suite; ISO-8859-2 encoded message (inbox unread)"
test_begin_subtest "RFC 2047 encoded word with spaces"
add_message '[subject]="=?utf-8?q?encoded word with spaces?="'
output=$(notmuch search id:${gen_msg_id} 2>&1 | notmuch_show_sanitize)
test_expect_equal "$output" "thread:0000000000000003 2001-01-05 [1/1] Notmuch Test Suite; encoded word with spaces (inbox unread)"
test_begin_subtest "RFC 2047 encoded words back to back"
add_message '[subject]="=?utf-8?q?encoded-words-back?==?utf-8?q?to-back?="'
output=$(notmuch search id:${gen_msg_id} 2>&1 | notmuch_show_sanitize)
test_expect_equal "$output" "thread:0000000000000004 2001-01-05 [1/1] Notmuch Test Suite; encoded-words-backto-back (inbox unread)"
test_begin_subtest "RFC 2047 encoded words without space before or after"
add_message '[subject]="=?utf-8?q?encoded?=word without=?utf-8?q?space?=" '
output=$(notmuch search id:${gen_msg_id} 2>&1 | notmuch_show_sanitize)
test_expect_equal "$output" "thread:0000000000000005 2001-01-05 [1/1] Notmuch Test Suite; encodedword withoutspace (inbox unread)"
test_begin_subtest "Mislabeled Windows-1252 encoding"
add_message '[content-type]="text/plain; charset=iso-8859-1"' \
"[body]=$'This text contains \x93Windows-1252\x94 character codes.'"
cat <<EOF > EXPECTED
message{ id:XXXXX depth:0 match:1 excluded:0 filename:XXXXX
header{
Notmuch Test Suite <test_suite@notmuchmail.org> (2001-01-05) (inbox unread)
Subject: Mislabeled Windows-1252 encoding
From: Notmuch Test Suite <test_suite@notmuchmail.org>
To: Notmuch Test Suite <test_suite@notmuchmail.org>
Date: GENERATED_DATE
header}
body{
part{ ID: 1, Content-type: text/plain
This text contains “Windows-1252” character codes.
part}
body}
message}
EOF
notmuch show id:${gen_msg_id} 2>&1 | notmuch_show_sanitize_all > OUTPUT
test_expect_equal_file EXPECTED OUTPUT
test_done