notmuch/test/T300-encoding.sh
Sebastian Poeplau 7074bb8f80 test: add known broken test for mislabeled Windows-1252 encoding
Messages that contain Windows-1252 are frequently mislabeled as ISO
8859-1, which may result in non-printable characters when displaying
the message. The test asserts that such characters (in this case
curved quotes) are displayed correctly.
2018-08-29 06:28:13 -03:00

70 lines
3.1 KiB
Bash
Executable file
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
test_description="encoding issues"
. $(dirname "$0")/test-lib.sh || exit 1
test_begin_subtest "Message with text of unknown charset"
add_message '[content-type]="text/plain; charset=unknown-8bit"' \
"[body]=irrelevant"
output=$(notmuch show id:${gen_msg_id} 2>&1 | notmuch_show_sanitize_all)
test_expect_equal "$output" " message{ id:XXXXX depth:0 match:1 excluded:0 filename:XXXXX
header{
Notmuch Test Suite <test_suite@notmuchmail.org> (2001-01-05) (inbox unread)
Subject: Message with text of unknown charset
From: Notmuch Test Suite <test_suite@notmuchmail.org>
To: Notmuch Test Suite <test_suite@notmuchmail.org>
Date: GENERATED_DATE
header}
body{
part{ ID: 1, Content-type: text/plain
irrelevant
part}
body}
message}"
test_begin_subtest "Search for ISO-8859-2 encoded message"
add_message '[content-type]="text/plain; charset=iso-8859-2"' \
'[content-transfer-encoding]=8bit' \
'[subject]="ISO-8859-2 encoded message"' \
"[body]=$'Czech word tu\350\362\341\350\350\355 means pinguin\'s.'" # ISO-8859-2 characters are generated by shell's escape sequences
output=$(notmuch search tučňáččí 2>&1 | notmuch_show_sanitize_all)
test_expect_equal "$output" "thread:0000000000000002 2001-01-05 [1/1] Notmuch Test Suite; ISO-8859-2 encoded message (inbox unread)"
test_begin_subtest "RFC 2047 encoded word with spaces"
add_message '[subject]="=?utf-8?q?encoded word with spaces?="'
output=$(notmuch search id:${gen_msg_id} 2>&1 | notmuch_show_sanitize)
test_expect_equal "$output" "thread:0000000000000003 2001-01-05 [1/1] Notmuch Test Suite; encoded word with spaces (inbox unread)"
test_begin_subtest "RFC 2047 encoded words back to back"
add_message '[subject]="=?utf-8?q?encoded-words-back?==?utf-8?q?to-back?="'
output=$(notmuch search id:${gen_msg_id} 2>&1 | notmuch_show_sanitize)
test_expect_equal "$output" "thread:0000000000000004 2001-01-05 [1/1] Notmuch Test Suite; encoded-words-backto-back (inbox unread)"
test_begin_subtest "RFC 2047 encoded words without space before or after"
add_message '[subject]="=?utf-8?q?encoded?=word without=?utf-8?q?space?=" '
output=$(notmuch search id:${gen_msg_id} 2>&1 | notmuch_show_sanitize)
test_expect_equal "$output" "thread:0000000000000005 2001-01-05 [1/1] Notmuch Test Suite; encodedword withoutspace (inbox unread)"
test_begin_subtest "Mislabeled Windows-1252 encoding"
test_subtest_known_broken
add_message '[content-type]="text/plain; charset=iso-8859-1"' \
"[body]=$'This text contains \x93Windows-1252\x94 character codes.'"
cat <<EOF > EXPECTED
message{ id:XXXXX depth:0 match:1 excluded:0 filename:XXXXX
header{
Notmuch Test Suite <test_suite@notmuchmail.org> (2001-01-05) (inbox unread)
Subject: Mislabeled Windows-1252 encoding
From: Notmuch Test Suite <test_suite@notmuchmail.org>
To: Notmuch Test Suite <test_suite@notmuchmail.org>
Date: GENERATED_DATE
header}
body{
part{ ID: 1, Content-type: text/plain
This text contains “Windows-1252” character codes.
part}
body}
message}
EOF
notmuch show id:${gen_msg_id} 2>&1 | notmuch_show_sanitize_all > OUTPUT
test_expect_equal_file EXPECTED OUTPUT
test_done