notmuch/test/T300-encoding.sh
David Bremner 126347b694 Import notmuch_0.38.2.orig.tar.xz
[dgit import orig notmuch_0.38.2.orig.tar.xz]
2023-12-01 07:51:09 -04:00

69 lines
3.1 KiB
Bash
Executable file
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
test_description="encoding issues"
. $(dirname "$0")/test-lib.sh || exit 1
test_begin_subtest "Message with text of unknown charset"
add_message '[content-type]="text/plain; charset=unknown-8bit"' \
"[body]=irrelevant"
output=$(notmuch show id:${gen_msg_id} 2>&1 | notmuch_show_sanitize_all)
test_expect_equal "$output" " message{ id:XXXXX depth:0 match:1 excluded:0 filename:XXXXX
header{
Notmuch Test Suite <test_suite@notmuchmail.org> (2001-01-05) (inbox unread)
Subject: Message with text of unknown charset
From: Notmuch Test Suite <test_suite@notmuchmail.org>
To: Notmuch Test Suite <test_suite@notmuchmail.org>
Date: GENERATED_DATE
header}
body{
part{ ID: 1, Content-type: text/plain
irrelevant
part}
body}
message}"
test_begin_subtest "Search for ISO-8859-2 encoded message"
add_message '[content-type]="text/plain; charset=iso-8859-2"' \
'[content-transfer-encoding]=8bit' \
'[subject]="ISO-8859-2 encoded message"' \
"[body]=$'Czech word tu\350\362\341\350\350\355 means pinguin\'s.'" # ISO-8859-2 characters are generated by shell's escape sequences
output=$(notmuch search tučňáččí 2>&1 | notmuch_show_sanitize_all)
test_expect_equal "$output" "thread:0000000000000002 2001-01-05 [1/1] Notmuch Test Suite; ISO-8859-2 encoded message (inbox unread)"
test_begin_subtest "RFC 2047 encoded word with spaces"
add_message '[subject]="=?utf-8?q?encoded word with spaces?="'
output=$(notmuch search id:${gen_msg_id} 2>&1 | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; encoded word with spaces (inbox unread)"
test_begin_subtest "RFC 2047 encoded words back to back"
add_message '[subject]="=?utf-8?q?encoded-words-back?==?utf-8?q?to-back?="'
output=$(notmuch search id:${gen_msg_id} 2>&1 | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; encoded-words-backto-back (inbox unread)"
test_begin_subtest "RFC 2047 encoded words without space before or after"
add_message '[subject]="=?utf-8?q?encoded?=word without=?utf-8?q?space?=" '
output=$(notmuch search id:${gen_msg_id} 2>&1 | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; encodedword withoutspace (inbox unread)"
test_begin_subtest "Mislabeled Windows-1252 encoding"
add_message '[content-type]="text/plain; charset=iso-8859-1"' \
"[body]=$'This text contains \x93Windows-1252\x94 character codes.'"
cat <<EOF > EXPECTED
message{ id:XXXXX depth:0 match:1 excluded:0 filename:XXXXX
header{
Notmuch Test Suite <test_suite@notmuchmail.org> (2001-01-05) (inbox unread)
Subject: Mislabeled Windows-1252 encoding
From: Notmuch Test Suite <test_suite@notmuchmail.org>
To: Notmuch Test Suite <test_suite@notmuchmail.org>
Date: GENERATED_DATE
header}
body{
part{ ID: 1, Content-type: text/plain
This text contains “Windows-1252” character codes.
part}
body}
message}
EOF
notmuch show id:${gen_msg_id} 2>&1 | notmuch_show_sanitize_all > OUTPUT
test_expect_equal_file EXPECTED OUTPUT
test_done