不止热门角色,我们为你扩展了更多细分角色分类,覆盖职场提升、商业增长、内容创作、学习规划等多元场景。精准匹配不同目标,让每一次生成都更有方向、更高命中率。
立即探索更多角色分类,找到属于你的增长加速器。
以下为针对 summarize_text 函数的完整测试设计与实现,基于 pytest,覆盖正常流程、边界条件与异常场景,确保测试代码可读、可维护且可直接运行。
# test_summarize_text.py
import pytest
def summarize_text(text: str, max_sentences: int = 3, max_chars: int | None = None) -> str:
'''
概要: 从给定文本中提取若干关键句,保持原始顺序。
适用: 中英文与混合标点文本,保证输出可读性。
参数:
- text: 原文,支持包含换行、emoji与中文标点
- max_sentences: 最多包含的句子数,>=1
- max_chars: 可选,最终文本的最大字符数(不截断句中词)
行为:
1. 规范空白,将连续空白折叠为单一空格。
2. 以 . ! ? 。 ! ? 作为句末符进行切分,保留标点。
3. 去除重复句(大小写与空白差异视为同句)。
4. 按顺序选取不超过 max_sentences 的句子;若设置 max_chars,
在不破坏已加入句子的前提下尽量满足限制。
例子:
输入: '今天很冷!但是太阳很好。We still go hiking? 还是要出门。'
输出(2句): '今天很冷! We still go hiking?'
'''
if not isinstance(text, str):
raise TypeError('text must be str')
if not isinstance(max_sentences, int) or max_sentences < 1:
raise ValueError('max_sentences must be >= 1')
if max_chars is not None:
if not isinstance(max_chars, int) or max_chars < 10:
raise ValueError('max_chars must be None or >= 10')
import re
norm = re.sub(r'\s+', ' ', text.strip())
if not norm:
return ''
endings = set('。!?.!?')
sentences = []
buf = []
for ch in norm:
buf.append(ch)
if ch in endings:
s = ''.join(buf).strip()
if s:
sentences.append(s)
buf = []
if buf:
tail = ''.join(buf).strip()
if tail:
sentences.append(tail)
seen = set()
unique = []
for s in sentences:
key = re.sub(r'\s+', ' ', s).casefold()
if key not in seen:
seen.add(key)
unique.append(s)
selected = unique[:max_sentences]
if max_chars is not None:
out = []
curr = 0
for s in selected:
candidate = ((' ' if out else '') + s)
if curr + len(candidate) <= max_chars:
out.append(s)
curr += len(candidate)
else:
break
return ' '.join(out)
return ' '.join(selected)
# ----------------- Parameter validation tests -----------------
@pytest.mark.parametrize("bad_text", [None, 123, 3.14, b"bytes"])
def test_invalid_text_type_raises_typeerror(bad_text):
with pytest.raises(TypeError):
summarize_text(bad_text)
@pytest.mark.parametrize("bad_ms", [0, -1, 1.5, "2"])
def test_invalid_max_sentences_raises_valueerror(bad_ms):
with pytest.raises(ValueError):
summarize_text("ok.", max_sentences=bad_ms)
@pytest.mark.parametrize("bad_mc", [9, "100", 0, -5, 3.14])
def test_invalid_max_chars_raises_valueerror(bad_mc):
with pytest.raises(ValueError):
summarize_text("ok.", max_chars=bad_mc)
def test_max_chars_none_is_ok():
assert summarize_text("a.") == "a."
assert summarize_text("a.", max_chars=None) == "a."
# ----------------- Empty and whitespace-only input -----------------
@pytest.mark.parametrize("txt", ["", " ", "\n\t \n"])
def test_empty_or_whitespace_only_returns_empty(txt):
assert summarize_text(txt) == ""
# ----------------- Core behavior and ordering -----------------
def test_spec_example_two_sentences():
text = "今天很冷!但是太阳很好。We still go hiking? 还是要出门。"
out = summarize_text(text, max_sentences=2)
assert out == "今天很冷! We still go hiking?"
def test_default_max_sentences_three():
text = "今天很冷!但是太阳很好。We still go hiking? 还是要出门。"
out = summarize_text(text) # default 3
assert out == "今天很冷! 但是太阳很好。 We still go hiking?"
def test_tail_without_terminal_punctuation_is_included():
assert summarize_text("No punctuation at end") == "No punctuation at end"
# ----------------- Whitespace normalization -----------------
def test_whitespace_normalization_across_spaces_tabs_newlines():
text = "Hi \n\n there!\tHow are\tyou?\nGood."
out = summarize_text(text)
assert out == "Hi there! How are you? Good."
# ----------------- Deduplication (case/whitespace-insensitive) -----------------
def test_deduplication_case_and_whitespace_insensitive():
text = "Hello world! hello world! HELLO WORLD!"
out = summarize_text(text, max_sentences=5)
assert out == "Hello world!"
# ----------------- Edge: consecutive punctuations -----------------
def test_consecutive_punctuations_create_standalone_punct_sentence():
text = "Wow!! Wow!"
out = summarize_text(text, max_sentences=5)
# Splits into ["Wow!", "!", "Wow!"] then dedup -> ["Wow!", "!"]
assert out == "Wow! !"
# ----------------- max_sentences with duplicates -----------------
def test_max_sentences_respects_unique_sentences():
text = "A. A. B. C."
out = summarize_text(text, max_sentences=3)
assert out == "A. B. C."
# ----------------- max_chars behavior -----------------
def test_max_chars_exact_boundary_and_not_exceed():
text = "abcd. ef. ghi."
# lengths: "abcd."=5, " ef."=4, " ghi."=5
out = summarize_text(text, max_sentences=5, max_chars=9)
assert out == "abcd. ef."
out2 = summarize_text(text, max_sentences=5, max_chars=8)
assert out2 == "abcd."
def test_max_chars_first_sentence_too_long_returns_empty():
# "abcdefghij." length = 11 (> 10), with max_chars=10, nothing fits
text = "abcdefghij."
out = summarize_text(text, max_sentences=3, max_chars=10)
assert out == ""
def test_max_chars_partial_selection_two_vs_one():
text = "abcd. efghi. jk."
# "abcd."=5, " efghi."=7 -> 12 total fits, so two sentences if max_chars=12
out = summarize_text(text, max_sentences=3, max_chars=12)
assert out == "abcd. efghi."
# With max_chars=11, only the first fits
out2 = summarize_text(text, max_sentences=3, max_chars=11)
assert out2 == "abcd."
# ----------------- Unicode/Emoji and Chinese punctuation -----------------
def test_unicode_emoji_and_chinese_punctuation():
text = "今天天气☀️很好!我们去🏖️吗?好呀"
out = summarize_text(text, max_sentences=3)
assert out == "今天天气☀️很好! 我们去🏖️吗? 好呀"
# ----------------- Single punctuation as sentence -----------------
def test_single_punctuation_sentence():
assert summarize_text(".") == "."
以“更快、更稳、更省心”为核心价值,帮助研发团队在数分钟内生成可直接运行的 Python 单元测试:
请确认您是否已完成支付