不止热门角色,我们为你扩展了更多细分角色分类,覆盖职场提升、商业增长、内容创作、学习规划等多元场景。精准匹配不同目标,让每一次生成都更有方向、更高命中率。
立即探索更多角色分类,找到属于你的增长加速器。
# -*- coding: utf-8 -*-
import re
# 规则说明(简述):
# - local:字母/数字/下划线/加号/点,不能以点开头或结尾,且不允许连续点(通过“分段+点分隔”结构保证)
# - domain:1–10 个标签,每个标签 1–63 字符;以字母或数字开头和结尾,中间可含连字符“-”
# - tld:2–10 个英文字母
# - 忽略大小写
#
# 命名分组:local、domain、tld
#
# 注意:domain 分组仅包含 tld 之前的标签部分,不包含 tld 本身(例如 sub-example.domain.io 中,
# domain 捕获 "sub-example.domain",tld 捕获 "io")
EMAIL_PATTERN = re.compile(r"""
^
(?P<local>
[A-Za-z0-9_+]+ # 第一段:不允许以点开头
(?:\.[A-Za-z0-9_+]+)* # 后续段:由点连接,避免连续点与结尾点
)
@
(?P<domain>
# 1–10 个标签
(?:[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?) # 第一个标签:1–63,首尾字母/数字,中间可含连字符
(?:\.(?:[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?)){0,9} # 追加 0–9 个
)
\.
(?P<tld>[A-Za-z]{2,10})
$
""", re.IGNORECASE | re.VERBOSE)
def is_valid_email(email: str) -> bool:
"""用于数据验证场景:返回是否为符合规则的邮箱。"""
return EMAIL_PATTERN.fullmatch(email) is not None
def parse_email(email: str):
"""
返回命名分组(local、domain、tld),若不匹配返回 None。
适合在通过 is_valid_email 判定后调用,以安全获取分组。
"""
m = EMAIL_PATTERN.fullmatch(email)
return m.groupdict() if m else None
if __name__ == "__main__":
# 演示:示例地址匹配
demo = "user.name+tag_1@sub-example.domain.io"
m = EMAIL_PATTERN.fullmatch(demo)
print("Demo matched:", bool(m))
if m:
print("Groups:", m.groupdict())
# 基本正确用例
ok_cases = [
"user.name+tag_1@sub-example.domain.io",
"A_b+c@x-1.io",
"a@a.bc", # 最短域标签 + 最短 TLD
"name@very-long-label-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com", # 63 长度的标签(示意)
# 10 个域标签(不含 tld)
"u@a.a.a.a.a.a.a.a.a.a.zz",
]
# 错误用例(边界与非法情况)
bad_cases = [
".abc@domain.com", # local 以点开头
"abc.@domain.com", # local 以点结尾
"a..b@domain.com", # local 连续两个点
"user@-host.com", # 域标签以连字符开头
"user@host-.com", # 域标签以连字符结尾
"user@sub_domain.com", # 域标签包含下划线(不允许)
"user@x." + "a"*1, # 缺少 tld(只有点后 1 字母 TLD 不满足 2–10)
"user@host.c", # tld 长度不足 2
"user@host.comm3", # tld 含非字母字符
# 域标签长度 64(超限)
"user@" + "a"*64 + ".com",
# 域标签超过 10 个(domain 分组中 11 个标签)
"u@" + ".".join(["a"]*11) + ".com",
]
for s in ok_cases:
assert is_valid_email(s), f"Should be valid: {s}"
for s in bad_cases:
assert not is_valid_email(s), f"Should be invalid: {s}"
print("All tests passed.")
详细功能说明
local 分组:
domain 分组:
tld 分组:
完整匹配:
使用示例与测试用例
演示示例(题中示例)
快速验证
边界测试(见代码断言):
常见匹配场景说明
性能注意事项
import re
from typing import Optional, Dict
# Regex: match one Nginx access-log line and extract ip, ts, method, path, status, rt_ms
NGINX_LOG_RE = re.compile(r"""
^
(?P<ip>(?:\d{1,3}\.){3}\d{1,3}) # IPv4 address (not range-validated)
\s+-\s+-\s+ # literal " - - " with spaces
\[
(?P<ts>\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2})
\]
\s+
(?P<method>[A-Z]+) # HTTP method in uppercase
\s+
(?P<path>\S+) # path without spaces
\s+
HTTP/\d\.\d # HTTP version like 1.1, 2.0
\s+
(?P<status>\d{3}) # 3-digit status code
\s+ # at least one space after status
(?:.*\s)? # optional middle fields (size, referrer, UA, etc.)
(?P<rt_ms>\d+)ms # integer response time followed by "ms"
$
""", re.VERBOSE)
def parse_nginx_line(line: str) -> Optional[Dict[str, str]]:
"""
Parse a single Nginx access-log line.
Returns a dict with keys: ip, ts, method, path, status, rt_ms, or None if no match.
"""
m = NGINX_LOG_RE.match(line)
if not m:
return None
return m.groupdict()
Detailed explanation
Usage examples and tests
def _show(line: str) -> None:
parsed = parse_nginx_line(line)
print(("OK", parsed) if parsed else ("NO MATCH", line))
# Positive examples (should match)
pos_lines = [
"203.0.113.12 - - [2024-08-12 14:23:11] GET /api/v1/items?id=42 HTTP/1.1 200 512 - Mozilla/5.0 18ms",
"198.51.100.99 - - [2025-01-01 00:00:00] POST /index.html HTTP/1.0 404 1024 \"-\" curl/8.4.0 123ms",
"192.0.2.5 - - [2024-12-31 23:59:59] HEAD /ping HTTP/1.1 200 18ms", # no extra middle fields
]
# Negative examples (should not match)
neg_lines = [
"203.0.113.12 - - [2024/08/12 14:23:11] GET /api/v1 HTTP/1.1 200 18ms", # wrong timestamp format
"203.0.113.12 - - [2024-08-12 14:23:11] get /api HTTP/1.1 200 18ms", # method not uppercase
"203.0.113.12 - - [2024-08-12 14:23:11] GET /api v1 HTTP/1.1 200 18ms", # space in path
"203.0.113.12 - - [2024-08-12 14:23:11] GET /api HTTP/1.1 2000 18ms", # 4-digit status
"203.0.113.12 - - [2024-08-12 14:23:11] GET /api HTTP/1.1 200 18 ms", # space between number and ms
"2001:db8::1 - - [2024-08-12 14:23:11] GET /api HTTP/1.1 200 18ms", # IPv6 not supported by this pattern
]
for line in pos_lines:
_show(line)
for line in neg_lines:
_show(line)
# Minimal assertion checks
assert parse_nginx_line(pos_lines[0]) == {
'ip': '203.0.113.12',
'ts': '2024-08-12 14:23:11',
'method': 'GET',
'path': '/api/v1/items?id=42',
'status': '200',
'rt_ms': '18',
}
assert parse_nginx_line(pos_lines[2]) == {
'ip': '192.0.2.5',
'ts': '2024-12-31 23:59:59',
'method': 'HEAD',
'path': '/ping',
'status': '200',
'rt_ms': '18',
}
assert parse_nginx_line(neg_lines[0]) is None
assert parse_nginx_line(neg_lines[1]) is None
assert parse_nginx_line(neg_lines[2]) is None
assert parse_nginx_line(neg_lines[3]) is None
assert parse_nginx_line(neg_lines[4]) is None
assert parse_nginx_line(neg_lines[5]) is None
Common matching scenarios
Performance notes
import re
from typing import Optional, Dict
# ISO 8601 扩展格式(YYYY-MM-DDTHH:MM:SS),可选毫秒(1–3位),时区必须为 Z 或 ±HH:MM
# 校验要点:
# - 闰年规则(可匹配 2-29):能被400整除,或能被4整除但不能被100整除
# - 月份 01–12;每月天数上限正确;小时 00–23;分秒 00–59
ISO8601_DT_PATTERN = re.compile(
r"""
^
(?P<date>
(?:
# 非闰年的通用日期(含2月1-28)
(?:\d{4})-
(?:
(?:01|03|05|07|08|10|12)-(?:0[1-9]|[12]\d|3[01]) # 31天的月
|
(?:04|06|09|11)-(?:0[1-9]|[12]\d|30) # 30天的月
|
(?:02-(?:0[1-9]|1\d|2[0-8])) # 2月1-28
)
|
# 闰年专用的 2-29
(?:
(?:
(?:\d{2}(?:0[48]|[2468][048]|[13579][26])) # 非整百年,后两位 %4==0(不含00)
|
(?:[02468][048]|[13579][26])00 # 整百年且 %400==0
)
-02-29
)
)
)
T
(?P<time>
(?:[01]\d|2[0-3]) # 小时 00-23
:
[0-5]\d # 分 00-59
:
[0-5]\d # 秒 00-59
)
(?:\.(?P<ms>\d{1,3}))? # 可选毫秒 1-3 位
(?P<tz>
Z
|
[+\-](?:[01]\d|2[0-3]):[0-5]\d # 时区 ±HH:MM(此处允许 00:00–23:59)
)
$
""",
re.VERBOSE,
)
def is_iso8601_dt(s: str) -> bool:
"""格式检查:字符串是否满足指定的 ISO 8601 日期时间格式。"""
return ISO8601_DT_PATTERN.fullmatch(s) is not None
def parse_iso8601_dt(s: str) -> Optional[Dict[str, Optional[str]]]:
"""
解析并返回命名分组:
- date: YYYY-MM-DD
- time: HH:MM:SS
- ms: 毫秒数字(1–3位),若无则为 None
- tz: 时区字符串(Z 或 ±HH:MM)
未匹配返回 None。
"""
m = ISO8601_DT_PATTERN.fullmatch(s)
return m.groupdict() if m else None
详细的功能说明
使用示例和测试用例
positives = [
"2023-12-31T23:59:59Z",
"2020-02-29T00:00:00+08:00",
"2000-02-29T23:59:59-00:00", # 世纪闰年
"1996-02-29T12:30:15Z", # 普通闰年
"2024-02-29T00:00:00Z",
"2023-01-31T00:00:00+00:00",
"2023-12-31T23:59:59.1Z", # 1位毫秒
"2023-12-31T23:59:59.12+14:00",# 2位毫秒
"2023-12-31T23:59:59.123-05:30"# 3位毫秒
]
negatives = [
"2021-02-29T00:00:00Z", # 非闰年 2-29
"1900-02-29T12:00:00Z", # 世纪年非闰年
"2023-04-31T00:00:00Z", # 4月无31日
"2023-13-01T00:00:00Z", # 非法月份
"2023-00-10T00:00:00Z", # 非法月份
"2023-12-00T00:00:00Z", # 非法日期
"2023-12-31T24:00:00Z", # 非法小时
"2023-12-31T23:60:00Z", # 非法分钟
"2023-12-31T23:59:60Z", # 非法秒(不支持闰秒)
"2023-12-31T23:59:59.1234Z", # 毫秒>3位
"2023-12-31T23:59:59", # 缺少时区
"2023-12-31 23:59:59Z", # 缺少 'T'
"2023-12-31T23:59:59z", # 小写 z 不允许
"2023-12-31T23:59:59+0800", # 时区缺少冒号
"2023-12-31T23:59:59+25:00", # 时区小时越界
"2023-12-31T23:59:59+23:60", # 时区分钟越界
]
print("=== 正例测试 ===")
for s in positives:
assert is_iso8601_dt(s), f"应匹配但未通过:{s}"
print(s, "->", parse_iso8601_dt(s))
print("\n=== 反例测试 ===")
for s in negatives:
assert not is_iso8601_dt(s), f"不应匹配但通过了:{s}"
print(s, "-> 不匹配")
# 单例解析演示
sample = "2020-02-29T00:00:00+08:00"
parts = parse_iso8601_dt(sample)
# parts 示例:
# {
# 'date': '2020-02-29',
# 'time': '00:00:00',
# 'ms': None,
# 'tz': '+08:00'
# }
print("\n解析示例:", sample, "->", parts)
常见匹配场景说明
性能注意事项
将自然语言的匹配需求,快速转化为可直接粘贴到项目中的 Python 正则与可运行示例;覆盖数据清洗、日志解析、表单校验、文本抽取等高频场景,显著缩短从需求到上线的周期;自动补齐边界条件与正反例测试,降低误判风险与性能隐患;以统一清晰的结构输出表达式、用途说明、示例和注意事项,便于团队协作与复用;让初中级开发也能稳定产出“专家级”正则,实现效率与质量双提升。