对话偏好提取器从用户对话中自动识别偏好、约束、风格等信息,支持中英文关键词矩阵。
pkg/memory/dialog/
├── extractor.go # 关键词提取器
├── keywords_zh.go # 中文关键词矩阵
└── keywords_en.go # 英文关键词矩阵
type PreferenceCategory string
const (
CategoryPreference PreferenceCategory = "preference" // 一般偏好
CategoryConstraint PreferenceCategory = "constraint" // 约束条件
CategoryStyle PreferenceCategory = "style" // 风格偏好
CategoryAudience PreferenceCategory = "audience" // 受众偏好
CategoryLength PreferenceCategory = "length" // 长度偏好
CategoryFormat PreferenceCategory = "format" // 格式偏好
)
// ExtractedPreference 提取的偏好
type ExtractedPreference struct {
Category PreferenceCategory // 分类
Content string // 内容(包含关键词的句子)
Keywords []string // 匹配的关键词
Confidence float64 // 置信度
Timestamp time.Time // 提取时间
}
import "github.com/astercloud/aster/pkg/memory/dialog"
// 使用默认中文关键词
extractor := dialog.NewExtractor(dialog.DefaultChineseKeywords(), nil)
// 使用写作风格关键词(扩展版)
extractor := dialog.NewExtractor(dialog.WritingStyleKeywords(), nil)
// 使用英文关键词
extractor := dialog.NewExtractor(dialog.DefaultEnglishKeywords(), nil)
message := "我想要轻松一点的风格,字数控制在2000字以内,不要使用网络流行语"
preferences := extractor.Extract(message)
for _, pref := range preferences {
fmt.Printf("[%s] %s (置信度: %.2f)\n",
pref.Category, pref.Content, pref.Confidence)
}
// 输出:
// [preference] 我想要轻松一点的风格 (置信度: 0.60)
// [length] 字数控制在2000字以内 (置信度: 0.60)
// [constraint] 不要使用网络流行语 (置信度: 0.80)
// 检查消息是否包含偏好关键词
if extractor.HasPreferenceKeyword(message) {
// 值得进一步提取
preferences := extractor.Extract(message)
}
// DefaultChineseKeywords 默认中文关键词
func DefaultChineseKeywords() KeywordMatrix {
return KeywordMatrix{
CategoryPreference: {"想要", "希望", "喜欢", "偏好", "倾向", "更喜欢", "觉得", "认为"},
CategoryConstraint: {"必须", "一定要", "不要", "禁止", "不能", "避免", "不允许", "务必"},
CategoryStyle: {"风格", "语气", "调性", "口吻", "正式", "轻松", "幽默", "严肃", "专业"},
CategoryAudience: {"读者", "受众", "目标人群", "给谁看", "用户", "客户"},
CategoryLength: {"字数", "长度", "篇幅", "简短", "简洁", "详细", "控制在"},
CategoryFormat: {"格式", "排版", "列表", "段落", "标题", "markdown"},
}
}
// WritingStyleKeywords 写作风格关键词(扩展)
func WritingStyleKeywords() KeywordMatrix {
// 基于 DefaultChineseKeywords 扩展
// 添加: 文风、笔触、叙述方式、第一人称、第三人称、主题、题材、重点等
}
// ContentPlatformKeywords 内容平台关键词
func ContentPlatformKeywords() KeywordMatrix {
// 添加平台: 微信、公众号、小红书、抖音、知乎、微博、B站等
}
// DefaultEnglishKeywords 默认英文关键词
func DefaultEnglishKeywords() KeywordMatrix {
return KeywordMatrix{
CategoryPreference: {"want", "prefer", "like", "hope", "wish", "would like", "I'd like"},
CategoryConstraint: {"must", "have to", "don't", "do not", "never", "avoid", "should not"},
CategoryStyle: {"style", "tone", "voice", "formal", "casual", "professional", "friendly"},
CategoryAudience: {"reader", "audience", "target", "user", "customer", "intended for"},
CategoryLength: {"words", "length", "short", "brief", "detailed", "comprehensive"},
CategoryFormat: {"format", "layout", "list", "bullet", "paragraph", "heading", "markdown"},
}
}
// TechnicalWritingKeywords 技术写作关键词
func TechnicalWritingKeywords() KeywordMatrix {
// 扩展: technical, documentation, tutorial, guide, step-by-step, code block等
}
// MarketingContentKeywords 营销内容关键词
func MarketingContentKeywords() KeywordMatrix {
// 扩展: persuasive, engaging, call to action, benefits, prospect等
}
extractor := dialog.NewExtractor(nil, nil)
// 设置完整的关键词矩阵
extractor.SetKeywords(dialog.KeywordMatrix{
dialog.CategoryPreference: {"want", "need", "prefer"},
dialog.CategoryConstraint: {"must", "avoid", "don't"},
})
// 向现有类别添加关键词
extractor.AddKeywords(dialog.CategoryStyle, []string{
"vintage", "modern", "minimalist",
})
config := &dialog.ExtractorConfig{
MinConfidence: 0.5, // 最低置信度(低于此值不返回)
MaxExtractLength: 200, // 最大提取长度(防止句子过长)
}
extractor := dialog.NewExtractor(keywords, config)
提取器正确处理中文字符(rune),支持中文标点符号作为句子分隔符:
// 支持的句子结束符
// 中文: 。!?
// 英文: . ! ?
// 通用: \n
import (
"github.com/astercloud/aster/pkg/memory/dialog"
"github.com/astercloud/aster/pkg/memory/project"
)
extractor := dialog.NewExtractor(dialog.WritingStyleKeywords(), nil)
store := project.NewFileStore(config)
func recordPreferences(projectID, message string) error {
prefs := extractor.Extract(message)
for _, pref := range prefs {
err := store.AppendEntry(ctx, projectID, project.SectionPreferences, &project.Entry{
Category: string(pref.Category),
Content: pref.Content,
Source: "dialog",
})
if err != nil {
return err
}
}
return nil
}
import (
"github.com/astercloud/aster/pkg/memory/dialog"
"github.com/astercloud/aster/pkg/memory/auto"
)
// 使用 dialog.Extractor 作为 auto.Capturer 的预处理
extractor := dialog.NewExtractor(dialog.DefaultChineseKeywords(), nil)
if extractor.HasPreferenceKeyword(message) {
// 有偏好关键词,使用 Capturer 捕获
capturer.CaptureFromDialog(ctx, projectID, sessionID, "user", message)
}