first commit

This commit is contained in:
2026-06-09 14:50:53 +08:00
commit 6aebc60bfd
55 changed files with 3126 additions and 0 deletions

56
src/analysis/__test__.js Normal file
View File

@@ -0,0 +1,56 @@
import assert from 'assert'
import fs from 'fs'
import os from 'os'
import path from 'path'
import { analyzeWechatMessages, buildWechatStats } from './wechatAnalyzer.js'
const records = [
{
timestamp: '2026-05-12T08:00:00.000Z',
roomName: '研发群',
talkerName: 'Alice',
talkerAlias: 'Alice',
receiverName: '',
text: '今天排查登录问题',
typeName: 'Text',
},
{
timestamp: '2026-05-12T09:00:00.000Z',
roomName: '研发群',
talkerName: 'Bob',
talkerAlias: 'Bob',
receiverName: '',
text: '我来补日志',
typeName: 'Text',
},
{
timestamp: '2026-05-12T10:00:00.000Z',
roomName: '',
talkerName: 'Carol',
talkerAlias: 'Carol',
receiverName: 'me',
text: '周会改到下午',
typeName: 'Text',
},
]
const stats = buildWechatStats(records)
assert.equal(stats.totalMessages, 3)
assert.equal(stats.textMessages, 3)
assert.equal(stats.topSpeakers[0].name, 'Alice')
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'wechat-bot-analysis-'))
fs.mkdirSync(tmpDir, { recursive: true })
fs.writeFileSync(path.join(tmpDir, 'messages.jsonl'), records.map((record) => JSON.stringify(record)).join('\n'), 'utf8')
const result = await analyzeWechatMessages({
dataDir: tmpDir,
room: '研发群',
statsOnly: true,
})
assert.equal(result.target, '群聊「研发群」')
assert.equal(result.stats.totalMessages, 2)
assert.equal(result.analysis, '')
console.log('analysis tests passed')

View File

@@ -0,0 +1,95 @@
import { getServe } from '../wechaty/serve.js'
import { filterWechatMessages, loadWechatMessages } from '../platforms/wechat/messageStore.js'
function increment(map, key, step = 1) {
if (!key) return
map.set(key, (map.get(key) || 0) + step)
}
function topEntries(map, limit = 10) {
return [...map.entries()]
.sort((a, b) => b[1] - a[1])
.slice(0, limit)
.map(([name, count]) => ({ name, count }))
}
export function buildWechatStats(records) {
const speakers = new Map()
const rooms = new Map()
const hourly = new Map()
let textMessages = 0
let totalTextLength = 0
for (const record of records) {
increment(speakers, record.talkerAlias || record.talkerName || 'unknown')
increment(rooms, record.roomName || 'private')
if (record.timestamp) {
increment(hourly, new Date(record.timestamp).getHours().toString().padStart(2, '0'))
}
if (record.text) {
textMessages += 1
totalTextLength += record.text.length
}
}
return {
totalMessages: records.length,
textMessages,
averageTextLength: textMessages ? Number((totalTextLength / textMessages).toFixed(1)) : 0,
topSpeakers: topEntries(speakers),
topRooms: topEntries(rooms),
hourly: topEntries(hourly, 24).sort((a, b) => a.name.localeCompare(b.name)),
}
}
export function buildWechatAnalysisPrompt({ records, stats, target }) {
const recentMessages = records
.slice(-120)
.map((record) => {
const speaker = record.talkerAlias || record.talkerName || 'unknown'
return `[${record.timestamp}] ${speaker}: ${record.text || `[${record.typeName}]`}`
})
.join('\n')
return [
'你是一个严谨的中文聊天数据分析助手。',
'请基于用户显式提供的本地微信聊天记录做分析,不要编造记录之外的事实。',
'输出结构1. 关键统计2. 主要话题3. 互动模式4. 风险或误读提醒5. 可执行建议。',
`分析对象:${target}`,
`基础统计:${JSON.stringify(stats, null, 2)}`,
'最近消息样本:',
recentMessages || '无文本消息样本。',
].join('\n\n')
}
export async function analyzeWechatMessages(options = {}) {
const allRecords = loadWechatMessages({
dataDir: options.dataDir,
limit: options.limit || 5000,
})
const records = filterWechatMessages(allRecords, {
room: options.room,
friend: options.friend,
query: options.query,
start: options.start,
end: options.end,
})
const stats = buildWechatStats(records)
const target = options.room ? `群聊「${options.room}` : options.friend ? `好友「${options.friend}` : '全部本地记录'
if (options.statsOnly || !records.length) {
return {
target,
stats,
analysis: records.length ? '' : '没有匹配到可分析的本地微信消息。',
}
}
const getReply = getServe(options.serviceType || 'ChatGPT')
const prompt = buildWechatAnalysisPrompt({ records, stats, target })
const analysis = await getReply(prompt)
return { target, stats, analysis }
}