压缩内部机制深入分析

深入分析 OpenClaw 的会话历史压缩系统(src/agents/compaction.ts,406 行)。

核心常量

export const BASE_CHUNK_RATIO = 0.4;  // 每个块的上下文占比(默认 40%)
export const MIN_CHUNK_RATIO = 0.15;  // 当平均消息较大时的最小值(15%)
export const SAFETY_MARGIN = 1.2;     // 令牌估计错误的缓冲(20%)
export const SUMMARIZATION_OVERHEAD_TOKENS = 4096;  // 摘要提示的保留量

关键函数

computeAdaptiveChunkRatio()

根据平均消息大小动态调整块比率:

export function computeAdaptiveChunkRatio(messages: AgentMessage[], contextWindow: number): number {
  if (messages.length === 0) {
    return BASE_CHUNK_RATIO;
  }

  const totalTokens = estimateMessagesTokens(messages);
  const avgTokens = totalTokens / messages.length;
  const safeAvgTokens = avgTokens * SAFETY_MARGIN;
  const avgRatio = safeAvgTokens / contextWindow;

  // 如果平均消息 > 上下文的 10%,则降低块比率至 MIN_CHUNK_RATIO
  if (avgRatio > 0.1) {
    const reduction = Math.min(avgRatio * 2, BASE_CHUNK_RATIO - MIN_CHUNK_RATIO);
    return Math.max(MIN_CHUNK_RATIO, BASE_CHUNK_RATIO - reduction);
  }

  return BASE_CHUNK_RATIO;
}

行为:

  • 默认:每个块占上下文的 40%
  • 大消息(> 上下文的 10%):比率降低至 15%
  • 防止超大块导致溢出

chunkMessagesByMaxTokens()

将消息分割成尊重令牌限制的块:

export function chunkMessagesByMaxTokens(
  messages: AgentMessage[],
  maxTokens: number,
): AgentMessage[][] {
  // 应用 SAFETY_MARGIN 以补偿 estimateTokens() 的低估
  const effectiveMax = Math.max(1, Math.floor(maxTokens / SAFETY_MARGIN));

  const chunks: AgentMessage[][] = [];
  let currentChunk: AgentMessage[] = [];
  let currentTokens = 0;

  for (const message of messages) {
    const messageTokens = estimateCompactionMessageTokens(message);

    // 如果添加此消息会超过限制,则开始新块
    if (currentChunk.length > 0 && currentTokens + messageTokens > effectiveMax) {
      chunks.push(currentChunk);
      currentChunk = [];
      currentTokens = 0;
    }

    currentChunk.push(message);
    currentTokens += messageTokens;

    // 立即分割超大单条消息以避免无限块增长
    if (messageTokens > effectiveMax) {
      chunks.push(currentChunk);
      currentChunk = [];
      currentTokens = 0;
    }
  }

  if (currentChunk.length > 0) {
    chunks.push(currentChunk);
  }

  return chunks;
}

关键特性:

  • 应用 SAFETY_MARGIN 补偿令牌估计不准确性
  • 立即分割超大单条消息
  • 保证没有块超过有效最大值

splitMessagesByTokenShare()

将消息分配到 N 个块,目标是相等的令牌份额:

export function splitMessagesByTokenShare(
  messages: AgentMessage[],
  parts = 2,
): AgentMessage[][] {
  const normalizedParts = Math.min(Math.max(1, Math.floor(parts)), messages.length);
  if (normalizedParts <= 1) {
    return [messages];
  }

  const totalTokens = estimateMessagesTokens(messages);
  const targetTokens = totalTokens / normalizedParts;
  const chunks: AgentMessage[][] = [];
  let current: AgentMessage[] = [];
  let currentTokens = 0;

  for (const message of messages) {
    const messageTokens = estimateCompactionMessageTokens(message);

    // 达到目标时开始新块(但不针对最后一个块)
    if (
      chunks.length < normalizedParts - 1 &&
      current.length > 0 &&
      currentTokens + messageTokens > targetTokens
    ) {
      chunks.push(current);
      current = [];
      currentTokens = 0;
    }

    current.push(message);
    currentTokens += messageTokens;
  }

  if (current.length > 0) {
    chunks.push(current);
  }

  return chunks;
}

策略:

  • 目标是跨块的相等令牌分配
  • 最后一个块吸收剩余消息
  • summarizeInStages() 用于并行摘要

summarizeInStages()

带多阶段合并的渐进式摘要:

export async function summarizeInStages(params: {
  messages: AgentMessage[];
  minMessagesForSplit?: number;
  parts?: number;
  // ... 其他参数
}): Promise<string> {
  const minMessagesForSplit = Math.max(2, params.minMessagesForSplit ?? 4);
  const parts = normalizeParts(params.parts ?? 2, messages.length);
  const totalTokens = estimateMessagesTokens(messages);

  // 单次摘要条件:
  // - 只请求 1 个部分
  // - 消息太少(< minMessagesForSplit,默认 4)
  // - 总令牌适合最大块大小
  if (parts <= 1 || messages.length < minMessagesForSplit || totalTokens <= params.maxChunkTokens) {
    return summarizeWithFallback(params);
  }

  // 分割成多个部分并独立摘要每个
  const splits = splitMessagesByTokenShare(messages, parts).filter(chunk => chunk.length > 0);
  if (splits.length <= 1) {
    return summarizeWithFallback(params);
  }

  const partialSummaries: string[] = [];
  for (const chunk of splits) {
    partialSummaries.push(
      await summarizeWithFallback({
        ...params,
        messages: chunk,
        previousSummary: undefined,  // 每个部分独立摘要
      })
    );
  }

  // 将部分摘要合并为最终摘要
  const summaryMessages: AgentMessage[] = partialSummaries.map(summary => ({
    role: "user",
    content: summary,
    timestamp: Date.now(),
  }));

  return summarizeWithFallback({
    ...params,
    messages: summaryMessages,
    customInstructions: MERGE_SUMMARIES_INSTRUCTIONS,
  });
}

决策逻辑:

pruneHistoryForContextShare()

基于预算的修剪,带 tool_use/tool_result 配对修复:

export function pruneHistoryForContextShare(params: {
  messages: AgentMessage[];
  maxContextTokens: number;
  maxHistoryShare?: number;  // 默认 50%
  parts?: number;
}): {
  messages: AgentMessage[];
  droppedMessagesList: AgentMessage[];
  droppedChunks: number;
  droppedMessages: number;
  droppedTokens: number;
  keptTokens: number;
  budgetTokens: number;
} {
  const maxHistoryShare = params.maxHistoryShare ?? 0.5;
  const budgetTokens = Math.floor(params.maxContextTokens * maxHistoryShare);
  let keptMessages = params.messages;
  const allDroppedMessages: AgentMessage[] = [];
  let droppedChunks = 0;

  while (keptMessages.length > 0 && estimateMessagesTokens(keptMessages) > budgetTokens) {
    const chunks = splitMessagesByTokenShare(keptMessages, parts);
    if (chunks.length <= 1) break;  // 不能再丢弃更多

    const [dropped, ...rest] = chunks;  // 丢弃最旧的块
    const flatRest = rest.flat();

    // 丢弃后修复 tool_use/tool_result 配对
    const repairReport = repairToolUseResultPairing(flatRest);
    keptMessages = repairReport.messages;

    droppedChunks += 1;
    droppedMessages += dropped.length + repairReport.droppedOrphanCount;
    allDroppedMessages.push(...dropped);
  }

  return { messages: keptMessages, droppedMessagesList: allDroppedMessages, ... };
}

关键特性:

  • 分配预算:maxHistoryShare * maxContextTokens(默认 50%)
  • 首先丢弃最旧的块
  • 每次丢弃后调用 repairToolUseResultPairing() 处理孤立的 tool_results
  • 在满足预算或没有更多块可丢弃时停止

优化见解

针对不同用例调整块比率

大上下文模型(例如 Claude Opus 4.6 200k):

  • 使用更高的 BASE_CHUNK_RATIO(0.5-0.6)
  • 更少的摘要轮次
  • 更好地保留细微差别

较小上下文模型(例如 32k):

  • 保持 BASE_CHUNK_RATIO 在 0.4 或更低
  • 更激进的自适应降低
  • 优先考虑在约束内适配

令牌密集型领域(代码、日志):

  • 更低的 BASE_CHUNK_RATIO(0.3)
  • 提高 MIN_CHUNK_RATIO 下限(0.2)
  • 补偿更高的令牌密度

安全边际的理由

SAFETY_MARGIN = 1.2(20% 缓冲)补偿:

  • 字符/4 启发式遗漏多字节字符
  • 特殊令牌(BOS、EOS 等)
  • 代码令牌(高于平均密度)
  • 格式化令牌(markdown、XML 标签)

交叉引用