Compaction Internals Deep Dive

Deep analysis of OpenClaw's session history compaction system (src/agents/compaction.ts, 406 lines).

Core Constants

export const BASE_CHUNK_RATIO = 0.4;  // 40% of context for each chunk (default)
export const MIN_CHUNK_RATIO = 0.15;  // 15% minimum when avg message is large
export const SAFETY_MARGIN = 1.2;     // 20% buffer for token estimation errors
export const SUMMARIZATION_OVERHEAD_TOKENS = 4096;  // Reserve for summarization prompts

Key Functions

computeAdaptiveChunkRatio()

Dynamically adjusts chunk ratio based on average message size:

export function computeAdaptiveChunkRatio(messages: AgentMessage[], contextWindow: number): number {
  if (messages.length === 0) {
    return BASE_CHUNK_RATIO;
  }

  const totalTokens = estimateMessagesTokens(messages);
  const avgTokens = totalTokens / messages.length;
  const safeAvgTokens = avgTokens * SAFETY_MARGIN;
  const avgRatio = safeAvgTokens / contextWindow;

  // If average message > 10% of context, reduce chunk ratio toward MIN_CHUNK_RATIO
  if (avgRatio > 0.1) {
    const reduction = Math.min(avgRatio * 2, BASE_CHUNK_RATIO - MIN_CHUNK_RATIO);
    return Math.max(MIN_CHUNK_RATIO, BASE_CHUNK_RATIO - reduction);
  }

  return BASE_CHUNK_RATIO;
}

Behavior:

  • Default: 40% of context per chunk
  • Large messages (> 10% of context): ratio reduces toward 15%
  • Prevents oversized chunks that could cause overflow

chunkMessagesByMaxTokens()

Splits messages into chunks respecting token limits:

export function chunkMessagesByMaxTokens(
  messages: AgentMessage[],
  maxTokens: number,
): AgentMessage[][] {
  // Apply SAFETY_MARGIN to compensate for estimateTokens() underestimation
  const effectiveMax = Math.max(1, Math.floor(maxTokens / SAFETY_MARGIN));

  const chunks: AgentMessage[][] = [];
  let currentChunk: AgentMessage[] = [];
  let currentTokens = 0;

  for (const message of messages) {
    const messageTokens = estimateCompactionMessageTokens(message);

    // Start new chunk if adding this message would exceed limit
    if (currentChunk.length > 0 && currentTokens + messageTokens > effectiveMax) {
      chunks.push(currentChunk);
      currentChunk = [];
      currentTokens = 0;
    }

    currentChunk.push(message);
    currentTokens += messageTokens;

    // Split oversized single messages to avoid unbounded chunk growth
    if (messageTokens > effectiveMax) {
      chunks.push(currentChunk);
      currentChunk = [];
      currentTokens = 0;
    }
  }

  if (currentChunk.length > 0) {
    chunks.push(currentChunk);
  }

  return chunks;
}

Key Features:

  • Applies SAFETY_MARGIN to compensate for token estimation inaccuracy
  • Splits oversized individual messages immediately
  • Guarantees no chunk exceeds effective max

splitMessagesByTokenShare()

Distributes messages across N chunks targeting equal token share:

export function splitMessagesByTokenShare(
  messages: AgentMessage[],
  parts = 2,
): AgentMessage[][] {
  const normalizedParts = Math.min(Math.max(1, Math.floor(parts)), messages.length);
  if (normalizedParts <= 1) {
    return [messages];
  }

  const totalTokens = estimateMessagesTokens(messages);
  const targetTokens = totalTokens / normalizedParts;
  const chunks: AgentMessage[][] = [];
  let current: AgentMessage[] = [];
  let currentTokens = 0;

  for (const message of messages) {
    const messageTokens = estimateCompactionMessageTokens(message);

    // Start new chunk when target reached (but not for last chunk)
    if (
      chunks.length < normalizedParts - 1 &&
      current.length > 0 &&
      currentTokens + messageTokens > targetTokens
    ) {
      chunks.push(current);
      current = [];
      currentTokens = 0;
    }

    current.push(message);
    currentTokens += messageTokens;
  }

  if (current.length > 0) {
    chunks.push(current);
  }

  return chunks;
}

Strategy:

  • Targets equal token distribution across chunks
  • Last chunk absorbs remaining messages
  • Used by summarizeInStages() for parallel summarization

summarizeInStages()

Progressive summarization with multi-stage merging:

export async function summarizeInStages(params: {
  messages: AgentMessage[];
  minMessagesForSplit?: number;
  parts?: number;
  // ... other params
}): Promise<string> {
  const minMessagesForSplit = Math.max(2, params.minMessagesForSplit ?? 4);
  const parts = normalizeParts(params.parts ?? 2, messages.length);
  const totalTokens = estimateMessagesTokens(messages);

  // Single summarization conditions:
  // - Only 1 part requested
  // - Too few messages (< minMessagesForSplit, default 4)
  // - Total tokens fit in max chunk size
  if (parts <= 1 || messages.length < minMessagesForSplit || totalTokens <= params.maxChunkTokens) {
    return summarizeWithFallback(params);
  }

  // Split into parts and summarize each independently
  const splits = splitMessagesByTokenShare(messages, parts).filter(chunk => chunk.length > 0);
  if (splits.length <= 1) {
    return summarizeWithFallback(params);
  }

  const partialSummaries: string[] = [];
  for (const chunk of splits) {
    partialSummaries.push(
      await summarizeWithFallback({
        ...params,
        messages: chunk,
        previousSummary: undefined,  // Each part summarized independently
      })
    );
  }

  // Merge partial summaries into final summary
  const summaryMessages: AgentMessage[] = partialSummaries.map(summary => ({
    role: "user",
    content: summary,
    timestamp: Date.now(),
  }));

  return summarizeWithFallback({
    ...params,
    messages: summaryMessages,
    customInstructions: MERGE_SUMMARIES_INSTRUCTIONS,
  });
}

Decision Logic:

pruneHistoryForContextShare()

Budget-based pruning with tool_use/tool_result pairing repair:

export function pruneHistoryForContextShare(params: {
  messages: AgentMessage[];
  maxContextTokens: number;
  maxHistoryShare?: number;  // Default 50%
  parts?: number;
}): {
  messages: AgentMessage[];
  droppedMessagesList: AgentMessage[];
  droppedChunks: number;
  droppedMessages: number;
  droppedTokens: number;
  keptTokens: number;
  budgetTokens: number;
} {
  const maxHistoryShare = params.maxHistoryShare ?? 0.5;
  const budgetTokens = Math.floor(params.maxContextTokens * maxHistoryShare);
  let keptMessages = params.messages;
  const allDroppedMessages: AgentMessage[] = [];
  let droppedChunks = 0;

  while (keptMessages.length > 0 && estimateMessagesTokens(keptMessages) > budgetTokens) {
    const chunks = splitMessagesByTokenShare(keptMessages, parts);
    if (chunks.length <= 1) break;  // Can't drop more

    const [dropped, ...rest] = chunks;  // Drop oldest chunk
    const flatRest = rest.flat();

    // Repair tool_use/tool_result pairing after dropping
    const repairReport = repairToolUseResultPairing(flatRest);
    keptMessages = repairReport.messages;

    droppedChunks += 1;
    droppedMessages += dropped.length + repairReport.droppedOrphanCount;
    allDroppedMessages.push(...dropped);
  }

  return { messages: keptMessages, droppedMessagesList: allDroppedMessages, ... };
}

Key Features:

  • Allocates budget: maxHistoryShare * maxContextTokens (default 50%)
  • Drops oldest chunk first
  • Calls repairToolUseResultPairing() after each drop to handle orphaned tool_results
  • Stops when budget satisfied or no more chunks to drop

Optimization Insights

Tuning Chunk Ratio for Different Use Cases

Large Context Models (e.g., Claude Opus 4.6 200k):

  • Use higher BASE_CHUNK_RATIO (0.5-0.6)
  • Fewer summarization rounds
  • Better preservation of nuance

Smaller Context Models (e.g., 32k):

  • Keep BASE_CHUNK_RATIO at 0.4 or lower
  • More aggressive adaptive reduction
  • Prioritize fitting within constraints

Token-Heavy Domains (code, logs):

  • Lower BASE_CHUNK_RATIO (0.3)
  • Increase MIN_CHUNK_RATIO floor (0.2)
  • Compensate for higher token density

Safety Margin Rationale

The SAFETY_MARGIN = 1.2 (20% buffer) compensates for:

  • Char/4 heuristic missing multi-byte characters
  • Special tokens (BOS, EOS, etc.)
  • Code tokens (higher than average density)
  • Formatting tokens (markdown, XML tags)

Cross-References