google · jcking · May 19, 2026 · Apr 11, 2026 · May 1, 2026 · May 1, 2026
diff --git a/cel/cel_test.go b/cel/cel_test.go
@@ -22,6 +22,7 @@ import (
 	"fmt"
 	"os"
 	"reflect"
+	"runtime"
 	"strings"
 	"sync"
 	"testing"
@@ -3694,3 +3695,55 @@ func interpret(t testing.TB, env *Env, expr string, vars any) (ref.Val, error) {
 	}
 	return out, nil
 }
+
+func TestExpressionSizeLimitEarlyEnforcement(t *testing.T) {
+	env, err := NewEnv(ParserExpressionSizeLimit(1000))
+	if err != nil {
+		t.Fatalf("NewEnv() failed: %v", err)
+	}
+
+	tests := []struct {
+		name string
+		mode string
+	}{
+		{name: "compile_rejects_oversized", mode: "compile"},
+		{name: "parse_rejects_oversized", mode: "parse"},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			payload := strings.Repeat("a", 10_000_000)
+
+			var m1, m2 runtime.MemStats
+			runtime.GC()
+			runtime.ReadMemStats(&m1)
+
+			switch tc.mode {
+			case "compile":
+				_, iss := env.Compile(payload)
+				if iss == nil || iss.Err() == nil {
+					t.Fatal("expected size limit error, got nil")
+				}
+				if !strings.Contains(iss.Err().Error(), "expression code point size exceeds limit") {
+					t.Fatalf("unexpected error: %v", iss.Err())
+				}
+			case "parse":
+				_, iss := env.Parse(payload)
+				if iss == nil || iss.Err() == nil {
+					t.Fatal("expected size limit error, got nil")
+				}
+				if !strings.Contains(iss.Err().Error(), "expression code point size exceeds limit") {
+					t.Fatalf("unexpected error: %v", iss.Err())
+				}
+			}
+
+			runtime.ReadMemStats(&m2)
+			allocDelta := (m2.TotalAlloc - m1.TotalAlloc) / (1024 * 1024)
+			if allocDelta > 5 {
+				t.Errorf("excessive memory allocation: %dMiB during %s (expected <5MiB with early enforcement)",
+					allocDelta, tc.mode)
+			}
+			t.Logf("[%s] memory delta: %dMiB", tc.mode, allocDelta)
+		})
+	}
+}
diff --git a/cel/env.go b/cel/env.go
@@ -436,6 +436,15 @@ func (e *Env) Check(ast *Ast) (*Ast, *Issues) {
 	return ast, nil
 }
 
+// configuredExpressionSizeLimit returns the effective expression size code point limit.
+// A zero value means "use the parser default".
+func (e *Env) configuredExpressionSizeLimit() int {
+	if l := e.limits[limitCodePointSize]; l != 0 {
+		return l
+	}
+	return 100_000
+}
+
 // Compile combines the Parse and Check phases CEL program compilation to produce an Ast and
 // associated issues.
 //
@@ -445,7 +454,11 @@ func (e *Env) Check(ast *Ast) (*Ast, *Issues) {
 //
 // Note, for parse-only uses of CEL use Parse.
 func (e *Env) Compile(txt string) (*Ast, *Issues) {
-	return e.CompileSource(common.NewTextSource(txt))
+	src, err := common.NewTextSourceWithLimit(txt, e.configuredExpressionSizeLimit())
+	if err != nil {
+		return nil, ErrorAsIssues(err)
+	}
+	return e.CompileSource(src)
 }
 
 // CompileSource combines the Parse and Check phases CEL program compilation to produce an Ast and
@@ -649,7 +662,10 @@ func (e *Env) Validators() []ASTValidator {
 // This form of Parse creates a Source value for the input `txt` and forwards to the
 // ParseSource method.
 func (e *Env) Parse(txt string) (*Ast, *Issues) {
-	src := common.NewTextSource(txt)
+	src, err := common.NewTextSourceWithLimit(txt, e.configuredExpressionSizeLimit())
+	if err != nil {
+		return nil, ErrorAsIssues(err)
+	}
 	return e.ParseSource(src)
 }
 

diff --git a/common/runes/buffer.go b/common/runes/buffer.go
@@ -16,6 +16,7 @@
 package runes
 
 import (
+	"fmt"
 	"strings"
 	"unicode/utf8"
 )
@@ -113,45 +114,64 @@ var _ Buffer = &supplementalBuffer{}
 
 var nilBuffer = &emptyBuffer{}
 
+// SizeLimitError indicates that the input exceeded the configured code point limit.
+type SizeLimitError struct {
+	Size  int
+	Limit int
+}
+
+func (e *SizeLimitError) Error() string {
+	return fmt.Sprintf("expression code point size exceeds limit: size: %d, limit %d", e.Size, e.Limit)
+}
+
 // NewBuffer returns an efficient implementation of Buffer for the given text based on the ranges of
 // the encoded code points contained within.
-//
-// Code points are represented as an array of byte, uint16, or rune. This approach ensures that
-// each index represents a code point by itself without needing to use an array of rune. At first
-// we assume all code points are less than or equal to '\u007f'. If this holds true, the
-// underlying storage is a byte array containing only ASCII characters. If we encountered a code
-// point above this range but less than or equal to '\uffff' we allocate a uint16 array, copy the
-// elements of previous byte array to the uint16 array, and continue. If this holds true, the
-// underlying storage is a uint16 array containing only Unicode characters in the Basic Multilingual
-// Plane. If we encounter a code point above '\uffff' we allocate an rune array, copy the previous
-// elements of the byte or uint16 array, and continue. The underlying storage is an rune array
-// containing any Unicode character.
 func NewBuffer(data string) Buffer {
-	buf, _ := newBuffer(data, false)
+	buf, _, _ := newBufferWithLimit(data, false, -1)
 	return buf
 }
 
 // NewBufferAndLineOffsets returns an efficient implementation of Buffer for the given text based on
 // the ranges of the encoded code points contained within, as well as returning the line offsets.
-//
-// Code points are represented as an array of byte, uint16, or rune. This approach ensures that
-// each index represents a code point by itself without needing to use an array of rune. At first
-// we assume all code points are less than or equal to '\u007f'. If this holds true, the
-// underlying storage is a byte array containing only ASCII characters. If we encountered a code
-// point above this range but less than or equal to '\uffff' we allocate a uint16 array, copy the
-// elements of previous byte array to the uint16 array, and continue. If this holds true, the
-// underlying storage is a uint16 array containing only Unicode characters in the Basic Multilingual
-// Plane. If we encounter a code point above '\uffff' we allocate an rune array, copy the previous
-// elements of the byte or uint16 array, and continue. The underlying storage is an rune array
-// containing any Unicode character.
 func NewBufferAndLineOffsets(data string) (Buffer, []int32) {
-	return newBuffer(data, true)
+	buf, offs, _ := newBufferWithLimit(data, true, -1)
+	return buf, offs
+}
+
+// NewBufferAndLineOffsetsWithLimit returns an efficient implementation of Buffer for the given text
+// and enforces a code point limit while constructing the buffer.
+func NewBufferAndLineOffsetsWithLimit(data string, limit int) (Buffer, []int32, error) {
+	if limit < 0 || len(data) <= limit {
+		return newBufferWithLimit(data, true, -1)
+	}
+	return newBufferWithLimit(data, true, limit)
+}
+
+func countRemainingCodePoints(data string, idx int, count int) int {
+	for idx < len(data) {
+		_, s := utf8.DecodeRuneInString(data[idx:])
+		idx += s
+		count++
+	}
+	return count
 }
 
-func newBuffer(data string, lines bool) (Buffer, []int32) {
+func newBufferWithLimit(data string, lines bool, limit int) (Buffer, []int32, error) {
 	if len(data) == 0 {
-		return nilBuffer, []int32{0}
+		return nilBuffer, []int32{0}, nil
+	}
+	if limit >= 0 && len(data) > limit {
+		size := countRemainingCodePoints(data, 0, 0)
+		if size > limit {
+			return nil, nil, &SizeLimitError{
+				Size:  size,
+				Limit: limit,
+			}
+		}
 	}
+
+	// The resulting buffers store one element per code point, so the worst case
+	// element count never exceeds len(data).
 	var (
 		idx         = 0
 		off   int32 = 0
@@ -195,7 +215,8 @@ func newBuffer(data string, lines bool) (Buffer, []int32) {
 	}
 	return &asciiBuffer{
 		arr: buf8,
-	}, offs
+	}, offs, nil
+
 copy16:
 	for idx < len(data) {
 		r, s := utf8.DecodeRuneInString(data[idx:])
@@ -222,7 +243,8 @@ copy16:
 	}
 	return &basicBuffer{
 		arr: buf16,
-	}, offs
+	}, offs, nil
+
 copy32:
 	for idx < len(data) {
 		r, s := utf8.DecodeRuneInString(data[idx:])
@@ -238,5 +260,5 @@ copy32:
 	}
 	return &supplementalBuffer{
 		arr: buf32,
-	}, offs
+	}, offs, nil
 }
diff --git a/common/runes/buffer_test.go b/common/runes/buffer_test.go
@@ -100,3 +100,30 @@ func TestNewBuffer_Empty(t *testing.T) {
 		t.Errorf("type mismatch: got %T, want %T", rb, &emptyBuffer{})
 	}
 }
+
+func TestNewBufferAndLineOffsetsWithLimit_Exceeded(t *testing.T) {
+	_, _, err := NewBufferAndLineOffsetsWithLimit("greetings", 5)
+	if err == nil {
+		t.Fatalf("expected size limit error, got nil")
+	}
+	if got, want := err.Error(), "expression code point size exceeds limit: size: 9, limit 5"; got != want {
+		t.Fatalf("got %q, want %q", got, want)
+	}
+}
+
+func TestNewBufferAndLineOffsetsWithLimit_MultibyteWithinLimit(t *testing.T) {
+	data := "🙂🙂"
+	rb, offs, err := NewBufferAndLineOffsetsWithLimit(data, 2)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if got, want := rb.Len(), 2; got != want {
+		t.Fatalf("length mismatch: got %d, want %d", got, want)
+	}
+	if got, want := rb.Slice(0, rb.Len()), data; got != want {
+		t.Fatalf("slice mismatch: got %q, want %q", got, want)
+	}
+	if got, want := len(offs), 1; got != want {
+		t.Fatalf("line offsets mismatch: got %d, want %d", got, want)
+	}
+}
diff --git a/common/source.go b/common/source.go
@@ -74,6 +74,12 @@ func NewTextSource(text string) Source {
 	return NewStringSource(text, "<input>")
 }
 
+// NewTextSourceWithLimit creates a new Source from the input text string while
+// enforcing a maximum code point count when needed.
+func NewTextSourceWithLimit(text string, limit int) (Source, error) {
+	return NewStringSourceWithLimit(text, "<input>", limit)
+}
+
 // NewStringSource creates a new Source from the given contents and description.
 func NewStringSource(contents string, description string) Source {
 	// Compute line offsets up front as they are referred to frequently.
@@ -85,6 +91,23 @@ func NewStringSource(contents string, description string) Source {
 	}
 }
 
+// NewStringSourceWithLimit creates a new Source from the given contents and
+// description while enforcing a maximum code point count when needed.
+func NewStringSourceWithLimit(contents string, description string, limit int) (Source, error) {
+	if limit < 0 || len(contents) <= limit {
+		return NewStringSource(contents, description), nil
+	}
+	buf, offs, err := runes.NewBufferAndLineOffsetsWithLimit(contents, limit)
+	if err != nil {
+		return nil, err
+	}
+	return &sourceImpl{
+		Buffer:      buf,
+		description: description,
+		lineOffsets: offs,
+	}, nil
+}
+
 // NewInfoSource creates a new Source from a SourceInfo.
 func NewInfoSource(info *exprpb.SourceInfo) Source {
 	return &sourceImpl{

diff --git a/common/source_test.go b/common/source_test.go
@@ -16,6 +16,7 @@
 package common
 
 import (
+	"strings"
 	"testing"
 )
 
@@ -135,3 +136,24 @@ func TestNewInfoSource_NoPanicOnNil(t *testing.T) {
 	// Ensure there is no panic when passing nil, NewInfoSource should use proto v2 style accessors.
 	_ = NewInfoSource(nil)
 }
+
+func TestNewTextSourceWithLimit_Exceeded(t *testing.T) {
+	_, err := NewTextSourceWithLimit("greetings", 5)
+	if err == nil {
+		t.Fatalf("expected size limit error, got nil")
+	}
+	if !strings.Contains(err.Error(), "size exceeds limit") {
+		t.Fatalf("unexpected error: %v", err)
+	}
+}
+
+func TestNewTextSourceWithLimit_MultibyteWithinLimit(t *testing.T) {
+	data := "🙂🙂"
+	src, err := NewTextSourceWithLimit(data, 2)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if got, want := src.Content(), data; got != want {
+		t.Fatalf("got %q, want %q", got, want)
+	}
+}