Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions cel/cel_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"fmt"
"os"
"reflect"
"runtime"
"strings"
"sync"
"testing"
Expand Down Expand Up @@ -3694,3 +3695,55 @@ func interpret(t testing.TB, env *Env, expr string, vars any) (ref.Val, error) {
}
return out, nil
}

func TestExpressionSizeLimitEarlyEnforcement(t *testing.T) {
env, err := NewEnv(ParserExpressionSizeLimit(1000))
if err != nil {
t.Fatalf("NewEnv() failed: %v", err)
}

tests := []struct {
name string
mode string
}{
{name: "compile_rejects_oversized", mode: "compile"},
{name: "parse_rejects_oversized", mode: "parse"},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
payload := strings.Repeat("a", 10_000_000)

var m1, m2 runtime.MemStats
runtime.GC()
runtime.ReadMemStats(&m1)

switch tc.mode {
case "compile":
_, iss := env.Compile(payload)
if iss == nil || iss.Err() == nil {
t.Fatal("expected size limit error, got nil")
}
if !strings.Contains(iss.Err().Error(), "expression code point size exceeds limit") {
t.Fatalf("unexpected error: %v", iss.Err())
}
case "parse":
_, iss := env.Parse(payload)
if iss == nil || iss.Err() == nil {
t.Fatal("expected size limit error, got nil")
}
if !strings.Contains(iss.Err().Error(), "expression code point size exceeds limit") {
t.Fatalf("unexpected error: %v", iss.Err())
}
}

runtime.ReadMemStats(&m2)
allocDelta := (m2.TotalAlloc - m1.TotalAlloc) / (1024 * 1024)
if allocDelta > 5 {
t.Errorf("excessive memory allocation: %dMiB during %s (expected <5MiB with early enforcement)",
allocDelta, tc.mode)
}
t.Logf("[%s] memory delta: %dMiB", tc.mode, allocDelta)
})
}
}
20 changes: 18 additions & 2 deletions cel/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,15 @@ func (e *Env) Check(ast *Ast) (*Ast, *Issues) {
return ast, nil
}

// configuredExpressionSizeLimit returns the effective expression size code point limit.
// A zero value means "use the parser default".
func (e *Env) configuredExpressionSizeLimit() int {
if l := e.limits[limitCodePointSize]; l != 0 {
return l
}
return 100_000
}

// Compile combines the Parse and Check phases CEL program compilation to produce an Ast and
// associated issues.
//
Expand All @@ -445,7 +454,11 @@ func (e *Env) Check(ast *Ast) (*Ast, *Issues) {
//
// Note, for parse-only uses of CEL use Parse.
func (e *Env) Compile(txt string) (*Ast, *Issues) {
return e.CompileSource(common.NewTextSource(txt))
src, err := common.NewTextSourceWithLimit(txt, e.configuredExpressionSizeLimit())
if err != nil {
return nil, ErrorAsIssues(err)
}
return e.CompileSource(src)
}

// CompileSource combines the Parse and Check phases CEL program compilation to produce an Ast and
Expand Down Expand Up @@ -649,7 +662,10 @@ func (e *Env) Validators() []ASTValidator {
// This form of Parse creates a Source value for the input `txt` and forwards to the
// ParseSource method.
func (e *Env) Parse(txt string) (*Ast, *Issues) {
src := common.NewTextSource(txt)
src, err := common.NewTextSourceWithLimit(txt, e.configuredExpressionSizeLimit())
if err != nil {
return nil, ErrorAsIssues(err)
}
return e.ParseSource(src)
}

Expand Down
80 changes: 51 additions & 29 deletions common/runes/buffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
package runes

import (
"fmt"
"strings"
"unicode/utf8"
)
Expand Down Expand Up @@ -113,45 +114,64 @@ var _ Buffer = &supplementalBuffer{}

var nilBuffer = &emptyBuffer{}

// SizeLimitError indicates that the input exceeded the configured code point limit.
type SizeLimitError struct {
Size int
Limit int
}

func (e *SizeLimitError) Error() string {
return fmt.Sprintf("expression code point size exceeds limit: size: %d, limit %d", e.Size, e.Limit)
}

// NewBuffer returns an efficient implementation of Buffer for the given text based on the ranges of
// the encoded code points contained within.
//
// Code points are represented as an array of byte, uint16, or rune. This approach ensures that
// each index represents a code point by itself without needing to use an array of rune. At first
// we assume all code points are less than or equal to '\u007f'. If this holds true, the
// underlying storage is a byte array containing only ASCII characters. If we encountered a code
// point above this range but less than or equal to '\uffff' we allocate a uint16 array, copy the
// elements of previous byte array to the uint16 array, and continue. If this holds true, the
// underlying storage is a uint16 array containing only Unicode characters in the Basic Multilingual
// Plane. If we encounter a code point above '\uffff' we allocate an rune array, copy the previous
// elements of the byte or uint16 array, and continue. The underlying storage is an rune array
// containing any Unicode character.
func NewBuffer(data string) Buffer {
buf, _ := newBuffer(data, false)
buf, _, _ := newBufferWithLimit(data, false, -1)
return buf
}

// NewBufferAndLineOffsets returns an efficient implementation of Buffer for the given text based on
// the ranges of the encoded code points contained within, as well as returning the line offsets.
//
// Code points are represented as an array of byte, uint16, or rune. This approach ensures that
// each index represents a code point by itself without needing to use an array of rune. At first
// we assume all code points are less than or equal to '\u007f'. If this holds true, the
// underlying storage is a byte array containing only ASCII characters. If we encountered a code
// point above this range but less than or equal to '\uffff' we allocate a uint16 array, copy the
// elements of previous byte array to the uint16 array, and continue. If this holds true, the
// underlying storage is a uint16 array containing only Unicode characters in the Basic Multilingual
// Plane. If we encounter a code point above '\uffff' we allocate an rune array, copy the previous
// elements of the byte or uint16 array, and continue. The underlying storage is an rune array
// containing any Unicode character.
func NewBufferAndLineOffsets(data string) (Buffer, []int32) {
return newBuffer(data, true)
buf, offs, _ := newBufferWithLimit(data, true, -1)
return buf, offs
}

// NewBufferAndLineOffsetsWithLimit returns an efficient implementation of Buffer for the given text
// and enforces a code point limit while constructing the buffer.
func NewBufferAndLineOffsetsWithLimit(data string, limit int) (Buffer, []int32, error) {
if limit < 0 || len(data) <= limit {
return newBufferWithLimit(data, true, -1)
}
return newBufferWithLimit(data, true, limit)
}

func countRemainingCodePoints(data string, idx int, count int) int {
for idx < len(data) {
_, s := utf8.DecodeRuneInString(data[idx:])
idx += s
count++
}
return count
}

func newBuffer(data string, lines bool) (Buffer, []int32) {
func newBufferWithLimit(data string, lines bool, limit int) (Buffer, []int32, error) {

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we still get here if len(data) > limit, which means we will allocate the buffer even though we know we are going to go past it. I would still add the following after this if statement.

if limit >= 0 && len(data) > limit {
		return nil, nil, &SizeLimitError{
			Size:  countRemainingCodePoints(data, 0, 0),
			Limit: limit,
		}
}

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I updated it so we return before any buffer allocation on the oversized path. I kept the code point count there so multibyte input still follows the same limit semantics.

I reran the tests after the change.

if len(data) == 0 {
return nilBuffer, []int32{0}
return nilBuffer, []int32{0}, nil
}
if limit >= 0 && len(data) > limit {
size := countRemainingCodePoints(data, 0, 0)
if size > limit {
return nil, nil, &SizeLimitError{
Size: size,
Limit: limit,
}
}
}

// The resulting buffers store one element per code point, so the worst case
// element count never exceeds len(data).
var (
idx = 0
off int32 = 0
Expand Down Expand Up @@ -195,7 +215,8 @@ func newBuffer(data string, lines bool) (Buffer, []int32) {
}
return &asciiBuffer{
arr: buf8,
}, offs
}, offs, nil

copy16:
for idx < len(data) {
r, s := utf8.DecodeRuneInString(data[idx:])
Expand All @@ -222,7 +243,8 @@ copy16:
}
return &basicBuffer{
arr: buf16,
}, offs
}, offs, nil

copy32:
for idx < len(data) {
r, s := utf8.DecodeRuneInString(data[idx:])
Expand All @@ -238,5 +260,5 @@ copy32:
}
return &supplementalBuffer{
arr: buf32,
}, offs
}, offs, nil
}
27 changes: 27 additions & 0 deletions common/runes/buffer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,30 @@ func TestNewBuffer_Empty(t *testing.T) {
t.Errorf("type mismatch: got %T, want %T", rb, &emptyBuffer{})
}
}

func TestNewBufferAndLineOffsetsWithLimit_Exceeded(t *testing.T) {
_, _, err := NewBufferAndLineOffsetsWithLimit("greetings", 5)
if err == nil {
t.Fatalf("expected size limit error, got nil")
}
if got, want := err.Error(), "expression code point size exceeds limit: size: 9, limit 5"; got != want {
t.Fatalf("got %q, want %q", got, want)
}
}

func TestNewBufferAndLineOffsetsWithLimit_MultibyteWithinLimit(t *testing.T) {
data := "🙂🙂"
rb, offs, err := NewBufferAndLineOffsetsWithLimit(data, 2)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if got, want := rb.Len(), 2; got != want {
t.Fatalf("length mismatch: got %d, want %d", got, want)
}
if got, want := rb.Slice(0, rb.Len()), data; got != want {
t.Fatalf("slice mismatch: got %q, want %q", got, want)
}
if got, want := len(offs), 1; got != want {
t.Fatalf("line offsets mismatch: got %d, want %d", got, want)
}
}
23 changes: 23 additions & 0 deletions common/source.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ func NewTextSource(text string) Source {
return NewStringSource(text, "<input>")
}

// NewTextSourceWithLimit creates a new Source from the input text string while
// enforcing a maximum code point count when needed.
func NewTextSourceWithLimit(text string, limit int) (Source, error) {
return NewStringSourceWithLimit(text, "<input>", limit)
}

// NewStringSource creates a new Source from the given contents and description.
func NewStringSource(contents string, description string) Source {
// Compute line offsets up front as they are referred to frequently.
Expand All @@ -85,6 +91,23 @@ func NewStringSource(contents string, description string) Source {
}
}

// NewStringSourceWithLimit creates a new Source from the given contents and
// description while enforcing a maximum code point count when needed.
func NewStringSourceWithLimit(contents string, description string, limit int) (Source, error) {
if limit < 0 || len(contents) <= limit {
return NewStringSource(contents, description), nil
}
buf, offs, err := runes.NewBufferAndLineOffsetsWithLimit(contents, limit)
if err != nil {
return nil, err
}
return &sourceImpl{
Buffer: buf,
description: description,
lineOffsets: offs,
}, nil
}

// NewInfoSource creates a new Source from a SourceInfo.
func NewInfoSource(info *exprpb.SourceInfo) Source {
return &sourceImpl{
Expand Down
22 changes: 22 additions & 0 deletions common/source_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
package common

import (
"strings"
"testing"
)

Expand Down Expand Up @@ -135,3 +136,24 @@ func TestNewInfoSource_NoPanicOnNil(t *testing.T) {
// Ensure there is no panic when passing nil, NewInfoSource should use proto v2 style accessors.
_ = NewInfoSource(nil)
}

func TestNewTextSourceWithLimit_Exceeded(t *testing.T) {
_, err := NewTextSourceWithLimit("greetings", 5)
if err == nil {
t.Fatalf("expected size limit error, got nil")
}
if !strings.Contains(err.Error(), "size exceeds limit") {
t.Fatalf("unexpected error: %v", err)
}
}

func TestNewTextSourceWithLimit_MultibyteWithinLimit(t *testing.T) {
data := "🙂🙂"
src, err := NewTextSourceWithLimit(data, 2)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if got, want := src.Content(), data; got != want {
t.Fatalf("got %q, want %q", got, want)
}
}