Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions contentstream/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type Op struct {
type Scanner struct {
lx *lex.Lexer
stack []Operand
depth int
done bool
}

Expand All @@ -45,6 +46,10 @@ func New(src []byte) *Scanner {
// operation (e.g. inside a dictionary, or while looking for EI).
var ErrUnexpectedEOF = errors.New("pdfdisassembler/contentstream: unexpected EOF")

// maxNestDepth bounds array/dict nesting so a hostile content stream can't
// recurse the scanner into a stack overflow.
const maxNestDepth = 1000

// Next returns the next operation. At end of stream it returns io.EOF.
// Any other error indicates malformed input; the scanner is not safe
// to keep using after an error.
Expand Down Expand Up @@ -148,6 +153,11 @@ func (s *Scanner) nextToken() (lex.Token, error) {
}

func (s *Scanner) readArray() ([]Operand, error) {
s.depth++
defer func() { s.depth-- }()
if s.depth > maxNestDepth {
return nil, fmt.Errorf("pdfdisassembler/contentstream: nesting too deep (> %d)", maxNestDepth)
}
var out []Operand
for {
tok, err := s.nextToken()
Expand Down Expand Up @@ -196,6 +206,11 @@ func (s *Scanner) readArray() ([]Operand, error) {
}

func (s *Scanner) readDict() (Dict, error) {
s.depth++
defer func() { s.depth-- }()
if s.depth > maxNestDepth {
return nil, fmt.Errorf("pdfdisassembler/contentstream: nesting too deep (> %d)", maxNestDepth)
}
out := Dict{}
for {
tok, err := s.nextToken()
Expand Down Expand Up @@ -326,6 +341,9 @@ func (s *Scanner) readInlineImage() ([]byte, error) {
// Check trailing boundary.
if pos+2 == len(src) || lex.IsWhitespace(src[pos+2]) || lex.IsDelimiter(src[pos+2]) {
imgEnd := pos - 1 // strip the whitespace separator
if imgEnd < imgStart {
imgEnd = imgStart // empty image: no data between ID and EI
}
s.lx.SetPos(pos + 2)
return append([]byte(nil), src[imgStart:imgEnd]...), nil
}
Expand Down
63 changes: 63 additions & 0 deletions contentstream/scanner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"errors"
"io"
"reflect"
"strings"
"testing"

"github.com/speedata/pdfdisassembler/contentstream"
Expand Down Expand Up @@ -201,3 +202,65 @@ func TestAllIteratorStopsOnError(t *testing.T) {
}
}

// An inline image with no data between ID and EI must yield an empty-image EI
// op, not panic on a reversed slice bound.
func TestInlineImageEmptyNoPanic(t *testing.T) {
for _, src := range []string{"BI ID EI", "BI /W 1 /H 1 ID EI", "q BI ID\nEI Q"} {
t.Run(src, func(t *testing.T) {
sc := contentstream.New([]byte(src))
var sawEI bool
for {
op, err := sc.Next()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if op.Operator == "EI" {
sawEI = true
if len(op.Image) != 0 {
t.Errorf("empty inline image: got %d image bytes", len(op.Image))
}
}
}
if !sawEI {
t.Error("no EI op produced")
}
})
}
}

// Deeply nested arrays must be rejected with an error rather than recursing
// until the goroutine stack overflows.
func TestDeeplyNestedArrayRejected(t *testing.T) {
src := strings.Repeat("[", 5000) + strings.Repeat("]", 5000) + " n"
sc := contentstream.New([]byte(src))
if _, err := sc.Next(); err == nil {
t.Fatal("expected a nesting-depth error, got nil")
}
}

// Deeply nested dicts (via inline-image / BDC bodies) must likewise be bounded.
func TestDeeplyNestedDictRejected(t *testing.T) {
src := "/P " + strings.Repeat("<< /K ", 5000) + "0" + strings.Repeat(" >>", 5000) + " BDC"
sc := contentstream.New([]byte(src))
if _, err := sc.Next(); err == nil {
t.Fatal("expected a nesting-depth error, got nil")
}
}

// Control: moderate nesting must still resolve, proving the limit doesn't
// reject legitimate content.
func TestModeratelyNestedArrayResolves(t *testing.T) {
const depth = 100
src := strings.Repeat("[", depth) + strings.Repeat("]", depth) + " n"
sc := contentstream.New([]byte(src))
op, err := sc.Next()
if err != nil {
t.Fatalf("unexpected error at depth %d: %v", depth, err)
}
if op.Operator != "n" || len(op.Operands) != 1 || op.Operands[0].Kind != contentstream.KindArray {
t.Fatalf("want n op with one array operand, got %+v", op)
}
}
16 changes: 3 additions & 13 deletions crypt.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package pdfdisassembler

import (
"errors"
"fmt"

"github.com/speedata/pdfdisassembler/internal/crypt"
Expand All @@ -11,8 +10,6 @@ import (
// unencrypted.
type encryptCtx struct {
handler *crypt.Handler
// password is retained for re-derivation if needed.
password []byte
}

// initEncrypt reads the trailer /Encrypt entry, builds the crypt.Handler
Expand Down Expand Up @@ -117,15 +114,8 @@ func encryptParamsFromDict(r *Reader, d *Dict) (crypt.Params, error) {
return p, nil
}

func (e *encryptCtx) decryptStream(data []byte, objNum, objGen int, dict *Dict) ([]byte, error) {
// Per-stream /Filter chain may contain /Crypt with a parameter dict;
// for now we use the default stream cipher.
func (e *encryptCtx) decryptStream(data []byte, objNum, objGen int) ([]byte, error) {
// V4 streams may carry an inline /Crypt filter overriding the cipher; it
// is not yet honored — the default stream cipher is always used.
return e.handler.DecryptStream(data, objNum, objGen, "")
}

func (e *encryptCtx) decryptString(data []byte, objNum, objGen int) ([]byte, error) {
return e.handler.DecryptString(data, objNum, objGen)
}

// guard against accidental nil deref in callers
var _ = errors.New
191 changes: 191 additions & 0 deletions crypt_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
package pdfdisassembler

import (
"bytes"
"crypto/md5"
"crypto/rc4"
"encoding/hex"
"fmt"
"strings"
"testing"
)

// buildEncryptedPDF constructs a PDF secured with the /Standard handler
// (V2/R3 RC4) whose /Encrypt dict declares the given /Length in bits. /O and
// /U are 32-byte placeholders; the empty-password key derivation runs during
// Open regardless of whether they validate.
func buildEncryptedPDF(t *testing.T, length int) []byte {
t.Helper()
var buf bytes.Buffer
off := func() int { return buf.Len() }
fmt.Fprint(&buf, "%PDF-1.7\n%\xE2\xE3\xCF\xD3\n")

offsets := make([]int, 4) // index 1..3

offsets[1] = off()
fmt.Fprint(&buf, "1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
offsets[2] = off()
fmt.Fprint(&buf, "2 0 obj\n<< /Type /Pages /Count 0 /Kids [] >>\nendobj\n")

o := strings.Repeat("ab", 32) // 32 bytes, hex-encoded
u := strings.Repeat("cd", 32)
offsets[3] = off()
fmt.Fprintf(&buf,
"3 0 obj\n<< /Filter /Standard /V 2 /R 3 /Length %d /O <%s> /U <%s> /P -44 >>\nendobj\n",
length, o, u)

xrefOff := off()
fmt.Fprint(&buf, "xref\n0 4\n")
fmt.Fprintf(&buf, "%010d %05d f \n", 0, 65535)
for i := 1; i <= 3; i++ {
fmt.Fprintf(&buf, "%010d %05d n \n", offsets[i], 0)
}
id := "<00112233445566778899aabbccddeeff>"
fmt.Fprintf(&buf,
"trailer\n<< /Size 4 /Root 1 0 R /Encrypt 3 0 R /ID [%s %s] >>\n", id, id)
fmt.Fprintf(&buf, "startxref\n%d\n%%%%EOF\n", xrefOff)
return buf.Bytes()
}

// A malicious /Encrypt dict can declare a /Length whose key size exceeds the
// 16-byte MD5 digest (or is negative). Open must surface an error, not panic.
func TestEncryptHostileKeyLengthNoPanic(t *testing.T) {
for _, length := range []int{256, 4096, -8} {
t.Run(fmt.Sprintf("length_%d", length), func(t *testing.T) {
data := buildEncryptedPDF(t, length)
if _, err := Open(bytes.NewReader(data)); err == nil {
t.Fatal("expected an error for hostile /Length, got nil")
}
})
}
}

// stdPassPad is the 32-byte padding string from PDF 32000-1:2008 algorithm 2,
// used to build an empty-password V2/R3 fixture.
var stdPassPad = []byte{
0x28, 0xbf, 0x4e, 0x5e, 0x4e, 0x75, 0x8a, 0x41,
0x64, 0x00, 0x4e, 0x56, 0xff, 0xfa, 0x01, 0x08,
0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80,
0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a,
}

// emptyPwRC4Key derives the V2/R3 file key for the empty user password.
func emptyPwRC4Key(owner, id0 []byte, p int32, bits int) []byte {
h := md5.New()
h.Write(stdPassPad)
h.Write(owner)
h.Write([]byte{byte(uint32(p)), byte(uint32(p) >> 8), byte(uint32(p) >> 16), byte(uint32(p) >> 24)})
h.Write(id0)
sum := h.Sum(nil)
keyLen := bits / 8
for i := 0; i < 50; i++ {
s := md5.Sum(sum[:keyLen])
sum = s[:]
}
key := make([]byte, keyLen)
copy(key, sum[:keyLen])
return key
}

// emptyPwU computes the /U value (algorithm 5, R>=3) for the empty password,
// so Open's password validation accepts the fixture.
func emptyPwU(key, id0 []byte) []byte {
h := md5.New()
h.Write(stdPassPad)
h.Write(id0)
digest := h.Sum(nil)
out := make([]byte, 16)
c, _ := rc4.NewCipher(key)
c.XORKeyStream(out, digest)
for i := 1; i <= 19; i++ {
tweaked := make([]byte, len(key))
for j, b := range key {
tweaked[j] = b ^ byte(i)
}
c2, _ := rc4.NewCipher(tweaked)
c2.XORKeyStream(out, out)
}
u := make([]byte, 32)
copy(u, out)
return u
}

// objKeyRC4 derives the per-object RC4 key (algorithm 1).
func objKeyRC4(fileKey []byte, num, gen int) []byte {
buf := append([]byte{}, fileKey...)
buf = append(buf, byte(num), byte(num>>8), byte(num>>16), byte(gen), byte(gen>>8))
sum := md5.Sum(buf)
n := len(fileKey) + 5
if n > 16 {
n = 16
}
return sum[:n]
}

func rc4Crypt(key, data []byte) []byte {
out := make([]byte, len(data))
c, _ := rc4.NewCipher(key)
c.XORKeyStream(out, data)
return out
}

// buildRC4EncryptedStreamPDF builds a V2/R3 RC4-encrypted PDF (empty password)
// whose object 4 is a stream carrying RC4-encrypted plaintext.
func buildRC4EncryptedStreamPDF(t *testing.T, plaintext []byte) []byte {
t.Helper()
owner := bytes.Repeat([]byte{0x5a}, 32)
id0 := bytes.Repeat([]byte{0x7c}, 16)
const bits = 128
var p int32 = -44
fileKey := emptyPwRC4Key(owner, id0, p, bits)
u := emptyPwU(fileKey, id0)
enc := rc4Crypt(objKeyRC4(fileKey, 4, 0), plaintext)

var buf bytes.Buffer
off := func() int { return buf.Len() }
fmt.Fprint(&buf, "%PDF-1.7\n%\xE2\xE3\xCF\xD3\n")
offsets := make([]int, 5) // 1..4
offsets[1] = off()
fmt.Fprint(&buf, "1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
offsets[2] = off()
fmt.Fprint(&buf, "2 0 obj\n<< /Type /Pages /Count 0 /Kids [] >>\nendobj\n")
offsets[3] = off()
fmt.Fprintf(&buf,
"3 0 obj\n<< /Filter /Standard /V 2 /R 3 /Length %d /O <%s> /U <%s> /P %d >>\nendobj\n",
bits, hex.EncodeToString(owner), hex.EncodeToString(u), p)
offsets[4] = off()
fmt.Fprintf(&buf, "4 0 obj\n<< /Length %d >>\nstream\n", len(enc))
buf.Write(enc)
fmt.Fprint(&buf, "\nendstream\nendobj\n")

xrefOff := off()
fmt.Fprint(&buf, "xref\n0 5\n")
fmt.Fprintf(&buf, "%010d %05d f \n", 0, 65535)
for i := 1; i <= 4; i++ {
fmt.Fprintf(&buf, "%010d %05d n \n", offsets[i], 0)
}
id := hex.EncodeToString(id0)
fmt.Fprintf(&buf,
"trailer\n<< /Size 5 /Root 1 0 R /Encrypt 3 0 R /ID [<%s> <%s>] >>\n", id, id)
fmt.Fprintf(&buf, "startxref\n%d\n%%%%EOF\n", xrefOff)
return buf.Bytes()
}

// Open must accept an RC4-encrypted PDF secured with the empty user password
// and decrypt its stream content end-to-end.
func TestOpenDecryptsRC4Stream(t *testing.T) {
plaintext := []byte("BT (top secret invoice) Tj ET")
data := buildRC4EncryptedStreamPDF(t, plaintext)
r, err := Open(bytes.NewReader(data))
if err != nil {
t.Fatalf("Open: %v", err)
}
defer r.Close()
got, err := r.DecodeStream(Reference{Number: 4, Generation: 0})
if err != nil {
t.Fatalf("DecodeStream: %v", err)
}
if !bytes.Equal(got, plaintext) {
t.Fatalf("decrypted stream mismatch:\n got %q\nwant %q", got, plaintext)
}
}
2 changes: 1 addition & 1 deletion filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func (r *Reader) applyFilters(s *Stream, raw []byte, encrypted bool) ([]byte, er
if encrypted && r.encrypt != nil {
// Cross-reference streams are themselves unencrypted; callers
// must pass encrypted=false for those.
dec, err := r.encrypt.decryptStream(data, s.objNumber, s.objGeneration, s.Dict)
dec, err := r.encrypt.decryptStream(data, s.objNumber, s.objGeneration)
if err != nil {
return nil, fmt.Errorf("pdfdisassembler: decrypt stream %d %d R: %w", s.objNumber, s.objGeneration, err)
}
Expand Down
5 changes: 5 additions & 0 deletions internal/crypt/crypt.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,11 @@ func computeRC4Key(p Params, password []byte) ([]byte, error) {
if keyLen == 0 {
keyLen = 5 // V1 default
}
// /Length is attacker-controlled; the key is sliced from a 16-byte MD5
// digest, so anything outside [1, md5.Size] would slice/make out of range.
if keyLen < 1 || keyLen > md5.Size {
return nil, fmt.Errorf("crypt: invalid key length %d bits", p.Length)
}
if p.R >= 3 {
for i := 0; i < 50; i++ {
s := md5.Sum(sum[:keyLen])
Expand Down
Loading