diff --git a/contentstream/scanner.go b/contentstream/scanner.go index 0345e1d..90d477e 100644 --- a/contentstream/scanner.go +++ b/contentstream/scanner.go @@ -30,6 +30,7 @@ type Op struct { type Scanner struct { lx *lex.Lexer stack []Operand + depth int done bool } @@ -45,6 +46,10 @@ func New(src []byte) *Scanner { // operation (e.g. inside a dictionary, or while looking for EI). var ErrUnexpectedEOF = errors.New("pdfdisassembler/contentstream: unexpected EOF") +// maxNestDepth bounds array/dict nesting so a hostile content stream can't +// recurse the scanner into a stack overflow. +const maxNestDepth = 1000 + // Next returns the next operation. At end of stream it returns io.EOF. // Any other error indicates malformed input; the scanner is not safe // to keep using after an error. @@ -148,6 +153,11 @@ func (s *Scanner) nextToken() (lex.Token, error) { } func (s *Scanner) readArray() ([]Operand, error) { + s.depth++ + defer func() { s.depth-- }() + if s.depth > maxNestDepth { + return nil, fmt.Errorf("pdfdisassembler/contentstream: nesting too deep (> %d)", maxNestDepth) + } var out []Operand for { tok, err := s.nextToken() @@ -196,6 +206,11 @@ func (s *Scanner) readArray() ([]Operand, error) { } func (s *Scanner) readDict() (Dict, error) { + s.depth++ + defer func() { s.depth-- }() + if s.depth > maxNestDepth { + return nil, fmt.Errorf("pdfdisassembler/contentstream: nesting too deep (> %d)", maxNestDepth) + } out := Dict{} for { tok, err := s.nextToken() @@ -326,6 +341,9 @@ func (s *Scanner) readInlineImage() ([]byte, error) { // Check trailing boundary. if pos+2 == len(src) || lex.IsWhitespace(src[pos+2]) || lex.IsDelimiter(src[pos+2]) { imgEnd := pos - 1 // strip the whitespace separator + if imgEnd < imgStart { + imgEnd = imgStart // empty image: no data between ID and EI + } s.lx.SetPos(pos + 2) return append([]byte(nil), src[imgStart:imgEnd]...), nil } diff --git a/contentstream/scanner_test.go b/contentstream/scanner_test.go index e3a5bea..0bba78f 100644 --- a/contentstream/scanner_test.go +++ b/contentstream/scanner_test.go @@ -4,6 +4,7 @@ import ( "errors" "io" "reflect" + "strings" "testing" "github.com/speedata/pdfdisassembler/contentstream" @@ -201,3 +202,65 @@ func TestAllIteratorStopsOnError(t *testing.T) { } } +// An inline image with no data between ID and EI must yield an empty-image EI +// op, not panic on a reversed slice bound. +func TestInlineImageEmptyNoPanic(t *testing.T) { + for _, src := range []string{"BI ID EI", "BI /W 1 /H 1 ID EI", "q BI ID\nEI Q"} { + t.Run(src, func(t *testing.T) { + sc := contentstream.New([]byte(src)) + var sawEI bool + for { + op, err := sc.Next() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if op.Operator == "EI" { + sawEI = true + if len(op.Image) != 0 { + t.Errorf("empty inline image: got %d image bytes", len(op.Image)) + } + } + } + if !sawEI { + t.Error("no EI op produced") + } + }) + } +} + +// Deeply nested arrays must be rejected with an error rather than recursing +// until the goroutine stack overflows. +func TestDeeplyNestedArrayRejected(t *testing.T) { + src := strings.Repeat("[", 5000) + strings.Repeat("]", 5000) + " n" + sc := contentstream.New([]byte(src)) + if _, err := sc.Next(); err == nil { + t.Fatal("expected a nesting-depth error, got nil") + } +} + +// Deeply nested dicts (via inline-image / BDC bodies) must likewise be bounded. +func TestDeeplyNestedDictRejected(t *testing.T) { + src := "/P " + strings.Repeat("<< /K ", 5000) + "0" + strings.Repeat(" >>", 5000) + " BDC" + sc := contentstream.New([]byte(src)) + if _, err := sc.Next(); err == nil { + t.Fatal("expected a nesting-depth error, got nil") + } +} + +// Control: moderate nesting must still resolve, proving the limit doesn't +// reject legitimate content. +func TestModeratelyNestedArrayResolves(t *testing.T) { + const depth = 100 + src := strings.Repeat("[", depth) + strings.Repeat("]", depth) + " n" + sc := contentstream.New([]byte(src)) + op, err := sc.Next() + if err != nil { + t.Fatalf("unexpected error at depth %d: %v", depth, err) + } + if op.Operator != "n" || len(op.Operands) != 1 || op.Operands[0].Kind != contentstream.KindArray { + t.Fatalf("want n op with one array operand, got %+v", op) + } +} diff --git a/crypt.go b/crypt.go index 8ea0ccc..5624c4c 100644 --- a/crypt.go +++ b/crypt.go @@ -1,7 +1,6 @@ package pdfdisassembler import ( - "errors" "fmt" "github.com/speedata/pdfdisassembler/internal/crypt" @@ -11,8 +10,6 @@ import ( // unencrypted. type encryptCtx struct { handler *crypt.Handler - // password is retained for re-derivation if needed. - password []byte } // initEncrypt reads the trailer /Encrypt entry, builds the crypt.Handler @@ -117,15 +114,8 @@ func encryptParamsFromDict(r *Reader, d *Dict) (crypt.Params, error) { return p, nil } -func (e *encryptCtx) decryptStream(data []byte, objNum, objGen int, dict *Dict) ([]byte, error) { - // Per-stream /Filter chain may contain /Crypt with a parameter dict; - // for now we use the default stream cipher. +func (e *encryptCtx) decryptStream(data []byte, objNum, objGen int) ([]byte, error) { + // V4 streams may carry an inline /Crypt filter overriding the cipher; it + // is not yet honored — the default stream cipher is always used. return e.handler.DecryptStream(data, objNum, objGen, "") } - -func (e *encryptCtx) decryptString(data []byte, objNum, objGen int) ([]byte, error) { - return e.handler.DecryptString(data, objNum, objGen) -} - -// guard against accidental nil deref in callers -var _ = errors.New diff --git a/crypt_test.go b/crypt_test.go new file mode 100644 index 0000000..c0262f5 --- /dev/null +++ b/crypt_test.go @@ -0,0 +1,191 @@ +package pdfdisassembler + +import ( + "bytes" + "crypto/md5" + "crypto/rc4" + "encoding/hex" + "fmt" + "strings" + "testing" +) + +// buildEncryptedPDF constructs a PDF secured with the /Standard handler +// (V2/R3 RC4) whose /Encrypt dict declares the given /Length in bits. /O and +// /U are 32-byte placeholders; the empty-password key derivation runs during +// Open regardless of whether they validate. +func buildEncryptedPDF(t *testing.T, length int) []byte { + t.Helper() + var buf bytes.Buffer + off := func() int { return buf.Len() } + fmt.Fprint(&buf, "%PDF-1.7\n%\xE2\xE3\xCF\xD3\n") + + offsets := make([]int, 4) // index 1..3 + + offsets[1] = off() + fmt.Fprint(&buf, "1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n") + offsets[2] = off() + fmt.Fprint(&buf, "2 0 obj\n<< /Type /Pages /Count 0 /Kids [] >>\nendobj\n") + + o := strings.Repeat("ab", 32) // 32 bytes, hex-encoded + u := strings.Repeat("cd", 32) + offsets[3] = off() + fmt.Fprintf(&buf, + "3 0 obj\n<< /Filter /Standard /V 2 /R 3 /Length %d /O <%s> /U <%s> /P -44 >>\nendobj\n", + length, o, u) + + xrefOff := off() + fmt.Fprint(&buf, "xref\n0 4\n") + fmt.Fprintf(&buf, "%010d %05d f \n", 0, 65535) + for i := 1; i <= 3; i++ { + fmt.Fprintf(&buf, "%010d %05d n \n", offsets[i], 0) + } + id := "<00112233445566778899aabbccddeeff>" + fmt.Fprintf(&buf, + "trailer\n<< /Size 4 /Root 1 0 R /Encrypt 3 0 R /ID [%s %s] >>\n", id, id) + fmt.Fprintf(&buf, "startxref\n%d\n%%%%EOF\n", xrefOff) + return buf.Bytes() +} + +// A malicious /Encrypt dict can declare a /Length whose key size exceeds the +// 16-byte MD5 digest (or is negative). Open must surface an error, not panic. +func TestEncryptHostileKeyLengthNoPanic(t *testing.T) { + for _, length := range []int{256, 4096, -8} { + t.Run(fmt.Sprintf("length_%d", length), func(t *testing.T) { + data := buildEncryptedPDF(t, length) + if _, err := Open(bytes.NewReader(data)); err == nil { + t.Fatal("expected an error for hostile /Length, got nil") + } + }) + } +} + +// stdPassPad is the 32-byte padding string from PDF 32000-1:2008 algorithm 2, +// used to build an empty-password V2/R3 fixture. +var stdPassPad = []byte{ + 0x28, 0xbf, 0x4e, 0x5e, 0x4e, 0x75, 0x8a, 0x41, + 0x64, 0x00, 0x4e, 0x56, 0xff, 0xfa, 0x01, 0x08, + 0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80, + 0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a, +} + +// emptyPwRC4Key derives the V2/R3 file key for the empty user password. +func emptyPwRC4Key(owner, id0 []byte, p int32, bits int) []byte { + h := md5.New() + h.Write(stdPassPad) + h.Write(owner) + h.Write([]byte{byte(uint32(p)), byte(uint32(p) >> 8), byte(uint32(p) >> 16), byte(uint32(p) >> 24)}) + h.Write(id0) + sum := h.Sum(nil) + keyLen := bits / 8 + for i := 0; i < 50; i++ { + s := md5.Sum(sum[:keyLen]) + sum = s[:] + } + key := make([]byte, keyLen) + copy(key, sum[:keyLen]) + return key +} + +// emptyPwU computes the /U value (algorithm 5, R>=3) for the empty password, +// so Open's password validation accepts the fixture. +func emptyPwU(key, id0 []byte) []byte { + h := md5.New() + h.Write(stdPassPad) + h.Write(id0) + digest := h.Sum(nil) + out := make([]byte, 16) + c, _ := rc4.NewCipher(key) + c.XORKeyStream(out, digest) + for i := 1; i <= 19; i++ { + tweaked := make([]byte, len(key)) + for j, b := range key { + tweaked[j] = b ^ byte(i) + } + c2, _ := rc4.NewCipher(tweaked) + c2.XORKeyStream(out, out) + } + u := make([]byte, 32) + copy(u, out) + return u +} + +// objKeyRC4 derives the per-object RC4 key (algorithm 1). +func objKeyRC4(fileKey []byte, num, gen int) []byte { + buf := append([]byte{}, fileKey...) + buf = append(buf, byte(num), byte(num>>8), byte(num>>16), byte(gen), byte(gen>>8)) + sum := md5.Sum(buf) + n := len(fileKey) + 5 + if n > 16 { + n = 16 + } + return sum[:n] +} + +func rc4Crypt(key, data []byte) []byte { + out := make([]byte, len(data)) + c, _ := rc4.NewCipher(key) + c.XORKeyStream(out, data) + return out +} + +// buildRC4EncryptedStreamPDF builds a V2/R3 RC4-encrypted PDF (empty password) +// whose object 4 is a stream carrying RC4-encrypted plaintext. +func buildRC4EncryptedStreamPDF(t *testing.T, plaintext []byte) []byte { + t.Helper() + owner := bytes.Repeat([]byte{0x5a}, 32) + id0 := bytes.Repeat([]byte{0x7c}, 16) + const bits = 128 + var p int32 = -44 + fileKey := emptyPwRC4Key(owner, id0, p, bits) + u := emptyPwU(fileKey, id0) + enc := rc4Crypt(objKeyRC4(fileKey, 4, 0), plaintext) + + var buf bytes.Buffer + off := func() int { return buf.Len() } + fmt.Fprint(&buf, "%PDF-1.7\n%\xE2\xE3\xCF\xD3\n") + offsets := make([]int, 5) // 1..4 + offsets[1] = off() + fmt.Fprint(&buf, "1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n") + offsets[2] = off() + fmt.Fprint(&buf, "2 0 obj\n<< /Type /Pages /Count 0 /Kids [] >>\nendobj\n") + offsets[3] = off() + fmt.Fprintf(&buf, + "3 0 obj\n<< /Filter /Standard /V 2 /R 3 /Length %d /O <%s> /U <%s> /P %d >>\nendobj\n", + bits, hex.EncodeToString(owner), hex.EncodeToString(u), p) + offsets[4] = off() + fmt.Fprintf(&buf, "4 0 obj\n<< /Length %d >>\nstream\n", len(enc)) + buf.Write(enc) + fmt.Fprint(&buf, "\nendstream\nendobj\n") + + xrefOff := off() + fmt.Fprint(&buf, "xref\n0 5\n") + fmt.Fprintf(&buf, "%010d %05d f \n", 0, 65535) + for i := 1; i <= 4; i++ { + fmt.Fprintf(&buf, "%010d %05d n \n", offsets[i], 0) + } + id := hex.EncodeToString(id0) + fmt.Fprintf(&buf, + "trailer\n<< /Size 5 /Root 1 0 R /Encrypt 3 0 R /ID [<%s> <%s>] >>\n", id, id) + fmt.Fprintf(&buf, "startxref\n%d\n%%%%EOF\n", xrefOff) + return buf.Bytes() +} + +// Open must accept an RC4-encrypted PDF secured with the empty user password +// and decrypt its stream content end-to-end. +func TestOpenDecryptsRC4Stream(t *testing.T) { + plaintext := []byte("BT (top secret invoice) Tj ET") + data := buildRC4EncryptedStreamPDF(t, plaintext) + r, err := Open(bytes.NewReader(data)) + if err != nil { + t.Fatalf("Open: %v", err) + } + defer r.Close() + got, err := r.DecodeStream(Reference{Number: 4, Generation: 0}) + if err != nil { + t.Fatalf("DecodeStream: %v", err) + } + if !bytes.Equal(got, plaintext) { + t.Fatalf("decrypted stream mismatch:\n got %q\nwant %q", got, plaintext) + } +} diff --git a/filter.go b/filter.go index 420dde4..03a7879 100644 --- a/filter.go +++ b/filter.go @@ -24,7 +24,7 @@ func (r *Reader) applyFilters(s *Stream, raw []byte, encrypted bool) ([]byte, er if encrypted && r.encrypt != nil { // Cross-reference streams are themselves unencrypted; callers // must pass encrypted=false for those. - dec, err := r.encrypt.decryptStream(data, s.objNumber, s.objGeneration, s.Dict) + dec, err := r.encrypt.decryptStream(data, s.objNumber, s.objGeneration) if err != nil { return nil, fmt.Errorf("pdfdisassembler: decrypt stream %d %d R: %w", s.objNumber, s.objGeneration, err) } diff --git a/internal/crypt/crypt.go b/internal/crypt/crypt.go index a02266c..34fb773 100644 --- a/internal/crypt/crypt.go +++ b/internal/crypt/crypt.go @@ -264,6 +264,11 @@ func computeRC4Key(p Params, password []byte) ([]byte, error) { if keyLen == 0 { keyLen = 5 // V1 default } + // /Length is attacker-controlled; the key is sliced from a 16-byte MD5 + // digest, so anything outside [1, md5.Size] would slice/make out of range. + if keyLen < 1 || keyLen > md5.Size { + return nil, fmt.Errorf("crypt: invalid key length %d bits", p.Length) + } if p.R >= 3 { for i := 0; i < 50; i++ { s := md5.Sum(sum[:keyLen]) diff --git a/internal/crypt/crypt_test.go b/internal/crypt/crypt_test.go new file mode 100644 index 0000000..b514775 --- /dev/null +++ b/internal/crypt/crypt_test.go @@ -0,0 +1,317 @@ +package crypt + +import ( + "bytes" + "crypto/aes" + "crypto/cipher" + "crypto/md5" + "testing" +) + +// New must reject an /Encrypt /Length whose derived key size (Length/8) falls +// outside [1, 16] — the RC4/AESV2 file key is sliced from a 16-byte MD5 digest, +// so a hostile large or negative /Length would slice out of range and panic. +func TestNewRejectsHostileKeyLength(t *testing.T) { + for _, length := range []int{136, 256, 4096, -8} { + base := Params{ + V: 2, + R: 3, + Length: length, + OwnerEntry: make([]byte, 32), + UserEntry: make([]byte, 32), + ID0: make([]byte, 16), + } + if _, err := New(base, nil); err == nil { + t.Fatalf("Length=%d: expected error, got nil", length) + } + } +} + +// fixedIV is a deterministic 16-byte IV for reproducible AES test vectors. +var fixedIV = []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15} + +// aesCBCEncryptStream is the inverse of aesCBCDecrypt: PKCS#7-pad, CBC-encrypt, +// and prepend the IV, producing a blob the handler should decrypt back. +func aesCBCEncryptStream(t *testing.T, key, iv, plaintext []byte) []byte { + t.Helper() + padLen := aes.BlockSize - len(plaintext)%aes.BlockSize + padded := append(append([]byte{}, plaintext...), bytes.Repeat([]byte{byte(padLen)}, padLen)...) + block, err := aes.NewCipher(key) + if err != nil { + t.Fatalf("aes.NewCipher: %v", err) + } + body := make([]byte, len(padded)) + cipher.NewCBCEncrypter(block, iv).CryptBlocks(body, padded) + return append(append([]byte{}, iv...), body...) +} + +func TestDecryptRC4RoundTrip(t *testing.T) { + h := &Handler{FileKey: bytes.Repeat([]byte{0x33}, 16), StreamAlg: AlgRC4, StringAlg: AlgRC4} + plaintext := []byte("the quick brown fox / RC4") + // RC4 is symmetric: decrypting plaintext yields ciphertext. + ct, err := h.DecryptStream(plaintext, 12, 0, "") + if err != nil { + t.Fatalf("encrypt: %v", err) + } + if bytes.Equal(ct, plaintext) { + t.Fatal("ciphertext equals plaintext") + } + got, err := h.DecryptStream(ct, 12, 0, "") + if err != nil { + t.Fatalf("decrypt: %v", err) + } + if !bytes.Equal(got, plaintext) { + t.Fatalf("round-trip mismatch: %q", got) + } + // Per-object keying: the same bytes under a different object number must + // not decrypt to the plaintext. + if other, _ := h.DecryptStream(ct, 99, 0, ""); bytes.Equal(other, plaintext) { + t.Fatal("ciphertext decrypted under wrong object number") + } +} + +func TestDecryptAES128RoundTrip(t *testing.T) { + h := &Handler{FileKey: bytes.Repeat([]byte{0x11}, 16), StreamAlg: AlgAES128, StringAlg: AlgAES128} + plaintext := []byte("attachment bytes under AESV2") + key := h.objKeyRC4orAES(7, 0, true) + ct := aesCBCEncryptStream(t, key, fixedIV, plaintext) + got, err := h.DecryptStream(ct, 7, 0, "") + if err != nil { + t.Fatalf("decrypt: %v", err) + } + if !bytes.Equal(got, plaintext) { + t.Fatalf("round-trip mismatch: %q", got) + } +} + +func TestDecryptAES256RoundTrip(t *testing.T) { + // V5/AESV3 keys streams directly with the file key (no per-object key). + h := &Handler{FileKey: bytes.Repeat([]byte{0x22}, 32), StreamAlg: AlgAES256, StringAlg: AlgAES256} + plaintext := []byte("AES-256 stream content for V5") + ct := aesCBCEncryptStream(t, h.FileKey, fixedIV, plaintext) + got, err := h.DecryptString(ct, 5, 0) + if err != nil { + t.Fatalf("decrypt: %v", err) + } + if !bytes.Equal(got, plaintext) { + t.Fatalf("round-trip mismatch: %q", got) + } +} + +// Attacker-supplied AES blobs (too short for the IV, not block-aligned, empty) +// must surface an error or empty output — never panic. +func TestDecryptAESMalformedNoPanic(t *testing.T) { + h := &Handler{FileKey: bytes.Repeat([]byte{0x11}, 16), StreamAlg: AlgAES128} + cases := map[string][]byte{ + "empty": {}, + "shorter_than_iv": make([]byte, aes.BlockSize-1), + "iv_only": make([]byte, aes.BlockSize), + "unaligned_body": make([]byte, aes.BlockSize+aes.BlockSize-1), + "one_byte": {0x00}, + } + for name, data := range cases { + t.Run(name, func(t *testing.T) { + // Must not panic; result is ignored, the point is robustness. + _, _ = h.DecryptStream(data, 1, 0, "") + }) + } +} + +func TestDecryptIdentityPassthrough(t *testing.T) { + h := &Handler{StreamAlg: AlgIdentity, StringAlg: AlgIdentity} + data := []byte{0xde, 0xad, 0xbe, 0xef} + got, err := h.DecryptStream(data, 1, 0, "") + if err != nil { + t.Fatalf("identity: %v", err) + } + if !bytes.Equal(got, data) { + t.Fatal("identity altered data") + } +} + +// deriveRC4Key mirrors computeRC4Key's derivation (without the /U validation), +// so a test can compute the matching /U for an empty-password fixture. +func deriveRC4Key(p Params, password []byte) []byte { + pad := padPassword(password) + h := md5.New() + h.Write(pad) + h.Write(p.OwnerEntry) + h.Write([]byte{ + byte(uint32(p.P)), byte(uint32(p.P) >> 8), + byte(uint32(p.P) >> 16), byte(uint32(p.P) >> 24), + }) + h.Write(p.ID0) + if p.R >= 4 && !p.EncryptMeta { + h.Write([]byte{0xff, 0xff, 0xff, 0xff}) + } + sum := h.Sum(nil) + keyLen := p.Length / 8 + if keyLen == 0 { + keyLen = 5 + } + if p.R >= 3 { + for i := 0; i < 50; i++ { + s := md5.Sum(sum[:keyLen]) + sum = s[:] + } + } + key := make([]byte, keyLen) + copy(key, sum[:keyLen]) + return key +} + +// New must reconstruct the V2/V4 file key from a correct empty-password /U. +func TestNewV2V4KeyDerivationRoundTrip(t *testing.T) { + cases := []struct { + name string + V, R, bits int + }{ + {"V2R2", 2, 2, 40}, + {"V2R3", 2, 3, 128}, + {"V4R4", 4, 4, 128}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + password := []byte{} + p := Params{ + V: tc.V, R: tc.R, Length: tc.bits, + OwnerEntry: bytes.Repeat([]byte{0x5a}, 32), + ID0: bytes.Repeat([]byte{0x7c}, 16), + P: -3904, + EncryptMeta: true, + StmF: "StdCF", StrF: "StdCF", + CryptFilters: map[string]string{"StdCF": "V2"}, + } + key := deriveRC4Key(p, password) + u, err := computeU(p, key) + if err != nil { + t.Fatalf("computeU: %v", err) + } + p.UserEntry = u + h, err := New(p, password) + if err != nil { + t.Fatalf("New: %v", err) + } + if !bytes.Equal(h.FileKey, key) { + t.Fatalf("file key mismatch:\n got %x\nwant %x", h.FileKey, key) + } + }) + } +} + +// A wrong /U must be rejected, not accepted with a garbage key. +func TestNewV2RejectsWrongUserEntry(t *testing.T) { + p := Params{ + V: 2, R: 3, Length: 128, + OwnerEntry: bytes.Repeat([]byte{0x5a}, 32), + ID0: bytes.Repeat([]byte{0x7c}, 16), + UserEntry: bytes.Repeat([]byte{0x00}, 32), // not the real /U + } + if _, err := New(p, []byte{}); err == nil { + t.Fatal("expected password-incorrect error, got nil") + } +} + +// aesCBCEncryptRaw is the inverse of v5DecryptKey: CBC-encrypt block-aligned +// data with a zero-prepend-free layout. +func aesCBCEncryptRaw(t *testing.T, key, iv, plaintext []byte) []byte { + t.Helper() + block, err := aes.NewCipher(key) + if err != nil { + t.Fatalf("aes.NewCipher: %v", err) + } + out := make([]byte, len(plaintext)) + cipher.NewCBCEncrypter(block, iv).CryptBlocks(out, plaintext) + return out +} + +// computeV5Key must recover the AES-256 file key from a correct empty-password +// /U, /UE for both R=5 (SHA-256) and R=6 (the iterated r6Hash). +func TestComputeV5KeyRoundTrip(t *testing.T) { + for _, R := range []int{5, 6} { + t.Run(map[int]string{5: "R5", 6: "R6"}[R], func(t *testing.T) { + password := []byte("user-pw") + fileKey := bytes.Repeat([]byte{0x42}, 32) + uVS := bytes.Repeat([]byte{0x01}, 8) + uKS := bytes.Repeat([]byte{0x02}, 8) + + uValHash, err := v5Hash(password, uVS, nil, R) + if err != nil { + t.Fatalf("v5Hash(validation): %v", err) + } + kHash, err := v5Hash(password, uKS, nil, R) + if err != nil { + t.Fatalf("v5Hash(key): %v", err) + } + ue := aesCBCEncryptRaw(t, kHash, make([]byte, aes.BlockSize), fileKey) + + userEntry := append(append(append([]byte{}, uValHash...), uVS...), uKS...) + p := Params{ + V: 5, R: R, + UserEntry: userEntry, // 48 bytes + OwnerEntry: make([]byte, 48), // present but unused (user path matches first) + UE: ue, // 32 bytes + OE: make([]byte, 32), + } + key, err := computeV5Key(p, password) + if err != nil { + t.Fatalf("computeV5Key: %v", err) + } + if !bytes.Equal(key, fileKey) { + t.Fatalf("V5 key mismatch:\n got %x\nwant %x", key, fileKey) + } + }) + } +} + +// New must map each V4 /CF crypt-filter method to a cipher and reject unknown +// ones, for a valid empty-password setup. +func TestNewV4CryptFilterMethods(t *testing.T) { + cases := []struct { + cfm string + wantErr bool + }{ + {"V2", false}, {"AESV2", false}, {"AESV3", false}, {"None", false}, {"Bogus", true}, + } + for _, tc := range cases { + t.Run(tc.cfm, func(t *testing.T) { + p := Params{ + V: 4, R: 4, Length: 128, + OwnerEntry: bytes.Repeat([]byte{0x5a}, 32), + ID0: bytes.Repeat([]byte{0x7c}, 16), + P: -3904, + EncryptMeta: true, + StmF: "StdCF", StrF: "StdCF", + CryptFilters: map[string]string{"StdCF": tc.cfm}, + } + key := deriveRC4Key(p, nil) + u, err := computeU(p, key) + if err != nil { + t.Fatalf("computeU: %v", err) + } + p.UserEntry = u + _, err = New(p, nil) + if tc.wantErr != (err != nil) { + t.Fatalf("CFM %q: wantErr=%v, got err=%v", tc.cfm, tc.wantErr, err) + } + }) + } +} + +// computeV5Key must reject short /U, /O, /UE, /OE entries rather than slicing +// out of range. +func TestComputeV5KeyRejectsShortEntries(t *testing.T) { + cases := map[string]Params{ + "short_user": {V: 5, R: 6, UserEntry: make([]byte, 47), OwnerEntry: make([]byte, 48), UE: make([]byte, 32), OE: make([]byte, 32)}, + "short_owner": {V: 5, R: 6, UserEntry: make([]byte, 48), OwnerEntry: make([]byte, 47), UE: make([]byte, 32), OE: make([]byte, 32)}, + "short_ue": {V: 5, R: 6, UserEntry: make([]byte, 48), OwnerEntry: make([]byte, 48), UE: make([]byte, 31), OE: make([]byte, 32)}, + "short_oe": {V: 5, R: 6, UserEntry: make([]byte, 48), OwnerEntry: make([]byte, 48), UE: make([]byte, 32), OE: make([]byte, 31)}, + } + for name, p := range cases { + t.Run(name, func(t *testing.T) { + if _, err := computeV5Key(p, []byte{}); err == nil { + t.Fatal("expected error for short entry, got nil") + } + }) + } +}