Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions object.go
Original file line number Diff line number Diff line change
Expand Up @@ -318,3 +318,11 @@ type DocInfo struct {
ModDate time.Time
Custom map[string]string
}

// EmbeddedFile is one entry from the catalog's EmbeddedFiles name tree (a PDF
// attachment). Spec is the /Filespec dictionary; its /EF stream holds the
// bytes.
type EmbeddedFile struct {
Name string
Spec *Dict
}
56 changes: 56 additions & 0 deletions reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -464,3 +464,59 @@ func (r *Reader) DecodeStream(obj Object) ([]byte, error) {
}
return s.Content()
}

const maxNameTreeDepth = 1000

// EmbeddedFiles returns the document's embedded files (PDF attachments) from
// the catalog's EmbeddedFiles name tree, in tree order. Returns nil when there
// are none.
func (r *Reader) EmbeddedFiles() []EmbeddedFile {
cat, err := r.Catalog()
if err != nil {
return nil
}
names, ok := cat.Dict("Names")
if !ok {
return nil
}
root, ok := names.Dict("EmbeddedFiles")
if !ok {
return nil
}
var out []EmbeddedFile
r.walkNameTree(root, map[Reference]struct{}{}, 0, &out)
return out
}

// walkNameTree collects (name, /Filespec) pairs from a name-tree node. seen
// records already-visited /Kids references and depth bounds the descent, so a
// cyclic or pathologically deep /Kids graph can't loop or overflow the stack.
func (r *Reader) walkNameTree(node *Dict, seen map[Reference]struct{}, depth int, out *[]EmbeddedFile) {
if node == nil || depth > maxNameTreeDepth {
return
}
if kids, ok := node.Array("Kids"); ok {
for _, kid := range kids {
if ref, ok := kid.(Reference); ok {
if _, dup := seen[ref]; dup {
continue
}
seen[ref] = struct{}{}
}
if child, err := r.ResolveDict(kid); err == nil {
r.walkNameTree(child, seen, depth+1, out)
}
}
}
if entries, ok := node.Array("Names"); ok {
for i := 0; i+1 < len(entries); i += 2 {
name, ok := entries[i].(String)
if !ok {
continue
}
if spec, err := r.ResolveDict(entries[i+1]); err == nil {
*out = append(*out, EmbeddedFile{Name: string(name), Spec: spec})
}
}
}
}
96 changes: 96 additions & 0 deletions reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,3 +247,99 @@ func TestParseDate(t *testing.T) {
t.Fatalf("hour %d", d.Hour())
}
}

// buildDictPDF puts each body in objs as object i+1 of a classical-xref PDF
// (obj 1 is the catalog). Bodies are plain objects (no streams).
func buildDictPDF(t *testing.T, objs []string) []byte {
t.Helper()
var buf bytes.Buffer
fmt.Fprint(&buf, "%PDF-1.7\n%\xE2\xE3\xCF\xD3\n")
offsets := make([]int, len(objs)+1)
for i, body := range objs {
offsets[i+1] = buf.Len()
fmt.Fprintf(&buf, "%d 0 obj\n%s\nendobj\n", i+1, body)
}
xrefOff := buf.Len()
fmt.Fprintf(&buf, "xref\n0 %d\n%010d %05d f \n", len(objs)+1, 0, 65535)
for i := 1; i <= len(objs); i++ {
fmt.Fprintf(&buf, "%010d %05d n \n", offsets[i], 0)
}
fmt.Fprintf(&buf, "trailer\n<< /Size %d /Root 1 0 R >>\nstartxref\n%d\n%%%%EOF\n",
len(objs)+1, xrefOff)
return buf.Bytes()
}

func TestEmbeddedFiles(t *testing.T) {
data := buildDictPDF(t, []string{
"<< /Type /Catalog /Pages 2 0 R /Names << /EmbeddedFiles 3 0 R >> >>",
"<< /Type /Pages /Kids [] /Count 0 >>",
"<< /Names [ (a.xml) 4 0 R (b.xml) 5 0 R ] >>",
"<< /Type /Filespec /F (a.xml) >>",
"<< /Type /Filespec /F (b.xml) >>",
})
r, err := Open(bytes.NewReader(data))
if err != nil {
t.Fatalf("Open: %v", err)
}
defer r.Close()
ef := r.EmbeddedFiles()
if len(ef) != 2 {
t.Fatalf("got %d files, want 2", len(ef))
}
if ef[0].Name != "a.xml" || ef[1].Name != "b.xml" {
t.Fatalf("names %q, %q", ef[0].Name, ef[1].Name)
}
if f, ok := ef[0].Spec.String("F"); !ok || f != "a.xml" {
t.Fatalf("spec /F %q ok=%v", f, ok)
}
}

func TestEmbeddedFilesNestedKids(t *testing.T) {
data := buildDictPDF(t, []string{
"<< /Type /Catalog /Pages 2 0 R /Names << /EmbeddedFiles 3 0 R >> >>",
"<< /Type /Pages /Kids [] /Count 0 >>",
"<< /Kids [ 4 0 R ] >>",
"<< /Names [ (a.xml) 5 0 R ] >>",
"<< /Type /Filespec /F (a.xml) >>",
})
r, err := Open(bytes.NewReader(data))
if err != nil {
t.Fatalf("Open: %v", err)
}
defer r.Close()
if ef := r.EmbeddedFiles(); len(ef) != 1 || ef[0].Name != "a.xml" {
t.Fatalf("got %+v, want one a.xml", ef)
}
}

func TestEmbeddedFilesCyclicKidsTerminates(t *testing.T) {
// obj 3's /Kids references itself; the walk must terminate, not overflow.
data := buildDictPDF(t, []string{
"<< /Type /Catalog /Pages 2 0 R /Names << /EmbeddedFiles 3 0 R >> >>",
"<< /Type /Pages /Kids [] /Count 0 >>",
"<< /Kids [ 3 0 R ] >>",
})
r, err := Open(bytes.NewReader(data))
if err != nil {
t.Fatalf("Open: %v", err)
}
defer r.Close()
if ef := r.EmbeddedFiles(); len(ef) != 0 {
t.Fatalf("got %d files, want 0", len(ef))
}
}

func TestEmbeddedFilesNone(t *testing.T) {
data := buildDictPDF(t, []string{
"<< /Type /Catalog /Pages 2 0 R >>",
"<< /Type /Pages /Kids [] /Count 0 >>",
})
r, err := Open(bytes.NewReader(data))
if err != nil {
t.Fatalf("Open: %v", err)
}
defer r.Close()
if ef := r.EmbeddedFiles(); ef != nil {
t.Fatalf("got %+v, want nil", ef)
}
}