From 8f33189f2a713801f113077ce750f00c6f95a004 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Tue, 26 May 2026 11:49:46 +0200 Subject: [PATCH 01/11] Add multiline pattern handling. --- embedding/embedding_test.go | 30 +++++++++++ embedding/parsing/instruction.go | 50 ++++++++++++++++--- embedding/parsing/pattern.go | 32 ++++++++++++ .../org/example/MultiLinePatternSample.java | 23 +++++++++ .../docs/escaped-newline-exact-pattern.md | 7 +++ .../resources/docs/escaped-newline-pattern.md | 7 +++ 6 files changed, 143 insertions(+), 6 deletions(-) create mode 100644 test/resources/code/java/org/example/MultiLinePatternSample.java create mode 100644 test/resources/docs/escaped-newline-exact-pattern.md create mode 100644 test/resources/docs/escaped-newline-pattern.md diff --git a/embedding/embedding_test.go b/embedding/embedding_test.go index f2857c3..881d226 100644 --- a/embedding/embedding_test.go +++ b/embedding/embedding_test.go @@ -197,6 +197,36 @@ var _ = Describe("Embedding", func() { Expect(processor.IsUpToDate()).Should(BeTrue()) }) + It("should embed a method with escaped newline patterns", func() { + config.DocIncludes = []string{"escaped-newline-pattern.md"} + docPath := fmt.Sprintf("%s/escaped-newline-pattern.md", config.DocumentationRoot) + processor := embedding.NewProcessor(docPath, config) + + Expect(processor.Embed()).Error().ShouldNot(HaveOccurred()) + + docContent, err := os.ReadFile(docPath) + Expect(err).ShouldNot(HaveOccurred()) + Expect(string(docContent)).Should(ContainSubstring("@Test\n" + + "@DisplayName(\"adds two values\")")) + Expect(string(docContent)).Should(ContainSubstring("assertEquals(2, value);\n}")) + Expect(string(docContent)).ShouldNot(ContainSubstring("subtractsTwoValues")) + }) + + It("should embed a method with exact escaped newline patterns", func() { + config.DocIncludes = []string{"escaped-newline-exact-pattern.md"} + docPath := fmt.Sprintf("%s/escaped-newline-exact-pattern.md", config.DocumentationRoot) + processor := embedding.NewProcessor(docPath, config) + + Expect(processor.Embed()).Error().ShouldNot(HaveOccurred()) + + docContent, err := os.ReadFile(docPath) + Expect(err).ShouldNot(HaveOccurred()) + Expect(string(docContent)).Should(ContainSubstring("@Test\n" + + "@DisplayName(\"adds two values\")")) + Expect(string(docContent)).Should(ContainSubstring("assertEquals(2, value);\n}")) + Expect(string(docContent)).ShouldNot(ContainSubstring("subtractsTwoValues")) + }) + It("should report a missing closing tag", func() { docPath := fmt.Sprintf("%s/missing-closing-tag.md", config.DocumentationRoot) processor := embedding.NewProcessor(docPath, config) diff --git a/embedding/parsing/instruction.go b/embedding/parsing/instruction.go index 8628455..10b6359 100644 --- a/embedding/parsing/instruction.go +++ b/embedding/parsing/instruction.go @@ -238,19 +238,57 @@ func (e Instruction) matchingLines(lines []string, codeFileReference string) ([] // startFrom — an index from which to start searching. func (e Instruction) matchGlob(pattern *Pattern, lines []string, startFrom int, kind string, codeFileReference string) (int, error) { + if kind != "line" && pattern.HasLineSeparator() { + start, end, found := matchLineSequence(pattern, lines, startFrom) + if found { + if kind == "end" { + return end, nil + } + return start, nil + } + return 0, PatternNotFoundError{ + Line: e.DocumentationLine, + CodeFileReference: codeFileReference, + Kind: kind, + Pattern: pattern, + } + } + if line, found := matchSingleLine(pattern, lines, startFrom); found { + return line, nil + } + return 0, PatternNotFoundError{ + Line: e.DocumentationLine, + CodeFileReference: codeFileReference, + Kind: kind, + Pattern: pattern, + } +} + +// matchSingleLine returns the first source line matching the pattern. +func matchSingleLine(pattern *Pattern, lines []string, startFrom int) (int, bool) { lineCount := len(lines) resultLine := startFrom for resultLine < lineCount { line := lines[resultLine] if pattern.Match(line) { - return resultLine, nil + return resultLine, true } resultLine++ } - return 0, PatternNotFoundError{ - Line: e.DocumentationLine, - CodeFileReference: codeFileReference, - Kind: kind, - Pattern: pattern, + + return 0, false +} + +// matchLineSequence returns the first source-line range matching an escaped-line pattern. +func matchLineSequence(pattern *Pattern, lines []string, startFrom int) (int, int, bool) { + lineCount := len(pattern.linePatterns()) + lastStart := len(lines) - lineCount + for start := startFrom; start <= lastStart; start++ { + end := start + lineCount + if pattern.MatchLineSequence(lines[start:end]) { + return start, end - 1, true + } } + + return 0, 0, false } diff --git a/embedding/parsing/pattern.go b/embedding/parsing/pattern.go index 07a6424..f991065 100644 --- a/embedding/parsing/pattern.go +++ b/embedding/parsing/pattern.go @@ -39,6 +39,7 @@ type Pattern struct { const ( anyCharacterSequence = "*" + escapedLineSeparator = `\n` lineStart = "^" lineEnd = "$" ) @@ -100,6 +101,37 @@ func (p Pattern) Match(line string) bool { return g.Match(line) } +// HasLineSeparator reports whether the pattern contains an escaped line separator. +func (p Pattern) HasLineSeparator() bool { + return strings.Contains(p.sourceGlob, escapedLineSeparator) +} + +// MatchLineSequence reports whether source lines match the escaped-line-separated pattern. +func (p Pattern) MatchLineSequence(lines []string) bool { + patternLines := p.linePatterns() + if len(patternLines) != len(lines) { + return false + } + for i, patternLine := range patternLines { + pattern := NewPattern(patternLine) + if !pattern.Match(lines[i]) { + return false + } + } + + return true +} + +// linePatterns returns trimmed pattern lines separated by an escaped newline. +func (p Pattern) linePatterns() []string { + patternLines := strings.Split(p.sourceGlob, escapedLineSeparator) + for i, line := range patternLines { + patternLines[i] = strings.TrimSpace(line) + } + + return patternLines +} + // Returns string representation of Pattern. func (p Pattern) String() string { return fmt.Sprintf("Pattern %s", p.sourceGlob) diff --git a/test/resources/code/java/org/example/MultiLinePatternSample.java b/test/resources/code/java/org/example/MultiLinePatternSample.java new file mode 100644 index 0000000..c193057 --- /dev/null +++ b/test/resources/code/java/org/example/MultiLinePatternSample.java @@ -0,0 +1,23 @@ +package org.example; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +class MultiLinePatternSample { + + @Test + @DisplayName("adds two values") + void addsTwoValues() { + int value = 1 + 1; + + assertEquals(2, value); + } + + @Test + @DisplayName("subtracts two values") + void subtractsTwoValues() { + int value = 2 - 1; + + assertEquals(1, value); + } +} diff --git a/test/resources/docs/escaped-newline-exact-pattern.md b/test/resources/docs/escaped-newline-exact-pattern.md new file mode 100644 index 0000000..04417e4 --- /dev/null +++ b/test/resources/docs/escaped-newline-exact-pattern.md @@ -0,0 +1,7 @@ +# Escaped-newline exact pattern + + +```java +``` diff --git a/test/resources/docs/escaped-newline-pattern.md b/test/resources/docs/escaped-newline-pattern.md new file mode 100644 index 0000000..01081f6 --- /dev/null +++ b/test/resources/docs/escaped-newline-pattern.md @@ -0,0 +1,7 @@ +# Escaped-newline pattern + + +```java +``` From 9564edd358e2950d8a4441a94de16a6d54b9e380 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Tue, 26 May 2026 12:12:29 +0200 Subject: [PATCH 02/11] Add escaping. --- embedding/embedding_test.go | 14 ++++++++ embedding/parsing/instruction.go | 3 +- embedding/parsing/instruction_test.go | 9 +++++ embedding/parsing/pattern.go | 35 ++++++++++++++----- embedding/parsing/xml_parse.go | 8 ++++- .../org/example/MultiLinePatternSample.java | 2 ++ .../docs/escaped-newline-literal-pattern.md | 6 ++++ 7 files changed, 67 insertions(+), 10 deletions(-) create mode 100644 test/resources/docs/escaped-newline-literal-pattern.md diff --git a/embedding/embedding_test.go b/embedding/embedding_test.go index 881d226..3179860 100644 --- a/embedding/embedding_test.go +++ b/embedding/embedding_test.go @@ -227,6 +227,20 @@ var _ = Describe("Embedding", func() { Expect(string(docContent)).ShouldNot(ContainSubstring("subtractsTwoValues")) }) + It("should embed a line with an escaped newline literal pattern", func() { + config.DocIncludes = []string{"escaped-newline-literal-pattern.md"} + docPath := fmt.Sprintf("%s/escaped-newline-literal-pattern.md", config.DocumentationRoot) + processor := embedding.NewProcessor(docPath, config) + + Expect(processor.Embed()).Error().ShouldNot(HaveOccurred()) + + docContent, err := os.ReadFile(docPath) + Expect(err).ShouldNot(HaveOccurred()) + Expect(string(docContent)).Should(ContainSubstring( + "private static final String LINE_SEPARATOR = \"\\n\";", + )) + }) + It("should report a missing closing tag", func() { docPath := fmt.Sprintf("%s/missing-closing-tag.md", config.DocumentationRoot) processor := embedding.NewProcessor(docPath, config) diff --git a/embedding/parsing/instruction.go b/embedding/parsing/instruction.go index 10b6359..7b0b0d1 100644 --- a/embedding/parsing/instruction.go +++ b/embedding/parsing/instruction.go @@ -281,7 +281,8 @@ func matchSingleLine(pattern *Pattern, lines []string, startFrom int) (int, bool // matchLineSequence returns the first source-line range matching an escaped-line pattern. func matchLineSequence(pattern *Pattern, lines []string, startFrom int) (int, int, bool) { - lineCount := len(pattern.linePatterns()) + patternLines, _ := pattern.linePatterns() + lineCount := len(patternLines) lastStart := len(lines) - lineCount for start := startFrom; start <= lastStart; start++ { end := start + lineCount diff --git a/embedding/parsing/instruction_test.go b/embedding/parsing/instruction_test.go index 0425f98..cbfac22 100644 --- a/embedding/parsing/instruction_test.go +++ b/embedding/parsing/instruction_test.go @@ -85,6 +85,15 @@ var _ = Describe("Instruction", func() { Expect(parsing.FromXML(xmlString, config)).Error().ShouldNot(HaveOccurred()) }) + It("should parse backslash-escaped quotes in XML attributes", func() { + xmlString := `` + + attributes, err := parsing.ParseXMLLine(xmlString) + + Expect(err).ShouldNot(HaveOccurred()) + Expect(attributes["line"]).Should(Equal(`println("Hello world")`)) + }) + It("should have an error for unsupported comments mode", func() { instructionParams := TestInstructionParams{ comments: "summary", diff --git a/embedding/parsing/pattern.go b/embedding/parsing/pattern.go index f991065..dd5df72 100644 --- a/embedding/parsing/pattern.go +++ b/embedding/parsing/pattern.go @@ -39,9 +39,10 @@ type Pattern struct { const ( anyCharacterSequence = "*" - escapedLineSeparator = `\n` + escapedLineSeparator = `\\n` lineStart = "^" lineEnd = "$" + patternLineSeparator = `\n` ) // NewPattern creates a new Pattern based on provided glob string. @@ -103,12 +104,14 @@ func (p Pattern) Match(line string) bool { // HasLineSeparator reports whether the pattern contains an escaped line separator. func (p Pattern) HasLineSeparator() bool { - return strings.Contains(p.sourceGlob, escapedLineSeparator) + _, hasSeparator := p.linePatterns() + + return hasSeparator } // MatchLineSequence reports whether source lines match the escaped-line-separated pattern. func (p Pattern) MatchLineSequence(lines []string) bool { - patternLines := p.linePatterns() + patternLines, _ := p.linePatterns() if len(patternLines) != len(lines) { return false } @@ -123,13 +126,29 @@ func (p Pattern) MatchLineSequence(lines []string) bool { } // linePatterns returns trimmed pattern lines separated by an escaped newline. -func (p Pattern) linePatterns() []string { - patternLines := strings.Split(p.sourceGlob, escapedLineSeparator) - for i, line := range patternLines { - patternLines[i] = strings.TrimSpace(line) +func (p Pattern) linePatterns() ([]string, bool) { + var patternLines []string + var line strings.Builder + hasSeparator := false + for i := 0; i < len(p.sourceGlob); { + remaining := p.sourceGlob[i:] + switch { + case strings.HasPrefix(remaining, escapedLineSeparator): + line.WriteString(escapedLineSeparator) + i += len(escapedLineSeparator) + case strings.HasPrefix(remaining, patternLineSeparator): + patternLines = append(patternLines, strings.TrimSpace(line.String())) + line.Reset() + hasSeparator = true + i += len(patternLineSeparator) + default: + line.WriteByte(p.sourceGlob[i]) + i++ + } } + patternLines = append(patternLines, strings.TrimSpace(line.String())) - return patternLines + return patternLines, hasSeparator } // Returns string representation of Pattern. diff --git a/embedding/parsing/xml_parse.go b/embedding/parsing/xml_parse.go index 9d4dcda..6716242 100644 --- a/embedding/parsing/xml_parse.go +++ b/embedding/parsing/xml_parse.go @@ -22,6 +22,7 @@ import ( "embed-code/embed-code-go/configuration" "encoding/xml" "fmt" + "strings" ) // Item needed for xml.Unmarshal parsing. The fields are filling up during the parsing. @@ -69,7 +70,7 @@ func FromXML(line string, config configuration.Configuration) (Instruction, erro // Returns a map of key-value pairs. If the provided line is not valid, returns an error. func ParseXMLLine(xmlLine string) (map[string]string, error) { var root Item - err := xml.Unmarshal([]byte(xmlLine), &root) + err := xml.Unmarshal([]byte(quoteEscapedXMLLine(xmlLine)), &root) if err != nil { return map[string]string{}, err } @@ -86,3 +87,8 @@ func ParseXMLLine(xmlLine string) (map[string]string, error) { return attributes, nil } + +// quoteEscapedXMLLine converts backslash-escaped quotes into XML entities. +func quoteEscapedXMLLine(xmlLine string) string { + return strings.ReplaceAll(xmlLine, `\"`, """) +} diff --git a/test/resources/code/java/org/example/MultiLinePatternSample.java b/test/resources/code/java/org/example/MultiLinePatternSample.java index c193057..bcf6e19 100644 --- a/test/resources/code/java/org/example/MultiLinePatternSample.java +++ b/test/resources/code/java/org/example/MultiLinePatternSample.java @@ -5,6 +5,8 @@ class MultiLinePatternSample { + private static final String LINE_SEPARATOR = "\n"; + @Test @DisplayName("adds two values") void addsTwoValues() { diff --git a/test/resources/docs/escaped-newline-literal-pattern.md b/test/resources/docs/escaped-newline-literal-pattern.md new file mode 100644 index 0000000..9a50459 --- /dev/null +++ b/test/resources/docs/escaped-newline-literal-pattern.md @@ -0,0 +1,6 @@ +# Escaped-newline literal pattern + + +```java +``` From 88bb61dc9c11b982bc797d402fe65a57be9128c8 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Tue, 26 May 2026 12:30:26 +0200 Subject: [PATCH 03/11] Improve documentation. --- EMBEDDING.md | 71 ++++++++++++++++++- embedding/parsing/instruction_test.go | 39 ++++++++++ test/resources/code/java/literal-patterns.txt | 4 ++ 3 files changed, 112 insertions(+), 2 deletions(-) create mode 100644 test/resources/code/java/literal-patterns.txt diff --git a/EMBEDDING.md b/EMBEDDING.md index 327e923..5777d65 100644 --- a/EMBEDDING.md +++ b/EMBEDDING.md @@ -111,8 +111,75 @@ The tool supports an extended glob syntax for matching lines: By default, patterns imply a wildcard (`*`) at both the start and end. Use `^` and `$` to disable this behavior and match the exact line start or end. -If you need to match a literal `^` at the start of a line, use `^^`. -Similarly, use `$$` to match a literal `$` at the end of a line. +#### Multi-line patterns + +Use `\n` inside a `start` or `end` pattern to match consecutive source lines. +Spaces around `\n` are ignored, and each pattern line uses the same glob syntax as a +regular one-line pattern. + +````markdown + +```java +``` +```` + +This matches a source range like: + +```java +@Test +@DisplayName("adds two values") +void addsTwoValues() { + int value = 1 + 1; + + assertEquals(2, value); +} +``` + +The `start` pattern above is interpreted as two consecutive line patterns: +`Test` and `adds two values`. Because ordinary patterns imply `*` at both ends, +these match `@Test` and `@DisplayName("adds two values")`. + +Use `^` and `$` on each pattern line when you need exact line matching: + +````markdown + +```java +``` +```` + +Without `\n`, a `start`, `end`, or `line` pattern matches only one source line. + +#### Escaping + +Use a backslash to match glob control characters literally. For example: + +- `\*` matches a literal `*`. +- `\?` matches a literal `?`. +- `\[` matches a literal `[`. + +Since `^` is only special at the start of a pattern, use `^^` to match a literal +`^` there. Since `$` is only special at the end of a pattern, use `$$` to match a +literal `$` there. + +To match literal `\n` text in a source line, write it as `\\n` in the pattern. + +````markdown + +```java +``` +```` + +You may write quote characters in patterns as `\"` instead of the XML entity `"`. +For example, `line="println(\"Hello\")"` is equivalent to +`line="println("Hello")"`. ## Comment filtering diff --git a/embedding/parsing/instruction_test.go b/embedding/parsing/instruction_test.go index cbfac22..1fd0d2c 100644 --- a/embedding/parsing/instruction_test.go +++ b/embedding/parsing/instruction_test.go @@ -310,6 +310,45 @@ var _ = Describe("Instruction", func() { })) }) + It("should embed a line with an escaped asterisk pattern", func() { + instructionParams := TestInstructionParams{ + lineGlob: `Use \* to multiply`, + } + + actualLines := getXMLExtractionContent( + "literal-patterns.txt", instructionParams, config) + + Expect(actualLines).Should(Equal([]string{ + "Use * to multiply", + })) + }) + + It("should embed a line starting with a literal caret pattern", func() { + instructionParams := TestInstructionParams{ + lineGlob: "^^ starts with caret", + } + + actualLines := getXMLExtractionContent( + "literal-patterns.txt", instructionParams, config) + + Expect(actualLines).Should(Equal([]string{ + "^ starts with caret", + })) + }) + + It("should embed a line ending with a literal dollar pattern", func() { + instructionParams := TestInstructionParams{ + lineGlob: "The value ends with $$", + } + + actualLines := getXMLExtractionContent( + "literal-patterns.txt", instructionParams, config) + + Expect(actualLines).Should(Equal([]string{ + "The value ends with $", + })) + }) + It("should successfully parse XML by only end glob", func() { instructionParams := TestInstructionParams{ endGlob: "package*", diff --git a/test/resources/code/java/literal-patterns.txt b/test/resources/code/java/literal-patterns.txt new file mode 100644 index 0000000..ca45d08 --- /dev/null +++ b/test/resources/code/java/literal-patterns.txt @@ -0,0 +1,4 @@ +Use * to multiply +The total is $5 +The value ends with $ +^ starts with caret From f8a81d048b0761b7baf36f970d07dd328f413144 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Tue, 26 May 2026 13:05:48 +0200 Subject: [PATCH 04/11] Add multiline handling for `line`. --- EMBEDDING.md | 2 +- embedding/embedding_test.go | 15 ++++++ embedding/parsing/instruction.go | 49 ++++++++++++++----- .../docs/escaped-newline-line-pattern.md | 6 +++ 4 files changed, 59 insertions(+), 13 deletions(-) create mode 100644 test/resources/docs/escaped-newline-line-pattern.md diff --git a/EMBEDDING.md b/EMBEDDING.md index 5777d65..469ab27 100644 --- a/EMBEDDING.md +++ b/EMBEDDING.md @@ -113,7 +113,7 @@ Use `^` and `$` to disable this behavior and match the exact line start or end. #### Multi-line patterns -Use `\n` inside a `start` or `end` pattern to match consecutive source lines. +Use `\n` inside a `start`, `end`, or `line` pattern to match consecutive source lines. Spaces around `\n` are ignored, and each pattern line uses the same glob syntax as a regular one-line pattern. diff --git a/embedding/embedding_test.go b/embedding/embedding_test.go index 3179860..8fc7119 100644 --- a/embedding/embedding_test.go +++ b/embedding/embedding_test.go @@ -227,6 +227,21 @@ var _ = Describe("Embedding", func() { Expect(string(docContent)).ShouldNot(ContainSubstring("subtractsTwoValues")) }) + It("should embed matching lines with an escaped newline line pattern", func() { + config.DocIncludes = []string{"escaped-newline-line-pattern.md"} + docPath := fmt.Sprintf("%s/escaped-newline-line-pattern.md", config.DocumentationRoot) + processor := embedding.NewProcessor(docPath, config) + + Expect(processor.Embed()).Error().ShouldNot(HaveOccurred()) + + docContent, err := os.ReadFile(docPath) + Expect(err).ShouldNot(HaveOccurred()) + Expect(string(docContent)).Should(ContainSubstring("@Test\n" + + "@DisplayName(\"adds two values\")")) + Expect(string(docContent)).ShouldNot(ContainSubstring("void addsTwoValues")) + Expect(string(docContent)).ShouldNot(ContainSubstring("subtractsTwoValues")) + }) + It("should embed a line with an escaped newline literal pattern", func() { config.DocIncludes = []string{"escaped-newline-literal-pattern.md"} docPath := fmt.Sprintf("%s/escaped-newline-literal-pattern.md", config.DocumentationRoot) diff --git a/embedding/parsing/instruction.go b/embedding/parsing/instruction.go index 7b0b0d1..8cc23c8 100644 --- a/embedding/parsing/instruction.go +++ b/embedding/parsing/instruction.go @@ -191,6 +191,18 @@ func (e Instruction) String() string { // lines — a list of strings representing the input lines. func (e Instruction) matchingLines(lines []string, codeFileReference string) ([]string, error) { if e.LinePattern != nil { + if e.LinePattern.HasLineSeparator() { + startPosition, endPosition, err := e.matchLineSequence( + e.LinePattern, lines, 0, "line", codeFileReference, + ) + if err != nil { + return nil, err + } + requiredLines := lines[startPosition : endPosition+1] + indentation := indent.MaxCommonIndentation(requiredLines) + + return indent.CutIndent(requiredLines, indentation), nil + } linePosition, err := e.matchGlob( e.LinePattern, lines, 0, "line", codeFileReference, ) @@ -238,20 +250,17 @@ func (e Instruction) matchingLines(lines []string, codeFileReference string) ([] // startFrom — an index from which to start searching. func (e Instruction) matchGlob(pattern *Pattern, lines []string, startFrom int, kind string, codeFileReference string) (int, error) { - if kind != "line" && pattern.HasLineSeparator() { - start, end, found := matchLineSequence(pattern, lines, startFrom) - if found { - if kind == "end" { - return end, nil - } - return start, nil + if pattern.HasLineSeparator() { + start, end, err := e.matchLineSequence( + pattern, lines, startFrom, kind, codeFileReference, + ) + if err != nil { + return 0, err } - return 0, PatternNotFoundError{ - Line: e.DocumentationLine, - CodeFileReference: codeFileReference, - Kind: kind, - Pattern: pattern, + if kind == "end" { + return end, nil } + return start, nil } if line, found := matchSingleLine(pattern, lines, startFrom); found { return line, nil @@ -279,6 +288,22 @@ func matchSingleLine(pattern *Pattern, lines []string, startFrom int) (int, bool return 0, false } +// matchLineSequence returns the first line range matching the pattern or a not-found error. +func (e Instruction) matchLineSequence(pattern *Pattern, lines []string, startFrom int, + kind string, codeFileReference string) (int, int, error) { + start, end, found := matchLineSequence(pattern, lines, startFrom) + if found { + return start, end, nil + } + + return 0, 0, PatternNotFoundError{ + Line: e.DocumentationLine, + CodeFileReference: codeFileReference, + Kind: kind, + Pattern: pattern, + } +} + // matchLineSequence returns the first source-line range matching an escaped-line pattern. func matchLineSequence(pattern *Pattern, lines []string, startFrom int) (int, int, bool) { patternLines, _ := pattern.linePatterns() diff --git a/test/resources/docs/escaped-newline-line-pattern.md b/test/resources/docs/escaped-newline-line-pattern.md new file mode 100644 index 0000000..8fd0a33 --- /dev/null +++ b/test/resources/docs/escaped-newline-line-pattern.md @@ -0,0 +1,6 @@ +# Escaped-newline line pattern + + +```java +``` From a58a8a468a706de6e691986d7e5d40b97fa33873 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Tue, 26 May 2026 13:24:45 +0200 Subject: [PATCH 05/11] Improve readability. --- embedding/parsing/pattern.go | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/embedding/parsing/pattern.go b/embedding/parsing/pattern.go index dd5df72..a754b54 100644 --- a/embedding/parsing/pattern.go +++ b/embedding/parsing/pattern.go @@ -39,10 +39,10 @@ type Pattern struct { const ( anyCharacterSequence = "*" - escapedLineSeparator = `\\n` lineStart = "^" lineEnd = "$" - patternLineSeparator = `\n` + lineSeparator = `\n` + escapedLineSeparator = `\\n` ) // NewPattern creates a new Pattern based on provided glob string. @@ -53,6 +53,12 @@ const ( // The modified pattern is the original one, but enclosed with the "*" wildcards, // unless start of the line or end of the line wildcards were specified. // +// A multi-line pattern uses "\n" as a separator between consecutive source-line +// patterns. For example, "Test \n adds two values" matches a line matching "Test" +// followed by a line matching "adds two values". Each part separated by "\n" is +// converted to Pattern separately and follows the same wildcard rules. +// Use "\\n" to match literal "\n" text instead of starting the next pattern line. +// // glob — a string that represents a pattern that can include such wildcards: // - "*" — matches any sequence of characters; // - "^" — matches the start of the line; @@ -136,11 +142,11 @@ func (p Pattern) linePatterns() ([]string, bool) { case strings.HasPrefix(remaining, escapedLineSeparator): line.WriteString(escapedLineSeparator) i += len(escapedLineSeparator) - case strings.HasPrefix(remaining, patternLineSeparator): + case strings.HasPrefix(remaining, lineSeparator): patternLines = append(patternLines, strings.TrimSpace(line.String())) line.Reset() hasSeparator = true - i += len(patternLineSeparator) + i += len(lineSeparator) default: line.WriteByte(p.sourceGlob[i]) i++ From fa57289a813427d819a5c1f206658e880786aec6 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Tue, 26 May 2026 13:31:29 +0200 Subject: [PATCH 06/11] Improve doc. --- EMBEDDING.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/EMBEDDING.md b/EMBEDDING.md index 469ab27..70a5528 100644 --- a/EMBEDDING.md +++ b/EMBEDDING.md @@ -177,9 +177,8 @@ To match literal `\n` text in a source line, write it as `\\n` in the pattern. ``` ```` -You may write quote characters in patterns as `\"` instead of the XML entity `"`. -For example, `line="println(\"Hello\")"` is equivalent to -`line="println("Hello")"`. +It s possible to write quote characters in patterns as `\"` instead of the XML entity `"`. +For example, `line="println(\"Hello\")"` is equivalent to `line="println("Hello")"`. ## Comment filtering From af9f863278643a3152ff193adcc6bc4aac699560 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Tue, 26 May 2026 14:06:02 +0200 Subject: [PATCH 07/11] Update version. --- main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.go b/main.go index b08fad5..c09df6e 100644 --- a/main.go +++ b/main.go @@ -28,7 +28,7 @@ import ( ) // Version of the embed-code application. -const Version = "1.2.0" +const Version = "1.2.1" // The entry point for embed-code. // From 3cf28707db51d7ae2f4dfd9f9cefbfbe5869ecc5 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Thu, 28 May 2026 09:49:46 +0200 Subject: [PATCH 08/11] Rename test variable. --- EMBEDDING.md | 4 ++-- embedding/embedding_test.go | 2 +- .../code/java/org/example/MultiLinePatternSample.java | 2 +- test/resources/docs/escaped-newline-literal-pattern.md | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/EMBEDDING.md b/EMBEDDING.md index 70a5528..11fc16f 100644 --- a/EMBEDDING.md +++ b/EMBEDDING.md @@ -171,8 +171,8 @@ To match literal `\n` text in a source line, write it as `\\n` in the pattern. ````markdown + file="src/test/java/example/MessageTemplate.java" + line="MY_STRING = \"\\n\""> ```java ``` ```` diff --git a/embedding/embedding_test.go b/embedding/embedding_test.go index 8fc7119..3ae82c4 100644 --- a/embedding/embedding_test.go +++ b/embedding/embedding_test.go @@ -252,7 +252,7 @@ var _ = Describe("Embedding", func() { docContent, err := os.ReadFile(docPath) Expect(err).ShouldNot(HaveOccurred()) Expect(string(docContent)).Should(ContainSubstring( - "private static final String LINE_SEPARATOR = \"\\n\";", + "private static final String MY_STRING = \"\\n\";", )) }) diff --git a/test/resources/code/java/org/example/MultiLinePatternSample.java b/test/resources/code/java/org/example/MultiLinePatternSample.java index bcf6e19..000a4d6 100644 --- a/test/resources/code/java/org/example/MultiLinePatternSample.java +++ b/test/resources/code/java/org/example/MultiLinePatternSample.java @@ -5,7 +5,7 @@ class MultiLinePatternSample { - private static final String LINE_SEPARATOR = "\n"; + private static final String MY_STRING = "\n"; @Test @DisplayName("adds two values") diff --git a/test/resources/docs/escaped-newline-literal-pattern.md b/test/resources/docs/escaped-newline-literal-pattern.md index 9a50459..4c45ddf 100644 --- a/test/resources/docs/escaped-newline-literal-pattern.md +++ b/test/resources/docs/escaped-newline-literal-pattern.md @@ -1,6 +1,6 @@ # Escaped-newline literal pattern + line="MY_STRING = \"\\n\""/> ```java ``` From 54a24971e382212838c2ee6b5e857c1b2331f727 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Thu, 28 May 2026 09:56:32 +0200 Subject: [PATCH 09/11] Extract pattern compilation. --- embedding/parsing/instruction.go | 6 ++-- embedding/parsing/pattern.go | 49 +++++++++++++++++++++++--------- 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/embedding/parsing/instruction.go b/embedding/parsing/instruction.go index 8cc23c8..06a384d 100644 --- a/embedding/parsing/instruction.go +++ b/embedding/parsing/instruction.go @@ -306,12 +306,12 @@ func (e Instruction) matchLineSequence(pattern *Pattern, lines []string, startFr // matchLineSequence returns the first source-line range matching an escaped-line pattern. func matchLineSequence(pattern *Pattern, lines []string, startFrom int) (int, int, bool) { - patternLines, _ := pattern.linePatterns() - lineCount := len(patternLines) + patterns := pattern.lineSequencePatterns() + lineCount := len(patterns) lastStart := len(lines) - lineCount for start := startFrom; start <= lastStart; start++ { end := start + lineCount - if pattern.MatchLineSequence(lines[start:end]) { + if matchLineSequencePatterns(patterns, lines[start:end]) { return start, end - 1, true } } diff --git a/embedding/parsing/pattern.go b/embedding/parsing/pattern.go index a754b54..4d86b6e 100644 --- a/embedding/parsing/pattern.go +++ b/embedding/parsing/pattern.go @@ -27,7 +27,8 @@ import ( // Pattern represents a glob-like pattern to match a line of a source file. // -// Contains both original glob string and modified pattern suitable for matching. +// Contains both original glob string, modified pattern suitable for matching, +// and a compiled matcher for the modified pattern. // // sourceGlob — a glob-like string, e.g. "*main*" or "^main". // @@ -35,6 +36,7 @@ import ( type Pattern struct { sourceGlob string pattern string + matcher glob.Glob } const ( @@ -73,19 +75,19 @@ const ( // p := NewPattern("^.txt") // fmt.Println("Original glob:", p.sourceGlob) // "*.txt" // fmt.Println("Modified pattern:", p.pattern) // ".txt*" -func NewPattern(glob string) Pattern { - pattern := glob +func NewPattern(globString string) Pattern { + pattern := globString - startOfLine := strings.HasPrefix(glob, lineStart) - if !startOfLine && !strings.HasPrefix(glob, anyCharacterSequence) { + startOfLine := strings.HasPrefix(globString, lineStart) + if !startOfLine && !strings.HasPrefix(globString, anyCharacterSequence) { pattern = anyCharacterSequence + pattern } if startOfLine { pattern = pattern[1:] } - endOfLine := strings.HasSuffix(glob, lineEnd) - if !endOfLine && !strings.HasSuffix(glob, anyCharacterSequence) { + endOfLine := strings.HasSuffix(globString, lineEnd) + if !endOfLine && !strings.HasSuffix(globString, anyCharacterSequence) { pattern += anyCharacterSequence } if endOfLine { @@ -94,8 +96,9 @@ func NewPattern(glob string) Pattern { } return Pattern{ - sourceGlob: glob, + sourceGlob: globString, pattern: pattern, + matcher: glob.MustCompile(pattern), } } @@ -103,9 +106,11 @@ func NewPattern(glob string) Pattern { // // line — a line to check the match for. func (p Pattern) Match(line string) bool { - g := glob.MustCompile(p.pattern) + if p.matcher == nil { + return glob.MustCompile(p.pattern).Match(line) + } - return g.Match(line) + return p.matcher.Match(line) } // HasLineSeparator reports whether the pattern contains an escaped line separator. @@ -117,12 +122,17 @@ func (p Pattern) HasLineSeparator() bool { // MatchLineSequence reports whether source lines match the escaped-line-separated pattern. func (p Pattern) MatchLineSequence(lines []string) bool { - patternLines, _ := p.linePatterns() - if len(patternLines) != len(lines) { + patterns := p.lineSequencePatterns() + + return matchLineSequencePatterns(patterns, lines) +} + +// matchLineSequencePatterns reports whether compiled Patterns match source lines in order. +func matchLineSequencePatterns(patterns []Pattern, lines []string) bool { + if len(patterns) != len(lines) { return false } - for i, patternLine := range patternLines { - pattern := NewPattern(patternLine) + for i, pattern := range patterns { if !pattern.Match(lines[i]) { return false } @@ -131,6 +141,17 @@ func (p Pattern) MatchLineSequence(lines []string) bool { return true } +// lineSequencePatterns returns the Patterns for each part of a multi-line pattern. +func (p Pattern) lineSequencePatterns() []Pattern { + patternLines, _ := p.linePatterns() + patterns := make([]Pattern, 0, len(patternLines)) + for _, patternLine := range patternLines { + patterns = append(patterns, NewPattern(patternLine)) + } + + return patterns +} + // linePatterns returns trimmed pattern lines separated by an escaped newline. func (p Pattern) linePatterns() ([]string, bool) { var patternLines []string From 65ab941a3e7301fd8f92eff6f8aa0d47dec040e0 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Thu, 28 May 2026 10:25:43 +0200 Subject: [PATCH 10/11] Fix typo. --- EMBEDDING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/EMBEDDING.md b/EMBEDDING.md index 11fc16f..83d24a1 100644 --- a/EMBEDDING.md +++ b/EMBEDDING.md @@ -177,7 +177,7 @@ To match literal `\n` text in a source line, write it as `\\n` in the pattern. ``` ```` -It s possible to write quote characters in patterns as `\"` instead of the XML entity `"`. +It's possible to write quote characters in patterns as `\"` instead of the XML entity `"`. For example, `line="println(\"Hello\")"` is equivalent to `line="println("Hello")"`. ## Comment filtering From 1401c464eeef40997eb5b8be5e21a6d953c784b9 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Thu, 28 May 2026 10:49:02 +0200 Subject: [PATCH 11/11] Improve space trimming. --- embedding/parsing/instruction_test.go | 14 ++++++++++++++ embedding/parsing/pattern.go | 18 ++++++++++++++++-- test/resources/code/java/literal-patterns.txt | 1 + 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/embedding/parsing/instruction_test.go b/embedding/parsing/instruction_test.go index 1fd0d2c..f89c1a8 100644 --- a/embedding/parsing/instruction_test.go +++ b/embedding/parsing/instruction_test.go @@ -349,6 +349,20 @@ var _ = Describe("Instruction", func() { })) }) + It("should preserve pattern spaces that are not adjacent to a line separator", func() { + instructionParams := TestInstructionParams{ + lineGlob: "^ padded text $ \\n ^Use \\* to multiply$", + } + + actualLines := getXMLExtractionContent( + "literal-patterns.txt", instructionParams, config) + + Expect(actualLines).Should(Equal([]string{ + " padded text ", + "Use * to multiply", + })) + }) + It("should successfully parse XML by only end glob", func() { instructionParams := TestInstructionParams{ endGlob: "package*", diff --git a/embedding/parsing/pattern.go b/embedding/parsing/pattern.go index 4d86b6e..fba743c 100644 --- a/embedding/parsing/pattern.go +++ b/embedding/parsing/pattern.go @@ -21,6 +21,8 @@ package parsing import ( "fmt" "strings" + "unicode" + "unicode/utf8" "github.com/gobwas/glob" ) @@ -157,6 +159,7 @@ func (p Pattern) linePatterns() ([]string, bool) { var patternLines []string var line strings.Builder hasSeparator := false + trimLeft := false for i := 0; i < len(p.sourceGlob); { remaining := p.sourceGlob[i:] switch { @@ -164,16 +167,27 @@ func (p Pattern) linePatterns() ([]string, bool) { line.WriteString(escapedLineSeparator) i += len(escapedLineSeparator) case strings.HasPrefix(remaining, lineSeparator): - patternLines = append(patternLines, strings.TrimSpace(line.String())) + patternLines = append(patternLines, strings.TrimRightFunc(line.String(), unicode.IsSpace)) line.Reset() hasSeparator = true + trimLeft = true i += len(lineSeparator) + case trimLeft: + r, size := utf8.DecodeRuneInString(remaining) + if !unicode.IsSpace(r) { + trimLeft = false + line.WriteByte(p.sourceGlob[i]) + i++ + continue + } + i += size default: + trimLeft = false line.WriteByte(p.sourceGlob[i]) i++ } } - patternLines = append(patternLines, strings.TrimSpace(line.String())) + patternLines = append(patternLines, line.String()) return patternLines, hasSeparator } diff --git a/test/resources/code/java/literal-patterns.txt b/test/resources/code/java/literal-patterns.txt index ca45d08..034ba90 100644 --- a/test/resources/code/java/literal-patterns.txt +++ b/test/resources/code/java/literal-patterns.txt @@ -1,3 +1,4 @@ + padded text Use * to multiply The total is $5 The value ends with $