From ea01cd0e000191d35ae9109e802083d7d1ab4503 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Mon, 8 Jun 2026 14:47:37 +0200 Subject: [PATCH 1/5] Improve readability. --- EMBEDDING.md | 20 ++++++++++---------- README.md | 23 ++++++++++++++--------- configuration/configuration.go | 10 ++++++---- fragmentation/fragment.go | 8 +++++--- main.go | 2 ++ 5 files changed, 37 insertions(+), 26 deletions(-) diff --git a/EMBEDDING.md b/EMBEDDING.md index 83d24a1..4b091d8 100644 --- a/EMBEDDING.md +++ b/EMBEDDING.md @@ -22,8 +22,8 @@ Use glob-style patterns to match the start and end lines of the fragment. ## Embedding instruction format -An `` instruction must always be followed by a Markdown code fence -(opening and closing triple backticks). +An `` instruction must always be followed by a Markdown code fence +(opening and closing triple backticks). ````markdown @@ -47,7 +47,7 @@ To define a named fragment in your source code, wrap the desired lines with ```java public final class String implements java.io.Serializable, Comparable, CharSequence { - + // #docfragment "Constructor" public String() { this.value = new char[0]; @@ -205,7 +205,7 @@ Supported values: Unknown extensions are embedded unchanged. -Not all languages has difference between documentation/regular or inline/block comments. +Not all languages distinguish documentation from regular comments or inline from block comments. The table below lists the supported languages and supported `comments` modes for them: @@ -213,7 +213,7 @@ The table below lists the supported languages and supported `comments` modes for |------------------------|---------------------------------------------------------|--------------------------------------------------------------| | Java, Kotlin, Groovy | `.java`, `.kt`, `.kts`, `.groovy` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | | C# | `.cs` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | -| C, C++ | `.c`, `.h`, `.cc`, `.cpp`, `.cxx`,`.hh`, `.hpp`, `.hxx` | `all`, `none`, `inline`, `block` | +| C, C++ | `.c`, `.h`, `.cc`, `.cpp`, `.cxx`, `.hh`, `.hpp`, `.hxx` | `all`, `none`, `inline`, `block` | | JavaScript, TypeScript | `.js`, `.jsx`, `.ts`, `.tsx` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | | Go | `.go` | `all`, `none`, `inline`, `block` | | Protobuf | `.proto` | `all`, `none`, `inline`, `block` | @@ -226,9 +226,9 @@ The table below lists the supported languages and supported `comments` modes for ### Joining several parts of code into one fragment -A named fragment may consist of one or several pieces declared in a single file. -When rendered, the pieces that belong to a single fragment are joined together. -It is possible to specify a separator between the joined pieces, +A named fragment may consist of one or several pieces declared in a single file. +When rendered, the pieces that belong to a single fragment are joined together. +It is possible to specify a separator between the joined pieces, see [Configuration](./README.md#arguments) for the corresponding parameter. Here is an example of how a multi-piece fragment is rendered. @@ -245,7 +245,7 @@ public final class String return hash; } // #enddocfragment "Standard Object methods" - + /* Here goes irrelevant code */ // #docfragment "Standard Object methods" @@ -313,7 +313,7 @@ public final class String } // #enddocfragment "All methods" } -``` +``` ### Usage with other languages diff --git a/README.md b/README.md index 9b6436c..bf33995 100644 --- a/README.md +++ b/README.md @@ -60,8 +60,11 @@ The available arguments are: * `-docs-path`: (Optional) Path to the documentation root directory. * `-config-path`: (Optional) Path to a YAML configuration file containing `code-path` and `docs-path`. * `-doc-includes`: (Optional) Comma-separated glob patterns for documentation files to include. Defaults to `"**/*.md,**/*.html"`. + * `-doc-excludes`: (Optional) Comma-separated glob patterns for documentation files to exclude. * `-separator`: (Optional) String used to separate joined code fragments. Defaults to `...`. - + * `-info`: (Optional) Enables info-level logging when set to `true`. + * `-stacktrace`: (Optional) Prints stack traces for panics when set to `true`. + Even though the `code-path`, `docs-path`, and `config-path` arguments are optional, Embed Code still requires the root directories for code and documentation to be set. This can be done in one of two ways: @@ -108,19 +111,19 @@ The available fields for the configuration file are: ``` * multiple named paths: ```yaml - code-path: + code-path: - name: examples path: path/to/code/root1 - name: production path: path/to/code/root2 ``` - When a named path is specified, fragments must be referenced in the embedding instructions + When a named path is specified, fragments must be referenced in the embedding instructions using the corresponding path name: ```md ``` **Do not forget the dollar sign (`$`) before the path name.** - + Code source names must be unique. A configuration may use either one unnamed code source or one or more named code sources, but named and unnamed sources cannot be mixed. @@ -131,6 +134,8 @@ The available fields for the configuration file are: * `doc-includes`: (Optional) Glob patterns for documentation files to include. It may be represented as a comma-separated string list or as a YAML sequence. * `separator`: (Optional) Separator for fragments. + * `info`: (Optional) Enables info-level logging. + * `stacktrace`: (Optional) Prints stack traces for panics. * `embeddings`: (Optional) A list of complete embedding configurations for multiple documentation targets. When `embeddings` is set, do not set root-level `code-path` or `docs-path`. Define `code-path`, `docs-path`, and optional settings inside each entry. @@ -153,19 +158,19 @@ However, you can also compile the utility manually if Go is [installed](#install Navigate to the project root and run: ```bash -go build -trimpath main.go +go build -trimpath -o embed-code main.go ``` There may be issues when running `go build` outside of the directory containing `main.go`, even if the path is specified correctly. -This command creates an executable named `embed-code` (or `embed-code.exe` on Windows). +This command creates an executable named `embed-code`. For further information, please refer to the [docs](https://pkg.go.dev/cmd/go#hdr-Compile_packages_and_dependencies). -Without the `-trimpath` flag, Go includes absolute file paths in stack traces -based on the system where the binary was built. +Without the `-trimpath` flag, Go includes absolute file paths in stack traces +based on the system where the binary was built. -Run following command to build binaries for macOS, Windows and Ubuntu: +Run the following command to build binaries for macOS, Windows and Ubuntu: ```bash mkdir -p bin && \ GOOS=darwin GOARCH=amd64 go build -trimpath -o bin/embed-code-macos main.go && chmod +x bin/embed-code-macos && \ diff --git a/configuration/configuration.go b/configuration/configuration.go index c62484a..6ac88fb 100644 --- a/configuration/configuration.go +++ b/configuration/configuration.go @@ -24,9 +24,11 @@ import ( ) const ( + // DefaultSeparator joins multiple partitions of a single fragment. DefaultSeparator = "..." ) +// DefaultDocIncludes contains the default documentation glob patterns. var DefaultDocIncludes = []string{"**/*.md", "**/*.html"} // Configuration contains the settings for the plugin to work. @@ -57,16 +59,16 @@ type Configuration struct { // The default value is ["**/*.md", "**/*.html"]. DocIncludes []string - // DocExcludes is a list of patterns for filtering which we should not include for embedding - // instructions. + // DocExcludes is a list of patterns for documentation files that should not be + // processed for embedding instructions. // // The patterns are resolved relatively to the `documentation_root`. // - // By the such a pattern, it can be both directories and files. + // A pattern can match both directories and files. // // For example, ["old-docs/**/*.md", "old-docs-v1/**/*"] // - // Be the default, it is not set. + // By default, it is not set. DocExcludes []string // Separator is a string that's inserted between multiple partitions of a single fragment. diff --git a/fragmentation/fragment.go b/fragmentation/fragment.go index 3eb6dc6..07efdbf 100644 --- a/fragmentation/fragment.go +++ b/fragmentation/fragment.go @@ -24,6 +24,7 @@ import ( "embed-code/embed-code-go/indent" ) +// DefaultFragmentName identifies the whole-file fragment. const DefaultFragmentName = "_default" // Fragment is a single fragment in a file. @@ -44,11 +45,12 @@ func CreateDefaultFragment() Fragment { } } +// isDefault reports whether this fragment represents the whole source file. func (f Fragment) isDefault() bool { return f.Name == DefaultFragmentName } -// Obtains the text for the fragment. +// text returns the rendered text for the fragment. // // lines — a list with every line of the file. // @@ -82,7 +84,7 @@ func (f Fragment) text(lines []string, separator string) (string, error) { return text, nil } -// Calculates and returns a list which contains corresponding lines for every partition. +// obtainPartitionTexts returns source lines selected for every partition. // // lines — a list with every line of the file. // @@ -100,7 +102,7 @@ func (f Fragment) obtainPartitionTexts(lines []string) ([][]string, error) { return partitionLines, nil } -// Returns string indent for separator. +// separatorIndent returns the indentation to use before a partition separator. func separatorIndent(lines []string) string { if len(lines) > 0 { firstLine := lines[0] diff --git a/main.go b/main.go index 94547d7..7292053 100644 --- a/main.go +++ b/main.go @@ -79,6 +79,8 @@ const Version = "1.2.2" // "old-docs/**/*.md,old-guides/*.html". It is not set by default; // - separator — a string which is used as a separator between code fragments. Default value // is "...". +// - info — a flag that enables info-level logs; +// - stacktrace — a flag that enables stack traces in panic logs. func main() { fmt.Printf("Running embed-code v%s.\n", Version) userArgs := cli.ReadArgs() From 7daa5acb6775026548cc3077bd3c8eb544802c3a Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Mon, 8 Jun 2026 14:50:24 +0200 Subject: [PATCH 2/5] Improve glob pattern error handling. --- embedding/parsing/instruction.go | 48 +++++++++++++++++++-------- embedding/parsing/instruction_test.go | 11 ++++++ embedding/parsing/pattern.go | 38 ++++++++++++++------- 3 files changed, 72 insertions(+), 25 deletions(-) diff --git a/embedding/parsing/instruction.go b/embedding/parsing/instruction.go index e65df22..d899efb 100644 --- a/embedding/parsing/instruction.go +++ b/embedding/parsing/instruction.go @@ -120,12 +120,25 @@ func NewInstruction( return Instruction{}, err } + startPattern, err := patternFromValue("start", startValue) + if err != nil { + return Instruction{}, err + } + endPattern, err := patternFromValue("end", endValue) + if err != nil { + return Instruction{}, err + } + linePattern, err := patternFromValue("line", lineValue) + if err != nil { + return Instruction{}, err + } + return Instruction{ CodeFile: codeFile, Fragment: fragment, - StartPattern: patternFromValue(startValue), - EndPattern: patternFromValue(endValue), - LinePattern: patternFromValue(lineValue), + StartPattern: startPattern, + EndPattern: endPattern, + LinePattern: linePattern, CommentMode: commentMode, Configuration: config, }, nil @@ -148,13 +161,16 @@ func validateExclusiveAttributes(fragment string, start string, end string, line } // patternFromValue creates a Pattern pointer for a non-empty attribute value. -func patternFromValue(value string) *Pattern { +func patternFromValue(attribute string, value string) (*Pattern, error) { if value == "" { - return nil + return nil, nil + } + pattern, err := NewPattern(value) + if err != nil { + return nil, fmt.Errorf("invalid %s pattern `%s`: %w", attribute, value, err) } - pattern := NewPattern(value) - return &pattern + return &pattern, nil } // Content reads and returns the lines for specified fragment from the code. @@ -226,7 +242,7 @@ func patternLabel(kind string, pattern *Pattern) string { return fmt.Sprintf("%s pattern `%s`", kind, pattern.sourceGlob) } -// Returns string representation of Instruction. +// String returns a string representation of Instruction. func (e Instruction) String() string { return fmt.Sprintf( "EmbeddingInstruction[file=`%s`, fragment=`%s`, start=`%s`, end=`%s`, line=`%s`, comments=`%s`]", @@ -341,7 +357,10 @@ func matchSingleLine(pattern *Pattern, lines []string, startFrom int) (int, bool // matchLineSequence returns the first line range matching the pattern or a not-found error. func (e Instruction) matchLineSequence(pattern *Pattern, lines []string, startFrom int, kind string, codeFileReference string) (int, int, error) { - start, end, found := matchLineSequence(pattern, lines, startFrom) + start, end, found, err := matchLineSequence(pattern, lines, startFrom) + if err != nil { + return 0, 0, err + } if found { return start, end, nil } @@ -355,16 +374,19 @@ func (e Instruction) matchLineSequence(pattern *Pattern, lines []string, startFr } // matchLineSequence returns the first source-line range matching an escaped-line pattern. -func matchLineSequence(pattern *Pattern, lines []string, startFrom int) (int, int, bool) { - patterns := pattern.lineSequencePatterns() +func matchLineSequence(pattern *Pattern, lines []string, startFrom int) (int, int, bool, error) { + patterns, err := pattern.lineSequencePatterns() + if err != nil { + return 0, 0, false, err + } lineCount := len(patterns) lastStart := len(lines) - lineCount for start := startFrom; start <= lastStart; start++ { end := start + lineCount if matchLineSequencePatterns(patterns, lines[start:end]) { - return start, end - 1, true + return start, end - 1, true, nil } } - return 0, 0, false + return 0, 0, false, nil } diff --git a/embedding/parsing/instruction_test.go b/embedding/parsing/instruction_test.go index 531dcb6..5af31d2 100644 --- a/embedding/parsing/instruction_test.go +++ b/embedding/parsing/instruction_test.go @@ -104,6 +104,17 @@ var _ = Describe("Instruction", func() { Expect(parsing.FromXML(xmlString, config)).Error().Should(HaveOccurred()) }) + It("should have an error for an invalid glob pattern", func() { + instructionParams := TestInstructionParams{ + startGlob: "[", + } + xmlString := buildInstruction("org/example/Hello.java", instructionParams) + + _, err := parsing.FromXML(xmlString, config) + + Expect(err).Should(MatchError(ContainSubstring("invalid start pattern `[`"))) + }) + It("should successfully read source content", func() { instructionParams := TestInstructionParams{ closeTag: true, diff --git a/embedding/parsing/pattern.go b/embedding/parsing/pattern.go index c891473..e924053 100644 --- a/embedding/parsing/pattern.go +++ b/embedding/parsing/pattern.go @@ -70,14 +70,16 @@ const ( // // Example usage: // -// p := NewPattern("*.txt") +// p, err := NewPattern("*.txt") // fmt.Println("Original glob:", p.sourceGlob) // "*.txt" // fmt.Println("Modified pattern:", p.pattern) // "*.txt*" // -// p := NewPattern("^.txt") -// fmt.Println("Original glob:", p.sourceGlob) // "*.txt" +// p, err = NewPattern("^.txt") +// fmt.Println("Original glob:", p.sourceGlob) // "^.txt" // fmt.Println("Modified pattern:", p.pattern) // ".txt*" -func NewPattern(globString string) Pattern { +// +// Returns an error if the modified glob pattern cannot be compiled. +func NewPattern(globString string) (Pattern, error) { pattern := globString startOfLine := strings.HasPrefix(globString, lineStart) @@ -97,11 +99,16 @@ func NewPattern(globString string) Pattern { pattern = pattern[:lastIndex] } + matcher, err := glob.Compile(pattern) + if err != nil { + return Pattern{}, err + } + return Pattern{ sourceGlob: globString, pattern: pattern, - matcher: glob.MustCompile(pattern), - } + matcher: matcher, + }, nil } // Match reports whether given line matches the pattern. @@ -109,7 +116,7 @@ func NewPattern(globString string) Pattern { // line — a line to check the match for. func (p Pattern) Match(line string) bool { if p.matcher == nil { - return glob.MustCompile(p.pattern).Match(line) + return false } return p.matcher.Match(line) @@ -124,7 +131,10 @@ func (p Pattern) HasLineSeparator() bool { // MatchLineSequence reports whether source lines match the escaped-line-separated pattern. func (p Pattern) MatchLineSequence(lines []string) bool { - patterns := p.lineSequencePatterns() + patterns, err := p.lineSequencePatterns() + if err != nil { + return false + } return matchLineSequencePatterns(patterns, lines) } @@ -144,14 +154,18 @@ func matchLineSequencePatterns(patterns []Pattern, lines []string) bool { } // lineSequencePatterns returns the Patterns for each part of a multi-line pattern. -func (p Pattern) lineSequencePatterns() []Pattern { +func (p Pattern) lineSequencePatterns() ([]Pattern, error) { patternLines, _ := p.linePatterns() patterns := make([]Pattern, 0, len(patternLines)) for _, patternLine := range patternLines { - patterns = append(patterns, NewPattern(patternLine)) + pattern, err := NewPattern(patternLine) + if err != nil { + return nil, err + } + patterns = append(patterns, pattern) } - return patterns + return patterns, nil } // linePatterns returns trimmed pattern lines separated by an escaped newline. @@ -193,7 +207,7 @@ func (p Pattern) linePatterns() ([]string, bool) { return patternLines, hasSeparator } -// Returns string representation of Pattern. +// String returns a string representation of Pattern. func (p Pattern) String() string { return fmt.Sprintf("Pattern %s", p.sourceGlob) } From 9847ef1d03403c3ba3e084b8e1d9df5df8e78b05 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Mon, 8 Jun 2026 16:51:52 +0200 Subject: [PATCH 3/5] Improve `Pattern` behaviour. --- embedding/parsing/instruction.go | 96 +++------------------ embedding/parsing/pattern.go | 140 +++++++++++-------------------- 2 files changed, 61 insertions(+), 175 deletions(-) diff --git a/embedding/parsing/instruction.go b/embedding/parsing/instruction.go index d899efb..bc4831d 100644 --- a/embedding/parsing/instruction.go +++ b/embedding/parsing/instruction.go @@ -66,7 +66,7 @@ type Instruction struct { Configuration configuration.Configuration } -// PatternNotFoundError reports that a start or end pattern did not match the code file. +// PatternNotFoundError reports that an instruction pattern did not match the code file. type PatternNotFoundError struct { Line int CodeFileReference string @@ -255,25 +255,13 @@ func (e Instruction) String() string { // lines — a list of strings representing the input lines. func (e Instruction) matchingLines(lines []string, codeFileReference string) ([]string, error) { if e.LinePattern != nil { - if e.LinePattern.HasLineSeparator() { - startPosition, endPosition, err := e.matchLineSequence( - e.LinePattern, lines, 0, "line", codeFileReference, - ) - if err != nil { - return nil, err - } - requiredLines := lines[startPosition : endPosition+1] - indentation := indent.MaxCommonIndentation(requiredLines) - - return indent.CutIndent(requiredLines, indentation), nil - } - linePosition, err := e.matchGlob( + startPosition, endPosition, err := e.matchPattern( e.LinePattern, lines, 0, "line", codeFileReference, ) if err != nil { return nil, err } - requiredLines := []string{lines[linePosition]} + requiredLines := lines[startPosition : endPosition+1] indentation := indent.MaxCommonIndentation(requiredLines) return indent.CutIndent(requiredLines, indentation), nil @@ -282,7 +270,7 @@ func (e Instruction) matchingLines(lines []string, codeFileReference string) ([] startPosition := 0 if e.StartPattern != nil { var err error - startPosition, err = e.matchGlob( + startPosition, _, err = e.matchPattern( e.StartPattern, lines, 0, "start", codeFileReference, ) if err != nil { @@ -292,7 +280,7 @@ func (e Instruction) matchingLines(lines []string, codeFileReference string) ([] endPosition := len(lines) - 1 if e.EndPattern != nil { var err error - endPosition, err = e.matchGlob( + _, endPosition, err = e.matchPattern( e.EndPattern, lines, startPosition, "end", codeFileReference, ) if err != nil { @@ -305,63 +293,17 @@ func (e Instruction) matchingLines(lines []string, codeFileReference string) ([] return indent.CutIndent(requiredLines, indentation), nil } -// Returns the index of a first line that matches given pattern. +// matchPattern returns the first line range that matches given pattern. // // pattern — a pattern to search in lines for. // // lines — a list of lines to search in. // // startFrom — an index from which to start searching. -func (e Instruction) matchGlob(pattern *Pattern, lines []string, startFrom int, - kind string, codeFileReference string) (int, error) { - if pattern.HasLineSeparator() { - start, end, err := e.matchLineSequence( - pattern, lines, startFrom, kind, codeFileReference, - ) - if err != nil { - return 0, err - } - if kind == "end" { - return end, nil - } - - return start, nil - } - if line, found := matchSingleLine(pattern, lines, startFrom); found { - return line, nil - } - - return 0, PatternNotFoundError{ - Line: e.DocumentationLine, - CodeFileReference: codeFileReference, - Kind: kind, - Pattern: pattern, - } -} - -// matchSingleLine returns the first source line matching the pattern. -func matchSingleLine(pattern *Pattern, lines []string, startFrom int) (int, bool) { - lineCount := len(lines) - resultLine := startFrom - for resultLine < lineCount { - line := lines[resultLine] - if pattern.Match(line) { - return resultLine, true - } - resultLine++ - } - - return 0, false -} - -// matchLineSequence returns the first line range matching the pattern or a not-found error. -func (e Instruction) matchLineSequence(pattern *Pattern, lines []string, startFrom int, - kind string, codeFileReference string) (int, int, error) { - start, end, found, err := matchLineSequence(pattern, lines, startFrom) - if err != nil { - return 0, 0, err - } - if found { +func (e Instruction) matchPattern( + pattern *Pattern, lines []string, startFrom int, kind string, codeFileReference string, +) (int, int, error) { + if start, end, found := pattern.FindIn(lines, startFrom); found { return start, end, nil } @@ -372,21 +314,3 @@ func (e Instruction) matchLineSequence(pattern *Pattern, lines []string, startFr Pattern: pattern, } } - -// matchLineSequence returns the first source-line range matching an escaped-line pattern. -func matchLineSequence(pattern *Pattern, lines []string, startFrom int) (int, int, bool, error) { - patterns, err := pattern.lineSequencePatterns() - if err != nil { - return 0, 0, false, err - } - lineCount := len(patterns) - lastStart := len(lines) - lineCount - for start := startFrom; start <= lastStart; start++ { - end := start + lineCount - if matchLineSequencePatterns(patterns, lines[start:end]) { - return start, end - 1, true, nil - } - } - - return 0, 0, false, nil -} diff --git a/embedding/parsing/pattern.go b/embedding/parsing/pattern.go index e924053..469882f 100644 --- a/embedding/parsing/pattern.go +++ b/embedding/parsing/pattern.go @@ -27,18 +27,14 @@ import ( "github.com/gobwas/glob" ) -// Pattern represents a glob-like pattern to match a line of a source file. +// Pattern represents a glob-like pattern to match consecutive source lines. // -// Contains both original glob string, modified pattern suitable for matching, -// and a compiled matcher for the modified pattern. +// Contains the original glob string and compiled matchers for each source-line pattern. // // sourceGlob — a glob-like string, e.g. "*main*" or "^main". -// -// pattern — a pattern to search for. type Pattern struct { sourceGlob string - pattern string - matcher glob.Glob + matchers []glob.Glob } const ( @@ -51,16 +47,13 @@ const ( // NewPattern creates a new Pattern based on provided glob string. // -// The resulting Pattern struct contains both original glob string and -// modified pattern suitable for matching. -// // The modified pattern is the original one, but enclosed with the "*" wildcards, // unless start of the line or end of the line wildcards were specified. // // A multi-line pattern uses "\n" as a separator between consecutive source-line // patterns. For example, "Test \n adds two values" matches a line matching "Test" // followed by a line matching "adds two values". Each part separated by "\n" is -// converted to Pattern separately and follows the same wildcard rules. +// compiled separately and follows the same wildcard rules. // Use "\\n" to match literal "\n" text instead of starting the next pattern line. // // glob — a string that represents a pattern that can include such wildcards: @@ -68,30 +61,38 @@ const ( // - "^" — matches the start of the line; // - "$" — matches the end of the line. // -// Example usage: -// -// p, err := NewPattern("*.txt") -// fmt.Println("Original glob:", p.sourceGlob) // "*.txt" -// fmt.Println("Modified pattern:", p.pattern) // "*.txt*" -// -// p, err = NewPattern("^.txt") -// fmt.Println("Original glob:", p.sourceGlob) // "^.txt" -// fmt.Println("Modified pattern:", p.pattern) // ".txt*" -// -// Returns an error if the modified glob pattern cannot be compiled. +// Returns an error if any modified glob pattern cannot be compiled. func NewPattern(globString string) (Pattern, error) { - pattern := globString + patternLines := splitPatternLines(globString) + matchers := make([]glob.Glob, 0, len(patternLines)) + for _, patternLine := range patternLines { + matcher, err := compileLineMatcher(patternLine) + if err != nil { + return Pattern{}, err + } + matchers = append(matchers, matcher) + } - startOfLine := strings.HasPrefix(globString, lineStart) - if !startOfLine && !strings.HasPrefix(globString, anyCharacterSequence) { + return Pattern{ + sourceGlob: globString, + matchers: matchers, + }, nil +} + +// compileLineMatcher compiles one source-line pattern into a glob matcher. +func compileLineMatcher(patternLine string) (glob.Glob, error) { + pattern := patternLine + + startOfLine := strings.HasPrefix(patternLine, lineStart) + if !startOfLine && !strings.HasPrefix(patternLine, anyCharacterSequence) { pattern = anyCharacterSequence + pattern } if startOfLine { pattern = pattern[1:] } - endOfLine := strings.HasSuffix(globString, lineEnd) - if !endOfLine && !strings.HasSuffix(globString, anyCharacterSequence) { + endOfLine := strings.HasSuffix(patternLine, lineEnd) + if !endOfLine && !strings.HasSuffix(patternLine, anyCharacterSequence) { pattern += anyCharacterSequence } if endOfLine { @@ -99,53 +100,31 @@ func NewPattern(globString string) (Pattern, error) { pattern = pattern[:lastIndex] } - matcher, err := glob.Compile(pattern) - if err != nil { - return Pattern{}, err - } - - return Pattern{ - sourceGlob: globString, - pattern: pattern, - matcher: matcher, - }, nil + return glob.Compile(pattern) } -// Match reports whether given line matches the pattern. -// -// line — a line to check the match for. -func (p Pattern) Match(line string) bool { - if p.matcher == nil { - return false +// FindIn returns the first source-line range matching the pattern. +func (p Pattern) FindIn(lines []string, startFrom int) (int, int, bool) { + if len(p.matchers) == 0 || startFrom < 0 { + return 0, 0, false } - - return p.matcher.Match(line) -} - -// HasLineSeparator reports whether the pattern contains an escaped line separator. -func (p Pattern) HasLineSeparator() bool { - _, hasSeparator := p.linePatterns() - - return hasSeparator -} - -// MatchLineSequence reports whether source lines match the escaped-line-separated pattern. -func (p Pattern) MatchLineSequence(lines []string) bool { - patterns, err := p.lineSequencePatterns() - if err != nil { - return false + lastStart := len(lines) - len(p.matchers) + for start := startFrom; start <= lastStart; start++ { + if p.matchesAt(lines, start) { + return start, start + len(p.matchers) - 1, true + } } - return matchLineSequencePatterns(patterns, lines) + return 0, 0, false } -// matchLineSequencePatterns reports whether compiled Patterns match source lines in order. -func matchLineSequencePatterns(patterns []Pattern, lines []string) bool { - if len(patterns) != len(lines) { +// matchesAt reports whether the compiled matchers match source lines at start. +func (p Pattern) matchesAt(lines []string, start int) bool { + if len(p.matchers) == 0 || start < 0 || start+len(p.matchers) > len(lines) { return false } - for i, pattern := range patterns { - if !pattern.Match(lines[i]) { + for i, matcher := range p.matchers { + if matcher == nil || !matcher.Match(lines[start+i]) { return false } } @@ -153,29 +132,13 @@ func matchLineSequencePatterns(patterns []Pattern, lines []string) bool { return true } -// lineSequencePatterns returns the Patterns for each part of a multi-line pattern. -func (p Pattern) lineSequencePatterns() ([]Pattern, error) { - patternLines, _ := p.linePatterns() - patterns := make([]Pattern, 0, len(patternLines)) - for _, patternLine := range patternLines { - pattern, err := NewPattern(patternLine) - if err != nil { - return nil, err - } - patterns = append(patterns, pattern) - } - - return patterns, nil -} - -// linePatterns returns trimmed pattern lines separated by an escaped newline. -func (p Pattern) linePatterns() ([]string, bool) { +// splitPatternLines returns trimmed pattern lines separated by an escaped newline. +func splitPatternLines(sourceGlob string) []string { var patternLines []string var line strings.Builder - hasSeparator := false trimLeft := false - for cursor := 0; cursor < len(p.sourceGlob); { - remaining := p.sourceGlob[cursor:] + for cursor := 0; cursor < len(sourceGlob); { + remaining := sourceGlob[cursor:] switch { case strings.HasPrefix(remaining, escapedLineSeparator): line.WriteString(escapedLineSeparator) @@ -183,14 +146,13 @@ func (p Pattern) linePatterns() ([]string, bool) { case strings.HasPrefix(remaining, lineSeparator): patternLines = append(patternLines, strings.TrimRightFunc(line.String(), unicode.IsSpace)) line.Reset() - hasSeparator = true trimLeft = true cursor += len(lineSeparator) case trimLeft: r, size := utf8.DecodeRuneInString(remaining) if !unicode.IsSpace(r) { trimLeft = false - line.WriteByte(p.sourceGlob[cursor]) + line.WriteByte(sourceGlob[cursor]) cursor++ continue @@ -198,13 +160,13 @@ func (p Pattern) linePatterns() ([]string, bool) { cursor += size default: trimLeft = false - line.WriteByte(p.sourceGlob[cursor]) + line.WriteByte(sourceGlob[cursor]) cursor++ } } patternLines = append(patternLines, line.String()) - return patternLines, hasSeparator + return patternLines } // String returns a string representation of Pattern. From ee3c1857dc340642b39098c7b444d320e2ca29aa Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Mon, 8 Jun 2026 17:03:49 +0200 Subject: [PATCH 4/5] Divide long method. --- embedding/parsing/instruction.go | 52 ++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/embedding/parsing/instruction.go b/embedding/parsing/instruction.go index bc4831d..0147741 100644 --- a/embedding/parsing/instruction.go +++ b/embedding/parsing/instruction.go @@ -250,23 +250,44 @@ func (e Instruction) String() string { ) } -// Filters and returns a subset of input lines based on start, end, or line patterns. +// matchingLines filters and returns input lines based on start, end, or line patterns. // // lines — a list of strings representing the input lines. func (e Instruction) matchingLines(lines []string, codeFileReference string) ([]string, error) { + var selectedLines []string + var err error if e.LinePattern != nil { - startPosition, endPosition, err := e.matchPattern( - e.LinePattern, lines, 0, "line", codeFileReference, - ) - if err != nil { - return nil, err - } - requiredLines := lines[startPosition : endPosition+1] - indentation := indent.MaxCommonIndentation(requiredLines) + selectedLines, err = e.matchLinePattern(lines, codeFileReference) + } else { + selectedLines, err = e.matchRangePattern(lines, codeFileReference) + } + if err != nil { + return nil, err + } + + return removeCommonIndent(selectedLines), nil +} - return indent.CutIndent(requiredLines, indentation), nil +// matchLinePattern returns the source lines matched by the instruction line pattern. +func (e Instruction) matchLinePattern( + lines []string, + codeFileReference string, +) ([]string, error) { + startPosition, endPosition, err := e.matchPattern( + e.LinePattern, lines, 0, "line", codeFileReference, + ) + if err != nil { + return nil, err } + return lines[startPosition : endPosition+1], nil +} + +// matchRangePattern returns the source lines matched by the instruction start/end patterns. +func (e Instruction) matchRangePattern( + lines []string, + codeFileReference string, +) ([]string, error) { startPosition := 0 if e.StartPattern != nil { var err error @@ -287,10 +308,15 @@ func (e Instruction) matchingLines(lines []string, codeFileReference string) ([] return nil, err } } - requiredLines := lines[startPosition : endPosition+1] - indentation := indent.MaxCommonIndentation(requiredLines) - return indent.CutIndent(requiredLines, indentation), nil + return lines[startPosition : endPosition+1], nil +} + +// removeCommonIndent removes shared indentation from the selected source lines. +func removeCommonIndent(lines []string) []string { + indentation := indent.MaxCommonIndentation(lines) + + return indent.CutIndent(lines, indentation) } // matchPattern returns the first line range that matches given pattern. From 9169457d611f7c29a1b997730fca0504e6a86d53 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Mon, 8 Jun 2026 21:01:43 +0200 Subject: [PATCH 5/5] Fix linting. --- embedding/parsing/instruction.go | 60 ++++++++++++++++++++++---------- embedding/parsing/pattern.go | 25 ++++++++++--- 2 files changed, 62 insertions(+), 23 deletions(-) diff --git a/embedding/parsing/instruction.go b/embedding/parsing/instruction.go index 0147741..7f98dbc 100644 --- a/embedding/parsing/instruction.go +++ b/embedding/parsing/instruction.go @@ -120,15 +120,7 @@ func NewInstruction( return Instruction{}, err } - startPattern, err := patternFromValue("start", startValue) - if err != nil { - return Instruction{}, err - } - endPattern, err := patternFromValue("end", endValue) - if err != nil { - return Instruction{}, err - } - linePattern, err := patternFromValue("line", lineValue) + patterns, err := parseInstructionPatterns(startValue, endValue, lineValue) if err != nil { return Instruction{}, err } @@ -136,9 +128,9 @@ func NewInstruction( return Instruction{ CodeFile: codeFile, Fragment: fragment, - StartPattern: startPattern, - EndPattern: endPattern, - LinePattern: linePattern, + StartPattern: patterns.start, + EndPattern: patterns.end, + LinePattern: patterns.line, CommentMode: commentMode, Configuration: config, }, nil @@ -160,17 +152,49 @@ func validateExclusiveAttributes(fragment string, start string, end string, line return nil } -// patternFromValue creates a Pattern pointer for a non-empty attribute value. -func patternFromValue(attribute string, value string) (*Pattern, error) { - if value == "" { - return nil, nil +// instructionPatterns holds the optional source-line patterns from instruction attributes. +type instructionPatterns struct { + start *Pattern + end *Pattern + line *Pattern +} + +// parseInstructionPatterns parses all optional source-line pattern attributes. +func parseInstructionPatterns(start string, end string, line string) (instructionPatterns, error) { + var patterns instructionPatterns + if start != "" { + pattern, err := parseInstructionPattern("start", start) + if err != nil { + return instructionPatterns{}, err + } + patterns.start = &pattern } + if end != "" { + pattern, err := parseInstructionPattern("end", end) + if err != nil { + return instructionPatterns{}, err + } + patterns.end = &pattern + } + if line != "" { + pattern, err := parseInstructionPattern("line", line) + if err != nil { + return instructionPatterns{}, err + } + patterns.line = &pattern + } + + return patterns, nil +} + +// parseInstructionPattern parses one non-empty source-line pattern attribute. +func parseInstructionPattern(attribute string, value string) (Pattern, error) { pattern, err := NewPattern(value) if err != nil { - return nil, fmt.Errorf("invalid %s pattern `%s`: %w", attribute, value, err) + return Pattern{}, fmt.Errorf("invalid %s pattern `%s`: %w", attribute, value, err) } - return &pattern, nil + return pattern, nil } // Content reads and returns the lines for specified fragment from the code. diff --git a/embedding/parsing/pattern.go b/embedding/parsing/pattern.go index 469882f..0edd4ab 100644 --- a/embedding/parsing/pattern.go +++ b/embedding/parsing/pattern.go @@ -34,7 +34,7 @@ import ( // sourceGlob — a glob-like string, e.g. "*main*" or "^main". type Pattern struct { sourceGlob string - matchers []glob.Glob + matchers []lineMatcher } const ( @@ -45,6 +45,11 @@ const ( escapedLineSeparator = `\\n` ) +// lineMatcher matches a single source line using the compiled glob pattern. +type lineMatcher struct { + compiled glob.Glob +} + // NewPattern creates a new Pattern based on provided glob string. // // The modified pattern is the original one, but enclosed with the "*" wildcards, @@ -64,7 +69,7 @@ const ( // Returns an error if any modified glob pattern cannot be compiled. func NewPattern(globString string) (Pattern, error) { patternLines := splitPatternLines(globString) - matchers := make([]glob.Glob, 0, len(patternLines)) + matchers := make([]lineMatcher, 0, len(patternLines)) for _, patternLine := range patternLines { matcher, err := compileLineMatcher(patternLine) if err != nil { @@ -80,7 +85,7 @@ func NewPattern(globString string) (Pattern, error) { } // compileLineMatcher compiles one source-line pattern into a glob matcher. -func compileLineMatcher(patternLine string) (glob.Glob, error) { +func compileLineMatcher(patternLine string) (lineMatcher, error) { pattern := patternLine startOfLine := strings.HasPrefix(patternLine, lineStart) @@ -100,7 +105,17 @@ func compileLineMatcher(patternLine string) (glob.Glob, error) { pattern = pattern[:lastIndex] } - return glob.Compile(pattern) + compiledGlob, err := glob.Compile(pattern) + if err != nil { + return lineMatcher{}, err + } + + return lineMatcher{compiled: compiledGlob}, nil +} + +// matches reports whether the source line matches the compiled pattern. +func (m lineMatcher) matches(line string) bool { + return m.compiled != nil && m.compiled.Match(line) } // FindIn returns the first source-line range matching the pattern. @@ -124,7 +139,7 @@ func (p Pattern) matchesAt(lines []string, start int) bool { return false } for i, matcher := range p.matchers { - if matcher == nil || !matcher.Match(lines[start+i]) { + if !matcher.matches(lines[start+i]) { return false } }