Skip to content

Commit c0f3544

Browse files
authored
Merge pull request #742 from ChrisJr404/feature/read-file-split
refactor(file): composable Lines iterator; deprecate channel-based ReadFile helpers
2 parents e122ca9 + 4ce2b0a commit c0f3544

3 files changed

Lines changed: 338 additions & 38 deletions

File tree

file/file.go

Lines changed: 44 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -200,83 +200,89 @@ func HasStdin() bool {
200200
return isPipedFromChrDev || isPipedFromFIFO
201201
}
202202

203-
// ReadFileWithReader and stream on a channel
203+
// ReadFileWithReader streams r line by line on a channel.
204+
//
205+
// Deprecated: use LinesReader, which returns an iter.Seq2[string, error] and
206+
// surfaces scanner errors. Equivalent invocation:
207+
//
208+
// for line, err := range fileutil.LinesReader(r) { ... }
204209
func ReadFileWithReader(r io.Reader) (chan string, error) {
205210
out := make(chan string)
206211
go func() {
207212
defer close(out)
208-
scanner := bufio.NewScanner(r)
209-
for scanner.Scan() {
210-
out <- scanner.Text()
213+
for line, err := range LinesReader(r) {
214+
if err != nil {
215+
return
216+
}
217+
out <- line
211218
}
212219
}()
213-
214220
return out, nil
215221
}
216222

217-
// ReadFileWithReader with specific buffer size and stream on a channel
223+
// ReadFileWithReaderAndBufferSize streams r line by line on a channel using
224+
// the given scanner buffer size.
225+
//
226+
// Deprecated: use LinesReader with WithBufferSize. Equivalent invocation:
227+
//
228+
// for line, err := range fileutil.LinesReader(r, fileutil.WithBufferSize(n)) { ... }
218229
func ReadFileWithReaderAndBufferSize(r io.Reader, maxCapacity int) (chan string, error) {
219230
out := make(chan string)
220231
go func() {
221232
defer close(out)
222-
scanner := bufio.NewScanner(r)
223-
buf := make([]byte, maxCapacity)
224-
scanner.Buffer(buf, maxCapacity)
225-
for scanner.Scan() {
226-
out <- scanner.Text()
233+
for line, err := range LinesReader(r, WithBufferSize(maxCapacity)) {
234+
if err != nil {
235+
return
236+
}
237+
out <- line
227238
}
228239
}()
229-
230240
return out, nil
231241
}
232242

233-
// ReadFile with filename
243+
// ReadFile streams the file at filename line by line on a channel.
244+
//
245+
// Deprecated: use Lines, which returns an iter.Seq2[string, error] and
246+
// surfaces open / scanner errors. Equivalent invocation:
247+
//
248+
// for line, err := range fileutil.Lines(filename) { ... }
234249
func ReadFile(filename string) (chan string, error) {
235250
if !FileExists(filename) {
236251
return nil, errors.New("file doesn't exist")
237252
}
238253
out := make(chan string)
239254
go func() {
240255
defer close(out)
241-
f, err := os.Open(filename)
242-
if err != nil {
243-
return
244-
}
245-
defer func() {
246-
_ = f.Close()
247-
}()
248-
scanner := bufio.NewScanner(f)
249-
for scanner.Scan() {
250-
out <- scanner.Text()
256+
for line, err := range Lines(filename) {
257+
if err != nil {
258+
return
259+
}
260+
out <- line
251261
}
252262
}()
253-
254263
return out, nil
255264
}
256265

257-
// ReadFile with filename and specific buffer size
266+
// ReadFileWithBufferSize streams the file at filename line by line on a
267+
// channel using the given scanner buffer size.
268+
//
269+
// Deprecated: use Lines with WithBufferSize. Equivalent invocation:
270+
//
271+
// for line, err := range fileutil.Lines(filename, fileutil.WithBufferSize(n)) { ... }
258272
func ReadFileWithBufferSize(filename string, maxCapacity int) (chan string, error) {
259273
if !FileExists(filename) {
260274
return nil, errors.New("file doesn't exist")
261275
}
262276
out := make(chan string)
263277
go func() {
264278
defer close(out)
265-
f, err := os.Open(filename)
266-
if err != nil {
267-
return
268-
}
269-
defer func() {
270-
_ = f.Close()
271-
}()
272-
scanner := bufio.NewScanner(f)
273-
buf := make([]byte, maxCapacity)
274-
scanner.Buffer(buf, maxCapacity)
275-
for scanner.Scan() {
276-
out <- scanner.Text()
279+
for line, err := range Lines(filename, WithBufferSize(maxCapacity)) {
280+
if err != nil {
281+
return
282+
}
283+
out <- line
277284
}
278285
}()
279-
280286
return out, nil
281287
}
282288

file/lines.go

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
package fileutil
2+
3+
import (
4+
"bufio"
5+
"io"
6+
"iter"
7+
"os"
8+
"strings"
9+
)
10+
11+
// LineOption configures the line iterator returned by Lines / LinesReader.
12+
type LineOption func(*lineConfig)
13+
14+
type lineConfig struct {
15+
bufferSize int
16+
splitSet string
17+
hasSplit bool
18+
trimSpace bool
19+
skipEmpty bool
20+
filter func(string) bool
21+
}
22+
23+
// WithBufferSize sets the underlying bufio.Scanner buffer. A non-positive
24+
// value leaves the scanner default (64 KiB) in place.
25+
func WithBufferSize(n int) LineOption {
26+
return func(c *lineConfig) { c.bufferSize = n }
27+
}
28+
29+
// WithSplit splits each scanned line on any of the given runes
30+
// (strings.FieldsFunc semantics: runs of separator runes are collapsed and
31+
// empty pieces are not produced). Each piece becomes its own emitted value.
32+
func WithSplit(separators ...rune) LineOption {
33+
return func(c *lineConfig) {
34+
c.hasSplit = true
35+
c.splitSet = string(separators)
36+
}
37+
}
38+
39+
// WithTrimSpace trims leading/trailing whitespace from each emitted value.
40+
func WithTrimSpace() LineOption {
41+
return func(c *lineConfig) { c.trimSpace = true }
42+
}
43+
44+
// WithSkipEmpty drops empty values, evaluated after WithTrimSpace.
45+
func WithSkipEmpty() LineOption {
46+
return func(c *lineConfig) { c.skipEmpty = true }
47+
}
48+
49+
// WithFilter keeps only values for which keep returns true. The filter runs
50+
// after split / trim / skip-empty so it sees the final value that would be
51+
// yielded.
52+
func WithFilter(keep func(string) bool) LineOption {
53+
return func(c *lineConfig) { c.filter = keep }
54+
}
55+
56+
// Lines streams lines from the file at filename, applying any configured
57+
// transforms. With no options it emits raw scanner lines.
58+
//
59+
// The file is opened lazily on first iteration and closed when iteration
60+
// ends (including via break). Open and scanner errors are surfaced as a
61+
// final ("", err) pair, after which iteration stops.
62+
//
63+
// Typical use:
64+
//
65+
// for v, err := range fileutil.Lines(path,
66+
// fileutil.WithSplit(','),
67+
// fileutil.WithTrimSpace(),
68+
// fileutil.WithSkipEmpty(),
69+
// ) {
70+
// if err != nil { return err }
71+
// // use v
72+
// }
73+
func Lines(filename string, opts ...LineOption) iter.Seq2[string, error] {
74+
return func(yield func(string, error) bool) {
75+
f, err := os.Open(filename)
76+
if err != nil {
77+
yield("", err)
78+
return
79+
}
80+
defer func() { _ = f.Close() }()
81+
scanLines(f, opts, yield)
82+
}
83+
}
84+
85+
// LinesReader is the io.Reader variant of Lines. The reader is consumed but
86+
// not closed; the caller owns its lifecycle.
87+
func LinesReader(r io.Reader, opts ...LineOption) iter.Seq2[string, error] {
88+
return func(yield func(string, error) bool) {
89+
scanLines(r, opts, yield)
90+
}
91+
}
92+
93+
func scanLines(r io.Reader, opts []LineOption, yield func(string, error) bool) {
94+
var cfg lineConfig
95+
for _, o := range opts {
96+
o(&cfg)
97+
}
98+
scanner := bufio.NewScanner(r)
99+
if cfg.bufferSize > 0 {
100+
scanner.Buffer(make([]byte, cfg.bufferSize), cfg.bufferSize)
101+
}
102+
for scanner.Scan() {
103+
line := scanner.Text()
104+
if !cfg.hasSplit {
105+
if !emitLine(line, &cfg, yield) {
106+
return
107+
}
108+
continue
109+
}
110+
for _, piece := range strings.FieldsFunc(line, func(r rune) bool {
111+
return strings.ContainsRune(cfg.splitSet, r)
112+
}) {
113+
if !emitLine(piece, &cfg, yield) {
114+
return
115+
}
116+
}
117+
}
118+
if err := scanner.Err(); err != nil {
119+
yield("", err)
120+
}
121+
}
122+
123+
func emitLine(v string, cfg *lineConfig, yield func(string, error) bool) bool {
124+
if cfg.trimSpace {
125+
v = strings.TrimSpace(v)
126+
}
127+
if cfg.skipEmpty && v == "" {
128+
return true
129+
}
130+
if cfg.filter != nil && !cfg.filter(v) {
131+
return true
132+
}
133+
return yield(v, nil)
134+
}

0 commit comments

Comments
 (0)