gg/file.go

310 lines
6.3 KiB
Go

/*
gg is a recursive grep written in Go, with some shortcuts for everyday use.
*/
package main
import (
"bytes"
"errors"
"fmt"
"strings"
"unicode"
"unicode/utf8"
)
const (
// if a matching line is longer than longLine runes then we will print
// a truncated form
longLine = 256
// if any line in the file is detected to be longer than this many runes
// we will count the file as being minified
minifiedLine = 1024
// number of bytes at the start / end of each file to examine for binary
// or minified file detection.
bytesToExamine = 8192
)
type notPlainTextBehaviour int
const (
notPlainTextShort notPlainTextBehaviour = iota
notPlainTextFull
notPlainTextSkip
)
type notPlainTextFlag struct {
b notPlainTextBehaviour
}
func (nptf *notPlainTextFlag) String() string {
switch nptf.b {
case notPlainTextShort:
return "short"
case notPlainTextFull:
return "full"
case notPlainTextSkip:
return "skip"
}
return "???"
}
func (nptf *notPlainTextFlag) Set(s string) error {
switch s {
case "short":
nptf.b = notPlainTextShort
case "full":
nptf.b = notPlainTextFull
case "skip":
nptf.b = notPlainTextSkip
default:
return errors.New("must be one of short|full|skip")
}
return nil
}
func (nptf *notPlainTextFlag) Type() string {
return "short|full|skip" // ???
}
var (
// printedFull is set by file() if it prints a full file's matches
// (i.e. a header line, then the match lines). It is cleared if we
// just printed a "Binary file <foo> matches" line. It lets us have a
// nice one-line separator between full files, but show the binary
// matches compactly.
printedFull bool
)
func file(path string, data []byte) {
var short string
isBinary, isMinified := notPlainText(data)
switch {
case isBinary:
switch binaryFile.b {
case notPlainTextShort:
short = "Binary"
case notPlainTextSkip:
return
}
case isMinified:
switch minifiedFile.b {
case notPlainTextShort:
short = "Minified"
case notPlainTextSkip:
return
}
}
var (
lineNum int
printedHeader bool
b, b2 strings.Builder
)
// split into lines
for len(data) > 0 {
eol := bytes.IndexByte(data, '\n')
lineNum++
var line []byte
if eol == -1 {
line = data
data = nil
} else {
line = data[:eol]
data = data[eol+1:]
}
loc := findMatches(line)
if loc == nil {
continue
}
switch {
case !printedHeader && short != "":
if printedFull {
fmt.Println("")
printedFull = false
}
fmt.Printf("%s file %s matches.\n", short, path)
return
case !printedHeader:
if printedFull {
fmt.Println("")
}
printedFull = true
fmt.Println(display.Filename(path))
printedHeader = true
}
b.Reset()
fmt.Fprintf(&b, "%4d: ", display.LineNumber(lineNum))
before := line[0:loc[0]]
matched := line[loc[0]:loc[1]]
after := line[loc[1]:]
if utf8.RuneCount(line) < longLine {
b2.Reset()
escape(&b2, matched)
escape(&b, before)
b.WriteString(display.Match(b2.String()).String())
escape(&b, after)
} else {
n := utf8.RuneCount(before)
if n < 64 {
escape(&b, before)
} else {
var nbytes int
for i := 0; i < 64; i++ {
_, s := utf8.DecodeLastRune(before[:len(before)-nbytes])
nbytes += s
}
b.WriteString(display.TruncatedChars(n - 64).String())
b.WriteString(display.TruncatedMarker().String())
escape(&b, before[len(before)-nbytes:])
}
n = utf8.RuneCount(matched)
if n < 64 {
b2.Reset()
escape(&b2, matched)
b.WriteString(display.Match(b2.String()).String())
} else {
var nbytes int
for i := 0; i < 32; i++ {
_, s := utf8.DecodeRune(matched[nbytes:])
nbytes += s
}
b2.Reset()
escape(&b2, matched[:nbytes])
b.WriteString(display.Match(b2.String()).String())
b.WriteString(display.TruncatedMarker().String())
b.WriteString(display.TruncatedChars(n - 64).String())
b.WriteString(display.TruncatedMarker().String())
nbytes = 0
for i := 0; i < 32; i++ {
_, s := utf8.DecodeLastRune(matched[:len(matched)-nbytes])
nbytes += s
}
b2.Reset()
escape(&b2, matched[len(matched)-nbytes:])
b.WriteString(display.Match(b2.String()).String())
}
n = utf8.RuneCount(after)
if n < 64 {
escape(&b, after)
} else {
var nbytes int
for i := 0; i < 64; i++ {
_, s := utf8.DecodeRune(after[nbytes:])
nbytes += s
}
escape(&b, after[:nbytes])
b.WriteString(display.TruncatedMarker().String())
b.WriteString(display.TruncatedChars(n - 64).String())
}
}
b.WriteRune('\n')
fmt.Print(b.String())
}
}
func notPlainText(data []byte) (isBinary, isMinified bool) {
// examine bytes at the start of the file for binary data
b, m := notPlainTextAux(data)
if b || len(data) < bytesToExamine*2 {
return b, m
}
// some files, like .a files, have a header which passes plaintext
// detection but we can expect the trailer to be binary
data = data[len(data)-bytesToExamine:]
for i := 0; i < 5; i++ { // attempt to align to UTF-8 char boundary
if utf8.RuneStart(data[0]) {
break
}
data = data[1:]
}
b2, m2 := notPlainTextAux(data)
return (b || b2), (m || m2)
}
func notPlainTextAux(data []byte) (isBinary, isMinified bool) {
n := bytesToExamine
var lineLength int
for n > 0 {
r, s := utf8.DecodeRune(data)
switch {
case s == 0:
// end of string
return
case s == 1 && r == utf8.RuneError:
// invalid UTF-8
isBinary = true
return
case r == '\n':
// newline
lineLength = -1
case r == '\r', r == '\t', r == '\v':
// valid control chars often present in text
case r < ' ':
// control chars not expected in plain text
isBinary = true
return
}
data = data[s:]
n -= s
lineLength++
if lineLength >= minifiedLine {
isMinified = true
}
}
return
}
func findMatches(data []byte) (loc []int) {
for _, re := range regexps {
loc := re.FindIndex(data)
if loc != nil {
return loc
}
}
return nil
}
func escape(b *strings.Builder, s []byte) {
for len(s) > 0 {
r, size := utf8.DecodeRune(s)
s = s[size:]
switch {
case r == utf8.RuneError && size == 1:
b.WriteString(display.BadUTF8Char().String())
case r == '\r':
b.WriteString(display.CarriageReturn().String())
case r == '\t',
unicode.IsPrint(r):
b.WriteRune(r)
default:
b.WriteString(display.UnprintableChar().String())
}
}
}