2023-05-13 12:10:05 +01:00
|
|
|
/*
|
|
|
|
gg is a recursive grep written in Go, with some shortcuts for everyday use.
|
|
|
|
*/
|
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"strings"
|
|
|
|
"unicode"
|
|
|
|
"unicode/utf8"
|
|
|
|
)
|
|
|
|
|
2023-05-13 13:17:54 +01:00
|
|
|
const (
|
|
|
|
// if a matching line is longer than longLine runes then we will print
|
|
|
|
// a truncated form
|
|
|
|
longLine = 256
|
|
|
|
|
|
|
|
// if any line in the file is detected to be longer than this many runes
|
|
|
|
// we will count the file as being minified
|
|
|
|
minifiedLine = 1024
|
|
|
|
|
|
|
|
// number of bytes at the start / end of each file to examine for binary
|
|
|
|
// or minified file detection.
|
|
|
|
bytesToExamine = 8192
|
|
|
|
)
|
|
|
|
|
2023-05-13 12:10:05 +01:00
|
|
|
type notPlainTextBehaviour int
|
|
|
|
|
|
|
|
const (
|
|
|
|
notPlainTextShort notPlainTextBehaviour = iota
|
|
|
|
notPlainTextFull
|
|
|
|
notPlainTextSkip
|
|
|
|
)
|
|
|
|
|
|
|
|
type notPlainTextFlag struct {
|
|
|
|
b notPlainTextBehaviour
|
|
|
|
}
|
|
|
|
|
|
|
|
func (nptf *notPlainTextFlag) String() string {
|
|
|
|
switch nptf.b {
|
|
|
|
case notPlainTextShort:
|
|
|
|
return "short"
|
|
|
|
case notPlainTextFull:
|
|
|
|
return "full"
|
|
|
|
case notPlainTextSkip:
|
|
|
|
return "skip"
|
|
|
|
}
|
|
|
|
return "???"
|
|
|
|
}
|
|
|
|
|
|
|
|
func (nptf *notPlainTextFlag) Set(s string) error {
|
|
|
|
switch s {
|
|
|
|
case "short":
|
|
|
|
nptf.b = notPlainTextShort
|
|
|
|
case "full":
|
|
|
|
nptf.b = notPlainTextFull
|
|
|
|
case "skip":
|
|
|
|
nptf.b = notPlainTextSkip
|
|
|
|
default:
|
|
|
|
return errors.New("must be one of short|full|skip")
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (nptf *notPlainTextFlag) Type() string {
|
|
|
|
return "short|full|skip" // ???
|
|
|
|
}
|
|
|
|
|
2023-07-07 11:21:41 +01:00
|
|
|
var (
|
|
|
|
// printedFull is set by file() if it prints a full file's matches
|
|
|
|
// (i.e. a header line, then the match lines). It is cleared if we
|
|
|
|
// just printed a "Binary file <foo> matches" line. It lets us have a
|
|
|
|
// nice one-line separator between full files, but show the binary
|
|
|
|
// matches compactly.
|
|
|
|
printedFull bool
|
|
|
|
)
|
|
|
|
|
2023-05-13 12:10:05 +01:00
|
|
|
func file(path string, data []byte) {
|
|
|
|
var short string
|
2023-05-13 13:17:54 +01:00
|
|
|
isBinary, isMinified := notPlainText(data)
|
2023-05-13 12:10:05 +01:00
|
|
|
switch {
|
2023-05-13 13:17:54 +01:00
|
|
|
case isBinary:
|
2023-05-13 12:10:05 +01:00
|
|
|
switch binaryFile.b {
|
|
|
|
case notPlainTextShort:
|
|
|
|
short = "Binary"
|
|
|
|
case notPlainTextSkip:
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-05-13 13:17:54 +01:00
|
|
|
case isMinified:
|
2023-05-13 12:10:05 +01:00
|
|
|
switch minifiedFile.b {
|
|
|
|
case notPlainTextShort:
|
|
|
|
short = "Minified"
|
|
|
|
case notPlainTextSkip:
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var (
|
|
|
|
lineNum int
|
|
|
|
printedHeader bool
|
|
|
|
b, b2 strings.Builder
|
|
|
|
)
|
|
|
|
|
|
|
|
// split into lines
|
|
|
|
for len(data) > 0 {
|
|
|
|
eol := bytes.IndexByte(data, '\n')
|
|
|
|
lineNum++
|
|
|
|
var line []byte
|
|
|
|
if eol == -1 {
|
|
|
|
line = data
|
|
|
|
data = nil
|
|
|
|
} else {
|
|
|
|
line = data[:eol]
|
|
|
|
data = data[eol+1:]
|
|
|
|
}
|
|
|
|
|
|
|
|
loc := findMatches(line)
|
|
|
|
if loc == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
switch {
|
|
|
|
case !printedHeader && short != "":
|
|
|
|
if printedFull {
|
|
|
|
fmt.Println("")
|
|
|
|
printedFull = false
|
|
|
|
}
|
|
|
|
fmt.Printf("%s file %s matches.\n", short, path)
|
|
|
|
return
|
|
|
|
|
|
|
|
case !printedHeader:
|
|
|
|
if printedFull {
|
|
|
|
fmt.Println("")
|
|
|
|
}
|
|
|
|
printedFull = true
|
|
|
|
fmt.Println(display.Filename(path))
|
|
|
|
printedHeader = true
|
|
|
|
}
|
|
|
|
|
|
|
|
b.Reset()
|
|
|
|
fmt.Fprintf(&b, "%4d: ", display.LineNumber(lineNum))
|
|
|
|
|
2023-05-13 12:45:29 +01:00
|
|
|
before := line[0:loc[0]]
|
|
|
|
matched := line[loc[0]:loc[1]]
|
|
|
|
after := line[loc[1]:]
|
|
|
|
if utf8.RuneCount(line) < longLine {
|
|
|
|
b2.Reset()
|
|
|
|
escape(&b2, matched)
|
|
|
|
|
|
|
|
escape(&b, before)
|
|
|
|
b.WriteString(display.Match(b2.String()).String())
|
|
|
|
escape(&b, after)
|
2023-05-13 12:10:05 +01:00
|
|
|
} else {
|
2023-05-13 12:45:29 +01:00
|
|
|
n := utf8.RuneCount(before)
|
|
|
|
if n < 64 {
|
|
|
|
escape(&b, before)
|
|
|
|
} else {
|
|
|
|
var nbytes int
|
|
|
|
for i := 0; i < 64; i++ {
|
|
|
|
_, s := utf8.DecodeLastRune(before[:len(before)-nbytes])
|
|
|
|
nbytes += s
|
2023-05-13 12:10:05 +01:00
|
|
|
}
|
2023-05-13 12:45:29 +01:00
|
|
|
b.WriteString(display.TruncatedChars(n - 64).String())
|
|
|
|
b.WriteString(display.TruncatedMarker().String())
|
|
|
|
escape(&b, before[len(before)-nbytes:])
|
2023-05-13 12:10:05 +01:00
|
|
|
}
|
|
|
|
|
2023-05-13 12:45:29 +01:00
|
|
|
n = utf8.RuneCount(matched)
|
|
|
|
if n < 64 {
|
|
|
|
b2.Reset()
|
|
|
|
escape(&b2, matched)
|
|
|
|
b.WriteString(display.Match(b2.String()).String())
|
2023-05-13 12:10:05 +01:00
|
|
|
} else {
|
2023-05-13 12:45:29 +01:00
|
|
|
var nbytes int
|
|
|
|
for i := 0; i < 32; i++ {
|
|
|
|
_, s := utf8.DecodeRune(matched[nbytes:])
|
|
|
|
nbytes += s
|
2023-05-13 12:10:05 +01:00
|
|
|
}
|
2023-05-13 12:45:29 +01:00
|
|
|
b2.Reset()
|
|
|
|
escape(&b2, matched[:nbytes])
|
|
|
|
b.WriteString(display.Match(b2.String()).String())
|
|
|
|
|
|
|
|
b.WriteString(display.TruncatedMarker().String())
|
|
|
|
b.WriteString(display.TruncatedChars(n - 64).String())
|
2023-05-13 12:10:05 +01:00
|
|
|
b.WriteString(display.TruncatedMarker().String())
|
|
|
|
|
2023-05-13 12:45:29 +01:00
|
|
|
nbytes = 0
|
|
|
|
for i := 0; i < 32; i++ {
|
|
|
|
_, s := utf8.DecodeLastRune(matched[:len(matched)-nbytes])
|
|
|
|
nbytes += s
|
2023-05-13 12:10:05 +01:00
|
|
|
}
|
2023-05-13 12:45:29 +01:00
|
|
|
b2.Reset()
|
|
|
|
escape(&b2, matched[len(matched)-nbytes:])
|
|
|
|
b.WriteString(display.Match(b2.String()).String())
|
2023-05-13 12:10:05 +01:00
|
|
|
}
|
|
|
|
|
2023-05-13 12:45:29 +01:00
|
|
|
n = utf8.RuneCount(after)
|
|
|
|
if n < 64 {
|
|
|
|
escape(&b, after)
|
|
|
|
} else {
|
|
|
|
var nbytes int
|
|
|
|
for i := 0; i < 64; i++ {
|
|
|
|
_, s := utf8.DecodeRune(after[nbytes:])
|
|
|
|
nbytes += s
|
|
|
|
}
|
|
|
|
escape(&b, after[:nbytes])
|
|
|
|
b.WriteString(display.TruncatedMarker().String())
|
|
|
|
b.WriteString(display.TruncatedChars(n - 64).String())
|
|
|
|
}
|
2023-05-13 12:10:05 +01:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
b.WriteRune('\n')
|
|
|
|
fmt.Print(b.String())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-13 13:17:54 +01:00
|
|
|
func notPlainText(data []byte) (isBinary, isMinified bool) {
|
|
|
|
// examine bytes at the start of the file for binary data
|
|
|
|
b, m := notPlainTextAux(data)
|
|
|
|
if b || len(data) < bytesToExamine*2 {
|
|
|
|
return b, m
|
|
|
|
}
|
2023-05-13 12:10:05 +01:00
|
|
|
|
2023-05-13 13:17:54 +01:00
|
|
|
// some files, like .a files, have a header which passes plaintext
|
|
|
|
// detection but we can expect the trailer to be binary
|
|
|
|
data = data[len(data)-bytesToExamine:]
|
|
|
|
for i := 0; i < 5; i++ { // attempt to align to UTF-8 char boundary
|
|
|
|
if utf8.RuneStart(data[0]) {
|
|
|
|
break
|
2023-05-13 12:10:05 +01:00
|
|
|
}
|
2023-05-13 13:17:54 +01:00
|
|
|
data = data[1:]
|
2023-05-13 12:10:05 +01:00
|
|
|
}
|
2023-05-13 13:17:54 +01:00
|
|
|
b2, m2 := notPlainTextAux(data)
|
|
|
|
return (b || b2), (m || m2)
|
2023-05-13 12:10:05 +01:00
|
|
|
}
|
|
|
|
|
2023-05-13 13:17:54 +01:00
|
|
|
func notPlainTextAux(data []byte) (isBinary, isMinified bool) {
|
|
|
|
n := bytesToExamine
|
2023-05-13 12:10:05 +01:00
|
|
|
var lineLength int
|
2023-05-13 13:17:54 +01:00
|
|
|
for n > 0 {
|
2023-05-13 12:10:05 +01:00
|
|
|
r, s := utf8.DecodeRune(data)
|
|
|
|
switch {
|
|
|
|
case s == 0:
|
|
|
|
// end of string
|
2023-05-13 13:17:54 +01:00
|
|
|
return
|
|
|
|
|
|
|
|
case s == 1 && r == utf8.RuneError:
|
|
|
|
// invalid UTF-8
|
|
|
|
isBinary = true
|
|
|
|
return
|
|
|
|
|
2023-05-13 12:10:05 +01:00
|
|
|
case r == '\n':
|
2023-05-13 13:17:54 +01:00
|
|
|
// newline
|
|
|
|
lineLength = -1
|
|
|
|
|
|
|
|
case r == '\r', r == '\t', r == '\v':
|
|
|
|
// valid control chars often present in text
|
|
|
|
|
|
|
|
case r < ' ':
|
|
|
|
// control chars not expected in plain text
|
|
|
|
isBinary = true
|
|
|
|
return
|
2023-05-13 12:10:05 +01:00
|
|
|
}
|
2023-05-13 13:17:54 +01:00
|
|
|
|
2023-05-13 12:10:05 +01:00
|
|
|
data = data[s:]
|
2023-05-13 13:17:54 +01:00
|
|
|
n -= s
|
|
|
|
lineLength++
|
|
|
|
if lineLength >= minifiedLine {
|
|
|
|
isMinified = true
|
|
|
|
}
|
2023-05-13 12:10:05 +01:00
|
|
|
}
|
2023-05-13 13:17:54 +01:00
|
|
|
return
|
2023-05-13 12:10:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func findMatches(data []byte) (loc []int) {
|
|
|
|
for _, re := range regexps {
|
|
|
|
loc := re.FindIndex(data)
|
|
|
|
if loc != nil {
|
|
|
|
return loc
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func escape(b *strings.Builder, s []byte) {
|
|
|
|
for len(s) > 0 {
|
|
|
|
r, size := utf8.DecodeRune(s)
|
|
|
|
s = s[size:]
|
|
|
|
|
|
|
|
switch {
|
|
|
|
case r == utf8.RuneError && size == 1:
|
|
|
|
b.WriteString(display.BadUTF8Char().String())
|
|
|
|
|
|
|
|
case r == '\r':
|
|
|
|
b.WriteString(display.CarriageReturn().String())
|
|
|
|
|
|
|
|
case r == '\t',
|
|
|
|
unicode.IsPrint(r):
|
|
|
|
b.WriteRune(r)
|
|
|
|
|
|
|
|
default:
|
|
|
|
b.WriteString(display.UnprintableChar().String())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|