/* gg is a recursive grep written in Go, with some shortcuts for everyday use. */ package main import ( "bytes" "errors" "fmt" "strings" "unicode" "unicode/utf8" ) const ( // if a matching line is longer than longLine runes then we will print // a truncated form longLine = 256 // if any line in the file is detected to be longer than this many runes // we will count the file as being minified minifiedLine = 1024 // number of bytes at the start / end of each file to examine for binary // or minified file detection. bytesToExamine = 8192 ) type notPlainTextBehaviour int const ( notPlainTextShort notPlainTextBehaviour = iota notPlainTextFull notPlainTextSkip ) type notPlainTextFlag struct { b notPlainTextBehaviour } func (nptf *notPlainTextFlag) String() string { switch nptf.b { case notPlainTextShort: return "short" case notPlainTextFull: return "full" case notPlainTextSkip: return "skip" } return "???" } func (nptf *notPlainTextFlag) Set(s string) error { switch s { case "short": nptf.b = notPlainTextShort case "full": nptf.b = notPlainTextFull case "skip": nptf.b = notPlainTextSkip default: return errors.New("must be one of short|full|skip") } return nil } func (nptf *notPlainTextFlag) Type() string { return "short|full|skip" // ??? } var ( // printedFull is set by file() if it prints a full file's matches // (i.e. a header line, then the match lines). It is cleared if we // just printed a "Binary file matches" line. It lets us have a // nice one-line separator between full files, but show the binary // matches compactly. printedFull bool ) func file(path string, data []byte) { var short string isBinary, isMinified := notPlainText(data) switch { case isBinary: switch binaryFile.b { case notPlainTextShort: short = "Binary" case notPlainTextSkip: return } case isMinified: switch minifiedFile.b { case notPlainTextShort: short = "Minified" case notPlainTextSkip: return } } var ( lineNum int printedHeader bool b, b2 strings.Builder ) // split into lines for len(data) > 0 { eol := bytes.IndexByte(data, '\n') lineNum++ var line []byte if eol == -1 { line = data data = nil } else { line = data[:eol] data = data[eol+1:] } loc := findMatches(line) if loc == nil { continue } switch { case !printedHeader && short != "": if printedFull { fmt.Println("") printedFull = false } fmt.Printf("%s file %s matches.\n", short, path) return case !printedHeader: if printedFull { fmt.Println("") } printedFull = true fmt.Println(display.Filename(path)) printedHeader = true } b.Reset() fmt.Fprintf(&b, "%4d: ", display.LineNumber(lineNum)) before := line[0:loc[0]] matched := line[loc[0]:loc[1]] after := line[loc[1]:] if utf8.RuneCount(line) < longLine { b2.Reset() escape(&b2, matched) escape(&b, before) b.WriteString(display.Match(b2.String()).String()) escape(&b, after) } else { n := utf8.RuneCount(before) if n < 64 { escape(&b, before) } else { var nbytes int for i := 0; i < 64; i++ { _, s := utf8.DecodeLastRune(before[:len(before)-nbytes]) nbytes += s } b.WriteString(display.TruncatedChars(n - 64).String()) b.WriteString(display.TruncatedMarker().String()) escape(&b, before[len(before)-nbytes:]) } n = utf8.RuneCount(matched) if n < 64 { b2.Reset() escape(&b2, matched) b.WriteString(display.Match(b2.String()).String()) } else { var nbytes int for i := 0; i < 32; i++ { _, s := utf8.DecodeRune(matched[nbytes:]) nbytes += s } b2.Reset() escape(&b2, matched[:nbytes]) b.WriteString(display.Match(b2.String()).String()) b.WriteString(display.TruncatedMarker().String()) b.WriteString(display.TruncatedChars(n - 64).String()) b.WriteString(display.TruncatedMarker().String()) nbytes = 0 for i := 0; i < 32; i++ { _, s := utf8.DecodeLastRune(matched[:len(matched)-nbytes]) nbytes += s } b2.Reset() escape(&b2, matched[len(matched)-nbytes:]) b.WriteString(display.Match(b2.String()).String()) } n = utf8.RuneCount(after) if n < 64 { escape(&b, after) } else { var nbytes int for i := 0; i < 64; i++ { _, s := utf8.DecodeRune(after[nbytes:]) nbytes += s } escape(&b, after[:nbytes]) b.WriteString(display.TruncatedMarker().String()) b.WriteString(display.TruncatedChars(n - 64).String()) } } b.WriteRune('\n') fmt.Print(b.String()) } } func notPlainText(data []byte) (isBinary, isMinified bool) { // examine bytes at the start of the file for binary data b, m := notPlainTextAux(data) if b || len(data) < bytesToExamine*2 { return b, m } // some files, like .a files, have a header which passes plaintext // detection but we can expect the trailer to be binary data = data[len(data)-bytesToExamine:] for i := 0; i < 5; i++ { // attempt to align to UTF-8 char boundary if utf8.RuneStart(data[0]) { break } data = data[1:] } b2, m2 := notPlainTextAux(data) return (b || b2), (m || m2) } func notPlainTextAux(data []byte) (isBinary, isMinified bool) { n := bytesToExamine var lineLength int for n > 0 { r, s := utf8.DecodeRune(data) switch { case s == 0: // end of string return case s == 1 && r == utf8.RuneError: // invalid UTF-8 isBinary = true return case r == '\n': // newline lineLength = -1 case r == '\r', r == '\t', r == '\v': // valid control chars often present in text case r < ' ': // control chars not expected in plain text isBinary = true return } data = data[s:] n -= s lineLength++ if lineLength >= minifiedLine { isMinified = true } } return } func findMatches(data []byte) (loc []int) { for _, re := range regexps { loc := re.FindIndex(data) if loc != nil { return loc } } return nil } func escape(b *strings.Builder, s []byte) { for len(s) > 0 { r, size := utf8.DecodeRune(s) s = s[size:] switch { case r == utf8.RuneError && size == 1: b.WriteString(display.BadUTF8Char().String()) case r == '\r': b.WriteString(display.CarriageReturn().String()) case r == '\t', unicode.IsPrint(r): b.WriteRune(r) default: b.WriteString(display.UnprintableChar().String()) } } }