gg/file.go

/*
gg is a recursive grep written in Go, with some shortcuts for everyday use.
*/
package main

import (
	"bytes"
	"errors"
	"fmt"
	"strings"
	"unicode"
	"unicode/utf8"
)

const (
	// if a matching line is longer than longLine runes then we will print
	// a truncated form
	longLine = 256

	// if any line in the file is detected to be longer than this many runes
	// we will count the file as being minified
	minifiedLine = 1024

	// number of bytes at the start / end of each file to examine for binary
	// or minified file detection.
	bytesToExamine = 8192
)

type notPlainTextBehaviour int

const (
	notPlainTextShort notPlainTextBehaviour = iota
	notPlainTextFull
	notPlainTextSkip
)

type notPlainTextFlag struct {
	b notPlainTextBehaviour
}

func (nptf *notPlainTextFlag) String() string {
	switch nptf.b {
	case notPlainTextShort:
		return "short"
	case notPlainTextFull:
		return "full"
	case notPlainTextSkip:
		return "skip"
	}
	return "???"
}

func (nptf *notPlainTextFlag) Set(s string) error {
	switch s {
	case "short":
		nptf.b = notPlainTextShort
	case "full":
		nptf.b = notPlainTextFull
	case "skip":
		nptf.b = notPlainTextSkip
	default:
		return errors.New("must be one of short|full|skip")
	}
	return nil
}

func (nptf *notPlainTextFlag) Type() string {
	return "short|full|skip" // ???
}

var (
	// printedFull is set by file() if it prints a full file's matches
	// (i.e. a header line, then the match lines). It is cleared if we
	// just printed a "Binary file <foo> matches" line. It lets us have a
	// nice one-line separator between full files, but show the binary
	// matches compactly.
	printedFull bool
)

func file(path string, data []byte) {
	var short string
	isBinary, isMinified := notPlainText(data)
	switch {
	case isBinary:
		switch binaryFile.b {
		case notPlainTextShort:
			short = "Binary"
		case notPlainTextSkip:
			return
		}

	case isMinified:
		switch minifiedFile.b {
		case notPlainTextShort:
			short = "Minified"
		case notPlainTextSkip:
			return
		}
	}

	var (
		lineNum       int
		printedHeader bool
		b, b2         strings.Builder
	)

	// split into lines
	for len(data) > 0 {
		eol := bytes.IndexByte(data, '\n')
		lineNum++
		var line []byte
		if eol == -1 {
			line = data
			data = nil
		} else {
			line = data[:eol]
			data = data[eol+1:]
		}

		loc := findMatches(line)
		if loc == nil {
			continue
		}

		switch {
		case !printedHeader && short != "":
			if printedFull {
				fmt.Println("")
				printedFull = false
			}
			fmt.Printf("%s file %s matches.\n", short, path)
			return

		case !printedHeader:
			if printedFull {
				fmt.Println("")
			}
			printedFull = true
			fmt.Println(display.Filename(path))
			printedHeader = true
		}

		b.Reset()
		fmt.Fprintf(&b, "%4d: ", display.LineNumber(lineNum))

		before := line[0:loc[0]]
		matched := line[loc[0]:loc[1]]
		after := line[loc[1]:]
		if utf8.RuneCount(line) < longLine {
			b2.Reset()
			escape(&b2, matched)

			escape(&b, before)
			b.WriteString(display.Match(b2.String()).String())
			escape(&b, after)
		} else {
			n := utf8.RuneCount(before)
			if n < 64 {
				escape(&b, before)
			} else {
				var nbytes int
				for i := 0; i < 64; i++ {
					_, s := utf8.DecodeLastRune(before[:len(before)-nbytes])
					nbytes += s
				}
				b.WriteString(display.TruncatedChars(n - 64).String())
				b.WriteString(display.TruncatedMarker().String())
				escape(&b, before[len(before)-nbytes:])
			}

			n = utf8.RuneCount(matched)
			if n < 64 {
				b2.Reset()
				escape(&b2, matched)
				b.WriteString(display.Match(b2.String()).String())
			} else {
				var nbytes int
				for i := 0; i < 32; i++ {
					_, s := utf8.DecodeRune(matched[nbytes:])
					nbytes += s
				}
				b2.Reset()
				escape(&b2, matched[:nbytes])
				b.WriteString(display.Match(b2.String()).String())

				b.WriteString(display.TruncatedMarker().String())
				b.WriteString(display.TruncatedChars(n - 64).String())
				b.WriteString(display.TruncatedMarker().String())

				nbytes = 0
				for i := 0; i < 32; i++ {
					_, s := utf8.DecodeLastRune(matched[:len(matched)-nbytes])
					nbytes += s
				}
				b2.Reset()
				escape(&b2, matched[len(matched)-nbytes:])
				b.WriteString(display.Match(b2.String()).String())
			}

			n = utf8.RuneCount(after)
			if n < 64 {
				escape(&b, after)
			} else {
				var nbytes int
				for i := 0; i < 64; i++ {
					_, s := utf8.DecodeRune(after[nbytes:])
					nbytes += s
				}
				escape(&b, after[:nbytes])
				b.WriteString(display.TruncatedMarker().String())
				b.WriteString(display.TruncatedChars(n - 64).String())
			}

		}

		b.WriteRune('\n')
		fmt.Print(b.String())
	}
}

func notPlainText(data []byte) (isBinary, isMinified bool) {
	// examine bytes at the start of the file for binary data
	b, m := notPlainTextAux(data)
	if b || len(data) < bytesToExamine*2 {
		return b, m
	}

	// some files, like .a files, have a header which passes plaintext
	// detection but we can expect the trailer to be binary
	data = data[len(data)-bytesToExamine:]
	for i := 0; i < 5; i++ { // attempt to align to UTF-8 char boundary
		if utf8.RuneStart(data[0]) {
			break
		}
		data = data[1:]
	}
	b2, m2 := notPlainTextAux(data)
	return (b || b2), (m || m2)
}

func notPlainTextAux(data []byte) (isBinary, isMinified bool) {
	n := bytesToExamine
	var lineLength int
	for n > 0 {
		r, s := utf8.DecodeRune(data)
		switch {
		case s == 0:
			// end of string
			return

		case s == 1 && r == utf8.RuneError:
			// invalid UTF-8
			isBinary = true
			return

		case r == '\n':
			// newline
			lineLength = -1

		case r == '\r', r == '\t', r == '\v':
			// valid control chars often present in text

		case r < ' ':
			// control chars not expected in plain text
			isBinary = true
			return
		}

		data = data[s:]
		n -= s
		lineLength++
		if lineLength >= minifiedLine {
			isMinified = true
		}
	}
	return
}

func findMatches(data []byte) (loc []int) {
	for _, re := range regexps {
		loc := re.FindIndex(data)
		if loc != nil {
			return loc
		}
	}
	return nil
}

func escape(b *strings.Builder, s []byte) {
	for len(s) > 0 {
		r, size := utf8.DecodeRune(s)
		s = s[size:]

		switch {
		case r == utf8.RuneError && size == 1:
			b.WriteString(display.BadUTF8Char().String())

		case r == '\r':
			b.WriteString(display.CarriageReturn().String())

		case r == '\t',
			unicode.IsPrint(r):
			b.WriteRune(r)

		default:
			b.WriteString(display.UnprintableChar().String())
		}
	}
}