diff --git a/file.go b/file.go new file mode 100644 index 0000000..4ef63cb --- /dev/null +++ b/file.go @@ -0,0 +1,266 @@ +/* +gg is a recursive grep written in Go, with some shortcuts for everyday use. +*/ +package main + +import ( + "bytes" + "errors" + "fmt" + "strings" + "unicode" + "unicode/utf8" +) + +type notPlainTextBehaviour int + +const ( + notPlainTextShort notPlainTextBehaviour = iota + notPlainTextFull + notPlainTextSkip +) + +type notPlainTextFlag struct { + b notPlainTextBehaviour +} + +func (nptf *notPlainTextFlag) String() string { + switch nptf.b { + case notPlainTextShort: + return "short" + case notPlainTextFull: + return "full" + case notPlainTextSkip: + return "skip" + } + return "???" +} + +func (nptf *notPlainTextFlag) Set(s string) error { + switch s { + case "short": + nptf.b = notPlainTextShort + case "full": + nptf.b = notPlainTextFull + case "skip": + nptf.b = notPlainTextSkip + default: + return errors.New("must be one of short|full|skip") + } + return nil +} + +func (nptf *notPlainTextFlag) Type() string { + return "short|full|skip" // ??? +} + +func file(path string, data []byte) { + var short string + switch { + case isBinary(data): + switch binaryFile.b { + case notPlainTextShort: + short = "Binary" + case notPlainTextSkip: + return + } + + case isMinified(data): + switch minifiedFile.b { + case notPlainTextShort: + short = "Minified" + case notPlainTextSkip: + return + } + } + + var ( + lineNum int + printedHeader bool + b, b2 strings.Builder + ) + + // split into lines + for len(data) > 0 { + eol := bytes.IndexByte(data, '\n') + lineNum++ + var line []byte + if eol == -1 { + line = data + data = nil + } else { + line = data[:eol] + data = data[eol+1:] + } + + loc := findMatches(line) + if loc == nil { + continue + } + + switch { + case !printedHeader && short != "": + if printedFull { + fmt.Println("") + printedFull = false + } + fmt.Printf("%s file %s matches.\n", short, path) + return + + case !printedHeader: + if printedFull { + fmt.Println("") + } + printedFull = true + fmt.Println(display.Filename(path)) + printedHeader = true + } + + b.Reset() + fmt.Fprintf(&b, "%4d: ", display.LineNumber(lineNum)) + + if loc[0] < 128 { + escape(&b, line[0:loc[0]]) + } else { + start := loc[0] - 128 + for i := 0; i < 5; i++ { + if utf8.RuneStart(line[start]) { + break + } + start++ + } + + b.WriteString(display.TruncatedBytes(start).String()) + b.WriteString(display.TruncatedMarker().String()) + escape(&b, line[start:loc[0]]) + } + + if loc[1]-loc[0] < 128 { + b2.Reset() + escape(&b2, line[loc[0]:loc[1]]) + b.WriteString(display.Match(b2.String()).String()) + + if loc[1]+128 > len(line) { + escape(&b, line[loc[1]:]) + } else { + end := loc[1] + 128 + for i := 0; i < 5; i++ { + if utf8.RuneStart(line[end]) { + break + } + end-- + } + escape(&b, line[loc[1]:end]) + b.WriteString(display.TruncatedBytes(len(line) - end).String()) + b.WriteString(display.TruncatedMarker().String()) + } + + } else { + end := loc[1] + for i := 0; i < 5; i++ { + if utf8.RuneStart(line[end]) { + break + } + end-- + } + + b2.Reset() + escape(&b2, line[loc[0]:end]) + b.WriteString(display.Match(b2.String()).String()) + b.WriteString(display.TruncatedMarker().String()) + b.WriteString(display.TruncatedBytes(len(line) - end).String()) + + } + + b.WriteRune('\n') + fmt.Print(b.String()) + } +} + +func isBinary(data []byte) bool { + bytesToExamine := 4096 + + for bytesToExamine > 0 { + r, s := utf8.DecodeRune(data) + switch { + case s == 0: + // end of string + return false + case s == 1 && r == utf8.RuneError: + // invalid UTF-8 + return true + case r == '\r', r == '\n', r == '\t': + // valid control chars + case r < ' ': + // invalid control chars + return true + } + data = data[s:] + bytesToExamine -= s + } + + return false +} + +func isMinified(data []byte) bool { + const longLine = 256 + bytesToExamine := 4096 + var lineLength int + + for bytesToExamine > 0 { + r, s := utf8.DecodeRune(data) + switch { + case s == 0: + // end of string + return false + case r == '\n': + lineLength = 0 + default: + lineLength++ + if lineLength >= longLine { + return true + } + } + data = data[s:] + bytesToExamine -= s + } + return false +} + +func findMatches(data []byte) (loc []int) { + for _, re := range regexps { + loc := re.FindIndex(data) + if loc != nil { + return loc + } + } + for _, s := range searchBytes { + pos := bytes.Index(data, s) + if pos != -1 { + return []int{pos, pos + len(s)} + } + } + return nil +} + +func escape(b *strings.Builder, s []byte) { + for len(s) > 0 { + r, size := utf8.DecodeRune(s) + s = s[size:] + + switch { + case r == utf8.RuneError && size == 1: + b.WriteString(display.BadUTF8Char().String()) + + case r == '\r': + b.WriteString(display.CarriageReturn().String()) + + case r == '\t', + unicode.IsPrint(r): + b.WriteRune(r) + + default: + b.WriteString(display.UnprintableChar().String()) + } + } +} diff --git a/main.go b/main.go index 48ce67d..5a93986 100644 --- a/main.go +++ b/main.go @@ -4,15 +4,10 @@ gg is a recursive grep written in Go, with some shortcuts for everyday use. package main import ( - "bytes" "errors" - "fmt" "os" "path/filepath" "regexp" - "strings" - "unicode" - "unicode/utf8" "github.com/spf13/cobra" "golang.org/x/sys/unix" @@ -20,8 +15,6 @@ import ( // TODO: // - bold of escaped output doesn't work -// - binary file detection -// - long-line / minified-file detection // - ignore files by extension (or glob?) func main() { @@ -61,11 +54,13 @@ var ( searchBytes [][]byte searchPath []string ignoreList []string + binaryFile notPlainTextFlag + minifiedFile notPlainTextFlag ignoreMap map[string]struct{} ignoreCase bool noColour bool display *Display - matchedAny bool + printedFull bool ) func init() { @@ -74,6 +69,8 @@ func init() { rootCmd.Flags().StringSliceVarP(&ignoreList, "exclude", "x", []string{".git"}, "files/directories to exclude") rootCmd.Flags().BoolVarP(&ignoreCase, "ignore-case", "i", false, "make all searches case insensitive") rootCmd.Flags().BoolVarP(&noColour, "no-colour", "C", false, "disable colour output") + rootCmd.Flags().Var(&binaryFile, "binary", "what to do with binary files") + rootCmd.Flags().Var(&minifiedFile, "minified", "what to do with minified text files") } func run(c *cobra.Command, args []string) error { @@ -164,147 +161,12 @@ func search(path string) error { return err } defer f.Close() - fullData, err := unix.Mmap(int(f.Fd()), 0, int(st.Size()), unix.PROT_READ, unix.MAP_PRIVATE) + data, err := unix.Mmap(int(f.Fd()), 0, int(st.Size()), unix.PROT_READ, unix.MAP_PRIVATE) if err != nil { return err } - defer unix.Munmap(fullData) - - var printedHeader bool - - var ( - data = fullData - lineNum int - b, b2 strings.Builder - ) - for len(data) > 0 { - eol := bytes.IndexByte(data, '\n') - lineNum++ - var line []byte - if eol == -1 { - line = data - data = nil - } else { - line = data[:eol] - data = data[eol+1:] - } - - if len(line) == 0 { - continue - } - - loc := matches(line) - if loc == nil { - continue - } - - if !printedHeader { - printedHeader = true - if !matchedAny { - matchedAny = true - } else { - fmt.Println("") - } - fmt.Println(display.Filename(path)) - } - - b.Reset() - fmt.Fprintf(&b, "%4d: ", display.LineNumber(lineNum)) - - if loc[0] < 128 { - escape(&b, line[0:loc[0]]) - } else { - start := loc[0] - 128 - for i := 0; i < 5; i++ { - if utf8.RuneStart(line[start]) { - break - } - start++ - } - - b.WriteString(display.TruncatedBytes(start).String()) - b.WriteString(display.TruncatedMarker().String()) - escape(&b, line[start:loc[0]]) - } - - if loc[1]-loc[0] < 128 { - b2.Reset() - escape(&b2, line[loc[0]:loc[1]]) - b.WriteString(display.Match(b2.String()).String()) - - if loc[1]+128 > len(line) { - escape(&b, line[loc[1]:]) - } else { - end := loc[1] + 128 - for i := 0; i < 5; i++ { - if utf8.RuneStart(line[end]) { - break - } - end-- - } - escape(&b, line[loc[1]:end]) - b.WriteString(display.TruncatedBytes(len(line) - end).String()) - b.WriteString(display.TruncatedMarker().String()) - } - - } else { - end := loc[1] - for i := 0; i < 5; i++ { - if utf8.RuneStart(line[end]) { - break - } - end-- - } - - b2.Reset() - escape(&b2, line[loc[0]:end]) - b.WriteString(display.Match(b2.String()).String()) - b.WriteString(display.TruncatedMarker().String()) - b.WriteString(display.TruncatedBytes(len(line) - end).String()) - - } - - b.WriteRune('\n') - fmt.Print(b.String()) - } + defer unix.Munmap(data) + file(path, data) return nil } - -func matches(data []byte) (loc []int) { - for _, re := range regexps { - loc := re.FindIndex(data) - if loc != nil { - return loc - } - } - for _, s := range searchBytes { - pos := bytes.Index(data, s) - if pos != -1 { - return []int{pos, pos + len(s)} - } - } - return nil -} - -func escape(b *strings.Builder, s []byte) { - for len(s) > 0 { - r, size := utf8.DecodeRune(s) - s = s[size:] - - switch { - case r == utf8.RuneError && size == 1: - b.WriteString(display.BadUTF8Char().String()) - - case r == '\r': - b.WriteString(display.CarriageReturn().String()) - - case r == '\t', - unicode.IsPrint(r): - b.WriteRune(r) - - default: - b.WriteString(display.UnprintableChar().String()) - } - } -}