Compare commits
8 Commits
Author | SHA1 | Date |
---|---|---|
|
294a41b736 | |
|
f6cd64cf3b | |
|
eacffb4fe1 | |
|
5694cc5194 | |
|
67f81d2728 | |
|
d2cf57dcd9 | |
|
2f3af7fc8e | |
|
5790d3ab5f |
41
README.md
41
README.md
|
@ -1,3 +1,44 @@
|
||||||
# gg
|
# gg
|
||||||
|
|
||||||
Recursive grep written in Go, for everyday ease of use.
|
Recursive grep written in Go, for everyday ease of use.
|
||||||
|
|
||||||
|
`gg` is a recursive grep. Given a regexp (or fixed pattern) it will search for
|
||||||
|
the pattern recursively in the current working directory. It will print a
|
||||||
|
coloured header per file along with the matching line and pattern.
|
||||||
|
|
||||||
|
It is possible to scan specific files or directories, rather than the default
|
||||||
|
current working directory. To do this, simply specify the path(s) as arguments
|
||||||
|
following the pattern.
|
||||||
|
|
||||||
|
It is possible to scan for multiple patterns using the `-e` (or `-Q`) argument,
|
||||||
|
which can be repeated multiple times. `-e` specifies a regular expression and
|
||||||
|
`-Q` a fixed pattern. When using either flag, any non-flag arguments are treated
|
||||||
|
as paths to scan.
|
||||||
|
|
||||||
|
Search defaults to case-sensitive but the `-i` flag may be passed to make all
|
||||||
|
search terms case-insensitive. Alternatively, the `"(?i)"` construct may be added
|
||||||
|
to a regular expression to make that specific expression case insensitive.
|
||||||
|
|
||||||
|
Files and directories can be excluded with the `-x` option. This supports bash-style
|
||||||
|
globs with `'*'`, `'?'`, `'[a-z]'`, `'{this,that}'`, or `'/**/'` to match zero or more
|
||||||
|
directories. By default, `.git` and vim swap files are ignored. Similarly, `-I`
|
||||||
|
filters files to include. Examples:
|
||||||
|
|
||||||
|
```
|
||||||
|
# ignore files/dirs with .js or .css suffix
|
||||||
|
gg -x '*.js' -x '*.css' pattern
|
||||||
|
|
||||||
|
# only match files with .go suffix (any subdir)
|
||||||
|
gg -I '*.go' pattern
|
||||||
|
|
||||||
|
# only match files whose parent dir is "stuff", but ignore "foo" subdir
|
||||||
|
gg -x ./foo -I 'stuff/*' pattern
|
||||||
|
|
||||||
|
# only match .js files with a directory "things" in the path, but ignore
|
||||||
|
# .min.js (e.g. will match "foo/things/bar/my.js")
|
||||||
|
gg -I 'things/**/*.js' -x '*.min.js' pattern
|
||||||
|
```
|
||||||
|
|
||||||
|
Symlinks named on the command line are followed, but by default symlinks are
|
||||||
|
not followed when recursing into directories. `-L` allows them to be
|
||||||
|
dereferenced.
|
||||||
|
|
115
file.go
115
file.go
|
@ -12,6 +12,20 @@ import (
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// if a matching line is longer than longLine runes then we will print
|
||||||
|
// a truncated form
|
||||||
|
longLine = 256
|
||||||
|
|
||||||
|
// if any line in the file is detected to be longer than this many runes
|
||||||
|
// we will count the file as being minified
|
||||||
|
minifiedLine = 1024
|
||||||
|
|
||||||
|
// number of bytes at the start / end of each file to examine for binary
|
||||||
|
// or minified file detection.
|
||||||
|
bytesToExamine = 8192
|
||||||
|
)
|
||||||
|
|
||||||
type notPlainTextBehaviour int
|
type notPlainTextBehaviour int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -54,10 +68,20 @@ func (nptf *notPlainTextFlag) Type() string {
|
||||||
return "short|full|skip" // ???
|
return "short|full|skip" // ???
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
// printedFull is set by file() if it prints a full file's matches
|
||||||
|
// (i.e. a header line, then the match lines). It is cleared if we
|
||||||
|
// just printed a "Binary file <foo> matches" line. It lets us have a
|
||||||
|
// nice one-line separator between full files, but show the binary
|
||||||
|
// matches compactly.
|
||||||
|
printedFull bool
|
||||||
|
)
|
||||||
|
|
||||||
func file(path string, data []byte) {
|
func file(path string, data []byte) {
|
||||||
var short string
|
var short string
|
||||||
|
isBinary, isMinified := notPlainText(data)
|
||||||
switch {
|
switch {
|
||||||
case isBinary(data):
|
case isBinary:
|
||||||
switch binaryFile.b {
|
switch binaryFile.b {
|
||||||
case notPlainTextShort:
|
case notPlainTextShort:
|
||||||
short = "Binary"
|
short = "Binary"
|
||||||
|
@ -65,7 +89,7 @@ func file(path string, data []byte) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
case isMinified(data):
|
case isMinified:
|
||||||
switch minifiedFile.b {
|
switch minifiedFile.b {
|
||||||
case notPlainTextShort:
|
case notPlainTextShort:
|
||||||
short = "Minified"
|
short = "Minified"
|
||||||
|
@ -194,55 +218,62 @@ func file(path string, data []byte) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func isBinary(data []byte) bool {
|
func notPlainText(data []byte) (isBinary, isMinified bool) {
|
||||||
bytesToExamine := 4096
|
// examine bytes at the start of the file for binary data
|
||||||
|
b, m := notPlainTextAux(data)
|
||||||
|
if b || len(data) < bytesToExamine*2 {
|
||||||
|
return b, m
|
||||||
|
}
|
||||||
|
|
||||||
for bytesToExamine > 0 {
|
// some files, like .a files, have a header which passes plaintext
|
||||||
|
// detection but we can expect the trailer to be binary
|
||||||
|
data = data[len(data)-bytesToExamine:]
|
||||||
|
for i := 0; i < 5; i++ { // attempt to align to UTF-8 char boundary
|
||||||
|
if utf8.RuneStart(data[0]) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
data = data[1:]
|
||||||
|
}
|
||||||
|
b2, m2 := notPlainTextAux(data)
|
||||||
|
return (b || b2), (m || m2)
|
||||||
|
}
|
||||||
|
|
||||||
|
func notPlainTextAux(data []byte) (isBinary, isMinified bool) {
|
||||||
|
n := bytesToExamine
|
||||||
|
var lineLength int
|
||||||
|
for n > 0 {
|
||||||
r, s := utf8.DecodeRune(data)
|
r, s := utf8.DecodeRune(data)
|
||||||
switch {
|
switch {
|
||||||
case s == 0:
|
case s == 0:
|
||||||
// end of string
|
// end of string
|
||||||
return false
|
return
|
||||||
|
|
||||||
case s == 1 && r == utf8.RuneError:
|
case s == 1 && r == utf8.RuneError:
|
||||||
// invalid UTF-8
|
// invalid UTF-8
|
||||||
return true
|
isBinary = true
|
||||||
case r == '\r', r == '\n', r == '\t':
|
return
|
||||||
// valid control chars
|
|
||||||
case r < ' ':
|
|
||||||
// invalid control chars
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
data = data[s:]
|
|
||||||
bytesToExamine -= s
|
|
||||||
}
|
|
||||||
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
const longLine = 256
|
|
||||||
|
|
||||||
func isMinified(data []byte) bool {
|
|
||||||
bytesToExamine := 4096
|
|
||||||
var lineLength int
|
|
||||||
|
|
||||||
for bytesToExamine > 0 {
|
|
||||||
r, s := utf8.DecodeRune(data)
|
|
||||||
switch {
|
|
||||||
case s == 0:
|
|
||||||
// end of string
|
|
||||||
return false
|
|
||||||
case r == '\n':
|
case r == '\n':
|
||||||
lineLength = 0
|
// newline
|
||||||
default:
|
lineLength = -1
|
||||||
lineLength++
|
|
||||||
if lineLength >= longLine {
|
case r == '\r', r == '\t', r == '\v':
|
||||||
return true
|
// valid control chars often present in text
|
||||||
}
|
|
||||||
|
case r < ' ':
|
||||||
|
// control chars not expected in plain text
|
||||||
|
isBinary = true
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
data = data[s:]
|
data = data[s:]
|
||||||
bytesToExamine -= s
|
n -= s
|
||||||
|
lineLength++
|
||||||
|
if lineLength >= minifiedLine {
|
||||||
|
isMinified = true
|
||||||
}
|
}
|
||||||
return false
|
}
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func findMatches(data []byte) (loc []int) {
|
func findMatches(data []byte) (loc []int) {
|
||||||
|
@ -252,12 +283,6 @@ func findMatches(data []byte) (loc []int) {
|
||||||
return loc
|
return loc
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, s := range searchBytes {
|
|
||||||
pos := bytes.Index(data, s)
|
|
||||||
if pos != -1 {
|
|
||||||
return []int{pos, pos + len(s)}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
148
main.go
148
main.go
|
@ -9,6 +9,7 @@ import (
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/bmatcuk/doublestar/v4"
|
"github.com/bmatcuk/doublestar/v4"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
|
@ -16,7 +17,6 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
// TODO:
|
// TODO:
|
||||||
// - it would be better to make fixed patterns case insensitive too.
|
|
||||||
// - configurable defaults for exclude.
|
// - configurable defaults for exclude.
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
@ -41,46 +41,73 @@ which can be repeated multiple times. -e specifies a regular expression and
|
||||||
-Q a fixed pattern. When using either flag, any non-flag arguments are treated
|
-Q a fixed pattern. When using either flag, any non-flag arguments are treated
|
||||||
as paths to scan.
|
as paths to scan.
|
||||||
|
|
||||||
Search defaults to case-sensitive but the -i flag may be passed to make regular
|
Search defaults to case-sensitive but the -i flag may be passed to make all
|
||||||
expression searches case-insensitive. Alternatively, the "(?i)" construct may be
|
search terms case-insensitive. Alternatively, the "(?i)" construct may be added
|
||||||
added to a regular expression to make that specific expression case insensitive.
|
to a regular expression to make that specific expression case insensitive.
|
||||||
Fixed pattern matches are always case-sensitive.
|
|
||||||
|
|
||||||
Files and directories can be excluded with the -x option. This supports bash-style
|
Files and directories can be excluded with the -x option. This supports bash-style
|
||||||
globs with '*', '?', '[a-z]', '{this,that}', or '/**/' to match zero or more
|
globs with '*', '?', '[a-z]', '{this,that}', or '/**/' to match zero or more
|
||||||
directories. By default, .git and vim swap files are ignored.`,
|
directories. By default, .git and vim swap files are ignored. Similarly, -I
|
||||||
|
filters files to include. Examples:
|
||||||
|
|
||||||
|
# ignore files/dirs with .js or .css suffix
|
||||||
|
gg -x '*.js' -x '*.css' pattern
|
||||||
|
|
||||||
|
# only match files with .go suffix (any subdir)
|
||||||
|
gg -I '*.go' pattern
|
||||||
|
|
||||||
|
# only match files whose parent dir is "stuff", but ignore "foo" subdir
|
||||||
|
gg -x ./foo -I 'stuff/*' pattern
|
||||||
|
|
||||||
|
# only match .js files with a directory "things" in the path, but ignore
|
||||||
|
# .min.js (e.g. will match "foo/things/bar/my.js")
|
||||||
|
gg -I 'things/**/*.js' -x '*.min.js' pattern
|
||||||
|
|
||||||
|
Symlinks named on the command line are followed, but by default symlinks are
|
||||||
|
not followed when recursing into directories. -L allows them to be
|
||||||
|
dereferenced.`,
|
||||||
|
|
||||||
RunE: run,
|
RunE: run,
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
// flags
|
||||||
searchRegexp []string
|
searchRegexp []string
|
||||||
regexps []*regexp.Regexp
|
|
||||||
searchFixed []string
|
searchFixed []string
|
||||||
searchBytes [][]byte
|
|
||||||
searchPath []string
|
searchPath []string
|
||||||
excludeList []string
|
excludeList []string
|
||||||
binaryFile notPlainTextFlag
|
includeList []string
|
||||||
minifiedFile notPlainTextFlag
|
|
||||||
ignoreCase bool
|
ignoreCase bool
|
||||||
noColour bool
|
noColour bool
|
||||||
|
binaryFile notPlainTextFlag
|
||||||
|
minifiedFile notPlainTextFlag
|
||||||
|
followSymlinks bool
|
||||||
|
|
||||||
|
// computed from searchRegexp, searchFixed. Each regexp here will be
|
||||||
|
// matched against each line of each input file.
|
||||||
|
regexps []*regexp.Regexp
|
||||||
|
|
||||||
|
// formats output
|
||||||
display *Display
|
display *Display
|
||||||
printedFull bool
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
rootCmd.Flags().StringSliceVarP(&searchRegexp, "grep", "e", nil, "pattern to match (regular expression)")
|
rootCmd.Flags().StringSliceVarP(&searchRegexp, "grep", "e", nil, "pattern to match (regular expression)")
|
||||||
rootCmd.Flags().StringSliceVarP(&searchFixed, "fixed", "Q", nil, "pattern to match (fixed string)")
|
rootCmd.Flags().StringSliceVarP(&searchFixed, "fixed", "Q", nil, "pattern to match (fixed string)")
|
||||||
rootCmd.Flags().StringSliceVarP(&excludeList, "exclude", "x", []string{".git", ".*.swp"}, "files/directories to exclude")
|
rootCmd.Flags().StringSliceVarP(&excludeList, "exclude", "x", []string{".git", ".*.swp"}, "files/directories to exclude")
|
||||||
|
rootCmd.Flags().StringSliceVarP(&includeList, "include", "I", nil, "files/directories to include")
|
||||||
rootCmd.Flags().BoolVarP(&ignoreCase, "ignore-case", "i", false, "make all searches case insensitive")
|
rootCmd.Flags().BoolVarP(&ignoreCase, "ignore-case", "i", false, "make all searches case insensitive")
|
||||||
rootCmd.Flags().BoolVarP(&noColour, "no-colour", "C", false, "disable colour output")
|
rootCmd.Flags().BoolVarP(&noColour, "no-colour", "C", false, "disable colour output")
|
||||||
rootCmd.Flags().Var(&binaryFile, "binary", "what to do with binary files")
|
rootCmd.Flags().Var(&binaryFile, "binary", "what to do with binary files")
|
||||||
rootCmd.Flags().Var(&minifiedFile, "minified", "what to do with minified text files")
|
rootCmd.Flags().Var(&minifiedFile, "minified", "what to do with minified text files")
|
||||||
|
rootCmd.Flags().BoolVarP(&followSymlinks, "dereference", "L", false, "follow symlinks when recursing")
|
||||||
}
|
}
|
||||||
|
|
||||||
func run(c *cobra.Command, args []string) error {
|
func run(c *cobra.Command, args []string) error {
|
||||||
display = NewDisplay(noColour)
|
display = NewDisplay(noColour)
|
||||||
|
|
||||||
|
// if no -e or -Q flag is passed, then the first arg is taken to be
|
||||||
|
// the pattern to match
|
||||||
if len(searchRegexp) == 0 && len(searchFixed) == 0 {
|
if len(searchRegexp) == 0 && len(searchFixed) == 0 {
|
||||||
if len(args) == 0 {
|
if len(args) == 0 {
|
||||||
return errors.New("no pattern specified")
|
return errors.New("no pattern specified")
|
||||||
|
@ -89,17 +116,38 @@ func run(c *cobra.Command, args []string) error {
|
||||||
args = args[1:]
|
args = args[1:]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// remaining arguments are treated as search paths; an empty list is
|
||||||
|
// taken to mean the CWD
|
||||||
searchPath = args
|
searchPath = args
|
||||||
if len(searchPath) == 0 {
|
if len(searchPath) == 0 {
|
||||||
searchPath = append(searchPath, ".")
|
searchPath = append(searchPath, ".")
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, x := range excludeList {
|
// if we got past argument passing, then returned errors are runtime
|
||||||
|
// things (like file not found) that shouldn't trigger a usage message.
|
||||||
|
c.SilenceUsage = true
|
||||||
|
|
||||||
|
// for -x and -I, an undecorated pattern is treated as a suffix match
|
||||||
|
for i, x := range excludeList {
|
||||||
|
if !strings.HasPrefix(x, "**/") && !strings.HasPrefix(x, "./") {
|
||||||
|
x = "**/" + x
|
||||||
|
excludeList[i] = x
|
||||||
|
}
|
||||||
if !doublestar.ValidatePattern(x) {
|
if !doublestar.ValidatePattern(x) {
|
||||||
return fmt.Errorf("invalid exclude pattern %q", x)
|
return fmt.Errorf("invalid exclude pattern %q", x)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for i, x := range includeList {
|
||||||
|
if !strings.HasPrefix(x, "**/") && !strings.HasPrefix(x, "./") {
|
||||||
|
x = "**/" + x
|
||||||
|
includeList[i] = x
|
||||||
|
}
|
||||||
|
if !doublestar.ValidatePattern(x) {
|
||||||
|
return fmt.Errorf("invalid include pattern %q", x)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// compile regular expressions for matching
|
||||||
for _, r := range searchRegexp {
|
for _, r := range searchRegexp {
|
||||||
if ignoreCase {
|
if ignoreCase {
|
||||||
r = "(?i)" + r
|
r = "(?i)" + r
|
||||||
|
@ -110,14 +158,21 @@ func run(c *cobra.Command, args []string) error {
|
||||||
}
|
}
|
||||||
regexps = append(regexps, re)
|
regexps = append(regexps, re)
|
||||||
}
|
}
|
||||||
|
for _, r := range searchFixed {
|
||||||
for _, s := range searchFixed {
|
r = regexp.QuoteMeta(r)
|
||||||
searchBytes = append(searchBytes, []byte(s))
|
if ignoreCase {
|
||||||
|
r = "(?i)" + r
|
||||||
|
}
|
||||||
|
re, err := regexp.Compile(r)
|
||||||
|
if err != nil {
|
||||||
|
}
|
||||||
|
regexps = append(regexps, re)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// search over named paths
|
||||||
var errs []error
|
var errs []error
|
||||||
for _, path := range searchPath {
|
for _, path := range searchPath {
|
||||||
if err := search(path); err != nil {
|
if err := search(path, true); err != nil {
|
||||||
errs = append(errs, err)
|
errs = append(errs, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -132,26 +187,63 @@ func recurse(path string) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
var errs []error
|
var errs []error
|
||||||
NextFile:
|
|
||||||
for _, de := range d {
|
for _, de := range d {
|
||||||
name := de.Name()
|
fullPath := filepath.Join(path, de.Name())
|
||||||
fullPath := filepath.Join(path, name)
|
if !shouldSearch(fullPath, de.IsDir()) {
|
||||||
|
continue
|
||||||
for _, x := range excludeList {
|
|
||||||
if exclude, _ := doublestar.Match(x, fullPath); exclude {
|
|
||||||
continue NextFile
|
|
||||||
}
|
}
|
||||||
}
|
if err := search(fullPath, followSymlinks); err != nil {
|
||||||
|
|
||||||
if err := search(fullPath); err != nil {
|
|
||||||
errs = append(errs, err)
|
errs = append(errs, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return errors.Join(errs...)
|
return errors.Join(errs...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func search(path string) error {
|
// shouldSearch matches the full path of the file against the include and
|
||||||
st, err := os.Stat(path)
|
// exclude lists, returning true if we should consider the file/directory for
|
||||||
|
// searching and false if not.
|
||||||
|
func shouldSearch(fullPath string, isDir bool) bool {
|
||||||
|
// process the exclude list first
|
||||||
|
for _, x := range excludeList {
|
||||||
|
if exclude, _ := doublestar.Match(x, fullPath); exclude {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// if the include list is empty, everything is included
|
||||||
|
if len(includeList) == 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, x := range includeList {
|
||||||
|
match, _ := doublestar.Match(x, fullPath)
|
||||||
|
fmt.Printf("[DEBUG] x=%q fullPath=%q isDir=%t match=%t\n", x, fullPath, isDir, match)
|
||||||
|
|
||||||
|
// if it's a directory, and we have at least one recursive
|
||||||
|
// matcher, then search
|
||||||
|
if isDir && strings.HasPrefix(x, "**/") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
if include, _ := doublestar.Match(x, fullPath); include {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func search(path string, deref bool) error {
|
||||||
|
var (
|
||||||
|
st os.FileInfo
|
||||||
|
err error
|
||||||
|
)
|
||||||
|
|
||||||
|
if deref {
|
||||||
|
st, err = os.Stat(path)
|
||||||
|
} else {
|
||||||
|
st, err = os.Lstat(path)
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue