Compare commits

...

6 Commits
v0.1.1 ... main

Author SHA1 Message Date
Laurence Withers 294a41b736 Flesh out README 2023-07-07 11:50:00 +01:00
Laurence Withers f6cd64cf3b Add -I include option, and some usage examples 2023-07-07 11:49:54 +01:00
Laurence Withers eacffb4fe1 Add -L symlink follow option, default false 2023-07-07 11:22:06 +01:00
Laurence Withers 5694cc5194 Move printedFull closer to its use, document 2023-07-07 11:21:41 +01:00
Laurence Withers 67f81d2728 Silence usage message on runtime error 2023-07-07 11:10:31 +01:00
Laurence Withers d2cf57dcd9 Use regexp for case-insensitive literal matches
This commit switches back to using the regexp engine for
case-insensitive literal string matches.

This is slower, but at least case-insensitive matches for string
literals will function now. The code is a tiny bit shorter and simpler
too.

Given the aim of the tool is to be useful for ad-hoc searches,
efficiency isn't the concern but rather just getting the job done with
the minimum of fuss / unexpected behaviour.
2023-07-07 11:00:02 +01:00
3 changed files with 172 additions and 41 deletions

View File

@ -1,3 +1,44 @@
# gg
Recursive grep written in Go, for everyday ease of use.
Recursive grep written in Go, for everyday ease of use.
`gg` is a recursive grep. Given a regexp (or fixed pattern) it will search for
the pattern recursively in the current working directory. It will print a
coloured header per file along with the matching line and pattern.
It is possible to scan specific files or directories, rather than the default
current working directory. To do this, simply specify the path(s) as arguments
following the pattern.
It is possible to scan for multiple patterns using the `-e` (or `-Q`) argument,
which can be repeated multiple times. `-e` specifies a regular expression and
`-Q` a fixed pattern. When using either flag, any non-flag arguments are treated
as paths to scan.
Search defaults to case-sensitive but the `-i` flag may be passed to make all
search terms case-insensitive. Alternatively, the `"(?i)"` construct may be added
to a regular expression to make that specific expression case insensitive.
Files and directories can be excluded with the `-x` option. This supports bash-style
globs with `'*'`, `'?'`, `'[a-z]'`, `'{this,that}'`, or `'/**/'` to match zero or more
directories. By default, `.git` and vim swap files are ignored. Similarly, `-I`
filters files to include. Examples:
```
# ignore files/dirs with .js or .css suffix
gg -x '*.js' -x '*.css' pattern
# only match files with .go suffix (any subdir)
gg -I '*.go' pattern
# only match files whose parent dir is "stuff", but ignore "foo" subdir
gg -x ./foo -I 'stuff/*' pattern
# only match .js files with a directory "things" in the path, but ignore
# .min.js (e.g. will match "foo/things/bar/my.js")
gg -I 'things/**/*.js' -x '*.min.js' pattern
```
Symlinks named on the command line are followed, but by default symlinks are
not followed when recursing into directories. `-L` allows them to be
dereferenced.

15
file.go
View File

@ -68,6 +68,15 @@ func (nptf *notPlainTextFlag) Type() string {
return "short|full|skip" // ???
}
var (
// printedFull is set by file() if it prints a full file's matches
// (i.e. a header line, then the match lines). It is cleared if we
// just printed a "Binary file <foo> matches" line. It lets us have a
// nice one-line separator between full files, but show the binary
// matches compactly.
printedFull bool
)
func file(path string, data []byte) {
var short string
isBinary, isMinified := notPlainText(data)
@ -274,12 +283,6 @@ func findMatches(data []byte) (loc []int) {
return loc
}
}
for _, s := range searchBytes {
pos := bytes.Index(data, s)
if pos != -1 {
return []int{pos, pos + len(s)}
}
}
return nil
}

155
main.go
View File

@ -17,7 +17,6 @@ import (
)
// TODO:
// - it would be better to make fixed patterns case insensitive too.
// - configurable defaults for exclude.
func main() {
@ -42,46 +41,73 @@ which can be repeated multiple times. -e specifies a regular expression and
-Q a fixed pattern. When using either flag, any non-flag arguments are treated
as paths to scan.
Search defaults to case-sensitive but the -i flag may be passed to make regular
expression searches case-insensitive. Alternatively, the "(?i)" construct may be
added to a regular expression to make that specific expression case insensitive.
Fixed pattern matches are always case-sensitive.
Search defaults to case-sensitive but the -i flag may be passed to make all
search terms case-insensitive. Alternatively, the "(?i)" construct may be added
to a regular expression to make that specific expression case insensitive.
Files and directories can be excluded with the -x option. This supports bash-style
globs with '*', '?', '[a-z]', '{this,that}', or '/**/' to match zero or more
directories. By default, .git and vim swap files are ignored.`,
directories. By default, .git and vim swap files are ignored. Similarly, -I
filters files to include. Examples:
# ignore files/dirs with .js or .css suffix
gg -x '*.js' -x '*.css' pattern
# only match files with .go suffix (any subdir)
gg -I '*.go' pattern
# only match files whose parent dir is "stuff", but ignore "foo" subdir
gg -x ./foo -I 'stuff/*' pattern
# only match .js files with a directory "things" in the path, but ignore
# .min.js (e.g. will match "foo/things/bar/my.js")
gg -I 'things/**/*.js' -x '*.min.js' pattern
Symlinks named on the command line are followed, but by default symlinks are
not followed when recursing into directories. -L allows them to be
dereferenced.`,
RunE: run,
}
var (
searchRegexp []string
regexps []*regexp.Regexp
searchFixed []string
searchBytes [][]byte
searchPath []string
excludeList []string
binaryFile notPlainTextFlag
minifiedFile notPlainTextFlag
ignoreCase bool
noColour bool
display *Display
printedFull bool
// flags
searchRegexp []string
searchFixed []string
searchPath []string
excludeList []string
includeList []string
ignoreCase bool
noColour bool
binaryFile notPlainTextFlag
minifiedFile notPlainTextFlag
followSymlinks bool
// computed from searchRegexp, searchFixed. Each regexp here will be
// matched against each line of each input file.
regexps []*regexp.Regexp
// formats output
display *Display
)
func init() {
rootCmd.Flags().StringSliceVarP(&searchRegexp, "grep", "e", nil, "pattern to match (regular expression)")
rootCmd.Flags().StringSliceVarP(&searchFixed, "fixed", "Q", nil, "pattern to match (fixed string)")
rootCmd.Flags().StringSliceVarP(&excludeList, "exclude", "x", []string{".git", ".*.swp"}, "files/directories to exclude")
rootCmd.Flags().StringSliceVarP(&includeList, "include", "I", nil, "files/directories to include")
rootCmd.Flags().BoolVarP(&ignoreCase, "ignore-case", "i", false, "make all searches case insensitive")
rootCmd.Flags().BoolVarP(&noColour, "no-colour", "C", false, "disable colour output")
rootCmd.Flags().Var(&binaryFile, "binary", "what to do with binary files")
rootCmd.Flags().Var(&minifiedFile, "minified", "what to do with minified text files")
rootCmd.Flags().BoolVarP(&followSymlinks, "dereference", "L", false, "follow symlinks when recursing")
}
func run(c *cobra.Command, args []string) error {
display = NewDisplay(noColour)
// if no -e or -Q flag is passed, then the first arg is taken to be
// the pattern to match
if len(searchRegexp) == 0 && len(searchFixed) == 0 {
if len(args) == 0 {
return errors.New("no pattern specified")
@ -90,11 +116,18 @@ func run(c *cobra.Command, args []string) error {
args = args[1:]
}
// remaining arguments are treated as search paths; an empty list is
// taken to mean the CWD
searchPath = args
if len(searchPath) == 0 {
searchPath = append(searchPath, ".")
}
// if we got past argument passing, then returned errors are runtime
// things (like file not found) that shouldn't trigger a usage message.
c.SilenceUsage = true
// for -x and -I, an undecorated pattern is treated as a suffix match
for i, x := range excludeList {
if !strings.HasPrefix(x, "**/") && !strings.HasPrefix(x, "./") {
x = "**/" + x
@ -104,7 +137,17 @@ func run(c *cobra.Command, args []string) error {
return fmt.Errorf("invalid exclude pattern %q", x)
}
}
for i, x := range includeList {
if !strings.HasPrefix(x, "**/") && !strings.HasPrefix(x, "./") {
x = "**/" + x
includeList[i] = x
}
if !doublestar.ValidatePattern(x) {
return fmt.Errorf("invalid include pattern %q", x)
}
}
// compile regular expressions for matching
for _, r := range searchRegexp {
if ignoreCase {
r = "(?i)" + r
@ -115,14 +158,21 @@ func run(c *cobra.Command, args []string) error {
}
regexps = append(regexps, re)
}
for _, s := range searchFixed {
searchBytes = append(searchBytes, []byte(s))
for _, r := range searchFixed {
r = regexp.QuoteMeta(r)
if ignoreCase {
r = "(?i)" + r
}
re, err := regexp.Compile(r)
if err != nil {
}
regexps = append(regexps, re)
}
// search over named paths
var errs []error
for _, path := range searchPath {
if err := search(path); err != nil {
if err := search(path, true); err != nil {
errs = append(errs, err)
}
}
@ -137,26 +187,63 @@ func recurse(path string) error {
}
var errs []error
NextFile:
for _, de := range d {
name := de.Name()
fullPath := filepath.Join(path, name)
for _, x := range excludeList {
if exclude, _ := doublestar.Match(x, fullPath); exclude {
continue NextFile
}
fullPath := filepath.Join(path, de.Name())
if !shouldSearch(fullPath, de.IsDir()) {
continue
}
if err := search(fullPath); err != nil {
if err := search(fullPath, followSymlinks); err != nil {
errs = append(errs, err)
}
}
return errors.Join(errs...)
}
func search(path string) error {
st, err := os.Stat(path)
// shouldSearch matches the full path of the file against the include and
// exclude lists, returning true if we should consider the file/directory for
// searching and false if not.
func shouldSearch(fullPath string, isDir bool) bool {
// process the exclude list first
for _, x := range excludeList {
if exclude, _ := doublestar.Match(x, fullPath); exclude {
return false
}
}
// if the include list is empty, everything is included
if len(includeList) == 0 {
return true
}
for _, x := range includeList {
match, _ := doublestar.Match(x, fullPath)
fmt.Printf("[DEBUG] x=%q fullPath=%q isDir=%t match=%t\n", x, fullPath, isDir, match)
// if it's a directory, and we have at least one recursive
// matcher, then search
if isDir && strings.HasPrefix(x, "**/") {
return true
}
if include, _ := doublestar.Match(x, fullPath); include {
return true
}
}
return false
}
func search(path string, deref bool) error {
var (
st os.FileInfo
err error
)
if deref {
st, err = os.Stat(path)
} else {
st, err = os.Lstat(path)
}
if err != nil {
return err
}