gg/main.go

274 lines
7.2 KiB
Go
Raw Permalink Normal View History

2023-05-11 23:30:43 +01:00
/*
gg is a recursive grep written in Go, with some shortcuts for everyday use.
*/
package main
import (
"errors"
2023-05-13 12:30:39 +01:00
"fmt"
2023-05-11 23:30:43 +01:00
"os"
"path/filepath"
"regexp"
"strings"
2023-05-11 23:30:43 +01:00
2023-05-13 12:30:39 +01:00
"github.com/bmatcuk/doublestar/v4"
2023-05-11 23:30:43 +01:00
"github.com/spf13/cobra"
"golang.org/x/sys/unix"
)
// TODO:
2023-05-13 12:47:52 +01:00
// - configurable defaults for exclude.
2023-05-11 23:30:43 +01:00
func main() {
if err := rootCmd.Execute(); err != nil {
os.Exit(1)
}
}
var rootCmd = &cobra.Command{
Use: "gg pattern [path1 [path2 …]]",
Short: "gg is a recursive grep",
Long: `gg is a recursive grep. Given a regexp (or fixed pattern) it will search for
the pattern recursively in the current working directory. It will print a
coloured header per file along with the matching line and pattern.
It is possible to scan specific files or directories, rather than the default
current working directory. To do this, simply specify the path(s) as arguments
following the pattern.
It is possible to scan for multiple patterns using the -e (or -Q) argument,
which can be repeated multiple times. -e specifies a regular expression and
-Q a fixed pattern. When using either flag, any non-flag arguments are treated
as paths to scan.
Search defaults to case-sensitive but the -i flag may be passed to make all
search terms case-insensitive. Alternatively, the "(?i)" construct may be added
to a regular expression to make that specific expression case insensitive.
2023-05-13 12:30:39 +01:00
Files and directories can be excluded with the -x option. This supports bash-style
globs with '*', '?', '[a-z]', '{this,that}', or '/**/' to match zero or more
directories. By default, .git and vim swap files are ignored. Similarly, -I
filters files to include. Examples:
# ignore files/dirs with .js or .css suffix
gg -x '*.js' -x '*.css' pattern
# only match files with .go suffix (any subdir)
gg -I '*.go' pattern
# only match files whose parent dir is "stuff", but ignore "foo" subdir
gg -x ./foo -I 'stuff/*' pattern
# only match .js files with a directory "things" in the path, but ignore
# .min.js (e.g. will match "foo/things/bar/my.js")
gg -I 'things/**/*.js' -x '*.min.js' pattern
Symlinks named on the command line are followed, but by default symlinks are
not followed when recursing into directories. -L allows them to be
dereferenced.`,
2023-05-11 23:30:43 +01:00
RunE: run,
}
var (
// flags
searchRegexp []string
searchFixed []string
searchPath []string
excludeList []string
includeList []string
ignoreCase bool
noColour bool
binaryFile notPlainTextFlag
minifiedFile notPlainTextFlag
followSymlinks bool
// computed from searchRegexp, searchFixed. Each regexp here will be
// matched against each line of each input file.
regexps []*regexp.Regexp
// formats output
display *Display
2023-05-11 23:30:43 +01:00
)
func init() {
rootCmd.Flags().StringSliceVarP(&searchRegexp, "grep", "e", nil, "pattern to match (regular expression)")
rootCmd.Flags().StringSliceVarP(&searchFixed, "fixed", "Q", nil, "pattern to match (fixed string)")
2023-05-13 12:30:39 +01:00
rootCmd.Flags().StringSliceVarP(&excludeList, "exclude", "x", []string{".git", ".*.swp"}, "files/directories to exclude")
rootCmd.Flags().StringSliceVarP(&includeList, "include", "I", nil, "files/directories to include")
2023-05-13 10:22:58 +01:00
rootCmd.Flags().BoolVarP(&ignoreCase, "ignore-case", "i", false, "make all searches case insensitive")
rootCmd.Flags().BoolVarP(&noColour, "no-colour", "C", false, "disable colour output")
rootCmd.Flags().Var(&binaryFile, "binary", "what to do with binary files")
rootCmd.Flags().Var(&minifiedFile, "minified", "what to do with minified text files")
rootCmd.Flags().BoolVarP(&followSymlinks, "dereference", "L", false, "follow symlinks when recursing")
2023-05-11 23:30:43 +01:00
}
func run(c *cobra.Command, args []string) error {
display = NewDisplay(noColour)
// if no -e or -Q flag is passed, then the first arg is taken to be
// the pattern to match
2023-05-11 23:30:43 +01:00
if len(searchRegexp) == 0 && len(searchFixed) == 0 {
if len(args) == 0 {
return errors.New("no pattern specified")
}
searchRegexp = append(searchRegexp, args[0])
args = args[1:]
}
// remaining arguments are treated as search paths; an empty list is
// taken to mean the CWD
2023-05-11 23:30:43 +01:00
searchPath = args
if len(searchPath) == 0 {
searchPath = append(searchPath, ".")
}
// if we got past argument passing, then returned errors are runtime
// things (like file not found) that shouldn't trigger a usage message.
c.SilenceUsage = true
// for -x and -I, an undecorated pattern is treated as a suffix match
for i, x := range excludeList {
if !strings.HasPrefix(x, "**/") && !strings.HasPrefix(x, "./") {
x = "**/" + x
excludeList[i] = x
}
2023-05-13 12:30:39 +01:00
if !doublestar.ValidatePattern(x) {
return fmt.Errorf("invalid exclude pattern %q", x)
}
2023-05-11 23:30:43 +01:00
}
for i, x := range includeList {
if !strings.HasPrefix(x, "**/") && !strings.HasPrefix(x, "./") {
x = "**/" + x
includeList[i] = x
}
if !doublestar.ValidatePattern(x) {
return fmt.Errorf("invalid include pattern %q", x)
}
}
2023-05-11 23:30:43 +01:00
// compile regular expressions for matching
2023-05-11 23:30:43 +01:00
for _, r := range searchRegexp {
if ignoreCase {
r = "(?i)" + r
}
re, err := regexp.Compile(r)
if err != nil {
return err
}
regexps = append(regexps, re)
}
for _, r := range searchFixed {
r = regexp.QuoteMeta(r)
if ignoreCase {
r = "(?i)" + r
}
re, err := regexp.Compile(r)
if err != nil {
}
regexps = append(regexps, re)
2023-05-11 23:30:43 +01:00
}
// search over named paths
2023-05-11 23:30:43 +01:00
var errs []error
for _, path := range searchPath {
if err := search(path, true); err != nil {
2023-05-11 23:30:43 +01:00
errs = append(errs, err)
}
}
return errors.Join(errs...)
}
func recurse(path string) error {
d, err := os.ReadDir(path)
if err != nil {
return err
}
var errs []error
for _, de := range d {
fullPath := filepath.Join(path, de.Name())
if !shouldSearch(fullPath, de.IsDir()) {
continue
2023-05-11 23:30:43 +01:00
}
if err := search(fullPath, followSymlinks); err != nil {
2023-05-11 23:30:43 +01:00
errs = append(errs, err)
}
}
return errors.Join(errs...)
}
// shouldSearch matches the full path of the file against the include and
// exclude lists, returning true if we should consider the file/directory for
// searching and false if not.
func shouldSearch(fullPath string, isDir bool) bool {
// process the exclude list first
for _, x := range excludeList {
if exclude, _ := doublestar.Match(x, fullPath); exclude {
return false
}
}
// if the include list is empty, everything is included
if len(includeList) == 0 {
return true
}
for _, x := range includeList {
match, _ := doublestar.Match(x, fullPath)
fmt.Printf("[DEBUG] x=%q fullPath=%q isDir=%t match=%t\n", x, fullPath, isDir, match)
// if it's a directory, and we have at least one recursive
// matcher, then search
if isDir && strings.HasPrefix(x, "**/") {
return true
}
if include, _ := doublestar.Match(x, fullPath); include {
return true
}
}
return false
}
func search(path string, deref bool) error {
var (
st os.FileInfo
err error
)
if deref {
st, err = os.Stat(path)
} else {
st, err = os.Lstat(path)
}
2023-05-11 23:30:43 +01:00
if err != nil {
return err
}
switch {
case st.IsDir():
return recurse(path)
case !st.Mode().IsRegular(),
st.Size() == 0:
return nil
}
f, err := os.Open(path)
if err != nil {
return err
}
defer f.Close()
data, err := unix.Mmap(int(f.Fd()), 0, int(st.Size()), unix.PROT_READ, unix.MAP_PRIVATE)
2023-05-11 23:30:43 +01:00
if err != nil {
return err
}
defer unix.Munmap(data)
2023-05-11 23:30:43 +01:00
file(path, data)
2023-05-11 23:30:43 +01:00
return nil
}