diff --git a/README.md b/README.md index 473ba0c..a2c66c6 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # HTTP resource pack server A common scenario is that you have a set of static resources that you want to -server up quickly via HTTP (for example: stylesheets, WASM). +serve up quickly via HTTP (for example: stylesheets, WASM). This package provides a `net/http`-compatible `http.Handler` to do so, with support for: @@ -20,4 +20,3 @@ The workflow is as follows: Only the minimal header processing necessary for correctness (Content-Length, etc.) is carried out by `htpack.Handler`; the handler can be combined with middleware for further processing (adding headers, `http.StripPrefix`, etc.). - diff --git a/cmd/htpacker/inspector.go b/cmd/htpacker/inspector.go new file mode 100644 index 0000000..c89130f --- /dev/null +++ b/cmd/htpacker/inspector.go @@ -0,0 +1,60 @@ +package main + +import ( + "fmt" + "os" + + "github.com/lwithers/htpack/internal/packed" +) + +// Inspect a packfile. +// TODO: verify etag; verify integrity of compressed data. +// TODO: skip Gzip/Brotli if not present; print ratio. +func Inspect(filename string) error { + f, err := os.Open(filename) + if err != nil { + return err + } + defer f.Close() + + hdr, dir, err := packed.Load(f) + if hdr != nil { + fmt.Printf("Header: %#v\n", hdr) + } + if dir != nil { + fmt.Printf("%d files:\n", len(dir.Files)) + for path, info := range dir.Files { + fmt.Printf(" • %s\n"+ + " · Etag: %s\n"+ + " · Content type: %s\n"+ + " · Uncompressed: %s (offset %d)\n"+ + " · Gzipped: %s (offset %d)\n"+ + " · Brotli: %s (offset %d)\n", + path, info.Etag, info.ContentType, + printSize(info.Uncompressed.Length), info.Uncompressed.Offset, + printSize(info.Gzip.Length), info.Gzip.Offset, + printSize(info.Brotli.Length), info.Brotli.Offset, + ) + } + } + return err +} + +func printSize(size uint64) string { + switch { + case size < 1<<10: + return fmt.Sprintf("%d bytes", size) + case size < 1<<15: + return fmt.Sprintf("%.2f KiB", float64(size)/(1<<10)) + case size < 1<<20: + return fmt.Sprintf("%.1f KiB", float64(size)/(1<<10)) + case size < 1<<25: + return fmt.Sprintf("%.2f MiB", float64(size)/(1<<20)) + case size < 1<<30: + return fmt.Sprintf("%.1f MiB", float64(size)/(1<<20)) + case size < 1<<35: + return fmt.Sprintf("%.2f GiB", float64(size)/(1<<30)) + default: + return fmt.Sprintf("%.1f GiB", float64(size)/(1<<30)) + } +} diff --git a/cmd/htpacker/packer.go b/cmd/htpacker/packer.go index 5a6ae74..213f938 100644 --- a/cmd/htpacker/packer.go +++ b/cmd/htpacker/packer.go @@ -37,6 +37,7 @@ type packInfo struct { offset, len uint64 } +// Pack a file. func Pack(filesToPack FilesToPack, outputFilename string) error { finalFname, outputFile, err := writefile.New(outputFilename) if err != nil { @@ -47,8 +48,8 @@ func Pack(filesToPack FilesToPack, outputFilename string) error { // write initial header (will rewrite offset/length when known) hdr := &packed.Header{ - Magic: 123, - Version: 1, + Magic: packed.Magic, + Version: packed.VersionInitial, DirectoryOffset: 1, DirectoryLength: 1, } @@ -117,7 +118,6 @@ func packOne(packer *packWriter, fileToPack FileToPack) (info packed.File, err e } defer unix.Munmap(data) - // TODO: content-type, etag info.Etag = etag(data) info.ContentType = fileToPack.ContentType if info.ContentType == "" { @@ -283,6 +283,8 @@ func (pw *packWriter) CopyFrom(in *os.File) (uint64, error) { return 0, pw.err } + fmt.Fprintf(os.Stderr, "[DEBUG] in size=%d\n", fi.Size()) + var off int64 remain := fi.Size() for remain > 0 { @@ -294,8 +296,9 @@ func (pw *packWriter) CopyFrom(in *os.File) (uint64, error) { } amt, err = unix.Sendfile(int(pw.f.Fd()), int(in.Fd()), &off, amt) + fmt.Fprintf(os.Stderr, "[DEBUG] sendfile=%d [off now %d]\n", amt, off) remain -= int64(amt) - off += int64(amt) + //off += int64(amt) if err != nil { pw.err = err return uint64(off), pw.err diff --git a/cmd/htpacker/quick_pack.go b/cmd/htpacker/quick_pack.go index 7d33323..f851e15 100644 --- a/cmd/htpacker/quick_pack.go +++ b/cmd/htpacker/quick_pack.go @@ -9,13 +9,14 @@ import ( ) func main() { - if err := run(); err != nil { + //if err := dopack(); err != nil { + if err := Inspect("out.htpack"); err != nil { fmt.Fprintln(os.Stderr, err) os.Exit(1) } } -func run() error { +func dopack() error { raw, err := ioutil.ReadFile("in.yaml") if err != nil { return err diff --git a/internal/packed/load.go b/internal/packed/load.go new file mode 100644 index 0000000..10609e5 --- /dev/null +++ b/internal/packed/load.go @@ -0,0 +1,304 @@ +package packed + +import ( + fmt "fmt" + "os" + "path" + "strings" +) + +const ( + // Magic number to identify .htpack files. + Magic = 0xb6e61a4b415ed33b + + // VersionInitial is the version number used by the initial packed + // format. Loading a file with a higher version number will cause an + // error to be returned. + VersionInitial = 1 +) + +// Load a ready-packed file. +func Load(f *os.File) (*Header, *Directory, error) { + hdr, err := loadHeader(f) + if err != nil { + return nil, nil, err + } + + dir, err := loadDirectory(f, hdr) + if le, ok := err.(*LoadError); ok { + // augment error + le.Magic = hdr.Magic + le.Version = hdr.Version + } + return hdr, dir, err // we may have a partial dir +} + +// loadHeader retrieves and decodes the header from the start of the file. It +// ensures the magic number and the version number match. Errors are returned +// as type LoadError. +func loadHeader(f *os.File) (*Header, error) { + raw := make([]byte, 36) + if _, err := f.ReadAt(raw, 0); err != nil { + return nil, &LoadError{ + Cause: IOError, + Underlying: err, + } + } + + hdr := new(Header) + if err := hdr.Unmarshal(raw); err != nil { + return nil, &LoadError{ + Cause: HeaderUnmarshalError, + Underlying: err, + } + } + + switch { + case hdr.Magic != Magic: + return nil, &LoadError{ + Cause: MagicMismatch, + Magic: hdr.Magic, + Version: hdr.Version, + } + case hdr.Version < VersionInitial: + return nil, &LoadError{ + Cause: VersionTooOld, + Magic: hdr.Magic, + Version: hdr.Version, + } + case hdr.Version > VersionInitial: + return nil, &LoadError{ + Cause: VersionTooNew, + Magic: hdr.Magic, + Version: hdr.Version, + } + } + + return hdr, nil +} + +// loadDirectory reads the directory from a file. The directory is checked +// for consistency (offsets, filenames) but not integrity (file data is not +// read/checksummed). +func loadDirectory(f *os.File, hdr *Header) (*Directory, error) { + fi, err := f.Stat() + if err != nil { + return nil, &LoadError{ + Cause: IOError, + Underlying: err, + } + } + fileSize := uint64(fi.Size()) + + if hdr.DirectoryOffset+hdr.DirectoryLength > fileSize { + return nil, &LoadError{ + Cause: BadOffsetError, + } + } + + raw := make([]byte, hdr.DirectoryLength) + if _, err := f.ReadAt(raw, int64(hdr.DirectoryOffset)); err != nil { + return nil, &LoadError{ + Cause: IOError, + Underlying: err, + } + } + + dir := new(Directory) + if err := dir.Unmarshal(raw); err != nil { + return nil, &LoadError{ + Cause: DirectoryUnmarshalError, + Underlying: err, + } + } + + return dir, checkDirectory(dir, fileSize) +} + +// checkDirectory verifies the consistency of the htpack file (offsets, +// filenames). It does not verify integrity (checksums). +func checkDirectory(dir *Directory, fileSize uint64) error { + files := map[string]struct{}{} + + for filename, info := range dir.Files { + var err error + + // validate filename (not duplicate, canonical, etc) + if _, dup := files[filename]; dup { + err = fmt.Errorf("duplicate path %q", filename) + } + files[filename] = struct{}{} + + if !path.IsAbs(filename) { + err = fmt.Errorf("relative path %q", filename) + } + if path.Clean(filename) != filename { + err = fmt.Errorf("non-canonical path %q", filename) + } + if err != nil { + return &LoadError{ + Cause: InvalidPath, + Underlying: err, + Path: filename, + } + } + + // ensure uncompressed data is present + if info.Uncompressed == nil { + return &LoadError{ + Cause: MissingUncompressed, + Path: filename, + } + } + + // validate offsets + checkOffset(&err, filename, info.Uncompressed, fileSize) + checkOffset(&err, filename, info.Gzip, fileSize) + checkOffset(&err, filename, info.Brotli, fileSize) + if err != nil { + return &LoadError{ + Cause: BadOffsetError, + } + } + } + + return nil +} + +func checkOffset(perr *error, filename string, data *FileData, fileSize uint64) { + if *perr != nil || data == nil { + return + } + if data.Offset+data.Length > fileSize { + *perr = &LoadError{ + Cause: BadOffsetError, + Path: filename, + } + } +} + +// LoadError reports a problem interpreting the header of a pack file. +type LoadError struct { + // Cause of the error. + Cause ErrorCause + + // Underlying may be set if there is some more information about the + // error (e.g. I/O error). + Underlying error + + // Magic as read from the file. + Magic uint64 + + // Version as read from the file. + Version uint64 + + // Path of an individual file within the pack, if relevant. + Path string +} + +// ErrorCause enumerates the possible reasons for failure. +type ErrorCause int + +const ( + // HeaderUnmarshalError means we could not unmarshal the protobuf object + // at the head of the file. + HeaderUnmarshalError ErrorCause = iota + + // DirectoryUnmarshalError means we could not unmarshal the protobuf + // object holding the directory contents. + DirectoryUnmarshalError + + // IOError indicates we could not read a protobuf object. + IOError + + // MagicMismatch occurs if the recorded magic number does not match + // the well-known constant Magic. + MagicMismatch + + // VersionTooNew indicates that the file has a version number ahead of + // what this package can parse. + VersionTooNew + + // VersionTooOld indicates that the file has a version number older than + // thisk package can parse. + VersionTooOld + + // BadOffsetError means that the header or directory has indicated a + // position that lies outside of the file. + BadOffsetError + + // InvalidPath is returned for a duplicate or otherwise invalid path. + // Underlying is set to a free-form string error describing the path. + InvalidPath + + // MissingUncompressed indicates that a file in the pack does not have + // an uncompressed version present, which is mandatory. + MissingUncompressed +) + +// Desc returns a description of the error cause. +func (le *LoadError) Desc() string { + switch le.Cause { + case HeaderUnmarshalError: + return "not a .htpack file (header not valid packed.Header)" + case DirectoryUnmarshalError: + return "file corrupt (directory not valid packed.Directory)" + case IOError: + return "error reading from file" + case MagicMismatch: + return "magic number does not match" + case VersionTooNew: + return "version too new" + case VersionTooOld: + return "version too old" + case BadOffsetError: + return "file corrupt/truncated (offset past end of file)" + case InvalidPath: + return "filename invalid" + case MissingUncompressed: + return "missing uncompressed version" + default: + return "unknown error" + } +} + +// Error returns a concise description of the error. +func (le *LoadError) Error() string { + var b strings.Builder + + b.WriteString(le.Desc()) + + var underlying, magic, version, path bool + switch le.Cause { + case HeaderUnmarshalError, IOError: + underlying = true + case MagicMismatch: + magic = true + case VersionTooNew, VersionTooOld: + version = true + case BadOffsetError: + path = le.Path != "" + case InvalidPath, MissingUncompressed: + path = true + } + + if underlying { + b.WriteString(" [") + b.WriteString(le.Underlying.Error()) + b.WriteString("]") + } + if magic { + fmt.Fprintf(&b, " (found magic 0x%X, expected 0x%X)", + le.Magic, uint64(Magic)) + } + if version { + fmt.Fprintf(&b, " (found version %d; oldest supported: "+ + "%d, newest: %d", + le.Version, VersionInitial, VersionInitial) + } + if path { + fmt.Fprintf(&b, " (path %q)", le.Path) + } + + return b.String() +}