Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Golang Make MIMETYPE File First File in Zip

Tags:

mime-types

zip

go

I am working on making some modifications to an epub and then rezipping it. I am currently doing this in Golang. The crux of the logic is housed in this repo here. However most of the issue resides in the following logic not setting/copying the mimetype properly:

package filehandler

import (
    "archive/zip"
    "context"
    "fmt"
    "io"
    "io/fs"
    "os"
    "path/filepath"
    "strings"

    "github.com/pjkaufman/go-go-gadgets/pkg/logger"
    "golang.org/x/sync/errgroup"
)

const (
    tempZip = "compress.zip"
    // have to use these or similar permissions to avoid permission denied errors in some cases
    folderPerms fs.FileMode = 0755
    numWorkers  int         = 5
)

// UnzipRunOperationAndRezip starts by deleting the destination directory if it exists,
// then it goes ahead an unzips the contents into the destination directory
// once that is done it runs the operation func on the destination folder
// lastly it rezips the folder back to compress.zip
func UnzipRunOperationAndRezip(src, dest string, operation func()) {
    var err error
    if FolderExists(dest) {
        err = os.RemoveAll(dest)

        if err != nil {
            logger.WriteError(fmt.Sprintf("failed to delete the destination directory %q: %s", dest, err))
        }
    }

    err = Unzip(src, dest)
    if err != nil {
        logger.WriteError(fmt.Sprintf("failed to unzip %q: %s", src, err))
    }

    operation()

    err = Rezip(dest, tempZip)
    if err != nil {
        logger.WriteError(fmt.Sprintf("failed to rezip content for source %q: %s", src, err))
    }

    err = os.RemoveAll(dest)
    if err != nil {
        logger.WriteError(fmt.Sprintf("failed to cleanup the destination directory %q: %s", dest, err))
    }

    MustRename(src, src+".original")
    MustRename(tempZip, src)
}

// Unzip is based on https://stackoverflow.com/a/24792688
func Unzip(src, dest string) error {
    r, err := zip.OpenReader(src)
    if err != nil {
        return err
    }
    defer func() {
        if err := r.Close(); err != nil {
            panic(err)
        }
    }()

    err = os.MkdirAll(dest, folderPerms)
    if err != nil {
        return err
    }

    var files = make(chan *zip.File, len(r.File))
    g, ctx := errgroup.WithContext(context.Background())
    for i := 0; i < numWorkers; i++ {
        g.Go(func() error {
            for {
                select {
                case file, ok := <-files:
                    if ok {
                        wErr := extractAndWriteFile(dest, file)

                        if wErr != nil {
                            return wErr
                        }
                    } else {
                        return nil
                    }
                case <-ctx.Done():
                    return ctx.Err()
                }
            }
        })
    }

    for _, f := range r.File {
        files <- f
    }

    close(files)

    return g.Wait()
}

func extractAndWriteFile(dest string, f *zip.File) error {
    rc, err := f.Open()
    if err != nil {
        return err
    }
    defer func() {
        if err := rc.Close(); err != nil {
            panic(err)
        }
    }()

    path := filepath.Join(dest, f.Name)

    // Check for ZipSlip (Directory traversal)
    if !strings.HasPrefix(path, filepath.Clean(dest)+string(os.PathSeparator)) {
        return fmt.Errorf("illegal file path: %s", path)
    }

    if f.FileInfo().IsDir() {
        err = os.MkdirAll(path, folderPerms)

        if err != nil {
            return err
        }
    } else {
        err = os.MkdirAll(filepath.Dir(path), folderPerms)
        if err != nil {
            return err
        }

        f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode())
        if err != nil {
            return err
        }
        defer func() {
            if err := f.Close(); err != nil {
                panic(err)
            }
        }()

        _, err = io.Copy(f, rc)
        if err != nil {
            return err
        }
    }

    return nil
}

// Rezip is based on https://stackoverflow.com/a/63233911
func Rezip(src, dest string) error {
    file, err := os.Create(dest)
    if err != nil {
        return err
    }

    defer file.Close()

    w := zip.NewWriter(file)
    defer w.Close()

    walker := func(path string, info os.FileInfo, err error) error {
        if err != nil {
            return err
        }

        // skip empty directories
        if info.IsDir() {
            return nil
        }

        file, err := os.Open(path)
        if err != nil {
            return err
        }
        defer file.Close()

        // need a zip relative path to avoid creating extra directories inside of the zip
        var zipRelativePath = strings.Replace(path, src+string(os.PathSeparator), "", 1)
        f, err := w.Create(zipRelativePath)
        if err != nil {
            return err
        }

        _, err = io.Copy(f, file)
        if err != nil {
            return err
        }

        return nil
    }
    err = filepath.Walk(src, walker)
    if err != nil {
        return err
    }

    return nil
}

The current problem that I am facing is that when I do this and then run the epubchecker validator on the file I am getting the following message:

ERROR(PKG-006): ./filename.epub(-1,-1): Mimetype file entry is missing or is not the first file in the archive.
Validating using EPUB version 2.0.1 rules.

Check finished with errors
Messages: 0 fatals / 1 error / 0 warnings / 0 infos

EPUBCheck completed

I am not really sure what I need to do to expressly set the mimetype when I create the zip. I did at one time try adding the following logic to the rezip logic just to test if it would properly set the mimetype, but I got an error saying the zip file header was corrupted:

// Rezip is based on https://stackoverflow.com/a/63233911
func Rezip(src, dest, mimetype string) error {
    file, err := os.Create(dest)
    if err != nil {
        return err
    }

    defer file.Close()

    var mtype []byte
    mtype = []byte("application/epub+zip")
    err = binary.Write(file, binary.LittleEndian, mtype)
    if err != nil {
        return err
    }
...
}

When setting the mimetype failed, I tried copying over the metainf data first and skipping that data in the walking logic, but got the same validation error as prior:

w := zip.NewWriter(file)
    defer w.Close()

    var mimetypePath = src + string(os.PathSeparator) + "META-INF/container.xml"
    err = writeToZip(w, src, mimetypePath)
    if err != nil {
        return err
    }

    walker := func(path string, info os.FileInfo, err error) error {
        if err != nil {
            return err
        }

        // skip empty directories
        if info.IsDir() {
            return nil
        }

        if mimetypePath == path {
            return nil
        }

        err = writeToZip(w, src, path)
        if err != nil {
            return err
        }

        return nil
    }
    err = filepath.Walk(src, walker)
    if err != nil {
        return err
    }

    return nil
}

func writeToZip(w *zip.Writer, src, path string) error {
    file, err := os.Open(path)
    if err != nil {
        return err
    }
    defer file.Close()

    // need a zip relative path to avoid creating extra directories inside of the zip
    var zipRelativePath = strings.Replace(path, src+string(os.PathSeparator), "", 1)
    f, err := w.Create(zipRelativePath)
    if err != nil {
        return err
    }

    _, err = io.Copy(f, file)
    if err != nil {
        return err
    }

    return nil
}

I did see the following two questions that are similar, but they do not answer my question:

  • Haskell zip package: How to add mimetype file as first file of the archive
    • Implies the issue may be with capitalization or with the container.xml being compressed
  • Make MIMETYPE file the first file in an EPUB ZIP file?
    • Shows some commands to run via the CLI, but does not really help understanding how to fix this in pure Golang

Any ideas as to what I am doing wrong when trying to set/copy the mimetype of the zip file when I create it? Thanks for the help!

like image 469
Peter Kaufman Avatar asked Feb 02 '26 07:02

Peter Kaufman


1 Answers

Ok, so I think I figured it out. I am no longer getting the error when I run the program. Essentially here is what I did to fix the issue:

// start zip file copy with uncompressed mimetype
var mimetypePath = src + string(os.PathSeparator) + "mimetype"
    err = copyMimetypeToZip(w, src, mimetypePath)
    if err != nil {
        return err
    }

    walker := func(path string, info os.FileInfo, err error) error {
        if err != nil {
            return err
        }

        // skip empty directories
        if info.IsDir() {
            return nil
        }

        // skip the mimetype when it comes to adding compressed files
        if mimetypePath == path {
            return nil
        }

        err = writeToZip(w, src, path)
        if err != nil {
            return err
        }

        return nil
    }
...
func copyMimetypeToZip(w *zip.Writer, src, path string) error {
    file, err := os.Open(path)
    if err != nil {
        return err
    }
    defer file.Close()

    // need a zip relative path to avoid creating extra directories inside of the zip
    var zipRelativePath = strings.Replace(path, src+string(os.PathSeparator), "", 1)
    f, err := w.CreateHeader(&zip.FileHeader{
        Name:   strings.ReplaceAll(zipRelativePath, string(os.PathSeparator), "/"),
        Method: zip.Store,
    })
    if err != nil {
        return err
    }

    _, err = io.Copy(f, file)
    if err != nil {
        return err
    }

    return nil
}

The issue seems to have been that I was assuming that the mimetype was stored in META-INF/container.xml. This is kind of true, but the error does not refer to this file and as such leaving it uncompressed does not help at all since a file called mimetype needs to be uncompressed at the root of the zip.

Note that I found this question on StackOverflow that explains how to create an uncompressed file in a zip file, but it did not initially solve my problem due to a misconception about which file to leave uncompressed.

like image 95
Peter Kaufman Avatar answered Feb 04 '26 02:02

Peter Kaufman



Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!