From 5163160bf3dd71c483f8e6e8b183796379efe847 Mon Sep 17 00:00:00 2001 From: mappu Date: Sun, 11 Apr 2021 19:41:32 +1200 Subject: [PATCH] initial commit --- .gitignore | 4 + README.md | 21 +++ crc32writer.go | 29 +++++ main.go | 344 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 398 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 crc32writer.go create mode 100644 main.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9a0302a --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.mkv +loadtup-dl + + diff --git a/README.md b/README.md new file mode 100644 index 0000000..6991df6 --- /dev/null +++ b/README.md @@ -0,0 +1,21 @@ +# loadtup-dl + +A tool to download subtitled videos from the website loadtup.com. + +It downloads videos using `youtube-dl`; parses and converts loadtup's custom subtitle format to srt; and remuxes them together using `mkvmerge`, including the CRC32 in the resulting filename. + +## Usage + +``` +Usage: loadtup-dl [options] [--] URL|- [URL...] + +Supported URLs take the form 'https://loadtup.com/abcdefghijk'. Use a hyphen to +read equivalent loadtup.com HTML content from stdin. + +Options: + --youtube-dl PATH Override path to youtube-dl + --mkvmerge PATH Override path to mkvmerge + --output PATH Override output filename + (only valid for a single URL) + --delete-temporary=false Preserve temporary files +``` diff --git a/crc32writer.go b/crc32writer.go new file mode 100644 index 0000000..5ef9c83 --- /dev/null +++ b/crc32writer.go @@ -0,0 +1,29 @@ +package main + +import ( + "hash" + "hash/crc32" + "io" +) + +// @ref https://stackoverflow.com/a/64419012 + +func NewCRCwriter(poly uint32, w io.Writer) *CRCwriter { + return &CRCwriter{ + h: crc32.New(crc32.MakeTable(poly)), + w: w, + } +} + +type CRCwriter struct { + h hash.Hash32 + w io.Writer +} + +func (c *CRCwriter) Write(p []byte) (n int, err error) { + n, err = c.w.Write(p) // with each write ... + c.h.Write(p) // ... update the hash + return +} + +func (c *CRCwriter) Sum() uint32 { return c.h.Sum32() } // final hash diff --git a/main.go b/main.go new file mode 100644 index 0000000..3c30099 --- /dev/null +++ b/main.go @@ -0,0 +1,344 @@ +package main + +import ( + "context" + "encoding/json" + "errors" + "flag" + "fmt" + "hash/crc32" + "io" + "io/ioutil" + "net/http" + "os" + "os/exec" + "path/filepath" + "regexp" + "strconv" + "strings" + "time" +) + +type loadTupContent struct { + Title string + VideoID string + Interval float64 + Secs []float64 + Caps []string + Scrsecs []float64 + Scrcaps []string +} + +func parse_json_floats(part []byte) ([]float64, error) { + secsStrings := make([]string, 0) + err := json.Unmarshal(part, &secsStrings) + if err != nil { + return nil, err + } + ret := make([]float64, 0, len(secsStrings)) + for _, sec := range secsStrings { + ff, err := strconv.ParseFloat(sec, 64) + if err != nil { + return nil, err + } + ret = append(ret, ff) + } + return ret, nil +} + +func NewLoadTupContent(content []byte) (*loadTupContent, error) { + + var err error + rx := regexp.MustCompile(`videoIdMain = "([^"]+)";var interval = (\d+);var secs = (.+?);var caps = (.+?);var scrsecs = (.+?);var scrcaps = (.+?);`) + + parts := rx.FindStringSubmatch(string(content)) + if parts == nil { + return nil, errors.New("Missing video properties in HTML page") + } + + if len(parts) != 7 { + return nil, fmt.Errorf("expected len(parts)=7, got %d", len(parts)) + } + + ltc := loadTupContent{} + ltc.VideoID = parts[1] + ltc.Interval, err = strconv.ParseFloat(parts[2], 64) + if err != nil { + return nil, err + } + + ltc.Secs, err = parse_json_floats([]byte(parts[3])) + if err != nil { + return nil, err + } + + err = json.Unmarshal([]byte(parts[4]), <c.Caps) + if err != nil { + return nil, err + } + + ltc.Scrsecs, err = parse_json_floats([]byte(parts[5])) + if err != nil { + return nil, err + } + + err = json.Unmarshal([]byte(parts[6]), <c.Scrcaps) + if err != nil { + return nil, err + } + + // Parse the page title + + rx = regexp.MustCompile(`(?ms)

(.+?)

`) + parts = rx.FindStringSubmatch(string(content)) + if parts == nil { + return nil, errors.New("Missing title in HTML page") + } + + if len(parts) != 2 { + return nil, fmt.Errorf("expected len(parts)=2, got %d", len(parts)) + } + + ltc.Title = strings.TrimSpace(parts[1]) + + return <c, nil +} + +func (ltc *loadTupContent) Validate() error { + if len(ltc.Secs) != len(ltc.Caps) { + return fmt.Errorf("secs/caps length mismatch") + } + + if len(ltc.Scrsecs) != len(ltc.Scrcaps) { + return fmt.Errorf("scrsecs/scrcaps length mismatch") + } + + if !(len(ltc.Scrcaps) == 0 || ltc.Scrcaps[0] == "") { + return errors.New("unsupported use of strcaps") + } + + if len(ltc.VideoID) == 0 { + return errors.New("unexpected blank video ID") + } + + if ltc.Interval != 100.0 { + return errors.New("unsupported non-100 duration field") + } + + return nil +} + +func secs_to_srt_time(secs float64) string { + dur := time.Duration(secs) * time.Second + + hh := int64(dur.Hours()) + mm := int64(dur.Minutes()) - (hh * 60) + ss := int64(dur.Seconds()) - (hh * 3600) - (mm * 60) + ms := int64(dur.Milliseconds()) - (hh * 3600000) - (mm * 60000) - (ss * 1000) + + return fmt.Sprintf("%02d:%02d:%02d,%03d", hh, mm, ss, ms) +} + +func (ltc *loadTupContent) WriteSRT(w io.Writer) error { + /* + + SRT file format (example from Wikipedia): + + 1 + 00:02:17,440 --> 00:02:20,375 + Senator, we're making + our final approach into Coruscant. + + 2 + 00:02:20,476 --> 00:02:22,501 + Very good, Lieutenant. + */ + + ctr := 1 + for i := 0; i < len(ltc.Caps); i += 1 { + if ltc.Caps[i] == "" { + // Don't show anything + continue + } + + start := secs_to_srt_time(ltc.Secs[i]) + var end string + if i < len(ltc.Caps)-1 { + end = secs_to_srt_time(ltc.Secs[i+1]) + } else { + // The final subtitle. We don't know how long it should be displayed + // for since we don't know the entire video's duration + // FIXME supply + // Assume 3 seconds + end = secs_to_srt_time(ltc.Secs[i] + 3) + } + + fmt.Fprintf(w, "%d\n%s --> %s\n%s\n\n", + ctr, start, end, ltc.Caps[i]) + // We emitted a message, increase the counter + ctr += 1 + } + + return nil +} + +type config struct { + youtubeDl string + mkvmerge string + overrideOutput string + deleteTemporaries bool +} + +func performDownload(ctx context.Context, cfg *config, targetUrl string) error { + + // + + var content []byte + var err error + if targetUrl == "-" { + // Read HTML page from stdin + content, err = ioutil.ReadAll(os.Stdin) + if err != nil { + return err + } + } else { + // Download HTML page from URL + resp, err := http.Get(targetUrl) + if err != nil { + return err + } + content, err = ioutil.ReadAll(resp.Body) + if err != nil { + return err + } + _ = resp.Body.Close() // swallow error + } + + ltc, err := NewLoadTupContent(content) + if err != nil { + return err + } + + err = ltc.Validate() + if err != nil { + return err + } + + // Create temporary directory + tmpdir, err := ioutil.TempDir("", "loadtup-dl-") + if err != nil { + return err + } + if cfg.deleteTemporaries { + defer os.RemoveAll(tmpdir) + } + + // Download the video + ytdl := exec.CommandContext(ctx, cfg.youtubeDl, `-f`, `bestvideo+bestaudio`, "https://youtu.be/"+ltc.VideoID, `--merge-output-format`, `mkv`, "-o", filepath.Join(tmpdir, "downloaded")) + ytdl.Stdout = os.Stdout + ytdl.Stderr = os.Stderr + err = ytdl.Run() + if err != nil { + return err + } + + // Determine video's total length + + // Create the subtitle file (clamped to total length) + + fh, err := os.OpenFile(filepath.Join(tmpdir, "subtitles.srt"), os.O_CREATE|os.O_WRONLY, 0600) + if err != nil { + return err + } + + err = ltc.WriteSRT(fh) + fh.Close() + if err != nil { + return err + } + + // Mux the subtitles into the file + + mkvm := exec.CommandContext(ctx, cfg.mkvmerge, `-o`, filepath.Join(tmpdir, "muxed.mkv"), filepath.Join(tmpdir, "downloaded.mkv"), filepath.Join(tmpdir, "subtitles.srt")) + mkvm.Stdout = os.Stdout + mkvm.Stderr = os.Stderr + err = mkvm.Run() + if err != nil { + return err + } + + // Determine final filename + outputFile := cfg.overrideOutput + if outputFile == "" { + // Generate the CRC32 and put it into the filename + hw := NewCRCwriter(crc32.IEEE, ioutil.Discard) + fhm, err := os.OpenFile(filepath.Join(tmpdir, "muxed.mkv"), os.O_RDONLY, 0400) + if err != nil { + return err + } + _, err = io.Copy(hw, fhm) + fhm.Close() + if err != nil { + return err + } + + outputFile = fmt.Sprintf(`[Loadtup] %s [%08X].mkv`, ltc.Title, hw.Sum()) + } + + err = os.Rename(filepath.Join(tmpdir, "muxed.mkv"), outputFile) + if err != nil { + return err + } + + // Done + + return nil +} + +func usage() { + fmt.Fprintln(os.Stderr, `Usage: loadtup-dl [options] [--] URL|- [URL...] + +Supported URLs take the form 'https://loadtup.com/abcdefghijk'. Use a hyphen to +read equivalent loadtup.com HTML content from stdin. + +Options: + --youtube-dl PATH Override path to youtube-dl + --mkvmerge PATH Override path to mkvmerge + --output PATH Override output filename + (only valid for a single URL) + --delete-temporary=false Preserve temporary files +`) + os.Exit(1) +} + +func main() { + + ctx := context.Background() + + cfg := config{} + + flag.StringVar(&cfg.youtubeDl, "youtube-dl", "youtube-dl", "") + flag.StringVar(&cfg.mkvmerge, "mkvmerge", "mkvmerge", "") + flag.StringVar(&cfg.overrideOutput, "output", "", "") + flag.BoolVar(&cfg.deleteTemporaries, "delete-temporary", true, "") + flag.Usage = usage + flag.Parse() + + if len(flag.Args()) == 0 { + usage() // n.b. calls os.Exit(1) + } + + if len(flag.Args()) > 1 && cfg.overrideOutput != "" { + fmt.Fprintln(os.Stderr, "Can't use --output when supplying multiple URLs") + os.Exit(1) + } + + for _, targetUrl := range flag.Args() { + err := performDownload(ctx, &cfg, targetUrl) + if err != nil { + fmt.Fprintln(os.Stderr, err.Error()) + os.Exit(1) + } + } + +}