initial commit

This commit is contained in:
mappu 2021-04-11 19:41:32 +12:00
commit 5163160bf3
4 changed files with 398 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
*.mkv
loadtup-dl

21
README.md Normal file
View File

@ -0,0 +1,21 @@
# loadtup-dl
A tool to download subtitled videos from the website loadtup.com.
It downloads videos using `youtube-dl`; parses and converts loadtup's custom subtitle format to srt; and remuxes them together using `mkvmerge`, including the CRC32 in the resulting filename.
## Usage
```
Usage: loadtup-dl [options] [--] URL|- [URL...]
Supported URLs take the form 'https://loadtup.com/abcdefghijk'. Use a hyphen to
read equivalent loadtup.com HTML content from stdin.
Options:
--youtube-dl PATH Override path to youtube-dl
--mkvmerge PATH Override path to mkvmerge
--output PATH Override output filename
(only valid for a single URL)
--delete-temporary=false Preserve temporary files
```

29
crc32writer.go Normal file
View File

@ -0,0 +1,29 @@
package main
import (
"hash"
"hash/crc32"
"io"
)
// @ref https://stackoverflow.com/a/64419012
func NewCRCwriter(poly uint32, w io.Writer) *CRCwriter {
return &CRCwriter{
h: crc32.New(crc32.MakeTable(poly)),
w: w,
}
}
type CRCwriter struct {
h hash.Hash32
w io.Writer
}
func (c *CRCwriter) Write(p []byte) (n int, err error) {
n, err = c.w.Write(p) // with each write ...
c.h.Write(p) // ... update the hash
return
}
func (c *CRCwriter) Sum() uint32 { return c.h.Sum32() } // final hash

344
main.go Normal file
View File

@ -0,0 +1,344 @@
package main
import (
"context"
"encoding/json"
"errors"
"flag"
"fmt"
"hash/crc32"
"io"
"io/ioutil"
"net/http"
"os"
"os/exec"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
)
type loadTupContent struct {
Title string
VideoID string
Interval float64
Secs []float64
Caps []string
Scrsecs []float64
Scrcaps []string
}
func parse_json_floats(part []byte) ([]float64, error) {
secsStrings := make([]string, 0)
err := json.Unmarshal(part, &secsStrings)
if err != nil {
return nil, err
}
ret := make([]float64, 0, len(secsStrings))
for _, sec := range secsStrings {
ff, err := strconv.ParseFloat(sec, 64)
if err != nil {
return nil, err
}
ret = append(ret, ff)
}
return ret, nil
}
func NewLoadTupContent(content []byte) (*loadTupContent, error) {
var err error
rx := regexp.MustCompile(`videoIdMain = "([^"]+)";var interval = (\d+);var secs = (.+?);var caps = (.+?);var scrsecs = (.+?);var scrcaps = (.+?);</script>`)
parts := rx.FindStringSubmatch(string(content))
if parts == nil {
return nil, errors.New("Missing video properties in HTML page")
}
if len(parts) != 7 {
return nil, fmt.Errorf("expected len(parts)=7, got %d", len(parts))
}
ltc := loadTupContent{}
ltc.VideoID = parts[1]
ltc.Interval, err = strconv.ParseFloat(parts[2], 64)
if err != nil {
return nil, err
}
ltc.Secs, err = parse_json_floats([]byte(parts[3]))
if err != nil {
return nil, err
}
err = json.Unmarshal([]byte(parts[4]), &ltc.Caps)
if err != nil {
return nil, err
}
ltc.Scrsecs, err = parse_json_floats([]byte(parts[5]))
if err != nil {
return nil, err
}
err = json.Unmarshal([]byte(parts[6]), &ltc.Scrcaps)
if err != nil {
return nil, err
}
// Parse the page title
rx = regexp.MustCompile(`(?ms)<h2 style="margin:0 0 0 0;">(.+?)</h2>`)
parts = rx.FindStringSubmatch(string(content))
if parts == nil {
return nil, errors.New("Missing title in HTML page")
}
if len(parts) != 2 {
return nil, fmt.Errorf("expected len(parts)=2, got %d", len(parts))
}
ltc.Title = strings.TrimSpace(parts[1])
return &ltc, nil
}
func (ltc *loadTupContent) Validate() error {
if len(ltc.Secs) != len(ltc.Caps) {
return fmt.Errorf("secs/caps length mismatch")
}
if len(ltc.Scrsecs) != len(ltc.Scrcaps) {
return fmt.Errorf("scrsecs/scrcaps length mismatch")
}
if !(len(ltc.Scrcaps) == 0 || ltc.Scrcaps[0] == "") {
return errors.New("unsupported use of strcaps")
}
if len(ltc.VideoID) == 0 {
return errors.New("unexpected blank video ID")
}
if ltc.Interval != 100.0 {
return errors.New("unsupported non-100 duration field")
}
return nil
}
func secs_to_srt_time(secs float64) string {
dur := time.Duration(secs) * time.Second
hh := int64(dur.Hours())
mm := int64(dur.Minutes()) - (hh * 60)
ss := int64(dur.Seconds()) - (hh * 3600) - (mm * 60)
ms := int64(dur.Milliseconds()) - (hh * 3600000) - (mm * 60000) - (ss * 1000)
return fmt.Sprintf("%02d:%02d:%02d,%03d", hh, mm, ss, ms)
}
func (ltc *loadTupContent) WriteSRT(w io.Writer) error {
/*
SRT file format (example from Wikipedia):
1
00:02:17,440 --> 00:02:20,375
Senator, we're making
our final approach into Coruscant.
2
00:02:20,476 --> 00:02:22,501
Very good, Lieutenant.
*/
ctr := 1
for i := 0; i < len(ltc.Caps); i += 1 {
if ltc.Caps[i] == "" {
// Don't show anything
continue
}
start := secs_to_srt_time(ltc.Secs[i])
var end string
if i < len(ltc.Caps)-1 {
end = secs_to_srt_time(ltc.Secs[i+1])
} else {
// The final subtitle. We don't know how long it should be displayed
// for since we don't know the entire video's duration
// FIXME supply
// Assume 3 seconds
end = secs_to_srt_time(ltc.Secs[i] + 3)
}
fmt.Fprintf(w, "%d\n%s --> %s\n%s\n\n",
ctr, start, end, ltc.Caps[i])
// We emitted a message, increase the counter
ctr += 1
}
return nil
}
type config struct {
youtubeDl string
mkvmerge string
overrideOutput string
deleteTemporaries bool
}
func performDownload(ctx context.Context, cfg *config, targetUrl string) error {
//
var content []byte
var err error
if targetUrl == "-" {
// Read HTML page from stdin
content, err = ioutil.ReadAll(os.Stdin)
if err != nil {
return err
}
} else {
// Download HTML page from URL
resp, err := http.Get(targetUrl)
if err != nil {
return err
}
content, err = ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
_ = resp.Body.Close() // swallow error
}
ltc, err := NewLoadTupContent(content)
if err != nil {
return err
}
err = ltc.Validate()
if err != nil {
return err
}
// Create temporary directory
tmpdir, err := ioutil.TempDir("", "loadtup-dl-")
if err != nil {
return err
}
if cfg.deleteTemporaries {
defer os.RemoveAll(tmpdir)
}
// Download the video
ytdl := exec.CommandContext(ctx, cfg.youtubeDl, `-f`, `bestvideo+bestaudio`, "https://youtu.be/"+ltc.VideoID, `--merge-output-format`, `mkv`, "-o", filepath.Join(tmpdir, "downloaded"))
ytdl.Stdout = os.Stdout
ytdl.Stderr = os.Stderr
err = ytdl.Run()
if err != nil {
return err
}
// Determine video's total length
// Create the subtitle file (clamped to total length)
fh, err := os.OpenFile(filepath.Join(tmpdir, "subtitles.srt"), os.O_CREATE|os.O_WRONLY, 0600)
if err != nil {
return err
}
err = ltc.WriteSRT(fh)
fh.Close()
if err != nil {
return err
}
// Mux the subtitles into the file
mkvm := exec.CommandContext(ctx, cfg.mkvmerge, `-o`, filepath.Join(tmpdir, "muxed.mkv"), filepath.Join(tmpdir, "downloaded.mkv"), filepath.Join(tmpdir, "subtitles.srt"))
mkvm.Stdout = os.Stdout
mkvm.Stderr = os.Stderr
err = mkvm.Run()
if err != nil {
return err
}
// Determine final filename
outputFile := cfg.overrideOutput
if outputFile == "" {
// Generate the CRC32 and put it into the filename
hw := NewCRCwriter(crc32.IEEE, ioutil.Discard)
fhm, err := os.OpenFile(filepath.Join(tmpdir, "muxed.mkv"), os.O_RDONLY, 0400)
if err != nil {
return err
}
_, err = io.Copy(hw, fhm)
fhm.Close()
if err != nil {
return err
}
outputFile = fmt.Sprintf(`[Loadtup] %s [%08X].mkv`, ltc.Title, hw.Sum())
}
err = os.Rename(filepath.Join(tmpdir, "muxed.mkv"), outputFile)
if err != nil {
return err
}
// Done
return nil
}
func usage() {
fmt.Fprintln(os.Stderr, `Usage: loadtup-dl [options] [--] URL|- [URL...]
Supported URLs take the form 'https://loadtup.com/abcdefghijk'. Use a hyphen to
read equivalent loadtup.com HTML content from stdin.
Options:
--youtube-dl PATH Override path to youtube-dl
--mkvmerge PATH Override path to mkvmerge
--output PATH Override output filename
(only valid for a single URL)
--delete-temporary=false Preserve temporary files
`)
os.Exit(1)
}
func main() {
ctx := context.Background()
cfg := config{}
flag.StringVar(&cfg.youtubeDl, "youtube-dl", "youtube-dl", "")
flag.StringVar(&cfg.mkvmerge, "mkvmerge", "mkvmerge", "")
flag.StringVar(&cfg.overrideOutput, "output", "", "")
flag.BoolVar(&cfg.deleteTemporaries, "delete-temporary", true, "")
flag.Usage = usage
flag.Parse()
if len(flag.Args()) == 0 {
usage() // n.b. calls os.Exit(1)
}
if len(flag.Args()) > 1 && cfg.overrideOutput != "" {
fmt.Fprintln(os.Stderr, "Can't use --output when supplying multiple URLs")
os.Exit(1)
}
for _, targetUrl := range flag.Args() {
err := performDownload(ctx, &cfg, targetUrl)
if err != nil {
fmt.Fprintln(os.Stderr, err.Error())
os.Exit(1)
}
}
}