package main import ( "context" "encoding/json" "errors" "flag" "fmt" "hash/crc32" "io" "io/ioutil" "net/http" "os" "os/exec" "path/filepath" "regexp" "strconv" "strings" "time" ) type loadTupContent struct { Title string VideoID string Interval float64 Secs []float64 Caps []string Scrsecs []float64 Scrcaps []string } func parse_json_floats(part []byte) ([]float64, error) { secsStrings := make([]string, 0) err := json.Unmarshal(part, &secsStrings) if err != nil { return nil, err } ret := make([]float64, 0, len(secsStrings)) for _, sec := range secsStrings { ff, err := strconv.ParseFloat(sec, 64) if err != nil { return nil, err } ret = append(ret, ff) } return ret, nil } func NewLoadTupContent(content []byte) (*loadTupContent, error) { var err error rx := regexp.MustCompile(`videoIdMain = "([^"]+)";var interval = (\d+);var secs = (.+?);var caps = (.+?);var scrsecs = (.+?);var scrcaps = (.+?);`) parts := rx.FindStringSubmatch(string(content)) if parts == nil { return nil, errors.New("Missing video properties in HTML page") } if len(parts) != 7 { return nil, fmt.Errorf("expected len(parts)=7, got %d", len(parts)) } ltc := loadTupContent{} ltc.VideoID = parts[1] ltc.Interval, err = strconv.ParseFloat(parts[2], 64) if err != nil { return nil, err } ltc.Secs, err = parse_json_floats([]byte(parts[3])) if err != nil { return nil, err } err = json.Unmarshal([]byte(parts[4]), <c.Caps) if err != nil { return nil, err } ltc.Scrsecs, err = parse_json_floats([]byte(parts[5])) if err != nil { return nil, err } err = json.Unmarshal([]byte(parts[6]), <c.Scrcaps) if err != nil { return nil, err } // Parse the page title rx = regexp.MustCompile(`(?ms)

(.+?)

`) parts = rx.FindStringSubmatch(string(content)) if parts == nil { return nil, errors.New("Missing title in HTML page") } if len(parts) != 2 { return nil, fmt.Errorf("expected len(parts)=2, got %d", len(parts)) } ltc.Title = strings.TrimSpace(parts[1]) return <c, nil } func (ltc *loadTupContent) Validate() error { if len(ltc.Secs) != len(ltc.Caps) { return fmt.Errorf("secs/caps length mismatch") } if len(ltc.Scrsecs) != len(ltc.Scrcaps) { return fmt.Errorf("scrsecs/scrcaps length mismatch") } if !(len(ltc.Scrcaps) == 0 || ltc.Scrcaps[0] == "") { return errors.New("unsupported use of strcaps") } if len(ltc.VideoID) == 0 { return errors.New("unexpected blank video ID") } if ltc.Interval != 100.0 { return errors.New("unsupported non-100 duration field") } return nil } func secs_to_srt_time(secs float64) string { dur := time.Duration(secs) * time.Second hh := int64(dur.Hours()) mm := int64(dur.Minutes()) - (hh * 60) ss := int64(dur.Seconds()) - (hh * 3600) - (mm * 60) ms := int64(dur.Milliseconds()) - (hh * 3600000) - (mm * 60000) - (ss * 1000) return fmt.Sprintf("%02d:%02d:%02d,%03d", hh, mm, ss, ms) } func (ltc *loadTupContent) WriteSRT(w io.Writer) error { /* SRT file format (example from Wikipedia): 1 00:02:17,440 --> 00:02:20,375 Senator, we're making our final approach into Coruscant. 2 00:02:20,476 --> 00:02:22,501 Very good, Lieutenant. */ ctr := 1 for i := 0; i < len(ltc.Caps); i += 1 { if ltc.Caps[i] == "" { // Don't show anything continue } start := secs_to_srt_time(ltc.Secs[i]) var end string if i < len(ltc.Caps)-1 { end = secs_to_srt_time(ltc.Secs[i+1]) } else { // The final subtitle. We don't know how long it should be displayed // for since we don't know the entire video's duration // FIXME supply // Assume 3 seconds end = secs_to_srt_time(ltc.Secs[i] + 3) } fmt.Fprintf(w, "%d\n%s --> %s\n%s\n\n", ctr, start, end, ltc.Caps[i]) // We emitted a message, increase the counter ctr += 1 } return nil } type config struct { youtubeDl string mkvmerge string overrideOutput string deleteTemporaries bool } func performDownload(ctx context.Context, cfg *config, targetUrl string) error { // var content []byte var err error if targetUrl == "-" { // Read HTML page from stdin content, err = ioutil.ReadAll(os.Stdin) if err != nil { return err } } else { // Download HTML page from URL resp, err := http.Get(targetUrl) if err != nil { return err } content, err = ioutil.ReadAll(resp.Body) if err != nil { return err } _ = resp.Body.Close() // swallow error } ltc, err := NewLoadTupContent(content) if err != nil { return err } err = ltc.Validate() if err != nil { return err } // Create temporary directory tmpdir, err := ioutil.TempDir("", "loadtup-dl-") if err != nil { return err } if cfg.deleteTemporaries { defer os.RemoveAll(tmpdir) } // Download the video ytdl := exec.CommandContext(ctx, cfg.youtubeDl, `-f`, `bestvideo+bestaudio`, "https://youtu.be/"+ltc.VideoID, `--merge-output-format`, `mkv`, "-o", filepath.Join(tmpdir, "downloaded")) ytdl.Stdout = os.Stdout ytdl.Stderr = os.Stderr err = ytdl.Run() if err != nil { return err } // Determine video's total length // Create the subtitle file (clamped to total length) fh, err := os.OpenFile(filepath.Join(tmpdir, "subtitles.srt"), os.O_CREATE|os.O_WRONLY, 0600) if err != nil { return err } err = ltc.WriteSRT(fh) fh.Close() if err != nil { return err } // Mux the subtitles into the file mkvm := exec.CommandContext(ctx, cfg.mkvmerge, `-o`, filepath.Join(tmpdir, "muxed.mkv"), filepath.Join(tmpdir, "downloaded.mkv"), filepath.Join(tmpdir, "subtitles.srt")) mkvm.Stdout = os.Stdout mkvm.Stderr = os.Stderr err = mkvm.Run() if err != nil { return err } // Determine final filename outputFile := cfg.overrideOutput if outputFile == "" { // Generate the CRC32 and put it into the filename hw := NewCRCwriter(crc32.IEEE, ioutil.Discard) fhm, err := os.OpenFile(filepath.Join(tmpdir, "muxed.mkv"), os.O_RDONLY, 0400) if err != nil { return err } _, err = io.Copy(hw, fhm) fhm.Close() if err != nil { return err } outputFile = fmt.Sprintf(`[Loadtup] %s [%08X].mkv`, ltc.Title, hw.Sum()) } err = os.Rename(filepath.Join(tmpdir, "muxed.mkv"), outputFile) if err != nil { return err } // Done return nil } func usage() { fmt.Fprintln(os.Stderr, `Usage: loadtup-dl [options] [--] URL|- [URL...] Supported URLs take the form 'https://loadtup.com/abcdefghijk'. Use a hyphen to read equivalent loadtup.com HTML content from stdin. Options: --youtube-dl PATH Override path to youtube-dl --mkvmerge PATH Override path to mkvmerge --output PATH Override output filename (only valid for a single URL) --delete-temporary=false Preserve temporary files `) os.Exit(1) } func main() { ctx := context.Background() cfg := config{} flag.StringVar(&cfg.youtubeDl, "youtube-dl", "youtube-dl", "") flag.StringVar(&cfg.mkvmerge, "mkvmerge", "mkvmerge", "") flag.StringVar(&cfg.overrideOutput, "output", "", "") flag.BoolVar(&cfg.deleteTemporaries, "delete-temporary", true, "") flag.Usage = usage flag.Parse() if len(flag.Args()) == 0 { usage() // n.b. calls os.Exit(1) } if len(flag.Args()) > 1 && cfg.overrideOutput != "" { fmt.Fprintln(os.Stderr, "Can't use --output when supplying multiple URLs") os.Exit(1) } for _, targetUrl := range flag.Args() { err := performDownload(ctx, &cfg, targetUrl) if err != nil { fmt.Fprintln(os.Stderr, err.Error()) os.Exit(1) } } }