Compare commits

...

13 Commits

6 changed files with 309 additions and 173 deletions

View File

@ -4,6 +4,15 @@ A tool to download subtitled videos from the website loadtup.com.
It downloads videos using `youtube-dl`; parses and converts loadtup's custom subtitle format to srt; and remuxes them together using `mkvmerge`, including the CRC32 in the resulting filename.
## Installation
```
git clone https://git.ivysaur.me/code.ivysaur.me/loadtup-dl.git
cd loadtup-dl
go build
sudo cp ./loadtup-dl /usr/local/bin/loadtup-dl
```
## Usage
```
@ -15,7 +24,22 @@ read equivalent loadtup.com HTML content from stdin.
Options:
--youtube-dl PATH Override path to youtube-dl
--mkvmerge PATH Override path to mkvmerge
--mediainfo PATH Override path to mediainfo
--output PATH Override output filename
(only valid for a single URL)
--delete-temporary=false Preserve temporary files
--loglevel 0|1|2 Set verbosity (0=silent, 1=normal, 2=verbose)
```
## Changelog
v1.1.0 (2020-04-12)
- Feature: Support translator notes (`scrcaps`)
- Feature: Set stream language and title for generated mkv file
- Enhancement: Add custom logging levels
- Fix invalid characters appearing in generated filenames
- Fix misdetection of translator note usage
- Fix duration of final subtitle entry (adds dependency on `mediainfo`)
v1.0.0 (2020-04-11)
- Initial public release

BIN
doc/example.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 65 KiB

3
go.mod Normal file
View File

@ -0,0 +1,3 @@
module code.ivysaur.me/loadtup-dl
go 1.15

233
main.go
View File

@ -2,8 +2,6 @@ package main
import (
"context"
"encoding/json"
"errors"
"flag"
"fmt"
"hash/crc32"
@ -13,186 +11,36 @@ import (
"os"
"os/exec"
"path/filepath"
"regexp"
"runtime"
"strconv"
"strings"
"time"
)
type loadTupContent struct {
Title string
VideoID string
Interval float64
Secs []float64
Caps []string
Scrsecs []float64
Scrcaps []string
}
func parse_json_floats(part []byte) ([]float64, error) {
secsStrings := make([]string, 0)
err := json.Unmarshal(part, &secsStrings)
if err != nil {
return nil, err
}
ret := make([]float64, 0, len(secsStrings))
for _, sec := range secsStrings {
ff, err := strconv.ParseFloat(sec, 64)
if err != nil {
return nil, err
}
ret = append(ret, ff)
}
return ret, nil
}
func NewLoadTupContent(content []byte) (*loadTupContent, error) {
var err error
rx := regexp.MustCompile(`videoIdMain = "([^"]+)";var interval = (\d+);var secs = (.+?);var caps = (.+?);var scrsecs = (.+?);var scrcaps = (.+?);</script>`)
parts := rx.FindStringSubmatch(string(content))
if parts == nil {
return nil, errors.New("Missing video properties in HTML page")
}
if len(parts) != 7 {
return nil, fmt.Errorf("expected len(parts)=7, got %d", len(parts))
}
ltc := loadTupContent{}
ltc.VideoID = parts[1]
ltc.Interval, err = strconv.ParseFloat(parts[2], 64)
if err != nil {
return nil, err
}
ltc.Secs, err = parse_json_floats([]byte(parts[3]))
if err != nil {
return nil, err
}
err = json.Unmarshal([]byte(parts[4]), &ltc.Caps)
if err != nil {
return nil, err
}
ltc.Scrsecs, err = parse_json_floats([]byte(parts[5]))
if err != nil {
return nil, err
}
err = json.Unmarshal([]byte(parts[6]), &ltc.Scrcaps)
if err != nil {
return nil, err
}
// Parse the page title
rx = regexp.MustCompile(`(?ms)<h2 style="margin:0 0 0 0;">(.+?)</h2>`)
parts = rx.FindStringSubmatch(string(content))
if parts == nil {
return nil, errors.New("Missing title in HTML page")
}
if len(parts) != 2 {
return nil, fmt.Errorf("expected len(parts)=2, got %d", len(parts))
}
ltc.Title = strings.TrimSpace(parts[1])
return &ltc, nil
}
func (ltc *loadTupContent) Validate() error {
if len(ltc.Secs) != len(ltc.Caps) {
return fmt.Errorf("secs/caps length mismatch")
}
if len(ltc.Scrsecs) != len(ltc.Scrcaps) {
return fmt.Errorf("scrsecs/scrcaps length mismatch")
}
if !(len(ltc.Scrcaps) == 0 || ltc.Scrcaps[0] == "") {
return errors.New("unsupported use of strcaps")
}
if len(ltc.VideoID) == 0 {
return errors.New("unexpected blank video ID")
}
if ltc.Interval != 100.0 {
return errors.New("unsupported non-100 duration field")
}
return nil
}
func secs_to_srt_time(secs float64) string {
dur := time.Duration(secs) * time.Second
hh := int64(dur.Hours())
mm := int64(dur.Minutes()) - (hh * 60)
ss := int64(dur.Seconds()) - (hh * 3600) - (mm * 60)
ms := int64(dur.Milliseconds()) - (hh * 3600000) - (mm * 60000) - (ss * 1000)
return fmt.Sprintf("%02d:%02d:%02d,%03d", hh, mm, ss, ms)
}
func (ltc *loadTupContent) WriteSRT(w io.Writer) error {
/*
SRT file format (example from Wikipedia):
1
00:02:17,440 --> 00:02:20,375
Senator, we're making
our final approach into Coruscant.
2
00:02:20,476 --> 00:02:22,501
Very good, Lieutenant.
*/
ctr := 1
for i := 0; i < len(ltc.Caps); i += 1 {
if ltc.Caps[i] == "" {
// Don't show anything
continue
}
start := secs_to_srt_time(ltc.Secs[i])
var end string
if i < len(ltc.Caps)-1 {
end = secs_to_srt_time(ltc.Secs[i+1])
} else {
// The final subtitle. We don't know how long it should be displayed
// for since we don't know the entire video's duration
// FIXME supply
// Assume 3 seconds
end = secs_to_srt_time(ltc.Secs[i] + 3)
}
fmt.Fprintf(w, "%d\n%s --> %s\n%s\n\n",
ctr, start, end, ltc.Caps[i])
// We emitted a message, increase the counter
ctr += 1
}
return nil
}
const (
LogLevelInfo int = 1
LogLevelVerbose int = 2
)
type config struct {
youtubeDl string
mkvmerge string
mediainfo string
overrideOutput string
subsOnly bool
deleteTemporaries bool
loglevel int
}
func performDownload(ctx context.Context, cfg *config, targetUrl string) error {
//
if cfg.loglevel >= LogLevelInfo {
fmt.Printf("Starting download for '%s'...\n", targetUrl)
}
//
var content []byte
var err error
if targetUrl == "-" {
@ -235,14 +83,32 @@ func performDownload(ctx context.Context, cfg *config, targetUrl string) error {
// Download the video
ytdl := exec.CommandContext(ctx, cfg.youtubeDl, `-f`, `bestvideo+bestaudio`, "https://youtu.be/"+ltc.VideoID, `--merge-output-format`, `mkv`, "-o", filepath.Join(tmpdir, "downloaded"))
ytdl.Stdout = os.Stdout
ytdl.Stderr = os.Stderr
if cfg.loglevel >= LogLevelVerbose {
ytdl.Stdout = os.Stdout
ytdl.Stderr = os.Stderr
}
err = ytdl.Run()
if err != nil {
return err
}
// Determine video's total length
minfo := exec.CommandContext(ctx, cfg.mediainfo, `--Inform=General;%Duration%`, filepath.Join(tmpdir, "downloaded.mkv"))
if cfg.loglevel >= LogLevelVerbose {
minfo.Stderr = os.Stderr
}
ret, err := minfo.Output()
if err != nil {
return err
}
msecsDuration, err := strconv.ParseInt(strings.TrimSpace(string(ret)), 10, 64)
if err != nil {
return err
}
if cfg.loglevel >= LogLevelVerbose {
fmt.Printf("Video duration is %d ms\n", msecsDuration)
}
// Create the subtitle file (clamped to total length)
@ -251,7 +117,7 @@ func performDownload(ctx context.Context, cfg *config, targetUrl string) error {
return err
}
err = ltc.WriteSRT(fh)
err = ltc.WriteSubtitle(fh, float64(msecsDuration)/1000)
fh.Close()
if err != nil {
return err
@ -259,9 +125,15 @@ func performDownload(ctx context.Context, cfg *config, targetUrl string) error {
// Mux the subtitles into the file
mkvm := exec.CommandContext(ctx, cfg.mkvmerge, `-o`, filepath.Join(tmpdir, "muxed.mkv"), filepath.Join(tmpdir, "downloaded.mkv"), filepath.Join(tmpdir, "subtitles.srt"))
mkvm.Stdout = os.Stdout
mkvm.Stderr = os.Stderr
mkvm := exec.CommandContext(ctx, cfg.mkvmerge,
`--title`, ltc.Title,
`-o`, filepath.Join(tmpdir, "muxed.mkv"),
`--language`, `0:jpn`, `--language`, `1:jpn`, filepath.Join(tmpdir, "downloaded.mkv"),
`--language`, `0:eng`, `--default-track`, `0`, filepath.Join(tmpdir, "subtitles.srt"))
if cfg.loglevel >= LogLevelVerbose {
mkvm.Stdout = os.Stdout
mkvm.Stderr = os.Stderr
}
err = mkvm.Run()
if err != nil {
return err
@ -282,7 +154,14 @@ func performDownload(ctx context.Context, cfg *config, targetUrl string) error {
return err
}
outputFile = fmt.Sprintf(`[Loadtup] %s [%08X].mkv`, ltc.Title, hw.Sum())
var invalidChars *strings.Replacer
if runtime.GOOS == "windows" { // compile-time constant comparison will be elided
invalidChars = strings.NewReplacer(`"`, `_`, `*`, `_`, `<`, `_`, `>`, `_`, `?`, `_`, `\`, `_`, `|`, `_`, `/`, `_`, `:`, `_`)
} else {
invalidChars = strings.NewReplacer(`/`, `_`)
}
outputFile = fmt.Sprintf(`[Loadtup] %s [%08X].mkv`, invalidChars.Replace(ltc.Title), hw.Sum())
}
err = os.Rename(filepath.Join(tmpdir, "muxed.mkv"), outputFile)
@ -292,6 +171,10 @@ func performDownload(ctx context.Context, cfg *config, targetUrl string) error {
// Done
if cfg.loglevel >= LogLevelInfo {
fmt.Printf("Download complete for '%s'\n", outputFile)
}
return nil
}
@ -304,9 +187,11 @@ read equivalent loadtup.com HTML content from stdin.
Options:
--youtube-dl PATH Override path to youtube-dl
--mkvmerge PATH Override path to mkvmerge
--mediainfo PATH Override path to mediainfo
--output PATH Override output filename
(only valid for a single URL)
--delete-temporary=false Preserve temporary files
--loglevel 0|1|2 Set verbosity (0=silent, 1=normal, 2=verbose)
`)
os.Exit(1)
}
@ -319,8 +204,10 @@ func main() {
flag.StringVar(&cfg.youtubeDl, "youtube-dl", "youtube-dl", "")
flag.StringVar(&cfg.mkvmerge, "mkvmerge", "mkvmerge", "")
flag.StringVar(&cfg.mediainfo, "mediainfo", "mediainfo", "")
flag.StringVar(&cfg.overrideOutput, "output", "", "")
flag.BoolVar(&cfg.deleteTemporaries, "delete-temporary", true, "")
flag.IntVar(&cfg.loglevel, "loglevel", 1, "")
flag.Usage = usage
flag.Parse()

115
scrape.go Normal file
View File

@ -0,0 +1,115 @@
package main
import (
"encoding/json"
"errors"
"fmt"
"regexp"
"strconv"
"strings"
)
type loadTupContent struct {
Title string
VideoID string
Interval float64
Secs []float64
Caps []string
Scrsecs []float64
Scrcaps []string
}
func parse_json_floats(part []byte) ([]float64, error) {
secsStrings := make([]string, 0)
err := json.Unmarshal(part, &secsStrings)
if err != nil {
return nil, err
}
ret := make([]float64, 0, len(secsStrings))
for _, sec := range secsStrings {
ff, err := strconv.ParseFloat(sec, 64)
if err != nil {
return nil, err
}
ret = append(ret, ff)
}
return ret, nil
}
func NewLoadTupContent(content []byte) (*loadTupContent, error) {
var err error
rx := regexp.MustCompile(`videoIdMain = "([^"]+)";var interval = (\d+);var secs = (.+?);var caps = (.+?);var scrsecs = (.+?);var scrcaps = (.+?);</script>`)
parts := rx.FindStringSubmatch(string(content))
if parts == nil {
return nil, errors.New("Missing video properties in HTML page")
}
if len(parts) != 7 {
return nil, fmt.Errorf("expected len(parts)=7, got %d", len(parts))
}
ltc := loadTupContent{}
ltc.VideoID = parts[1]
ltc.Interval, err = strconv.ParseFloat(parts[2], 64)
if err != nil {
return nil, err
}
ltc.Secs, err = parse_json_floats([]byte(parts[3]))
if err != nil {
return nil, err
}
err = json.Unmarshal([]byte(parts[4]), &ltc.Caps)
if err != nil {
return nil, err
}
ltc.Scrsecs, err = parse_json_floats([]byte(parts[5]))
if err != nil {
return nil, err
}
err = json.Unmarshal([]byte(parts[6]), &ltc.Scrcaps)
if err != nil {
return nil, err
}
// Parse the page title
rx = regexp.MustCompile(`(?ms)<h2 style="margin:0 0 0 0;">(.+?)</h2>`)
parts = rx.FindStringSubmatch(string(content))
if parts == nil {
return nil, errors.New("Missing title in HTML page")
}
if len(parts) != 2 {
return nil, fmt.Errorf("expected len(parts)=2, got %d", len(parts))
}
ltc.Title = strings.TrimSpace(parts[1])
return &ltc, nil
}
func (ltc *loadTupContent) Validate() error {
if len(ltc.Secs) != len(ltc.Caps) {
return fmt.Errorf("secs/caps length mismatch")
}
if len(ltc.Scrsecs) != len(ltc.Scrcaps) {
return fmt.Errorf("scrsecs/scrcaps length mismatch")
}
if len(ltc.VideoID) == 0 {
return errors.New("unexpected blank video ID")
}
if ltc.Interval != 100.0 {
return errors.New("unsupported non-100 duration field")
}
return nil
}

107
writesubs.go Normal file
View File

@ -0,0 +1,107 @@
package main
import (
"fmt"
"io"
"sort"
"time"
)
func secs_to_ass_time(secs float64) string {
dur := time.Duration(secs) * time.Second
hh := int64(dur.Hours())
mm := int64(dur.Minutes()) - (hh * 60)
ss := int64(dur.Seconds()) - (hh * 3600) - (mm * 60)
ms := int64(dur.Milliseconds()) - (hh * 3600000) - (mm * 60000) - (ss * 1000)
return fmt.Sprintf("%02d:%02d:%02d.%03d", hh, mm, ss, ms)
}
// WriteSubtitle streams the video subtitles to the supplied writer in ASS format.
func (ltc *loadTupContent) WriteSubtitle(w io.Writer, totalVideoDurationSecs float64) error {
w.Write([]byte(`[Script Info]
; Script generated by loadtup-dl
ScriptType: v4.00+
Collisions: Normal
Timer: 100,0000
WrapStyle: 3
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: Default, Arial,16,&H00FFFFFF,&H00FFFFFF,&H00000008,&H80000008,-1,0,0,0,100,100,0.00,0.00,1,1.00,2.00,2,10,10,10,0
Style: TLNote, Arial,10,&H00FFFFFF,&H00FFFFFF,&H00000008,&H80000008,-1,0,0,0,100,100,0.00,0.00,1,1.00,2.00,8,10,10,10,0
[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
`))
type entry struct {
startTime float64
entry string
}
entries := make([]entry, 0, len(ltc.Caps)+len(ltc.Scrcaps))
for i := 0; i < len(ltc.Caps); i += 1 {
if ltc.Caps[i] == "" {
// Don't show anything
continue
}
start := secs_to_ass_time(ltc.Secs[i])
var end string
if i < len(ltc.Caps)-1 {
end = secs_to_ass_time(ltc.Secs[i+1])
} else {
// The final subtitle. Loadtup displays these for the entire
// remaining video duration
end = secs_to_ass_time(totalVideoDurationSecs)
}
// Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
entries = append(entries, entry{
startTime: ltc.Secs[i],
entry: fmt.Sprintf("Dialogue: 0,%s,%s,Default,,0000,0000,0000,,%s\n", start, end, ltc.Caps[i]),
})
}
// Repeat for scrcaps, using top positioning and a different layer
for i := 0; i < len(ltc.Scrcaps); i += 1 {
if ltc.Caps[i] == "" {
// Don't show anything
continue
}
start := secs_to_ass_time(ltc.Scrsecs[i])
var end string
if i < len(ltc.Scrcaps)-1 {
end = secs_to_ass_time(ltc.Scrsecs[i+1])
} else {
// The final subtitle. Loadtup displays these for the entire
// remaining video duration
end = secs_to_ass_time(totalVideoDurationSecs)
}
// Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
entries = append(entries, entry{
startTime: ltc.Scrsecs[i],
entry: fmt.Sprintf("Dialogue: 1,%s,%s,TLNote,,0000,0000,0000,,%s\n", start, end, ltc.Scrcaps[i]),
})
}
// Sort all the entries by their start time, to mingle TL note entries
// properly with the other subtitles
sort.SliceStable(entries, func(i, j int) bool {
return entries[i].startTime < entries[j].startTime
})
// Emit all to writer
for _, e := range entries {
w.Write([]byte(e.entry))
}
return nil
}