Compare commits

..

No commits in common. "master" and "v1.0.0" have entirely different histories.

6 changed files with 173 additions and 309 deletions

View File

@ -4,15 +4,6 @@ A tool to download subtitled videos from the website loadtup.com.
It downloads videos using `youtube-dl`; parses and converts loadtup's custom subtitle format to srt; and remuxes them together using `mkvmerge`, including the CRC32 in the resulting filename.
## Installation
```
git clone https://git.ivysaur.me/code.ivysaur.me/loadtup-dl.git
cd loadtup-dl
go build
sudo cp ./loadtup-dl /usr/local/bin/loadtup-dl
```
## Usage
```
@ -24,22 +15,7 @@ read equivalent loadtup.com HTML content from stdin.
Options:
--youtube-dl PATH Override path to youtube-dl
--mkvmerge PATH Override path to mkvmerge
--mediainfo PATH Override path to mediainfo
--output PATH Override output filename
(only valid for a single URL)
--delete-temporary=false Preserve temporary files
--loglevel 0|1|2 Set verbosity (0=silent, 1=normal, 2=verbose)
```
## Changelog
v1.1.0 (2020-04-12)
- Feature: Support translator notes (`scrcaps`)
- Feature: Set stream language and title for generated mkv file
- Enhancement: Add custom logging levels
- Fix invalid characters appearing in generated filenames
- Fix misdetection of translator note usage
- Fix duration of final subtitle entry (adds dependency on `mediainfo`)
v1.0.0 (2020-04-11)
- Initial public release

Binary file not shown.

Before

Width:  |  Height:  |  Size: 65 KiB

3
go.mod
View File

@ -1,3 +0,0 @@
module code.ivysaur.me/loadtup-dl
go 1.15

225
main.go
View File

@ -2,6 +2,8 @@ package main
import (
"context"
"encoding/json"
"errors"
"flag"
"fmt"
"hash/crc32"
@ -11,36 +13,186 @@ import (
"os"
"os/exec"
"path/filepath"
"runtime"
"regexp"
"strconv"
"strings"
"time"
)
const (
LogLevelInfo int = 1
LogLevelVerbose int = 2
)
type loadTupContent struct {
Title string
VideoID string
Interval float64
Secs []float64
Caps []string
Scrsecs []float64
Scrcaps []string
}
func parse_json_floats(part []byte) ([]float64, error) {
secsStrings := make([]string, 0)
err := json.Unmarshal(part, &secsStrings)
if err != nil {
return nil, err
}
ret := make([]float64, 0, len(secsStrings))
for _, sec := range secsStrings {
ff, err := strconv.ParseFloat(sec, 64)
if err != nil {
return nil, err
}
ret = append(ret, ff)
}
return ret, nil
}
func NewLoadTupContent(content []byte) (*loadTupContent, error) {
var err error
rx := regexp.MustCompile(`videoIdMain = "([^"]+)";var interval = (\d+);var secs = (.+?);var caps = (.+?);var scrsecs = (.+?);var scrcaps = (.+?);</script>`)
parts := rx.FindStringSubmatch(string(content))
if parts == nil {
return nil, errors.New("Missing video properties in HTML page")
}
if len(parts) != 7 {
return nil, fmt.Errorf("expected len(parts)=7, got %d", len(parts))
}
ltc := loadTupContent{}
ltc.VideoID = parts[1]
ltc.Interval, err = strconv.ParseFloat(parts[2], 64)
if err != nil {
return nil, err
}
ltc.Secs, err = parse_json_floats([]byte(parts[3]))
if err != nil {
return nil, err
}
err = json.Unmarshal([]byte(parts[4]), &ltc.Caps)
if err != nil {
return nil, err
}
ltc.Scrsecs, err = parse_json_floats([]byte(parts[5]))
if err != nil {
return nil, err
}
err = json.Unmarshal([]byte(parts[6]), &ltc.Scrcaps)
if err != nil {
return nil, err
}
// Parse the page title
rx = regexp.MustCompile(`(?ms)<h2 style="margin:0 0 0 0;">(.+?)</h2>`)
parts = rx.FindStringSubmatch(string(content))
if parts == nil {
return nil, errors.New("Missing title in HTML page")
}
if len(parts) != 2 {
return nil, fmt.Errorf("expected len(parts)=2, got %d", len(parts))
}
ltc.Title = strings.TrimSpace(parts[1])
return &ltc, nil
}
func (ltc *loadTupContent) Validate() error {
if len(ltc.Secs) != len(ltc.Caps) {
return fmt.Errorf("secs/caps length mismatch")
}
if len(ltc.Scrsecs) != len(ltc.Scrcaps) {
return fmt.Errorf("scrsecs/scrcaps length mismatch")
}
if !(len(ltc.Scrcaps) == 0 || ltc.Scrcaps[0] == "") {
return errors.New("unsupported use of strcaps")
}
if len(ltc.VideoID) == 0 {
return errors.New("unexpected blank video ID")
}
if ltc.Interval != 100.0 {
return errors.New("unsupported non-100 duration field")
}
return nil
}
func secs_to_srt_time(secs float64) string {
dur := time.Duration(secs) * time.Second
hh := int64(dur.Hours())
mm := int64(dur.Minutes()) - (hh * 60)
ss := int64(dur.Seconds()) - (hh * 3600) - (mm * 60)
ms := int64(dur.Milliseconds()) - (hh * 3600000) - (mm * 60000) - (ss * 1000)
return fmt.Sprintf("%02d:%02d:%02d,%03d", hh, mm, ss, ms)
}
func (ltc *loadTupContent) WriteSRT(w io.Writer) error {
/*
SRT file format (example from Wikipedia):
1
00:02:17,440 --> 00:02:20,375
Senator, we're making
our final approach into Coruscant.
2
00:02:20,476 --> 00:02:22,501
Very good, Lieutenant.
*/
ctr := 1
for i := 0; i < len(ltc.Caps); i += 1 {
if ltc.Caps[i] == "" {
// Don't show anything
continue
}
start := secs_to_srt_time(ltc.Secs[i])
var end string
if i < len(ltc.Caps)-1 {
end = secs_to_srt_time(ltc.Secs[i+1])
} else {
// The final subtitle. We don't know how long it should be displayed
// for since we don't know the entire video's duration
// FIXME supply
// Assume 3 seconds
end = secs_to_srt_time(ltc.Secs[i] + 3)
}
fmt.Fprintf(w, "%d\n%s --> %s\n%s\n\n",
ctr, start, end, ltc.Caps[i])
// We emitted a message, increase the counter
ctr += 1
}
return nil
}
type config struct {
youtubeDl string
mkvmerge string
mediainfo string
overrideOutput string
subsOnly bool
deleteTemporaries bool
loglevel int
}
func performDownload(ctx context.Context, cfg *config, targetUrl string) error {
//
if cfg.loglevel >= LogLevelInfo {
fmt.Printf("Starting download for '%s'...\n", targetUrl)
}
//
var content []byte
var err error
if targetUrl == "-" {
@ -83,32 +235,14 @@ func performDownload(ctx context.Context, cfg *config, targetUrl string) error {
// Download the video
ytdl := exec.CommandContext(ctx, cfg.youtubeDl, `-f`, `bestvideo+bestaudio`, "https://youtu.be/"+ltc.VideoID, `--merge-output-format`, `mkv`, "-o", filepath.Join(tmpdir, "downloaded"))
if cfg.loglevel >= LogLevelVerbose {
ytdl.Stdout = os.Stdout
ytdl.Stderr = os.Stderr
}
err = ytdl.Run()
if err != nil {
return err
}
// Determine video's total length
minfo := exec.CommandContext(ctx, cfg.mediainfo, `--Inform=General;%Duration%`, filepath.Join(tmpdir, "downloaded.mkv"))
if cfg.loglevel >= LogLevelVerbose {
minfo.Stderr = os.Stderr
}
ret, err := minfo.Output()
if err != nil {
return err
}
msecsDuration, err := strconv.ParseInt(strings.TrimSpace(string(ret)), 10, 64)
if err != nil {
return err
}
if cfg.loglevel >= LogLevelVerbose {
fmt.Printf("Video duration is %d ms\n", msecsDuration)
}
// Create the subtitle file (clamped to total length)
@ -117,7 +251,7 @@ func performDownload(ctx context.Context, cfg *config, targetUrl string) error {
return err
}
err = ltc.WriteSubtitle(fh, float64(msecsDuration)/1000)
err = ltc.WriteSRT(fh)
fh.Close()
if err != nil {
return err
@ -125,15 +259,9 @@ func performDownload(ctx context.Context, cfg *config, targetUrl string) error {
// Mux the subtitles into the file
mkvm := exec.CommandContext(ctx, cfg.mkvmerge,
`--title`, ltc.Title,
`-o`, filepath.Join(tmpdir, "muxed.mkv"),
`--language`, `0:jpn`, `--language`, `1:jpn`, filepath.Join(tmpdir, "downloaded.mkv"),
`--language`, `0:eng`, `--default-track`, `0`, filepath.Join(tmpdir, "subtitles.srt"))
if cfg.loglevel >= LogLevelVerbose {
mkvm := exec.CommandContext(ctx, cfg.mkvmerge, `-o`, filepath.Join(tmpdir, "muxed.mkv"), filepath.Join(tmpdir, "downloaded.mkv"), filepath.Join(tmpdir, "subtitles.srt"))
mkvm.Stdout = os.Stdout
mkvm.Stderr = os.Stderr
}
err = mkvm.Run()
if err != nil {
return err
@ -154,14 +282,7 @@ func performDownload(ctx context.Context, cfg *config, targetUrl string) error {
return err
}
var invalidChars *strings.Replacer
if runtime.GOOS == "windows" { // compile-time constant comparison will be elided
invalidChars = strings.NewReplacer(`"`, `_`, `*`, `_`, `<`, `_`, `>`, `_`, `?`, `_`, `\`, `_`, `|`, `_`, `/`, `_`, `:`, `_`)
} else {
invalidChars = strings.NewReplacer(`/`, `_`)
}
outputFile = fmt.Sprintf(`[Loadtup] %s [%08X].mkv`, invalidChars.Replace(ltc.Title), hw.Sum())
outputFile = fmt.Sprintf(`[Loadtup] %s [%08X].mkv`, ltc.Title, hw.Sum())
}
err = os.Rename(filepath.Join(tmpdir, "muxed.mkv"), outputFile)
@ -171,10 +292,6 @@ func performDownload(ctx context.Context, cfg *config, targetUrl string) error {
// Done
if cfg.loglevel >= LogLevelInfo {
fmt.Printf("Download complete for '%s'\n", outputFile)
}
return nil
}
@ -187,11 +304,9 @@ read equivalent loadtup.com HTML content from stdin.
Options:
--youtube-dl PATH Override path to youtube-dl
--mkvmerge PATH Override path to mkvmerge
--mediainfo PATH Override path to mediainfo
--output PATH Override output filename
(only valid for a single URL)
--delete-temporary=false Preserve temporary files
--loglevel 0|1|2 Set verbosity (0=silent, 1=normal, 2=verbose)
`)
os.Exit(1)
}
@ -204,10 +319,8 @@ func main() {
flag.StringVar(&cfg.youtubeDl, "youtube-dl", "youtube-dl", "")
flag.StringVar(&cfg.mkvmerge, "mkvmerge", "mkvmerge", "")
flag.StringVar(&cfg.mediainfo, "mediainfo", "mediainfo", "")
flag.StringVar(&cfg.overrideOutput, "output", "", "")
flag.BoolVar(&cfg.deleteTemporaries, "delete-temporary", true, "")
flag.IntVar(&cfg.loglevel, "loglevel", 1, "")
flag.Usage = usage
flag.Parse()

115
scrape.go
View File

@ -1,115 +0,0 @@
package main
import (
"encoding/json"
"errors"
"fmt"
"regexp"
"strconv"
"strings"
)
type loadTupContent struct {
Title string
VideoID string
Interval float64
Secs []float64
Caps []string
Scrsecs []float64
Scrcaps []string
}
func parse_json_floats(part []byte) ([]float64, error) {
secsStrings := make([]string, 0)
err := json.Unmarshal(part, &secsStrings)
if err != nil {
return nil, err
}
ret := make([]float64, 0, len(secsStrings))
for _, sec := range secsStrings {
ff, err := strconv.ParseFloat(sec, 64)
if err != nil {
return nil, err
}
ret = append(ret, ff)
}
return ret, nil
}
func NewLoadTupContent(content []byte) (*loadTupContent, error) {
var err error
rx := regexp.MustCompile(`videoIdMain = "([^"]+)";var interval = (\d+);var secs = (.+?);var caps = (.+?);var scrsecs = (.+?);var scrcaps = (.+?);</script>`)
parts := rx.FindStringSubmatch(string(content))
if parts == nil {
return nil, errors.New("Missing video properties in HTML page")
}
if len(parts) != 7 {
return nil, fmt.Errorf("expected len(parts)=7, got %d", len(parts))
}
ltc := loadTupContent{}
ltc.VideoID = parts[1]
ltc.Interval, err = strconv.ParseFloat(parts[2], 64)
if err != nil {
return nil, err
}
ltc.Secs, err = parse_json_floats([]byte(parts[3]))
if err != nil {
return nil, err
}
err = json.Unmarshal([]byte(parts[4]), &ltc.Caps)
if err != nil {
return nil, err
}
ltc.Scrsecs, err = parse_json_floats([]byte(parts[5]))
if err != nil {
return nil, err
}
err = json.Unmarshal([]byte(parts[6]), &ltc.Scrcaps)
if err != nil {
return nil, err
}
// Parse the page title
rx = regexp.MustCompile(`(?ms)<h2 style="margin:0 0 0 0;">(.+?)</h2>`)
parts = rx.FindStringSubmatch(string(content))
if parts == nil {
return nil, errors.New("Missing title in HTML page")
}
if len(parts) != 2 {
return nil, fmt.Errorf("expected len(parts)=2, got %d", len(parts))
}
ltc.Title = strings.TrimSpace(parts[1])
return &ltc, nil
}
func (ltc *loadTupContent) Validate() error {
if len(ltc.Secs) != len(ltc.Caps) {
return fmt.Errorf("secs/caps length mismatch")
}
if len(ltc.Scrsecs) != len(ltc.Scrcaps) {
return fmt.Errorf("scrsecs/scrcaps length mismatch")
}
if len(ltc.VideoID) == 0 {
return errors.New("unexpected blank video ID")
}
if ltc.Interval != 100.0 {
return errors.New("unsupported non-100 duration field")
}
return nil
}

View File

@ -1,107 +0,0 @@
package main
import (
"fmt"
"io"
"sort"
"time"
)
func secs_to_ass_time(secs float64) string {
dur := time.Duration(secs) * time.Second
hh := int64(dur.Hours())
mm := int64(dur.Minutes()) - (hh * 60)
ss := int64(dur.Seconds()) - (hh * 3600) - (mm * 60)
ms := int64(dur.Milliseconds()) - (hh * 3600000) - (mm * 60000) - (ss * 1000)
return fmt.Sprintf("%02d:%02d:%02d.%03d", hh, mm, ss, ms)
}
// WriteSubtitle streams the video subtitles to the supplied writer in ASS format.
func (ltc *loadTupContent) WriteSubtitle(w io.Writer, totalVideoDurationSecs float64) error {
w.Write([]byte(`[Script Info]
; Script generated by loadtup-dl
ScriptType: v4.00+
Collisions: Normal
Timer: 100,0000
WrapStyle: 3
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: Default, Arial,16,&H00FFFFFF,&H00FFFFFF,&H00000008,&H80000008,-1,0,0,0,100,100,0.00,0.00,1,1.00,2.00,2,10,10,10,0
Style: TLNote, Arial,10,&H00FFFFFF,&H00FFFFFF,&H00000008,&H80000008,-1,0,0,0,100,100,0.00,0.00,1,1.00,2.00,8,10,10,10,0
[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
`))
type entry struct {
startTime float64
entry string
}
entries := make([]entry, 0, len(ltc.Caps)+len(ltc.Scrcaps))
for i := 0; i < len(ltc.Caps); i += 1 {
if ltc.Caps[i] == "" {
// Don't show anything
continue
}
start := secs_to_ass_time(ltc.Secs[i])
var end string
if i < len(ltc.Caps)-1 {
end = secs_to_ass_time(ltc.Secs[i+1])
} else {
// The final subtitle. Loadtup displays these for the entire
// remaining video duration
end = secs_to_ass_time(totalVideoDurationSecs)
}
// Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
entries = append(entries, entry{
startTime: ltc.Secs[i],
entry: fmt.Sprintf("Dialogue: 0,%s,%s,Default,,0000,0000,0000,,%s\n", start, end, ltc.Caps[i]),
})
}
// Repeat for scrcaps, using top positioning and a different layer
for i := 0; i < len(ltc.Scrcaps); i += 1 {
if ltc.Caps[i] == "" {
// Don't show anything
continue
}
start := secs_to_ass_time(ltc.Scrsecs[i])
var end string
if i < len(ltc.Scrcaps)-1 {
end = secs_to_ass_time(ltc.Scrsecs[i+1])
} else {
// The final subtitle. Loadtup displays these for the entire
// remaining video duration
end = secs_to_ass_time(totalVideoDurationSecs)
}
// Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
entries = append(entries, entry{
startTime: ltc.Scrsecs[i],
entry: fmt.Sprintf("Dialogue: 1,%s,%s,TLNote,,0000,0000,0000,,%s\n", start, end, ltc.Scrcaps[i]),
})
}
// Sort all the entries by their start time, to mingle TL note entries
// properly with the other subtitles
sort.SliceStable(entries, func(i, j int) bool {
return entries[i].startTime < entries[j].startTime
})
// Emit all to writer
for _, e := range entries {
w.Write([]byte(e.entry))
}
return nil
}