loadtup-dl/scrape.go

116 lines
2.4 KiB
Go

package main
import (
"encoding/json"
"errors"
"fmt"
"regexp"
"strconv"
"strings"
)
type loadTupContent struct {
Title string
VideoID string
Interval float64
Secs []float64
Caps []string
Scrsecs []float64
Scrcaps []string
}
func parse_json_floats(part []byte) ([]float64, error) {
secsStrings := make([]string, 0)
err := json.Unmarshal(part, &secsStrings)
if err != nil {
return nil, err
}
ret := make([]float64, 0, len(secsStrings))
for _, sec := range secsStrings {
ff, err := strconv.ParseFloat(sec, 64)
if err != nil {
return nil, err
}
ret = append(ret, ff)
}
return ret, nil
}
func NewLoadTupContent(content []byte) (*loadTupContent, error) {
var err error
rx := regexp.MustCompile(`videoIdMain = "([^"]+)";var interval = (\d+);var secs = (.+?);var caps = (.+?);var scrsecs = (.+?);var scrcaps = (.+?);</script>`)
parts := rx.FindStringSubmatch(string(content))
if parts == nil {
return nil, errors.New("Missing video properties in HTML page")
}
if len(parts) != 7 {
return nil, fmt.Errorf("expected len(parts)=7, got %d", len(parts))
}
ltc := loadTupContent{}
ltc.VideoID = parts[1]
ltc.Interval, err = strconv.ParseFloat(parts[2], 64)
if err != nil {
return nil, err
}
ltc.Secs, err = parse_json_floats([]byte(parts[3]))
if err != nil {
return nil, err
}
err = json.Unmarshal([]byte(parts[4]), &ltc.Caps)
if err != nil {
return nil, err
}
ltc.Scrsecs, err = parse_json_floats([]byte(parts[5]))
if err != nil {
return nil, err
}
err = json.Unmarshal([]byte(parts[6]), &ltc.Scrcaps)
if err != nil {
return nil, err
}
// Parse the page title
rx = regexp.MustCompile(`(?ms)<h2 style="margin:0 0 0 0;">(.+?)</h2>`)
parts = rx.FindStringSubmatch(string(content))
if parts == nil {
return nil, errors.New("Missing title in HTML page")
}
if len(parts) != 2 {
return nil, fmt.Errorf("expected len(parts)=2, got %d", len(parts))
}
ltc.Title = strings.TrimSpace(parts[1])
return &ltc, nil
}
func (ltc *loadTupContent) Validate() error {
if len(ltc.Secs) != len(ltc.Caps) {
return fmt.Errorf("secs/caps length mismatch")
}
if len(ltc.Scrsecs) != len(ltc.Scrcaps) {
return fmt.Errorf("scrsecs/scrcaps length mismatch")
}
if len(ltc.VideoID) == 0 {
return errors.New("unexpected blank video ID")
}
if ltc.Interval != 100.0 {
return errors.New("unsupported non-100 duration field")
}
return nil
}