247 lines
6.3 KiB
Go
247 lines
6.3 KiB
Go
|
package youtube
|
||
|
|
||
|
import (
|
||
|
"encoding/json"
|
||
|
"errors"
|
||
|
"fmt"
|
||
|
"strconv"
|
||
|
"strings"
|
||
|
|
||
|
"github.com/rylio/ytdl"
|
||
|
|
||
|
"github.com/iawia002/annie/config"
|
||
|
"github.com/iawia002/annie/downloader"
|
||
|
"github.com/iawia002/annie/extractors"
|
||
|
"github.com/iawia002/annie/request"
|
||
|
"github.com/iawia002/annie/utils"
|
||
|
)
|
||
|
|
||
|
type streamFormat struct {
|
||
|
Itag int `json:"itag"`
|
||
|
URL string `json:"url"`
|
||
|
MimeType string `json:"mimeType"`
|
||
|
ContentLength string `json:"contentLength"`
|
||
|
QualityLabel string `json:"qualityLabel"`
|
||
|
AudioQuality string `json:"audioQuality"`
|
||
|
}
|
||
|
|
||
|
type playerResponseType struct {
|
||
|
StreamingData struct {
|
||
|
Formats []streamFormat `json:"formats"`
|
||
|
AdaptiveFormats []streamFormat `json:"adaptiveFormats"`
|
||
|
} `json:"streamingData"`
|
||
|
VideoDetails struct {
|
||
|
Title string `json:"title"`
|
||
|
} `json:"videoDetails"`
|
||
|
}
|
||
|
|
||
|
type youtubeData struct {
|
||
|
Args struct {
|
||
|
PlayerResponse string `json:"player_response"`
|
||
|
} `json:"args"`
|
||
|
}
|
||
|
|
||
|
const referer = "https://www.youtube.com"
|
||
|
|
||
|
// Extract is the main function for extracting data
|
||
|
func Extract(uri string) ([]downloader.Data, error) {
|
||
|
var err error
|
||
|
if !config.Playlist {
|
||
|
return []downloader.Data{youtubeDownload(uri)}, nil
|
||
|
}
|
||
|
listIDs := utils.MatchOneOf(uri, `(list|p)=([^/&]+)`)
|
||
|
if listIDs == nil || len(listIDs) < 3 {
|
||
|
return nil, extractors.ErrURLParseFailed
|
||
|
}
|
||
|
listID := listIDs[2]
|
||
|
if len(listID) == 0 {
|
||
|
return nil, errors.New("can't get list ID from URL")
|
||
|
}
|
||
|
|
||
|
html, err := request.Get("https://www.youtube.com/playlist?list="+listID, referer, nil)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
// "videoId":"OQxX8zgyzuM","thumbnail"
|
||
|
videoIDs := utils.MatchAll(html, `"videoId":"([^,]+?)","thumbnail"`)
|
||
|
needDownloadItems := utils.NeedDownloadList(len(videoIDs))
|
||
|
extractedData := make([]downloader.Data, len(needDownloadItems))
|
||
|
wgp := utils.NewWaitGroupPool(config.ThreadNumber)
|
||
|
dataIndex := 0
|
||
|
for index, videoID := range videoIDs {
|
||
|
if !utils.ItemInSlice(index+1, needDownloadItems) || len(videoID) < 2 {
|
||
|
continue
|
||
|
}
|
||
|
u := fmt.Sprintf(
|
||
|
"https://www.youtube.com/watch?v=%s&list=%s", videoID[1], listID,
|
||
|
)
|
||
|
wgp.Add()
|
||
|
go func(index int, u string, extractedData []downloader.Data) {
|
||
|
defer wgp.Done()
|
||
|
extractedData[index] = youtubeDownload(u)
|
||
|
}(dataIndex, u, extractedData)
|
||
|
dataIndex++
|
||
|
}
|
||
|
wgp.Wait()
|
||
|
return extractedData, nil
|
||
|
}
|
||
|
|
||
|
// youtubeDownload download function for single url
|
||
|
func youtubeDownload(uri string) downloader.Data {
|
||
|
vid := utils.MatchOneOf(
|
||
|
uri,
|
||
|
`watch\?v=([^/&]+)`,
|
||
|
`youtu\.be/([^?/]+)`,
|
||
|
`embed/([^/?]+)`,
|
||
|
`v/([^/?]+)`,
|
||
|
)
|
||
|
if vid == nil || len(vid) < 2 {
|
||
|
return downloader.EmptyData(uri, errors.New("can't find vid"))
|
||
|
}
|
||
|
|
||
|
videoURL := fmt.Sprintf(
|
||
|
"https://www.youtube.com/watch?v=%s",
|
||
|
vid[1],
|
||
|
)
|
||
|
|
||
|
videoInfo, err := ytdl.GetVideoInfo(uri)
|
||
|
if err != nil {
|
||
|
return downloader.EmptyData(uri, err)
|
||
|
}
|
||
|
|
||
|
html, err := request.Get(videoURL, referer, nil)
|
||
|
if err != nil {
|
||
|
return downloader.EmptyData(uri, err)
|
||
|
}
|
||
|
ytplayer := utils.MatchOneOf(html, `;ytplayer\.config\s*=\s*({.+?});`)
|
||
|
if ytplayer == nil || len(ytplayer) < 2 {
|
||
|
if strings.Contains(html, "LOGIN_REQUIRED") ||
|
||
|
strings.Contains(html, "Sign in to confirm your age") {
|
||
|
return downloader.EmptyData(uri, extractors.ErrLoginRequired)
|
||
|
}
|
||
|
return downloader.EmptyData(uri, extractors.ErrURLParseFailed)
|
||
|
}
|
||
|
|
||
|
var data youtubeData
|
||
|
if err = json.Unmarshal([]byte(ytplayer[1]), &data); err != nil {
|
||
|
return downloader.EmptyData(uri, err)
|
||
|
}
|
||
|
var playerResponse playerResponseType
|
||
|
if err = json.Unmarshal([]byte(data.Args.PlayerResponse), &playerResponse); err != nil {
|
||
|
return downloader.EmptyData(uri, err)
|
||
|
}
|
||
|
title := playerResponse.VideoDetails.Title
|
||
|
|
||
|
streams, err := extractVideoURLS(playerResponse, videoInfo)
|
||
|
if err != nil {
|
||
|
return downloader.EmptyData(uri, err)
|
||
|
}
|
||
|
|
||
|
return downloader.Data{
|
||
|
Site: "YouTube youtube.com",
|
||
|
Title: title,
|
||
|
Type: "video",
|
||
|
Streams: streams,
|
||
|
URL: uri,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func getStreamExt(streamType string) string {
|
||
|
// video/webm; codecs="vp8.0, vorbis" --> webm
|
||
|
exts := utils.MatchOneOf(streamType, `(\w+)/(\w+);`)
|
||
|
if exts == nil || len(exts) < 3 {
|
||
|
return ""
|
||
|
}
|
||
|
return exts[2]
|
||
|
}
|
||
|
|
||
|
func getRealURL(videoFormat streamFormat, videoInfo *ytdl.VideoInfo, ext string) (*downloader.URL, error) {
|
||
|
ytdlFormat := new(ytdl.Format)
|
||
|
for _, f := range videoInfo.Formats {
|
||
|
if f.Itag.Number == videoFormat.Itag {
|
||
|
ytdlFormat = f
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if ytdlFormat == nil {
|
||
|
return nil, fmt.Errorf("unable to get info for itag %d", videoFormat.Itag)
|
||
|
}
|
||
|
|
||
|
realURL, err := videoInfo.GetDownloadURL(ytdlFormat)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
size, _ := strconv.ParseInt(videoFormat.ContentLength, 10, 64)
|
||
|
return &downloader.URL{
|
||
|
URL: realURL.String(),
|
||
|
Size: size,
|
||
|
Ext: ext,
|
||
|
}, nil
|
||
|
}
|
||
|
|
||
|
func genStream(videoFormat streamFormat, videoInfo *ytdl.VideoInfo) (*downloader.Stream, error) {
|
||
|
streamType := videoFormat.MimeType
|
||
|
ext := getStreamExt(streamType)
|
||
|
if ext == "" {
|
||
|
return nil, fmt.Errorf("unable to get file extension of MimeType %s", streamType)
|
||
|
}
|
||
|
|
||
|
video, err := getRealURL(videoFormat, videoInfo, ext)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
var quality string
|
||
|
if videoFormat.QualityLabel != "" {
|
||
|
quality = fmt.Sprintf("%s %s", videoFormat.QualityLabel, streamType)
|
||
|
} else {
|
||
|
quality = streamType
|
||
|
}
|
||
|
|
||
|
return &downloader.Stream{
|
||
|
URLs: []downloader.URL{*video},
|
||
|
Quality: quality,
|
||
|
}, nil
|
||
|
}
|
||
|
|
||
|
func extractVideoURLS(data playerResponseType, videoInfo *ytdl.VideoInfo) (map[string]downloader.Stream, error) {
|
||
|
streams := make(map[string]downloader.Stream, len(data.StreamingData.Formats)+len(data.StreamingData.AdaptiveFormats))
|
||
|
for _, f := range data.StreamingData.Formats {
|
||
|
stream, err := genStream(f, videoInfo)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
streams[strconv.Itoa(f.Itag)] = *stream
|
||
|
}
|
||
|
|
||
|
// Unlike `url_encoded_fmt_stream_map`, all videos in `adaptive_fmts` have no sound,
|
||
|
// we need download video and audio both and then merge them.
|
||
|
|
||
|
// get audio file for videos in AdaptiveFormats
|
||
|
var audio downloader.URL
|
||
|
for _, f := range data.StreamingData.AdaptiveFormats {
|
||
|
if strings.HasPrefix(f.MimeType, "audio/mp4") {
|
||
|
audioURL, err := getRealURL(f, videoInfo, "m4a")
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
audio = *audioURL
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for _, f := range data.StreamingData.AdaptiveFormats {
|
||
|
stream, err := genStream(f, videoInfo)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
stream.URLs = append(stream.URLs, audio)
|
||
|
|
||
|
streams[strconv.Itoa(f.Itag)] = *stream
|
||
|
}
|
||
|
|
||
|
return streams, nil
|
||
|
}
|