package ytdl import ( "bytes" "encoding/json" "encoding/xml" "fmt" "io" "net/url" "regexp" "strconv" "strings" "time" "github.com/rs/zerolog/log" ) const youtubeBaseURL = "https://www.youtube.com/watch" const youtubeEmbeddedBaseURL = "https://www.youtube.com/embed/" const youtubeVideoEURL = "https://youtube.googleapis.com/v/" const youtubeVideoInfoURL = "https://www.youtube.com/get_video_info" const youtubeDateFormat = "2006-01-02" // VideoInfo contains the info a youtube video type VideoInfo struct { ID string // The video ID Title string // The video title Description string // The video description DatePublished time.Time // The date the video was published Formats FormatList // Formats the video is available in Keywords []string // List of keywords associated with the video Uploader string // Author of the video Song string Artist string Album string Writers string Duration time.Duration // Duration of the video htmlPlayerFile string } // GetVideoInfo fetches info from a url string, url object, or a url string func GetVideoInfo(value interface{}) (*VideoInfo, error) { switch t := value.(type) { case *url.URL: return GetVideoInfoFromURL(t) case string: if strings.HasPrefix(t, "https://") { uri, err := url.ParseRequestURI(t) if err != nil { return nil, err } return GetVideoInfo(uri) } return GetVideoInfoFromID(t) default: return nil, fmt.Errorf("Identifier type must be a string, *url.URL, or []byte") } } // GetVideoInfoFromURL fetches video info from a youtube url func GetVideoInfoFromURL(u *url.URL) (*VideoInfo, error) { videoID := extractVideoID(u) if len(videoID) == 0 { return nil, fmt.Errorf("Invalid youtube url, no video id") } return GetVideoInfoFromID(videoID) } // GetVideoInfoFromShortURL fetches video info from a short youtube url func extractVideoID(u *url.URL) string { switch u.Host { case "www.youtube.com", "youtube.com", "m.youtube.com": if u.Path == "/watch" { return u.Query().Get("v") } if strings.HasPrefix(u.Path, "/embed/") { return u.Path[7:] } case "youtu.be": if len(u.Path) > 1 { return u.Path[1:] } } return "" } // GetVideoInfoFromID fetches video info from a youtube video id func GetVideoInfoFromID(id string) (*VideoInfo, error) { body, err := httpGetAndCheckResponseReadBody(youtubeBaseURL + "?v=" + id) if err != nil { return nil, err } return getVideoInfoFromHTML(id, body) } // GetDownloadURL gets the download url for a format func (info *VideoInfo) GetDownloadURL(format *Format) (*url.URL, error) { return getDownloadURL(format, info.htmlPlayerFile) } // GetThumbnailURL returns a url for the thumbnail image // with the given quality func (info *VideoInfo) GetThumbnailURL(quality ThumbnailQuality) *url.URL { u, _ := url.Parse(fmt.Sprintf("http://img.youtube.com/vi/%s/%s.jpg", info.ID, quality)) return u } // Download is a convenience method to download a format to an io.Writer func (info *VideoInfo) Download(format *Format, dest io.Writer) error { u, err := info.GetDownloadURL(format) if err != nil { return err } resp, err := httpGetAndCheckResponse(u.String()) if err != nil { return err } defer resp.Body.Close() _, err = io.Copy(dest, resp.Body) return err } var ( regexpPlayerConfig = regexp.MustCompile("ytplayer.config = (.*?);ytplayer.load") regexpInitialData = regexp.MustCompile(`\["ytInitialData"\] = (.+);`) regexpInitialPlayerResponse = regexp.MustCompile(`\["ytInitialPlayerResponse"\] = (.+);`) ) func getVideoInfoFromHTML(id string, html []byte) (*VideoInfo, error) { info := &VideoInfo{} if matches := regexpInitialData.FindSubmatch(html); len(matches) > 0 { data := initialData{} if err := json.Unmarshal(matches[1], &data); err != nil { return nil, err } contents := data.Contents.TwoColumnWatchNextResults.Results.Results.Contents if len(contents) >= 2 { infoRenderer := contents[1].VideoSecondaryInfoRenderer info.Description = infoRenderer.Description.String() rows := infoRenderer.MetadataRowContainer.MetadataRowContainerRenderer.Rows info.Artist = rows.Get("Artist") info.Album = rows.Get("Album") info.Song = rows.Get("Song") info.Writers = rows.Get("Writers") } } info.ID = id var jsonConfig playerConfig // match json in javascript if matches := regexpPlayerConfig.FindSubmatch(html); len(matches) > 1 { err := json.Unmarshal(matches[1], &jsonConfig) if err != nil { return nil, err } } else { log.Debug().Msg("Unable to extract json from default url, trying embedded url") info, err := getVideoInfoFromEmbedded(id) if err != nil { return nil, err } query := url.Values{ "video_id": []string{id}, "eurl": []string{youtubeVideoEURL + id}, } if sts, ok := info["sts"].(float64); ok { query.Add("sts", strconv.Itoa(int(sts))) } body, err := httpGetAndCheckResponseReadBody(youtubeVideoInfoURL + "?" + query.Encode()) if err != nil { return nil, fmt.Errorf("Unable to read video info: %w", err) } query, err = url.ParseQuery(string(body)) if err != nil { return nil, fmt.Errorf("Unable to parse video info data: %w", err) } for k, v := range query { switch k { case "errorcode": jsonConfig.Args.Errorcode = v[0] case "reason": jsonConfig.Args.Reason = v[0] case "status": jsonConfig.Args.Status = v[0] case "player_response": jsonConfig.Args.PlayerResponse = v[0] case "url_encoded_fmt_stream_map": jsonConfig.Args.URLEncodedFmtStreamMap = v[0] case "adaptive_fmts": jsonConfig.Args.AdaptiveFmts = v[0] case "dashmpd": jsonConfig.Args.Dashmpd = v[0] default: // log.Debug().Msgf("unknown query param: %v", k) } } } inf := jsonConfig.Args if inf.Status == "fail" { return nil, fmt.Errorf("Error %s:%s", inf.Errorcode, inf.Reason) } var formats FormatList formats.parseFormats(strings.NewReader(inf.URLEncodedFmtStreamMap)) formats.parseFormats(strings.NewReader(inf.AdaptiveFmts)) if inf.PlayerResponse != "" { response := &playerResponse{} if err := json.Unmarshal([]byte(inf.PlayerResponse), &response); err != nil { return nil, fmt.Errorf("Couldn't parse player response: %w", err) } if response.PlayabilityStatus.Status != "OK" { return nil, fmt.Errorf("Unavailable because: %s", response.PlayabilityStatus.Reason) } formats.add(response.StreamingData.Formats) formats.add(response.StreamingData.AdaptiveFormats) if seconds := response.VideoDetails.LengthSeconds; seconds != "" { val, err := strconv.Atoi(seconds) if err == nil { info.Duration = time.Duration(val) * time.Second } } if date, err := time.Parse(youtubeDateFormat, response.Microformat.Renderer.PublishDate); err == nil { info.DatePublished = date } else { log.Debug().Msgf("Unable to parse date published %v", err) } info.Title = response.VideoDetails.Title info.Uploader = response.VideoDetails.Author } else { log.Debug().Msg("Unable to extract player response JSON") } info.htmlPlayerFile = jsonConfig.Assets.JS if len(formats) == 0 { log.Debug().Msgf("No formats found") } if dashManifestURL := inf.Dashmpd; dashManifestURL != "" { tokens, err := getSigTokens(info.htmlPlayerFile) if err != nil { return nil, fmt.Errorf("Unable to extract signature tokens: %w", err) } regex := regexp.MustCompile("\\/s\\/([a-fA-F0-9\\.]+)") regexSub := regexp.MustCompile("([a-fA-F0-9\\.]+)") dashManifestURL = regex.ReplaceAllStringFunc(dashManifestURL, func(str string) string { return "/signature/" + decipherTokens(tokens, regexSub.FindString(str)) }) dashFormats, err := getDashManifest(dashManifestURL) if err != nil { return nil, fmt.Errorf("Unable to extract dash manifest: %w", err) } for _, dashFormat := range dashFormats { added := false for j, format := range formats { if dashFormat.Itag == format.Itag { formats[j] = dashFormat added = true break } } if !added { formats = append(formats, dashFormat) } } } info.Formats = formats return info, nil } func getVideoInfoFromEmbedded(id string) (map[string]interface{}, error) { var jsonConfig map[string]interface{} html, err := httpGetAndCheckResponseReadBody(youtubeEmbeddedBaseURL + id) if err != nil { return nil, fmt.Errorf("Embedded url request returned %w", err) } // re = regexp.MustCompile("\"sts\"\\s*:\\s*(\\d+)") re := regexp.MustCompile("yt.setConfig\\({'PLAYER_CONFIG': (.*?)}\\);") matches := re.FindSubmatch(html) if len(matches) < 2 { return nil, fmt.Errorf("Error extracting sts from embedded url response") } dec := json.NewDecoder(bytes.NewBuffer(matches[1])) err = dec.Decode(&jsonConfig) if err != nil { return nil, fmt.Errorf("Unable to extract json from embedded url: %w", err) } return jsonConfig, nil } func getDashManifest(urlString string) (formats []*Format, err error) { resp, err := httpGetAndCheckResponse(urlString) if err != nil { return nil, err } defer resp.Body.Close() dec := xml.NewDecoder(resp.Body) var token xml.Token for ; err == nil; token, err = dec.Token() { if el, ok := token.(xml.StartElement); ok && el.Name.Local == "Representation" { var rep representation err = dec.DecodeElement(&rep, &el) if err != nil { break } if itag := getItag(rep.Itag); itag != nil { format := &Format{ Itag: *itag, url: rep.URL, } if rep.Height != 0 { format.Itag.Resolution = strconv.Itoa(rep.Height) + "p" } formats = append(formats, format) } else { log.Debug().Msgf("No metadata found for itag: %v, skipping...", rep.Itag) } } } if err != io.EOF { return nil, err } return formats, nil }