aboutsummaryrefslogtreecommitdiff
path: root/yt_music_scraper/main.py
blob: 3e8a917c1d7e843a3bc8a506c0da9379094ff352 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from yt_dlp import YoutubeDL
import urllib.parse
from ytmusicapi import YTMusic
import re
import json
import argparse
import sys
import os
import logging
logging.basicConfig(stream=sys.stdout)
logger = logging.getLogger()
LOGLEVEL = os.environ.get('LOG_LEVEL', 'INFO').upper()
logger.setLevel(LOGLEVEL)
logger.debug("Starting...")

def album_info(data):

    logger.info(json.dumps(data['playlist'] if 'playlist' in data else "N/A", indent=4))
    logger.info(json.dumps(data['playlist_id'] if 'playlist_id' in data else "N/A", indent=4))
    return data['playlist_id'] if 'playlist_id' in data else "N/A"

def vid_info(data):

    logger.info(json.dumps(data['title'] if 'title' in data else "N/A", indent=4))
    logger.info(json.dumps(data['album'] if 'album' in data else "N/A", indent=4))
    logger.info(json.dumps(data['artists'] if 'artists' in data else "N/A", indent=4))
    return data['album'] if 'album' in data else "N/A"

ydl_opts = {
    'format': 'bestaudio/best',
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'flac',
        'preferredquality': 'best',
    }],
    'logger': logger,
    'output': '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s',
}



if __name__ == "__main__":
    parser = argparse.ArgumentParser(prog="YT DLP Music downloader", description="Download all albums corresponding to songs in a playlist")
    parser.add_argument("playlist")

    args = parser.parse_args()

    logger.debug("Create YT object...")
    albums = []
    yt = YoutubeDL(ydl_opts)
    data = yt.extract_info(args.playlist, download=False)
    i = 0
    if data:
        albums = set()
        for datum in data["entries"]:
            logger.debug(json.dumps(datum, indent=4))
            albums.add(vid_info(datum))
            logger.info(f"ALBUM NAME: {vid_info(datum)}")

        logger.info(f"albums: {albums}")
        re = re.compile("OLAK.*")
        ytmusic = YTMusic()
        album_ids = []
        for album in list(albums):
            results = ytmusic.search(albums.pop())
            for item in results:
                if item['resultType'] == 'album':
                    album_ids.append(item['playlistId'])
                    break
        logger.info(f"Album IDs: {album_ids}")