aboutsummaryrefslogtreecommitdiff
path: root/yt_music_scraper/main.py
blob: 849c6ef819792af01b68473d0cca539472973af5 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from yt_dlp import YoutubeDL
import urllib.parse
from ytmusicapi import YTMusic
import re
import concurrent.futures
import json
import argparse
import sys
import os
import logging
logging.basicConfig(stream=sys.stdout)
logger = logging.getLogger()
LOGLEVEL = os.environ.get('LOG_LEVEL', 'INFO').upper()
logger.setLevel(LOGLEVEL)
logger.debug("Starting...")

def album_info(data):

    logger.info(json.dumps(data['playlist'] if 'playlist' in data else "N/A", indent=4))
    logger.info(json.dumps(data['playlist_id'] if 'playlist_id' in data else "N/A", indent=4))
    return data['playlist_id'] if 'playlist_id' in data else "N/A"



def vid_info(data):

    logger.info(json.dumps(data['title'] if 'title' in data else "N/A", indent=4))
    logger.info(json.dumps(data['album'] if 'album' in data else "N/A", indent=4))
    album = data['album'] if 'album' in data else None
    artists = ','.join(data['artists']) if 'artists' in data else ""
    if album:
        return album + " " +  artists 
    else:
        return None

ydl_opts2 = {
    'format': 'bestaudio/best',
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'flac',
        'preferredquality': 'best',
    },
    {'key': 'FFmpegMetadata'},
    {'key': 'EmbedThumbnail'}],
    'writethumbnail': True,
    'embedthumbnail': True,
     'add_metadata': True,
    'logger': logger,
    'outtmpl': '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s',
}

ydl_opts = {
    'format': 'bestaudio/best',
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'flac',
        'preferredquality': 'best',
    },
    {'key': 'FFmpegMetadata'},
    {'key': 'EmbedThumbnail'}],
    'writethumbnail': True,
    'embedthumbnail': True,
    'ignoreerrors': True,
     'add_metadata': True,
    'logger': logger,
    'outtmpl': '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s',
}


def download_album(album_id):
    yt = YoutubeDL(ydl_opts2)
    try:
        data = yt.download(album_id)
        return data
    except:
        logger.info(f"Could not process {album_id}")
        return {}


if __name__ == "__main__":
    parser = argparse.ArgumentParser(prog="YT DLP Music downloader", description="Download all albums corresponding to songs in a playlist")
    parser.add_argument("playlist")

    args = parser.parse_args()

    logger.debug("Create YT object...")
    albums = []
    yt = YoutubeDL(ydl_opts)
    data = yt.extract_info(args.playlist, download=False)
    i = 0
    if data:
        albums = set()
        for datum in data["entries"]:
            if datum:
                logger.debug(json.dumps(datum, indent=4))
                info = vid_info(datum)
                if info:
                    albums.add(info)
                    logger.info(f"ALBUM NAME: {info}")
                else:
                    logger.info(f"Skipping {datum}")

        logger.info(f"albums: {albums}")
        re = re.compile("OLAK.*")
        ytmusic = YTMusic()
        album_ids = []
        for album in list(albums):
            results = ytmusic.search(albums.pop())
            for item in results:
                if item['resultType'] == 'album':
                    album_ids.append(item['playlistId'])
                    break
        logger.info(f"Album IDs: {album_ids}")
        tasks = []
        with concurrent.futures.ProcessPoolExecutor() as executor:
            for result in executor.map(download_album, album_ids):
                tasks.append(result)
            logger.info(f"DONE")
        logger.info(f"DONE")