aboutsummaryrefslogtreecommitdiff
path: root/yt_music_scraper/main.py
blob: 913e16a83fa50e938b705d3098bb1958ca55a532 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from yt_dlp import YoutubeDL
import urllib.parse
from ytmusicapi import YTMusic
import re
import concurrent.futures
import json
import argparse
import sys
import os
import logging
logging.basicConfig(stream=sys.stdout)
logger = logging.getLogger()
fh = logging.FileHandler('download.log')
LOGLEVEL = os.environ.get('LOG_LEVEL', 'INFO').upper()
logger.setLevel(LOGLEVEL)
logger.addHandler(fh)
logger.debug("Starting...")

def album_info(data):

    logger.info(json.dumps(data['playlist'] if 'playlist' in data else "N/A", indent=4))
    logger.info(json.dumps(data['playlist_id'] if 'playlist_id' in data else "N/A", indent=4))
    return data['playlist_id'] if 'playlist_id' in data else "N/A"



def vid_info(data):

    logger.info(json.dumps(data['title'] if 'title' in data else "N/A", indent=4))
    logger.info(json.dumps(data['album'] if 'album' in data else "N/A", indent=4))
    album = data['album'] if 'album' in data else None
    artists = data['artists'][0] if 'artists' in data else ""
    if album:
        return album + " " +  artists 
    else:
        return None

ydl_opts2 = {
    'format': 'bestaudio/best',
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'flac',
        'preferredquality': 'best',
    },
    {'key': 'FFmpegMetadata'},
    {'key': 'EmbedThumbnail'}],
    'writethumbnail': True,
    'embedthumbnail': True,
     'add_metadata': True,
    'logger': logger,
    'outtmpl': '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s',
}

ydl_opts = {
    'format': 'bestaudio/best',
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'flac',
        'preferredquality': 'best',
    },
    {'key': 'FFmpegMetadata'},
    {'key': 'EmbedThumbnail'}],
    'writethumbnail': True,
    'embedthumbnail': True,
    'ignoreerrors': True,
     'add_metadata': True,
    'logger': logger,
    'outtmpl': '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s',
}


def download_album(album_id):
    yt = YoutubeDL(ydl_opts2)
    try:
        data = yt.download(album_id)
        return data
    except:
        logger.info(f"Could not process {album_id}")
        return {}


if __name__ == "__main__":
    parser = argparse.ArgumentParser(prog="YT DLP Music downloader", description="Download all albums corresponding to songs in a playlist")
    parser.add_argument("playlist")
    parser.add_argument("--single", action="store_true", default=False)

    args = parser.parse_args()

    logger.debug("Create YT object...")
    albums = []
    yt = YoutubeDL(ydl_opts)
    data = yt.extract_info(args.playlist, download=False)
    i = 0
    if data:
        albums = set()

        entries = data["entries"] if not args.single else [data]
        logger.debug(json.dumps(entries, indent=4))
        for datum in entries:
            if datum:
                logger.debug(json.dumps(datum, indent=4))
                info = vid_info(datum)
                if info:
                    albums.add(info)
                    logger.info(f"ALBUM NAME: {info}")
                else:
                    logger.info(f"Skipping {datum}")

        logger.info(f"albums: {albums}")
        re = re.compile("OLAK.*")
        ytmusic = YTMusic()
        album_ids = []
        for album in list(albums):
            results = ytmusic.search(albums.pop())
            for item in results:
                if item['resultType'] == 'album':
                    album_ids.append(item['playlistId'])
                    break
        logger.info(f"Album IDs: {album_ids}")
        tasks = []
        with concurrent.futures.ProcessPoolExecutor() as executor:
            for result in executor.map(download_album, album_ids):
                tasks.append(result)
            logger.info(f"DONE")
        logger.info(f"DONE")