Spaces:
Runtime error
Runtime error
# def get_lyrics_url_from_website(): | |
# # https://www.disneyclips.com/lyrics/ | |
import aiohttp | |
import asyncio | |
from bs4 import BeautifulSoup | |
from typing import List, TypedDict, Tuple, Optional | |
class Lyric(TypedDict): | |
name: str | |
text: str | |
class Movie(TypedDict): | |
title: str | |
lyrics: List[Lyric] | |
URL = "https://www.disneyclips.com/lyrics/" | |
async def get_lyrics_urls_from_movie_url(url: str, session: aiohttp.ClientSession) -> Optional[Tuple[str, str]]: | |
async with session.get(url) as response: | |
html = await response.text() | |
soup = BeautifulSoup(html, 'html.parser') | |
table = soup.find('table', {'class': 'songs'}) | |
names_and_urls = None | |
if table: | |
links = table.find_all('a') | |
names_and_urls = [] | |
for link in links: | |
names_and_urls.append((link.text, f"{URL}/{link.get('href')}")) | |
return names_and_urls | |
async def get_lyric_from_lyric_url(url: str, name: str, session: aiohttp.ClientSession) -> Lyric: | |
async with session.get(url) as response: | |
html = await response.text() | |
soup = BeautifulSoup(html, 'html.parser') | |
div = soup.find('div', {'id': 'cnt'}).find('div', {'class': 'main'}) | |
paragraphs = div.find_all('p') | |
text = "" | |
for p in paragraphs: | |
text += p.text | |
return text | |
async def get_movie_names_and_urls(session: aiohttp.ClientSession) -> List[Tuple[str, str]]: | |
async with session.get(URL) as response: | |
html = await response.text() | |
soup = BeautifulSoup(html, 'html.parser') | |
links = soup.find('div', {'id': 'cnt'}).find('div', {'class': 'main'}).find_all('a') | |
movie_names_and_urls = [(link.text, f"{URL}/{link.get('href')}") for link in links] | |
return movie_names_and_urls | |
async def main(): | |
async with aiohttp.ClientSession() as session: | |
names_and_urls = await get_movie_names_and_urls(session) | |
data = await asyncio.gather(*[asyncio.create_task(get_lyrics_urls_from_movie_url(names, url, session)) for (names, url) in names_and_urls]) | |
loop = asyncio.get_event_loop() | |
loop.run_until_complete(main()) |