Example how to listen to an article from Heise.
MP3 from Text with OpenAI
Create an .env file with an API Key for OpenAI:
OPENAI_API_KEY=mySecretKey
According to OpenAPI documentation this is a sample code to generate Speech from Text:
from pathlib import Path from openai import OpenAI client = OpenAI() speech_file_path = Path(__file__).parent / "speech.mp3" response = client.audio.speech.create( model="tts-1", voice="alloy", input="Today is a wonderful day to build something people love!" ) response.stream_to_file(speech_file_path)
To execute this sample I have to install openai first:
pip install openai
To play the mp3-file I have to install ffmpeg first:
sudo apt install ffmpeg
Create mp3 and play it:
# run sample code python sample.py # play soundfile ffplay speech.mp3
Play MP3 with Python
Install pygame:
pip install pygame
from pathlib import Path import pygame def play_mp3(file_path): pygame.mixer.init() pygame.mixer.music.load(file_path) pygame.mixer.music.play() # Keep the program running while the music plays while pygame.mixer.music.get_busy(): pygame.time.Clock().tick(10) # Usage speech_file_path = Path(__file__).parent / "speech.mp3" play_mp3(speech_file_path)
python playmp3.py
Read Heise Article
from dotenv import load_dotenv from pathlib import Path from openai import OpenAI import selenium.webdriver as webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.options import Options from bs4 import BeautifulSoup import pygame def scrape_website(website): print("Launching chrome browser...") service = Service() options = Options() options.headless = True # Headless-Modus aktivieren, um den Browser unsichtbar zu machen driver = webdriver.Chrome(service=service, options=options) try: driver.get(website) print("Page loaded...") html = driver.page_source return html finally: driver.quit() def split_dom_content(dom_content, max_length=6000): return [ dom_content[i : i + max_length] for i in range(0, len(dom_content), max_length) ] def scrape_heise_website(website): html = scrape_website(website) # BeautifulSoup zum Parsen des HTML-Codes verwenden soup = BeautifulSoup(html, 'html.parser') # Artikel-Header und -Inhalt extrahieren # Der Header ist oft in einem-Tag zu finden header_title = soup.find('h1', {'class': 'a-article-header__title'}).get_text().strip() header_lead = soup.find('p', {'class': 'a-article-header__lead'}).get_text().strip() # Der eigentliche Artikelinhalt befindet sich oft in einem
-Tag mit der Klasse 'article-content' article_div = soup.find('div', {'class': 'article-content'}) paragraphs = article_div.find_all('p') if article_div else [] # 'redakteurskuerzel' entfernen for para in paragraphs: spans_to_remove = para.find_all('span', {'class': 'redakteurskuerzel'}) for span in spans_to_remove: span.decompose() # Entfernt den Tag vollständig aus dem Baum article_content = "\n".join([para.get_text().strip() for para in paragraphs]) return article_content # Header und Artikelinhalt ausgeben #result = "Header Title:" + header_title + "\nHeader Lead:" + header_lead + "\nContent:" + article_content #return result def article_to_mp3(article_content): client = OpenAI() speech_file_path = Path(__file__).parent / "speech.mp3" response = client.audio.speech.create( model="tts-1", voice="alloy", input=article_content ) response.stream_to_file(speech_file_path) def play_mp3(): speech_file_path = Path(__file__).parent / "speech.mp3" pygame.mixer.init() pygame.mixer.music.load(speech_file_path) pygame.mixer.music.play() # Keep the program running while the music plays while pygame.mixer.music.get_busy(): pygame.time.Clock().tick(10) # .env-Datei laden# load_dotenv() article_content = scrape_heise_website("https://www.heise.de/news/Streit-ueber-Kosten-Meta-kappt-Leitungen-zur-Telekom-9953162.html") article_to_mp3(article_content) play_mp3()