Example how to listen to an article from Heise.
MP3 from Text with OpenAI
Create an .env file with an API Key for OpenAI:
OPENAI_API_KEY=mySecretKey
According to OpenAPI documentation this is a sample code to generate Speech from Text:
from pathlib import Path from openai import OpenAI client = OpenAI() speech_file_path = Path(__file__).parent / "speech.mp3" response = client.audio.speech.create( model="tts-1", voice="alloy", input="Today is a wonderful day to build something people love!" ) response.stream_to_file(speech_file_path)
To execute this sample I have to install openai first:
pip install openai
To play the mp3-file I have to install ffmpeg first:
sudo apt install ffmpeg
Create mp3 and play it:
# run sample code python sample.py # play soundfile ffplay speech.mp3
Play MP3 with Python
Install pygame:
pip install pygame
","language":"Python","maxHeight":"400px","modeName":"python"}'>from pathlib import Path import pygame def play_mp3(file_path): pygame.mixer.init() pygame.mixer.music.load(file_path) pygame.mixer.music.play() # Keep the program running while the music plays while pygame.mixer.music.get_busy(): pygame.time.Clock().tick(10) # Usage speech_file_path = Path(__file__).parent / "speech.mp3" play_mp3(speech_file_path)
python playmp3.py
Read Heise Article
from dotenv import load_dotenv
from pathlib import Path
from openai import OpenAI
import selenium.webdriver as webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import pygame
def scrape_website(website):
print("Launching chrome browser...")
service = Service()
options = Options()
options.headless = True # Headless-Modus aktivieren, um den Browser unsichtbar zu machen
driver = webdriver.Chrome(service=service, options=options)
try:
driver.get(website)
print("Page loaded...")
html = driver.page_source
return html
finally:
driver.quit()
def split_dom_content(dom_content, max_length=6000):
return [
dom_content[i : i + max_length] for i in range(0, len(dom_content), max_length)
]
def scrape_heise_website(website):
html = scrape_website(website)
# BeautifulSoup zum Parsen des HTML-Codes verwenden
soup = BeautifulSoup(html, 'html.parser')
# Artikel-Header und -Inhalt extrahieren
# Der Header ist oft in einem -Tag zu finden
header_title = soup.find('h1', {'class': 'a-article-header__title'}).get_text().strip()
header_lead = soup.find('p', {'class': 'a-article-header__lead'}).get_text().strip()
# Der eigentliche Artikelinhalt befindet sich oft in einem -Tag mit der Klasse 'article-content'
article_div = soup.find('div', {'class': 'article-content'})
paragraphs = article_div.find_all('p') if article_div else []
# 'redakteurskuerzel' entfernen
for para in paragraphs:
spans_to_remove = para.find_all('span', {'class': 'redakteurskuerzel'})
for span in spans_to_remove:
span.decompose() # Entfernt den Tag vollständig aus dem Baum
article_content = "\n".join([para.get_text().strip() for para in paragraphs])
return article_content
# Header und Artikelinhalt ausgeben
#result = "Header Title:" + header_title + "\nHeader Lead:" + header_lead + "\nContent:" + article_content
#return result
def article_to_mp3(article_content):
client = OpenAI()
speech_file_path = Path(__file__).parent / "speech.mp3"
response = client.audio.speech.create(
model="tts-1",
voice="alloy",
input=article_content
)
response.stream_to_file(speech_file_path)
def play_mp3():
speech_file_path = Path(__file__).parent / "speech.mp3"
pygame.mixer.init()
pygame.mixer.music.load(speech_file_path)
pygame.mixer.music.play()
# Keep the program running while the music plays
while pygame.mixer.music.get_busy():
pygame.time.Clock().tick(10)
# .env-Datei laden#
load_dotenv()
article_content = scrape_heise_website("https://www.heise.de/news/Streit-ueber-Kosten-Meta-kappt-Leitungen-zur-Telekom-9953162.html")
article_to_mp3(article_content)
play_mp3()
