Categories
Development

OpenAI: Text to Speech

Example how to listen to an article from Heise.

MP3 from Text with OpenAI

Create an .env file with an API Key for OpenAI:

OPENAI_API_KEY=mySecretKey

According to OpenAPI documentation this is a sample code to generate Speech from Text:

from pathlib import Path
from openai import OpenAI
client = OpenAI()

speech_file_path = Path(__file__).parent / "speech.mp3"
response = client.audio.speech.create(
  model="tts-1",
  voice="alloy",
  input="Today is a wonderful day to build something people love!"
)

response.stream_to_file(speech_file_path)

To execute this sample I have to install openai first:

pip install openai

To play the mp3-file I have to install ffmpeg first:

sudo apt install ffmpeg

Create mp3 and play it:

# run sample code
python sample.py
# play soundfile
ffplay speech.mp3

Play MP3 with Python

Install pygame:

pip install pygame
from pathlib import Path
import pygame

def play_mp3(file_path):
    pygame.mixer.init()
    pygame.mixer.music.load(file_path)
    pygame.mixer.music.play()

    # Keep the program running while the music plays
    while pygame.mixer.music.get_busy():
        pygame.time.Clock().tick(10)

# Usage
speech_file_path = Path(__file__).parent / "speech.mp3"
play_mp3(speech_file_path)
python playmp3.py

Read Heise Article

from dotenv import load_dotenv
from pathlib import Path
from openai import OpenAI
import selenium.webdriver as webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import pygame

def scrape_website(website):
    print("Launching chrome browser...")
    service = Service()
    options = Options()
    options.headless = True  # Headless-Modus aktivieren, um den Browser unsichtbar zu machen

    driver = webdriver.Chrome(service=service, options=options)

    try:
        driver.get(website)
        print("Page loaded...")
        html = driver.page_source
        return html
    finally:
        driver.quit()

def split_dom_content(dom_content, max_length=6000):
    return [
        dom_content[i : i + max_length] for i in range(0, len(dom_content), max_length)
    ]

def scrape_heise_website(website):
    html  = scrape_website(website)

    # BeautifulSoup zum Parsen des HTML-Codes verwenden
    soup = BeautifulSoup(html, 'html.parser')

    # Artikel-Header und -Inhalt extrahieren
    # Der Header ist oft in einem 

-Tag zu finden header_title = soup.find('h1', {'class': 'a-article-header__title'}).get_text().strip() header_lead = soup.find('p', {'class': 'a-article-header__lead'}).get_text().strip() # Der eigentliche Artikelinhalt befindet sich oft in einem
-Tag mit der Klasse 'article-content' article_div = soup.find('div', {'class': 'article-content'}) paragraphs = article_div.find_all('p') if article_div else [] # 'redakteurskuerzel' entfernen for para in paragraphs: spans_to_remove = para.find_all('span', {'class': 'redakteurskuerzel'}) for span in spans_to_remove: span.decompose() # Entfernt den Tag vollständig aus dem Baum article_content = "\n".join([para.get_text().strip() for para in paragraphs]) return article_content # Header und Artikelinhalt ausgeben #result = "Header Title:" + header_title + "\nHeader Lead:" + header_lead + "\nContent:" + article_content #return result def article_to_mp3(article_content): client = OpenAI() speech_file_path = Path(__file__).parent / "speech.mp3" response = client.audio.speech.create( model="tts-1", voice="alloy", input=article_content ) response.stream_to_file(speech_file_path) def play_mp3(): speech_file_path = Path(__file__).parent / "speech.mp3" pygame.mixer.init() pygame.mixer.music.load(speech_file_path) pygame.mixer.music.play() # Keep the program running while the music plays while pygame.mixer.music.get_busy(): pygame.time.Clock().tick(10) # .env-Datei laden# load_dotenv() article_content = scrape_heise_website("https://www.heise.de/news/Streit-ueber-Kosten-Meta-kappt-Leitungen-zur-Telekom-9953162.html") article_to_mp3(article_content) play_mp3()

Leave a Reply

Your email address will not be published. Required fields are marked *