Categories
Development

OpenAI: Text to Speech

Example how to listen to an article from Heise.

MP3 from Text with OpenAI

Create an .env file with an API Key for OpenAI:

OPENAI_API_KEY=mySecretKey

According to OpenAPI documentation this is a sample code to generate Speech from Text:

from pathlib import Path
from openai import OpenAI
client = OpenAI()

speech_file_path = Path(__file__).parent / "speech.mp3"
response = client.audio.speech.create(
  model="tts-1",
  voice="alloy",
  input="Today is a wonderful day to build something people love!"
)

response.stream_to_file(speech_file_path)

To execute this sample I have to install openai first:

pip install openai

To play the mp3-file I have to install ffmpeg first:

sudo apt install ffmpeg

Create mp3 and play it:

# run sample code
python sample.py
# play soundfile
ffplay speech.mp3

Play MP3 with Python

Install pygame:

pip install pygame
from pathlib import Path
import pygame

def play_mp3(file_path):
    pygame.mixer.init()
    pygame.mixer.music.load(file_path)
    pygame.mixer.music.play()

    # Keep the program running while the music plays
    while pygame.mixer.music.get_busy():
        pygame.time.Clock().tick(10)

# Usage
speech_file_path = Path(__file__).parent / "speech.mp3"
play_mp3(speech_file_path)
python playmp3.py

Read Heise Article

from dotenv import load_dotenv
from pathlib import Path
from openai import OpenAI
import selenium.webdriver as webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import pygame

def scrape_website(website):
    print("Launching chrome browser...")
    service = Service()
    options = Options()
    options.headless = True  # Headless-Modus aktivieren, um den Browser unsichtbar zu machen

    driver = webdriver.Chrome(service=service, options=options)

    try:
        driver.get(website)
        print("Page loaded...")
        html = driver.page_source
        return html
    finally:
        driver.quit()


def split_dom_content(dom_content, max_length=6000):
    return [
        dom_content[i : i + max_length] for i in range(0, len(dom_content), max_length)
    ]


def scrape_heise_website(website):
    html  = scrape_website(website)

    # BeautifulSoup zum Parsen des HTML-Codes verwenden
    soup = BeautifulSoup(html, 'html.parser')

    # Artikel-Header und -Inhalt extrahieren
    # Der Header ist oft in einem <h1>-Tag zu finden
    header_title = soup.find('h1', {'class': 'a-article-header__title'}).get_text().strip()
    header_lead  = soup.find('p',  {'class': 'a-article-header__lead'}).get_text().strip()

    # Der eigentliche Artikelinhalt befindet sich oft in einem <div>-Tag mit der Klasse 'article-content'
    article_div = soup.find('div', {'class': 'article-content'})
    paragraphs = article_div.find_all('p') if article_div else []
    # 'redakteurskuerzel' entfernen
    for para in paragraphs:
        spans_to_remove = para.find_all('span', {'class': 'redakteurskuerzel'})
        for span in spans_to_remove:
            span.decompose()  # Entfernt den Tag vollständig aus dem Baum

    article_content = "\n".join([para.get_text().strip() for para in paragraphs])

    return article_content
    # Header und Artikelinhalt ausgeben
    #result = "Header Title:" + header_title + "\nHeader Lead:" + header_lead + "\nContent:" + article_content
    #return result

def article_to_mp3(article_content):
    client = OpenAI()

    speech_file_path = Path(__file__).parent / "speech.mp3"
    response = client.audio.speech.create(
        model="tts-1",
        voice="alloy",
        input=article_content
    )

    response.stream_to_file(speech_file_path)

def play_mp3():
    speech_file_path = Path(__file__).parent / "speech.mp3"
    pygame.mixer.init()
    pygame.mixer.music.load(speech_file_path)
    pygame.mixer.music.play()

    # Keep the program running while the music plays
    while pygame.mixer.music.get_busy():
        pygame.time.Clock().tick(10)


# .env-Datei laden#
load_dotenv()
article_content = scrape_heise_website("https://www.heise.de/news/Streit-ueber-Kosten-Meta-kappt-Leitungen-zur-Telekom-9953162.html")
article_to_mp3(article_content)
play_mp3()