browser text to mp3 , using pyton code, code optimized

!pip install edge-tts pydub


import requests

from bs4 import BeautifulSoup

import re

import asyncio

import edge_tts

from pydub import AudioSegment

import os

import math


# Input

url = "https://prabodhadevotee.blogspot.com/2025/01/test.html"

start_phrase = "ఆఖరీ మరణము"

end_phrase = "ప్రబోధానంద యోగీశ్వర్లు"

voice = "te-IN-MohanNeural"

chunk_size = 4000  # characters per chunk

output_path = "/content/final_output.mp3"


# Step 1: Extract the specific blog content

response = requests.get(url)

soup = BeautifulSoup(response.content, "html.parser")

full_text = soup.get_text()


pattern = re.compile(f"{re.escape(start_phrase)}(.*?){re.escape(end_phrase)}", re.DOTALL)

match = pattern.search(full_text)

if match:

    selected_text = start_phrase + match.group(1) + end_phrase

else:

    raise Exception("Text not found between given start and end phrases.")


selected_text = re.sub(r'\s+', ' ', selected_text).strip()


# Step 2: Split text into safe-sized chunks

def split_text(text, max_len):

    words = text.split()

    chunks, current = [], ""

    for word in words:

        if len(current) + len(word) + 1 <= max_len:

            current += (" " + word if current else word)

        else:

            chunks.append(current)

            current = word

    if current:

        chunks.append(current)

    return chunks


chunks = split_text(selected_text, chunk_size)


# Step 3: Convert each chunk to audio

async def generate_chunk_audio(chunks, voice):

    audio_segments = []

    for i, chunk in enumerate(chunks):

        chunk_path = f"/content/chunk_{i}.mp3"

        communicate = edge_tts.Communicate(chunk, voice)

        await communicate.save(chunk_path)

        audio_segments.append(AudioSegment.from_file(chunk_path))

    return audio_segments


# Step 4: Combine all chunks into one MP3

async def process():

    audio_segments = await generate_chunk_audio(chunks, voice)

    final_audio = sum(audio_segments[1:], audio_segments[0])

    final_audio.export(output_path, format="mp3")


await process()


print(f"✅ Final MP3 saved at: {output_path}")

Popular posts from this blog

SAP CPI : camle expression in sap cpi , cm, router, filter and groovy script. format

pss book: గురు ప్రార్థనామంజరి . completed 21st july 2024

pss book : శ్రీకృష్ణుడు దేవుడా, భగవంతుడా completed , second review needed. 26th April 2024