browser text to mp3 , using pyton code, code optimized
!pip install edge-tts pydub
import requests
from bs4 import BeautifulSoup
import re
import asyncio
import edge_tts
from pydub import AudioSegment
import os
import math
# Input
url = "https://prabodhadevotee.blogspot.com/2025/01/test.html"
start_phrase = "ఆఖరీ మరణము"
end_phrase = "ప్రబోధానంద యోగీశ్వర్లు"
voice = "te-IN-MohanNeural"
chunk_size = 4000 # characters per chunk
output_path = "/content/final_output.mp3"
# Step 1: Extract the specific blog content
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
full_text = soup.get_text()
pattern = re.compile(f"{re.escape(start_phrase)}(.*?){re.escape(end_phrase)}", re.DOTALL)
match = pattern.search(full_text)
if match:
selected_text = start_phrase + match.group(1) + end_phrase
else:
raise Exception("Text not found between given start and end phrases.")
selected_text = re.sub(r'\s+', ' ', selected_text).strip()
# Step 2: Split text into safe-sized chunks
def split_text(text, max_len):
words = text.split()
chunks, current = [], ""
for word in words:
if len(current) + len(word) + 1 <= max_len:
current += (" " + word if current else word)
else:
chunks.append(current)
current = word
if current:
chunks.append(current)
return chunks
chunks = split_text(selected_text, chunk_size)
# Step 3: Convert each chunk to audio
async def generate_chunk_audio(chunks, voice):
audio_segments = []
for i, chunk in enumerate(chunks):
chunk_path = f"/content/chunk_{i}.mp3"
communicate = edge_tts.Communicate(chunk, voice)
await communicate.save(chunk_path)
audio_segments.append(AudioSegment.from_file(chunk_path))
return audio_segments
# Step 4: Combine all chunks into one MP3
async def process():
audio_segments = await generate_chunk_audio(chunks, voice)
final_audio = sum(audio_segments[1:], audio_segments[0])
final_audio.export(output_path, format="mp3")
await process()
print(f"✅ Final MP3 saved at: {output_path}")