browser text to mp3 , using pyton code,
!pip install edge-tts #part1 worked , read web url content and mp3
# worked part2, without code optimization.
import requests
from bs4 import BeautifulSoup
import re
import asyncio
import edge_tts
# Blog URL
url = "https://prabodhadevotee.blogspot.com/2025/01/test.html"
# Start and end phrases
start_phrase = "ఆఖరీ మరణము"
end_phrase = "ప్రబోధానంద యోగీశ్వర్లు"
# Step 1: Fetch and parse the content
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
full_text = soup.get_text()
# Step 2: Extract desired section
pattern = re.compile(f"{re.escape(start_phrase)}(.*?){re.escape(end_phrase)}", re.DOTALL)
match = pattern.search(full_text)
if match:
selected_text = start_phrase + match.group(1) + end_phrase
else:
raise Exception("Text range not found between given start and end phrases.")
# Optional: Clean unwanted line breaks
selected_text = re.sub(r'\s+', ' ', selected_text).strip()
# Step 3: Use Edge-TTS for Telugu Male voice (Microsoft Mohan)
voice = "te-IN-MohanNeural" # Telugu, Male voice
# Output path
output_path = "/content/output.mp3"
# Step 4: Async function to convert to speech
async def save_tts(text, voice, output_path):
communicate = edge_tts.Communicate(text, voice)
await communicate.save(output_path)
# Run async function
await save_tts(selected_text, voice, output_path)
print(f"✅ Audio saved to: {output_path}")
Part 1: Code Logic Explanation
🔹 1. requests + BeautifulSoup
We download the webpage using requests.get(url)
and parse it with BeautifulSoup
to extract the entire text content of the blog.
🔹 2. Regex Search
We define a start phrase and end phrase, then use Python's re
module to extract only the portion of text between those two Telugu phrases.
This ensures we only convert a specific part of the blog to speech.
🔹 3. edge-tts
We use Microsoft Edge TTS API via the edge-tts
library. It supports natural-sounding Indian Telugu voices like te-IN-MohanNeural
.