version 5 and version 6 code , browser to mp3

 


async def generate_chunk_audio(chunks, voice):

    audio_segments = []

    for i, chunk in enumerate(chunks):

        print(f"🎤 Processing chunk {i+1}/{len(chunks)} ({len(chunk)} chars)")

        chunk_audio = AudioSegment.empty()


        # 🔸 Split the chunk into sentences by full stop

        sentences = [s.strip() for s in chunk.split('.') if s.strip()]

        

        for j, sentence in enumerate(sentences):

            sentence += '.'  # add the period back

            sentence_path = os.path.join(temp_dir, f"chunk_{i}_{j}.mp3")


            try:

                communicate = edge_tts.Communicate(sentence, voice)

                await communicate.save(sentence_path)

                audio = AudioSegment.from_file(sentence_path)

                chunk_audio += audio


                # ⏸ Add 1 second pause after each sentence

                chunk_audio += AudioSegment.silent(duration=1000)


            except Exception as e:

                print(f"⚠️ Failed sentence {j} in chunk {i}: {e}")


        audio_segments.append(chunk_audio)

    return audio_segments



----------

# ✅ Install required packages

!pip install edge-tts pydub --quiet

!apt install ffmpeg --quiet


import requests

from bs4 import BeautifulSoup

import re

import asyncio

import edge_tts

from pydub import AudioSegment

import os

import shutil


# 📌 Config

url = "https://prabodhadevotee.blogspot.com/2025/04/pss-visitor-recording.html"

start_phrase = "శ్లోకం 11"

end_phrase = "గాలికి త్రోయబడదు"

voice = "te-IN-ShrutiNeural"  # Telugu Female

chunk_size = 3000

output_path = "/content/smallBook_output_female_final.mp3"

temp_dir = "/content/chunks"

os.makedirs(temp_dir, exist_ok=True)


# 🔍 Step 1: Extract blog text between start and end phrases

response = requests.get(url)

soup = BeautifulSoup(response.content, "html.parser")

full_text = soup.get_text()


pattern = re.compile(f"{re.escape(start_phrase)}(.*?){re.escape(end_phrase)}", re.DOTALL)

match = pattern.search(full_text)

if match:

    selected_text = start_phrase + match.group(1) + end_phrase

else:

    raise Exception("Text not found between start and end phrases.")


selected_text = re.sub(r'\s+', ' ', selected_text).strip()


# 🔠 Step 2: Split text into manageable chunks

def split_text(text, max_len):

    words = text.split()

    chunks, current = [], ""

    for word in words:

        if len(current) + len(word) + 1 <= max_len:

            current += (" " + word if current else word)

        else:

            chunks.append(current)

            current = word

    if current:

        chunks.append(current)

    return chunks


chunks = split_text(selected_text, chunk_size)

print(f"🔍 Total Chunks: {len(chunks)}")


# 🎤 Step 3: Generate MP3 from each chunk and insert 1s silence after each sentence (excluding last)

async def generate_chunk_audio(chunks, voice):

    audio_segments = []

    for i, chunk in enumerate(chunks):

        print(f"🎤 Generating chunk {i+1}/{len(chunks)} ({len(chunk)} chars)")

        sentences = re.split(r'(?<=\.)\s+', chunk)  # split on ". "

        chunk_audio = AudioSegment.empty()

        for j, sentence in enumerate(sentences):

            if sentence.strip():

                chunk_path = os.path.join(temp_dir, f"chunk_{i}_{j}.mp3")

                try:

                    communicate = edge_tts.Communicate(sentence, voice)

                    await communicate.save(chunk_path)

                    audio = AudioSegment.from_file(chunk_path)

                    chunk_audio += audio

                    if j < len(sentences) - 1:  # 👈 Avoid extra silence at end

                        chunk_audio += AudioSegment.silent(duration=1000)

                except Exception as e:

                    print(f"⚠️ Chunk {i}, Sentence {j} failed: {e}")

        audio_segments.append(chunk_audio)

    return audio_segments


# 🎧 Step 4: Combine all chunks and clean up

async def process():

    audio_segments = await generate_chunk_audio(chunks, voice)

    if not audio_segments:

        print("❌ No audio segments generated.")

        return

    final_audio = sum(audio_segments[1:], audio_segments[0])

    final_audio.export(output_path, format="mp3")

    print(f"✅ Final MP3 saved at: {output_path}")

    print(f"📏 Final Duration: {round(len(final_audio) / 60000, 2)} minutes")

    

    # 🧹 Clean up

    shutil.rmtree(temp_dir)


# 🚀 Run

await process()


Popular posts from this blog

SAP CPI : camle expression in sap cpi , cm, router, filter and groovy script. format

pss book: గురు ప్రార్థనామంజరి . completed 21st july 2024

pss book : శ్రీకృష్ణుడు దేవుడా, భగవంతుడా completed , second review needed. 26th April 2024