tested text to mp3 neeguda tatva bhodini --- next mp3 to mp4 for youtube uploaod
# Install necessary libraries
# code optmized;
#tesed working good, including speed, no addition prefix added. 28th april 25, worked for speed 3 also,
#tested 5,3 male and female.
!pip install edge-tts pydub --quiet
!apt install ffmpeg --quiet
#to download offline
!pip download edge-tts pydub -d /content/drive/MyDrive/Lib/
import os
import asyncio
import edge_tts
from pydub import AudioSegment
# Constants
INPUT_FILE = "/content/input.txt" #enter file name in content folder.
#VOICE = "te-IN-MohanNeural"
# Telugu Female voice te-IN-MohanNeural te-IN-ShrutiNeural
VOICE = "te-IN-ShrutiNeural" #***
CHUNK_SIZE = 4000
TEMP_DIR = "/content/temp_chunks"
FINAL_MP3 = "/content/output.mp3"
# Speed mapping: 1 (slow) to 7 (fast)
#def map_speed_to_rate(value):
# mapping = {1: "-75%", 2: "-25%", 3: "0%", 4: "+25%", 5: "+50%", 6: "+75%", 7: "+100%"}
# return mapping.get(value, "0%")
value = 2 ;# looks starts from 0
#def map_speed_to_rate(value):
def map_speed_to_rate(value):
mapping = {1: "-75%", 2: "-50%", 3: "0%", 4: "+25%", 5: "+50%", 6: "+75%", 7: "+100%"}
return mapping.get(value, "")
# Read input text and clean it
def read_text(file_path):
try:
with open(file_path, 'r', encoding='utf-8') as f:
return " ".join(f.read().split()) # single-line, cleaned
except Exception as e:
print(f"❌ Error reading file: {e}")
return ""
# Break large text into chunks without breaking words
def chunk_text(text, chunk_size):
chunks = []
while text:
if len(text) <= chunk_size:
chunks.append(text)
break
split_idx = text.rfind(' ', 0, chunk_size)
if split_idx == -1:
split_idx = chunk_size
chunks.append(text[:split_idx])
text = text[split_idx:].lstrip()
return chunks
# Generate audio per chunk
# Generate audio per chunk
async def generate_audio(chunks, rate):
os.makedirs(TEMP_DIR, exist_ok=True)
segments = []
for idx, chunk in enumerate(chunks):
mp3_file = os.path.join(TEMP_DIR, f"chunk_{idx}.mp3")
print(f"🎙️ Generating Chunk {idx+1}/{len(chunks)} at Speed: {rate}")
try:
if rate == "0%":
communicate = edge_tts.Communicate(text=chunk, voice=VOICE)
else:
communicate = edge_tts.Communicate(text=chunk, voice=VOICE, rate=rate)
await communicate.save(mp3_file)
segments.append(AudioSegment.from_file(mp3_file))
except Exception as e:
print(f"⚠️ Error generating chunk {idx}: {e}")
return segments
# Combine all audio chunks
def combine_segments(segments, output_file):
if not segments:
print("❌ No audio segments to combine.")
return
final_audio = sum(segments[1:], segments[0])
final_audio.export(output_file, format="mp3")
print(f"✅ Final audio saved at: {output_file}")
# Cleanup temporary files
def cleanup_temp():
for f in os.listdir(TEMP_DIR):
os.remove(os.path.join(TEMP_DIR, f))
os.rmdir(TEMP_DIR)
print("🧹 Temp folder cleaned.")
# Main
async def main():
text = read_text(INPUT_FILE)
if not text:
print("❌ No text to process.")
return
chunks = chunk_text(text, CHUNK_SIZE)
speed_input = 4 # 1=slow, 3=normal, 7=fast
rate = map_speed_to_rate(speed_input)
segments = await generate_audio(chunks, rate)
combine_segments(segments, FINAL_MP3)
cleanup_temp()
await main()
#copy copy code above 17th feb 2026
#new optimized code tested vajra vakyalu part2
# =========================================
# STEP 1: Install FFmpeg (FAST, REQUIRED)
# =========================================
!apt-get install ffmpeg -y --quiet
# =========================================
# STEP 2: CONFIGURATION
# =========================================
BASE_PATH = "/content"
AUDIO_FILE = f"{BASE_PATH}/CLOUD_దయ్à°¯ాà°²_Bhutala_1_100_Updated23rdsep2024_audio.mp3" # Input audio
OUTPUT_VIDEO = f"{BASE_PATH}/CLOUD_దయ్à°¯ాà°²_Bhutala_1_100_Updated23rdsep2024_audio.mp4" # Output video
TITLE_TEXT = "CLOUD_దయ్à°¯ాà°²_Bhutala_1_100_Updated23rdsep2024_audio"
WIDTH, HEIGHT = 640, 360
FONT_SIZE = 48
# =========================================
# STEP 3: CREATE TITLE IMAGE
# =========================================
from PIL import Image, ImageDraw, ImageFont
def create_title_image(text, width, height, font_size):
img = Image.new("RGB", (width, height), (0, 0, 0))
draw = ImageDraw.Draw(img)
try:
font = ImageFont.truetype(
"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
font_size
)
except:
font = ImageFont.load_default()
max_width = width - 80
words = text.split()
lines, line = [], ""
for w in words:
test = line + w + " "
if draw.textbbox((0, 0), test, font=font)[2] <= max_width:
line = test
else:
lines.append(line)
line = w + " "
lines.append(line)
line_height = font_size + 12
total_height = len(lines) * line_height
y = (height - total_height) // 2
for ln in lines:
tw = draw.textbbox((0, 0), ln, font=font)[2]
x = (width - tw) // 2
draw.text((x, y), ln.strip(), fill=(255, 255, 255), font=font)
y += line_height
img_path = "/tmp/title.jpg"
img.save(img_path)
return img_path
# return "/content/akshay.png" #by pass if we have image
print("🖼️ Creating title image...")
IMAGE_FILE = create_title_image(TITLE_TEXT, WIDTH, HEIGHT, FONT_SIZE)
# =========================================
# STEP 4: CREATE VIDEO (ULTRA FAST)
# =========================================
import subprocess
print("⚡ Creating video (optimized for long audio)...")
cmd = [
"ffmpeg",
"-y",
"-loop", "1",
"-i", IMAGE_FILE,
"-i", AUDIO_FILE,
"-c:v", "libx264",
"-preset", "ultrafast",
"-tune", "stillimage",
"-crf", "28",
"-pix_fmt", "yuv420p",
"-c:a", "copy", # 🚀 KEY: audio NOT re-encoded
"-shortest",
OUTPUT_VIDEO
]
subprocess.run(cmd, check=True)
print("✅ DONE!")
print(f"📽️ Video created: {OUTPUT_VIDEO}")