pyton code for mp3 and ocr print.
import gc
gc.collect()
!pip install google-cloud-texttospeech pydub
import json
from google.cloud import texttospeech
from pydub import AudioSegment
import os
# Paths to the files
credentials_path = '/content/credentials.json'
text_file_path = '/content/text_input.txt'
output_file_path = '/content/output.mp3'
# Verify the file existence
if os.path.exists(credentials_path):
print("Credentials file found.")
else:
print("Credentials file not found.")
if os.path.exists(text_file_path):
print("Text file found.")
else:
print("Text file not found.")
# Load your Google Cloud project credentials
with open(credentials_path) as f:
credentials = json.load(f)
# Configure the Text-to-Speech client
client = texttospeech.TextToSpeechClient.from_service_account_json(credentials_path)
# Load Telugu text from the file
with open(text_file_path, 'r', encoding='utf-8') as file:
text = file.read().strip()
# Function to split text into chunks based on byte size
def split_text(text, max_bytes=5000):
"""Split text into chunks of max_bytes bytes."""
chunks = []
current_chunk = ""
current_chunk_bytes = 0
for char in text:
char_bytes = len(char.encode('utf-8'))
if current_chunk_bytes + char_bytes > max_bytes:
chunks.append(current_chunk)
current_chunk = char
current_chunk_bytes = char_bytes
else:
current_chunk += char
current_chunk_bytes += char_bytes
if current_chunk:
chunks.append(current_chunk)
return chunks
# Configure voice and speed
voice = texttospeech.VoiceSelectionParams(
language_code='te-IN', # Telugu (India) Link:https://cloud.google.com/text-to-speech/docs/voices
name='te-IN-Standard-A', # Male voice te-IN-Standard-B ;felmale : te-IN-Standard-A
ssml_gender=texttospeech.SsmlVoiceGender.MALE
)
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3,
speaking_rate=1.25 # Normal speed, 1.0 is default 0.25,0.5,0.75,1 (normal), 1.25, 1.5 , 1.75, 2
)
# Function to synthesize speech for each chunk
def synthesize_speech(text_chunk):
synthesis_input = texttospeech.SynthesisInput(text=text_chunk)
response = client.synthesize_speech(
input=synthesis_input, voice=voice, audio_config=audio_config
)
return response.audio_content
# Function to save audio content to a temporary file
def save_temp_audio(audio_content, temp_file_path):
with open(temp_file_path, 'wb') as out:
out.write(audio_content)
# Process text and combine audio files
text_chunks = split_text(text)
temp_files = []
for i, chunk in enumerate(text_chunks):
temp_file_path = f'/content/temp_chunk_{i}.mp3'
temp_files.append(temp_file_path)
audio_content = synthesize_speech(chunk)
save_temp_audio(audio_content, temp_file_path)
# Combine audio files
combined = AudioSegment.empty()
for temp_file in temp_files:
audio_segment = AudioSegment.from_mp3(temp_file)
combined += audio_segment
# Export the combined audio to the final output file
combined.export(output_file_path, format='mp3')
print(f'Audio content written to file "{output_file_path}"')
# Clean up temporary files
for temp_file in temp_files:
os.remove(temp_file)
!pip install google-cloud-texttospeech pydub
import json
from google.cloud import texttospeech
from pydub import AudioSegment
import os
# Paths to the files
credentials_path = '/content/credentials.json'
text_file_path = '/content/text_input.txt'
output_file_path = '/content/output.mp3'
# Verify the file existence
if os.path.exists(credentials_path):
print("Credentials file found.")
else:
print("Credentials file not found.")
if os.path.exists(text_file_path):
print("Text file found.")
else:
print("Text file not found.")
# Load your Google Cloud project credentials
with open(credentials_path) as f:
credentials = json.load(f)
# Configure the Text-to-Speech client
client = texttospeech.TextToSpeechClient.from_service_account_json(credentials_path)
# Load Telugu text from the file
with open(text_file_path, 'r', encoding='utf-8') as file:
text = file.read().strip()
# Function to split text into chunks based on byte size
def split_text(text, max_bytes=5000):
"""Split text into chunks of max_bytes bytes."""
chunks = []
current_chunk = ""
current_chunk_bytes = 0
for char in text:
char_bytes = len(char.encode('utf-8'))
if current_chunk_bytes + char_bytes > max_bytes:
chunks.append(current_chunk)
current_chunk = char
current_chunk_bytes = char_bytes
else:
current_chunk += char
current_chunk_bytes += char_bytes
if current_chunk:
chunks.append(current_chunk)
return chunks
# Configure voice and speed
voice = texttospeech.VoiceSelectionParams(
language_code='te-IN', # Telugu (India)
name='te-IN-Standard-A', # Female voice
ssml_gender=texttospeech.SsmlVoiceGender.FEMALE
)
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3,
speaking_rate=1.0 # Normal speed, 1.0 is default
)
# Function to synthesize speech for each chunk
def synthesize_speech(text_chunk):
synthesis_input = texttospeech.SynthesisInput(text=text_chunk)
response = client.synthesize_speech(
input=synthesis_input, voice=voice, audio_config=audio_config
)
return response.audio_content
# Function to save audio content to a temporary file
def save_temp_audio(audio_content, temp_file_path):
with open(temp_file_path, 'wb') as out:
out.write(audio_content)
# Process text and combine audio files
text_chunks = split_text(text)
temp_files = []
for i, chunk in enumerate(text_chunks):
temp_file_path = f'/content/temp_chunk_{i}.mp3'
temp_files.append(temp_file_path)
audio_content = synthesize_speech(chunk)
save_temp_audio(audio_content, temp_file_path)
# Combine audio files
combined = AudioSegment.empty()
for temp_file in temp_files:
audio_segment = AudioSegment.from_mp3(temp_file)
combined += audio_segment
# Export the combined audio to the final output file
combined.export(output_file_path, format='mp3')
print(f'Audio content written to file "{output_file_path}"')
# Clean up temporary files
for temp_file in temp_files:
os.remove(temp_file)
import gc
gc.collect()
!pip show google-cloud-vision pdf2image
!apt-cache policy poppler-utils
# Import required libraries
import io
import os
from google.cloud import vision
from google.cloud.vision_v1 import types
from pdf2image import convert_from_path
from google.oauth2 import service_account
# Paths to the files
credentials_path = '/content/credentials_doc.json'
pdf_file_path = '/content/Anthima-Daiva-Granthamulo-Vajra-Vaakyamulu_300_348.pdf'
output_text_file_path = '/content/CLOUD_Anthima-Daiva-Granthamulo-Vajra-Vaakyamulu_300_348.txt'
# Verify the file existence
if os.path.exists(credentials_path):
print("Credentials file found.")
else:
print("Credentials file not found. Please check the path.")
if os.path.exists(pdf_file_path):
print("PDF file found.")
else:
print("PDF file not found. Please check the path.")
# Authenticate using service account
credentials = service_account.Credentials.from_service_account_file(credentials_path)
client = vision.ImageAnnotatorClient(credentials=credentials)
# Convert PDF to images
images = convert_from_path(pdf_file_path, dpi=300)
# Function to perform OCR on an image
def perform_ocr(image):
content = io.BytesIO()
image.save(content, format='PNG')
content = content.getvalue()
image = types.Image(content=content)
response = client.document_text_detection(image=image)
return response.full_text_annotation.text
# Process each image and save the text
with open(output_text_file_path, 'w', encoding='utf-8') as output_file:
for i, image in enumerate(images):
text = perform_ocr(image)
output_file.write(f"Page {i + 1}\n")
output_file.write(text)
output_file.write("\n\n")
print(f'OCR text written to file "{output_text_file_path}"')