pyton , languange transfer , to transflate telugu text to english text.
enable language transfer google service, better
format file, with help paragaph wise in notepad++; crcd. \r\n. when paragraph ends.
generate in blogspot. languange transfer. copy the content, looks easy then below.
--
!pip show google-cloud-translate
import os
from google.oauth2 import service_account
from google.cloud import translate_v2 as translate
# Paths to the files
credentials_path = '/content/credentials_doc.json'
input_text_file_path = '/content/Pravakthalu-Yevaru_telugu_18thsep24.txt'
output_text_file_path = '/content/Pravakthalu-Yevaru_English_18thsep24.txt'
# Verify the file existence
if os.path.exists(credentials_path):
print("Credentials file found.")
else:
print("Credentials file not found. Please check the path.")
if os.path.exists(input_text_file_path):
print("Input text file found.")
else:
print("Input text file not found. Please check the path.")
# Authenticate using service account
credentials = service_account.Credentials.from_service_account_file(credentials_path)
# Initialize the translation client
translate_client = translate.Client(credentials=credentials)
# Function to translate text from Telugu to English
def translate_text(text, target_language='en'):
result = translate_client.translate(text, target_language=target_language)
return result['translatedText']
# Function to split text into chunks
def split_text(text, max_chunk_size=5000):
# Split the text into paragraphs
paragraphs = text.split('\n\n') # Assuming paragraphs are separated by double newlines
chunks = []
current_chunk = ""
for paragraph in paragraphs:
# If adding the next paragraph exceeds max_chunk_size, finalize the current chunk
if len(current_chunk) + len(paragraph) > max_chunk_size:
chunks.append(current_chunk.strip())
current_chunk = paragraph # Start a new chunk with the current paragraph
else:
# Add the paragraph to the current chunk
current_chunk += paragraph + "\n\n"
# Add the last chunk if it's not empty
if current_chunk.strip():
chunks.append(current_chunk.strip())
return chunks
# Read the Telugu text file
with open(input_text_file_path, 'r', encoding='utf-8') as input_file:
telugu_text = input_file.read()
# Split the text into smaller chunks
chunks = split_text(telugu_text)
# Translate each chunk and save the translated text to the output file
with open(output_text_file_path, 'w', encoding='utf-8') as output_file:
for i, chunk in enumerate(chunks):
# Translate the chunk
translated_chunk = translate_text(chunk)
# Write the translated chunk to the output file
output_file.write(translated_chunk)
output_file.write("\n\n")
print(f'Translated text written to file "{output_text_file_path}"')
---