pss work: replace \r\n or \n in paragraph only, in a file, paragraph as single line worked. colab code;

 (?<![.,!?])\r?\n\r?\n   it ignore , commas,? ! , but not full stop ; such lines will be highlighted.

find lines without full stop, need to merge with next paragraph. (?<!\.)\r?\n\r?\n





import os


def process_paragraphs(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as file:
        content = file.read()
   
    # Splitting content into paragraphs using double newlines as delimiter
    paragraphs = content.split('\n\n')  # This handles paragraphs separated by blank lines
   
    processed_paragraphs = []
    for paragraph in paragraphs:
        # Removing unnecessary line breaks within a paragraph
        cleaned_paragraph = ' '.join(paragraph.splitlines()).strip()
        processed_paragraphs.append(cleaned_paragraph)
   
    # Joining paragraphs back with double newlines to maintain separation
    processed_text = '\n\n'.join(processed_paragraphs)
   
    # Writing processed text to output file
    with open(output_file, 'w', encoding='utf-8') as file:
        file.write(processed_text)
   
    print(f"Processed file saved as: {output_file}")

# Define file paths
input_file_path = "/content/CLOUD_సామెతల జ్ఞానము_25March2025.txt"  # Change filename accordingly
output_file_path = "/content/CLOUD_సామెతల జ్ఞానము_25March2025_output.txt"

# Process file
process_paragraphs(input_file_path, output_file_path)

Popular posts from this blog

SAP CPI : camle expression in sap cpi , cm, router, filter and groovy script. format

pss book: గురు ప్రార్థనామంజరి . completed 21st july 2024

pss book : శ్రీకృష్ణుడు దేవుడా, భగవంతుడా completed , second review needed. 26th April 2024