test
from collections import OrderedDict
def remove_duplicate_lines(filename):
"""
Reads a file, removes duplicate lines while preserving order, and writes the deduplicated content back to the file.
Args:
filename (str): The name of the file to process.
"""
# Use OrderedDict to maintain line order while removing duplicates
seen_lines = OrderedDict()
with open(filename, 'r') as file:
for line in file:
# Add line to OrderedDict only if not already seen
if line not in seen_lines:
seen_lines[line] = None
# Overwrite the original file with deduplicated content
with open(filename, 'w') as file:
file.writelines(seen_lines.keys())
# Mount Google Drive if needed (uncomment if necessary)
# from google.colab import drive
# drive.mount('/content/drive')
# Replace 'your_file.txt' with the actual filename in your Colab content folder
filename = '/content/your_file.txt' # Path to your file in Colab content folder
remove_duplicate_lines(filename)
print(f"Duplicate lines removed from {filename}")