import java.io.*;
import java.text.SimpleDateFormat;
import java.util.*;
public class FileCopyWithRepeatUntilSize {
public static void main(String[] args) {
String inputDir = "C:\\Deviprasad\\input\\";
String outputDir = "C:\\Deviprasad\\output\\";
String fileName = "SALES_ORDER_multiple linesFile1.xml"; // Input file name
int startRow = 83; // Start repeating from this line number (1-based)
int endRow = 108; // End repeating at this line number (inclusive)
int targetSizeKB = 3; // Desired output size in KB
//Note: check output file, remove last rows, which is not completed fully, size we mentioned filesize;
String timeStamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date());
int dotIndex = fileName.lastIndexOf('.');
String baseName = (dotIndex == -1) ? fileName : fileName.substring(0, dotIndex);
String extension = (dotIndex == -1) ? "" : fileName.substring(dotIndex);
String outputFileName = baseName + "_" + timeStamp + extension;
File inputFile = new File(inputDir + fileName);
File outputFile = new File(outputDir + outputFileName);
try {
List<String> allLines = new ArrayList<>();
try (BufferedReader reader = new BufferedReader(new FileReader(inputFile))) {
String line;
while ((line = reader.readLine()) != null) {
allLines.add(line);
}
}
if (startRow < 1 || endRow > allLines.size() || startRow > endRow) {
throw new IllegalArgumentException("Invalid startRow or endRow.");
}
// Extract segments
List<String> prefixLines = allLines.subList(0, startRow - 1); // lines before startRow
List<String> repeatLines = allLines.subList(startRow - 1, endRow); // lines to repeat
List<String> suffixLines = allLines.subList(endRow, allLines.size()); // lines after endRow
try (BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile))) {
// Write prefix
for (String line : prefixLines) {
writer.write(line);
writer.newLine();
}
// Write repeat block once initially
for (String line : repeatLines) {
writer.write(line);
writer.newLine();
}
// Continue writing repeat block until target size (minus suffix size)
long suffixSizeEstimate = estimateLinesSize(suffixLines);
while (outputFile.length() < (targetSizeKB * 1024L - suffixSizeEstimate)) {
for (String line : repeatLines) {
writer.write(line);
writer.newLine();
if (outputFile.length() >= (targetSizeKB * 1024L - suffixSizeEstimate)) {
break;
}
}
}
// Write suffix
for (String line : suffixLines) {
writer.write(line);
writer.newLine();
}
}
System.out.println("File created: " + outputFile.getAbsolutePath());
} catch (IOException | IllegalArgumentException e) {
System.err.println("Error: " + e.getMessage());
}
}
// Helper to estimate size of lines in bytes (roughly)
private static long estimateLinesSize(List<String> lines) {
long size = 0;
for (String line : lines) {
size += line.getBytes().length + System.lineSeparator().getBytes().length;
}
return size;
}
}
---------------------

import java.io.*;
import java.text.SimpleDateFormat;
import java.util.Date;
public class FileCopyWithRegex {
public static void main(String[] args) {
String inputDir = "C:\\Deviprasad\\input\\";
String outputDir = "C:\\Deviprasad\\output\\";
String fileName = "example.txt"; // input file
String timeStamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date());
int dotIndex = fileName.lastIndexOf('.');
String baseName = (dotIndex == -1) ? fileName : fileName.substring(0, dotIndex);
String extension = (dotIndex == -1) ? "" : fileName.substring(dotIndex);
String outputFileName = baseName + "_" + timeStamp + extension;
File inputFile = new File(inputDir + fileName);
File outputFile = new File(outputDir + outputFileName);
// Regular expression to match words starting and ending with 't' or 'T'
String regex = "\\b[tT]\\w*[tT]\\b";
try (BufferedReader reader = new BufferedReader(new FileReader(inputFile));
BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile))) {
String line;
while ((line = reader.readLine()) != null) {
// Replace words matching the regex with empty string
String updatedLine = line.replaceAll(regex, "").replaceAll("\\s{2,}", " ").trim();
writer.write(updatedLine);
writer.newLine();
}
System.out.println("File processed and saved as: " + outputFile.getAbsolutePath());
} catch (IOException e) {
System.err.println("Error: " + e.getMessage());
}
}
}
--------
String regex = "\\b[tT]\\w*[tT]\\b";
try (BufferedReader reader = new BufferedReader(new FileReader(inputFile));
BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile))) {
String line;
while ((line = reader.readLine()) != null) {
// Replace words matching the regex with empty string
String updatedLine = line.replaceAll(regex, "").replaceAll("\\s{2,}", " ").trim();
writer.write(updatedLine);
writer.newLine();
}
} catch (IOException e) {
System.err.println("Error: " + e.getMessage());
}
🔍 Line-by-line Explanation:
1. String regex = "\\b[tT]\\w*[tT]\\b";
Defines a regular expression to match words that start and end with 't' or 'T'.
Breakdown:
\\b → Word boundary (start/end of a word)
[tT] → The word must start with lowercase or uppercase t
\\w* → Zero or more word characters (a-zA-Z0-9_)
[tT] → The word must end with t or T
\\b → Word boundary
2. try (BufferedReader reader = new BufferedReader(new FileReader(inputFile));
Opens the input file for reading using BufferedReader, which efficiently reads text line-by-line.
3. BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile))) {
Opens the output file for writing, using BufferedWriter for efficiency.
4. while ((line = reader.readLine()) != null) {
Reads the file line-by-line until the end (null indicates EOF).
5. line.replaceAll(regex, "")
Replaces all words that match the regex (start & end with t/T) with an empty string (removes them).
6. .replaceAll("\\s{2,}", " ")
Cleans up extra spaces caused by removed words:
\\s{2,} matches 2 or more whitespace characters.
Replaces them with a single space " ".
7. .trim();
Removes leading and trailing spaces from the updated line.
8. writer.write(updatedLine);
Writes the cleaned-up line to the output file.
9. writer.newLine();
Adds a newline character after writing each line to maintain formatting.
📌 Summary:
This code reads each line from the input file, removes any words starting and ending with t or T, cleans up extra spaces, and writes the cleaned line to the output file.
Let me know if you want to modify the regex to only remove lowercase t words, or exclude certain words from deletion!