Skip to content

Commit

Permalink
Merge pull request #7144 from sylvestre/comm2
Browse files Browse the repository at this point in the history
comm: implement the ordering check
  • Loading branch information
cakebaker authored Jan 18, 2025
2 parents 05c4ce0 + b50a012 commit 64dad0c
Show file tree
Hide file tree
Showing 3 changed files with 291 additions and 24 deletions.
2 changes: 1 addition & 1 deletion src/uu/comm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ path = "src/comm.rs"

[dependencies]
clap = { workspace = true }
uucore = { workspace = true }
uucore = { workspace = true, features = ["fs"] }

[[bin]]
name = "comm"
Expand Down
160 changes: 155 additions & 5 deletions src/uu/comm/src/comm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

// spell-checker:ignore (ToDO) delim mkdelim
// spell-checker:ignore (ToDO) delim mkdelim pairable

use std::cmp::Ordering;
use std::fs::{metadata, File};
use std::io::{self, stdin, BufRead, BufReader, Stdin};
use std::io::{self, stdin, BufRead, BufReader, Read, Stdin};
use uucore::error::{FromIo, UResult, USimpleError};
use uucore::fs::paths_refer_to_same_file;
use uucore::line_ending::LineEnding;
use uucore::{format_usage, help_about, help_usage};

Expand All @@ -27,6 +28,30 @@ mod options {
pub const FILE_2: &str = "FILE2";
pub const TOTAL: &str = "total";
pub const ZERO_TERMINATED: &str = "zero-terminated";
pub const CHECK_ORDER: &str = "check-order";
pub const NO_CHECK_ORDER: &str = "nocheck-order";
}

#[derive(Debug, Clone, Copy)]
enum FileNumber {
One,
Two,
}

impl FileNumber {
fn as_str(&self) -> &'static str {
match self {
FileNumber::One => "1",
FileNumber::Two => "2",
}
}
}

struct OrderChecker {
last_line: Vec<u8>,
file_num: FileNumber,
check_order: bool,
has_error: bool,
}

enum Input {
Expand Down Expand Up @@ -60,7 +85,74 @@ impl LineReader {
}
}

fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) {
impl OrderChecker {
fn new(file_num: FileNumber, check_order: bool) -> Self {
Self {
last_line: Vec::new(),
file_num,
check_order,
has_error: false,
}
}

fn verify_order(&mut self, current_line: &[u8]) -> bool {
if self.last_line.is_empty() {
self.last_line = current_line.to_vec();
return true;
}

let is_ordered = current_line >= &self.last_line;
if !is_ordered && !self.has_error {
eprintln!(
"comm: file {} is not in sorted order",
self.file_num.as_str()
);
self.has_error = true;
}

self.last_line = current_line.to_vec();
is_ordered || !self.check_order
}
}

// Check if two files are identical by comparing their contents
pub fn are_files_identical(path1: &str, path2: &str) -> io::Result<bool> {
// First compare file sizes
let metadata1 = std::fs::metadata(path1)?;
let metadata2 = std::fs::metadata(path2)?;

if metadata1.len() != metadata2.len() {
return Ok(false);
}

let file1 = File::open(path1)?;
let file2 = File::open(path2)?;

let mut reader1 = BufReader::new(file1);
let mut reader2 = BufReader::new(file2);

let mut buffer1 = [0; 8192];
let mut buffer2 = [0; 8192];

loop {
let bytes1 = reader1.read(&mut buffer1)?;
let bytes2 = reader2.read(&mut buffer2)?;

if bytes1 != bytes2 {
return Ok(false);
}

if bytes1 == 0 {
return Ok(true);
}

if buffer1[..bytes1] != buffer2[..bytes2] {
return Ok(false);
}
}
}

fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) -> UResult<()> {
let width_col_1 = usize::from(!opts.get_flag(options::COLUMN_1));
let width_col_2 = usize::from(!opts.get_flag(options::COLUMN_2));

Expand All @@ -76,6 +168,26 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches)
let mut total_col_2 = 0;
let mut total_col_3 = 0;

let check_order = opts.get_flag(options::CHECK_ORDER);
let no_check_order = opts.get_flag(options::NO_CHECK_ORDER);

// Determine if we should perform order checking
let should_check_order = !no_check_order
&& (check_order
|| if let (Some(file1), Some(file2)) = (
opts.get_one::<String>(options::FILE_1),
opts.get_one::<String>(options::FILE_2),
) {
!(paths_refer_to_same_file(file1, file2, true)
|| are_files_identical(file1, file2).unwrap_or(false))
} else {
true
});

let mut checker1 = OrderChecker::new(FileNumber::One, check_order);
let mut checker2 = OrderChecker::new(FileNumber::Two, check_order);
let mut input_error = false;

while na.is_ok() || nb.is_ok() {
let ord = match (na.is_ok(), nb.is_ok()) {
(false, true) => Ordering::Greater,
Expand All @@ -91,6 +203,9 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches)

match ord {
Ordering::Less => {
if should_check_order && !checker1.verify_order(ra) {
break;
}
if !opts.get_flag(options::COLUMN_1) {
print!("{}", String::from_utf8_lossy(ra));
}
Expand All @@ -99,6 +214,9 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches)
total_col_1 += 1;
}
Ordering::Greater => {
if should_check_order && !checker2.verify_order(rb) {
break;
}
if !opts.get_flag(options::COLUMN_2) {
print!("{delim_col_2}{}", String::from_utf8_lossy(rb));
}
Expand All @@ -107,6 +225,10 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches)
total_col_2 += 1;
}
Ordering::Equal => {
if should_check_order && (!checker1.verify_order(ra) || !checker2.verify_order(rb))
{
break;
}
if !opts.get_flag(options::COLUMN_3) {
print!("{delim_col_3}{}", String::from_utf8_lossy(ra));
}
Expand All @@ -117,12 +239,27 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches)
total_col_3 += 1;
}
}

// Track if we've seen any order errors
if (checker1.has_error || checker2.has_error) && !input_error && !check_order {
input_error = true;
}
}

if opts.get_flag(options::TOTAL) {
let line_ending = LineEnding::from_zero_flag(opts.get_flag(options::ZERO_TERMINATED));
print!("{total_col_1}{delim}{total_col_2}{delim}{total_col_3}{delim}total{line_ending}");
}

if should_check_order && (checker1.has_error || checker2.has_error) {
// Print the input error message once at the end
if input_error {
eprintln!("comm: input is not in sorted order");
}
Err(USimpleError::new(1, ""))
} else {
Ok(())
}
}

fn open_file(name: &str, line_ending: LineEnding) -> io::Result<LineReader> {
Expand Down Expand Up @@ -170,8 +307,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
"" => "\0",
delim => delim,
};
comm(&mut f1, &mut f2, delim, &matches);
Ok(())

comm(&mut f1, &mut f2, delim, &matches)
}

pub fn uu_app() -> Command {
Expand Down Expand Up @@ -233,4 +370,17 @@ pub fn uu_app() -> Command {
.help("output a summary")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::CHECK_ORDER)
.long(options::CHECK_ORDER)
.help("check that the input is correctly sorted, even if all input lines are pairable")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::NO_CHECK_ORDER)
.long(options::NO_CHECK_ORDER)
.help("do not check that the input is correctly sorted")
.action(ArgAction::SetTrue)
.conflicts_with(options::CHECK_ORDER),
)
}
Loading

0 comments on commit 64dad0c

Please sign in to comment.