From fce7a838e9c8e89cb71f55356cb4ecbc836df5cb Mon Sep 17 00:00:00 2001 From: Jiahao Yuan Date: Fri, 11 Oct 2024 21:10:06 +0800 Subject: [PATCH] Fix stream did not contain valid UTF-8 (#8120) ## Summary Related issues: #8009 #7549 Although `PYTHONIOENCODING=utf-8` forces python to use UTF-8 for `stdout`/`stderr`, it can't prevent code like `sys.stdout.buffer.write()` or `subprocess.call(["cl.exe", ...])` to bypass the encoder. This PR uses lossy UTF-8 conversion to avoid decoding error. ## Alternative Using `bstr` crate might be better since it can preserve original information. Or we should follow the Windows convention, unset `PYTHONIOENCODING` and decode with system default encoding. ## Test Plan Running locally with non-ASCII character in `UV_CACHE_DIR` works fine, but I have no unit test plan. Testing locale problem is hard :( --- crates/uv-build-frontend/src/lib.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/crates/uv-build-frontend/src/lib.rs b/crates/uv-build-frontend/src/lib.rs index b59edbb2d..d64510922 100644 --- a/crates/uv-build-frontend/src/lib.rs +++ b/crates/uv-build-frontend/src/lib.rs @@ -921,13 +921,15 @@ impl PythonRunner { ) -> Result { /// Read lines from a reader and store them in a buffer. async fn read_from( - mut reader: tokio::io::Lines>, + mut reader: tokio::io::Split>, mut printer: Printer, buffer: &mut Vec, ) -> io::Result<()> { loop { - match reader.next_line().await? { - Some(line) => { + match reader.next_segment().await? { + Some(line_buf) => { + let line_buf = line_buf.strip_suffix(b"\r").unwrap_or(&line_buf); + let line = String::from_utf8_lossy(line_buf).into(); let _ = write!(printer, "{line}"); buffer.push(line); } @@ -945,7 +947,7 @@ impl PythonRunner { .env("PATH", modified_path) .env("VIRTUAL_ENV", venv.root()) .env("CLICOLOR_FORCE", "1") - .env("PYTHONIOENCODING", "utf-8") + .env("PYTHONIOENCODING", "utf-8:backslashreplace") .stdout(std::process::Stdio::piped()) .stderr(std::process::Stdio::piped()) .spawn() @@ -956,8 +958,8 @@ impl PythonRunner { let mut stderr_buf = Vec::with_capacity(1024); // Create separate readers for `stdout` and `stderr`. - let stdout_reader = tokio::io::BufReader::new(child.stdout.take().unwrap()).lines(); - let stderr_reader = tokio::io::BufReader::new(child.stderr.take().unwrap()).lines(); + let stdout_reader = tokio::io::BufReader::new(child.stdout.take().unwrap()).split(b'\n'); + let stderr_reader = tokio::io::BufReader::new(child.stderr.take().unwrap()).split(b'\n'); // Asynchronously read from the in-memory pipes. let printer = Printer::from(self.level);