perf: read only first 128B from the file when searching for shebang (#1040)

This commit is contained in:
WieeRd 2024-08-16 20:23:35 +09:00 committed by GitHub
parent 804f873711
commit 4cf6f0b930
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 16 additions and 9 deletions

View File

@ -2,7 +2,7 @@ use std::{
borrow::Cow,
fmt,
fs::File,
io::{self, BufRead, BufReader, Read},
io::{self, Read},
path::{Path, PathBuf},
str::FromStr,
};

View File

@ -381,16 +381,23 @@ impl LanguageType {
/// assert_eq!(rust, Some(LanguageType::Rust));
/// ```
pub fn from_shebang<P: AsRef<Path>>(entry: P) -> Option<Self> {
let file = match File::open(entry) {
Ok(file) => file,
_ => return None,
};
// Read at max `READ_LIMIT` bytes from the given file.
// A typical shebang line has a length less than 32 characters;
// e.g. '#!/bin/bash' - 11B / `#!/usr/bin/env python3` - 22B
// It is *very* unlikely the file contains a valid shebang syntax
// if we don't find a newline character after searching the first 128B.
const READ_LIMIT: usize = 128;
let mut buf = BufReader::new(file);
let mut line = String::new();
let _ = buf.read_line(&mut line);
let mut file = File::open(entry).ok()?;
let mut buf = [0; READ_LIMIT];
let mut words = line.split_whitespace();
let len = file.read(&mut buf).ok()?;
let buf = &buf[..len];
let first_line = buf.split(|b| *b == b'\n').next()?;
let first_line = std::str::from_utf8(first_line).ok()?;
let mut words = first_line.split_whitespace();
match words.next() {
{# First match against any shebang paths, and then check if the
language matches any found in the environment shebang path. #}