mirror of https://github.com/XAMPPRocky/tokei
Add parallel count heuristic
This commit is contained in:
parent
2a3f735260
commit
a8afa67057
|
|
@ -6,4 +6,5 @@ src/test
|
|||
*.rustfmt
|
||||
.DS_Store
|
||||
.tokeirc
|
||||
results.csv
|
||||
node_modules
|
||||
|
|
|
|||
|
|
@ -39,12 +39,11 @@ fi
|
|||
cargo build --release
|
||||
|
||||
if [ $FULL = true ]; then
|
||||
hyperfine -w 5 "target/release/tokei $input" \
|
||||
hyperfine -w 10 --export-csv './results.csv' "target/release/tokei $input" \
|
||||
"tokei $input" \
|
||||
"scc $input" \
|
||||
"loc $input" \
|
||||
"cloc $input"
|
||||
"loc $input" # \ "cloc $input"
|
||||
else
|
||||
hyperfine -w 10 -m 50 "target/release/tokei $input" \
|
||||
hyperfine -w 5 "target/release/tokei $input" \
|
||||
"tokei $input"
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ use crate::{
|
|||
|
||||
use encoding_rs_io::DecodeReaderBytesBuilder;
|
||||
use grep_searcher::LineIter;
|
||||
use rayon::prelude::*;
|
||||
|
||||
use self::LanguageType::*;
|
||||
|
||||
|
|
@ -54,16 +55,66 @@ impl LanguageType {
|
|||
text: A,
|
||||
config: &Config,
|
||||
) -> Stats {
|
||||
let lines = LineIter::new(b'\n', text.as_ref());
|
||||
let text = text.as_ref();
|
||||
let lines = LineIter::new(b'\n', text);
|
||||
let mut stats = Stats::new(path);
|
||||
let syntax = SyntaxCounter::new(self);
|
||||
|
||||
if self.is_blank() {
|
||||
let count = lines.count();
|
||||
stats.lines = count;
|
||||
stats.code = count;
|
||||
stats
|
||||
} else if let Some(end) = syntax
|
||||
.shared
|
||||
.important_syntax
|
||||
.earliest_find(text)
|
||||
.and_then(|m| {
|
||||
// Get the position of the last line before the important
|
||||
// syntax.
|
||||
text[..=m.start()]
|
||||
.into_iter()
|
||||
.rev()
|
||||
.position(|&c| c == b'\n')
|
||||
.filter(|&p| p != 0)
|
||||
.map(|p| m.start() - p)
|
||||
})
|
||||
{
|
||||
let (skippable_text, rest) = text.split_at(end + 1);
|
||||
let lines = LineIter::new(b'\n', skippable_text);
|
||||
let is_fortran = syntax.shared.is_fortran;
|
||||
let comments = syntax.shared.line_comments;
|
||||
|
||||
let (mut stats, (code, comments, blanks)) = rayon::join(
|
||||
move || self.parse_lines(config, LineIter::new(b'\n', rest), stats, syntax),
|
||||
move || {
|
||||
lines
|
||||
.par_bridge()
|
||||
.map(|line| {
|
||||
// FORTRAN has a rule where it only counts as a comment if it's the
|
||||
// first character in the column, so removing starting whitespace
|
||||
// could cause a miscount.
|
||||
let line = if is_fortran { line } else { line.trim() };
|
||||
trace!("{}", String::from_utf8_lossy(line));
|
||||
|
||||
if line.trim().is_empty() {
|
||||
(0, 0, 1)
|
||||
} else if comments.iter().any(|c| line.starts_with(c.as_bytes())) {
|
||||
(0, 1, 0)
|
||||
} else {
|
||||
(1, 0, 0)
|
||||
}
|
||||
})
|
||||
.reduce(|| (0, 0, 0), |a, b| (a.0 + b.0, a.1 + b.1, a.2 + b.2))
|
||||
},
|
||||
);
|
||||
|
||||
stats.code += code;
|
||||
stats.comments += comments;
|
||||
stats.blanks += blanks;
|
||||
stats
|
||||
} else {
|
||||
self.parse_lines(config, lines, stats)
|
||||
self.parse_lines(config, lines, stats, syntax)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -73,9 +124,8 @@ impl LanguageType {
|
|||
config: &Config,
|
||||
lines: impl IntoIterator<Item = &'a [u8]>,
|
||||
mut stats: Stats,
|
||||
mut syntax: SyntaxCounter,
|
||||
) -> Stats {
|
||||
let mut syntax = SyntaxCounter::new(self);
|
||||
|
||||
for line in lines {
|
||||
// FORTRAN has a rule where it only counts as a comment if it's the
|
||||
// first character in the column, so removing starting whitespace
|
||||
|
|
@ -161,12 +211,7 @@ impl LanguageType {
|
|||
|| (
|
||||
// If we're currently in a comment or we just ended
|
||||
// with one.
|
||||
syntax
|
||||
.shared
|
||||
.any_comments
|
||||
.earliest_find(line)
|
||||
.map_or(false, |e| e.start() == 0)
|
||||
&& syntax.quote.is_none()
|
||||
syntax.shared.any_comments.is_match(line) && syntax.quote.is_none()
|
||||
)
|
||||
|| ((
|
||||
// If we're currently in a doc string or we just ended
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use aho_corasick::AhoCorasick;
|
||||
use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
|
||||
use dashmap::DashMap;
|
||||
use log::Level::Trace;
|
||||
|
||||
|
|
@ -29,8 +29,8 @@ pub(crate) struct SyntaxCounter {
|
|||
pub(crate) struct SharedMatchers {
|
||||
pub allows_nested: bool,
|
||||
pub doc_quotes: &'static [(&'static str, &'static str)],
|
||||
pub important_syntax: AhoCorasick,
|
||||
pub any_comments: AhoCorasick,
|
||||
pub important_syntax: AhoCorasick<u16>,
|
||||
pub any_comments: AhoCorasick<u16>,
|
||||
pub is_fortran: bool,
|
||||
pub line_comments: &'static [&'static str],
|
||||
pub multi_line_comments: &'static [(&'static str, &'static str)],
|
||||
|
|
@ -52,12 +52,22 @@ impl SharedMatchers {
|
|||
}
|
||||
|
||||
pub fn init(language: LanguageType) -> Self {
|
||||
fn init_corasick(pattern: &[&'static str], anchored: bool) -> AhoCorasick<u16> {
|
||||
let mut builder = AhoCorasickBuilder::new();
|
||||
builder
|
||||
.anchored(anchored)
|
||||
.byte_classes(false)
|
||||
.dfa(true)
|
||||
.prefilter(true);
|
||||
builder.build_with_size(pattern).unwrap()
|
||||
}
|
||||
|
||||
Self {
|
||||
allows_nested: language.allows_nested(),
|
||||
doc_quotes: language.doc_quotes(),
|
||||
is_fortran: language.is_fortran(),
|
||||
important_syntax: AhoCorasick::new_auto_configured(language.important_syntax()),
|
||||
any_comments: AhoCorasick::new_auto_configured(language.start_any_comments()),
|
||||
important_syntax: init_corasick(language.important_syntax(), false),
|
||||
any_comments: init_corasick(language.start_any_comments(), true),
|
||||
line_comments: language.line_comments(),
|
||||
multi_line_comments: language.multi_line_comments(),
|
||||
nested_comments: language.nested_comments(),
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ pub fn get_all_files<A: AsRef<Path>>(
|
|||
languages: &mut BTreeMap<LanguageType, Language>,
|
||||
config: &Config,
|
||||
) {
|
||||
let languages = parking_lot::Mutex::new(languages);
|
||||
let (tx, rx) = crossbeam_channel::unbounded();
|
||||
|
||||
let mut paths = paths.iter();
|
||||
|
|
@ -88,31 +89,22 @@ pub fn get_all_files<A: AsRef<Path>>(
|
|||
|
||||
let types: Option<&[LanguageType]> = config.types.as_ref().map(|v| &**v);
|
||||
|
||||
let iter = rx
|
||||
.into_iter()
|
||||
rx.into_iter()
|
||||
.par_bridge()
|
||||
.filter_map(|e| LanguageType::from_path(e.path(), &config).map(|l| (e, l)))
|
||||
.filter(|(_, l)| types.map(|t| t.contains(l)).unwrap_or(true))
|
||||
.map(|(entry, language)| {
|
||||
language
|
||||
.parse(entry.into_path(), &config)
|
||||
.map(|stats| (language, Some(stats)))
|
||||
.unwrap_or_else(|(e, path)| {
|
||||
error!("Error reading {}:\n{}", path.display(), e);
|
||||
(language, None)
|
||||
})
|
||||
.filter(|(_, l)| types.map_or(true, |t| t.contains(l)))
|
||||
.for_each(|(entry, language)| {
|
||||
let result = language.parse(entry.into_path(), &config);
|
||||
let mut lock = languages.lock();
|
||||
let entry = lock.entry(language).or_insert_with(Language::new);
|
||||
match result {
|
||||
Ok(stats) => entry.add_stat(stats),
|
||||
Err((error, path)) => {
|
||||
entry.mark_inaccurate();
|
||||
error!("Error reading {}:\n{}", path.display(), error);
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for (language_type, stats) in iter {
|
||||
let entry = languages.entry(language_type).or_insert_with(Language::new);
|
||||
|
||||
if let Some(stats) = stats {
|
||||
entry.add_stat(stats);
|
||||
} else {
|
||||
entry.mark_inaccurate();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_extension(path: &Path) -> Option<String> {
|
||||
|
|
|
|||
Loading…
Reference in New Issue