Verbatim strings (#535)

* Added CSharp example

Using example from #330, for testing purposes

* Updated Rust test case

Rust test case now has a verbatim string that fails, with current
version of Tokei.

* Added C++ verbatim string to example

C++ Syntax makes this incredibly difficult to parse using the current
infrastructure, `R"PREFIX(    )PREFIX"` where PREFIX can be almost anything.

* Updated F# Test to have verbatim string

* Add support for "verbatim_quotes"

Quotes that neglect the `\"` rule, in the case of python as mentioned
in #330 it produces the correct outcome with Tokei currently.

* Add support for verbatim_quotes in syntax

Properly keeps track of current state and handles escapes properly in
the case of being inside of a verbatim_quote

* Added `verbatim_quotes` to CONTRIBUTING.md
This commit is contained in:
Nick Hackman 2020-05-28 01:55:14 -04:00 committed by GitHub
parent b7388e2e5d
commit 96b47ab404
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 139 additions and 42 deletions

View File

@ -65,6 +65,24 @@ let x = /* There is a reason
10;
```
The `verbatim_quotes` property expects an array of strings, as some languages
have multiple syntaxes for defining verbatim strings. A verbatim string
in the context of Tokei is a string literal that can have unescaped `"`s. For example [`CSharp`](https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/strings/#regular-and-verbatim-string-literals)
```json
"CSharp": {
"verbatim_quotes": [
[
"@\\\"",
"\\\""
]
]
```
```csharp
const string BasePath = @"C:\";
```
Some languages have a single, standard filename with no extension
like `Makefile` or `Dockerfile`. These can be defined with the
`filenames` property:
@ -123,11 +141,13 @@ The comment should use the syntax of the language you're testing.
A good example of a test file is [`tests/data/rust.rs`].
```rust
// 39 lines 32 code 2 comments 5 blanks
// 41 lines 33 code 3 comments 5 blanks
/* /**/ */
fn main() {
let start = "/*";
let start = r##"/*\"
\"##;
// comment
loop {
if x.len() >= 2 && x[0] == '*' && x[1] == '/' { // found the */
break;

View File

@ -46,6 +46,7 @@ fn generate_languages(out_dir: &OsStr) -> Result<(), Box<dyn error::Error>> {
}
sort_prop!("quotes");
sort_prop!("verbatim_quotes");
sort_prop!("multi_line");
}

View File

@ -213,6 +213,7 @@
"line_comment": ["//"],
"multi_line_comments": [["/*", "*/"]],
"quotes": [["\\\"", "\\\""]],
"verbatim_quotes": [["R\\\"(", ")\\\""]],
"extensions": ["cc", "cpp", "cxx", "c++", "pcc", "tpp"]
},
"CppHeader": {
@ -234,6 +235,7 @@
"line_comment": ["//"],
"multi_line_comments": [["/*", "*/"]],
"quotes": [["\\\"", "\\\""]],
"verbatim_quotes": [["@\\\"", "\\\""]],
"extensions": ["cs", "csx"]
},
"CShell": {
@ -404,6 +406,7 @@
"line_comment": ["//"],
"multi_line_comments": [["(*", "*)"]],
"quotes": [["\\\"", "\\\""]],
"verbatim_quotes": [["@\\\"", "\\\""]],
"extensions": ["fs", "fsi", "fsx", "fsscript"]
},
"Futhark": {
@ -943,7 +946,8 @@
"multi_line_comments": [["/*", "*/"]],
"nested": true,
"extensions": ["rs"],
"quotes": [["\\\"", "\\\""], ["r#\\\"", "\\\"#"], ["#\\\"", "\\\"#"]]
"quotes": [["\\\"", "\\\""], ["#\\\"", "\\\"#"]],
"verbatim_quotes": [["r##\\\"", "\\\"##"], ["r#\\\"", "\\\"#"]]
},
"ReStructuredText": {
"blank": true,

View File

@ -169,6 +169,28 @@ impl LanguageType {
}
}
/// Returns the verbatim quotes of a language.
/// ```
/// use tokei::LanguageType;
/// let lang = LanguageType::CSharp;
/// assert_eq!(lang.verbatim_quotes(), &[("@\"", "\"")]);
/// ```
pub fn verbatim_quotes(self) -> &'static [(&'static str, &'static str)] {
match self {
{{#each languages}}
{{#if this.verbatim_quotes}}
{{~@key}} =>
&[
{{~#each this.verbatim_quotes}}
( {{~#each this}}"{{this}}",{{~/each}} ),
{{~/each}}
],
{{~/if}}
{{~/each}}
_ => &[],
}
}
/// Returns the doc quotes of a language.
/// ```
/// use tokei::LanguageType;

View File

@ -23,6 +23,7 @@ pub(crate) struct SyntaxCounter {
pub(crate) quote: Option<&'static str>,
pub(crate) quote_is_doc_quote: bool,
pub(crate) stack: Vec<&'static str>,
pub(crate) quote_is_verbatim: bool,
}
#[derive(Clone, Debug)]
@ -36,6 +37,7 @@ pub(crate) struct SharedMatchers {
pub multi_line_comments: &'static [(&'static str, &'static str)],
pub nested_comments: &'static [(&'static str, &'static str)],
pub string_literals: &'static [(&'static str, &'static str)],
pub verbatim_string_literals: &'static [(&'static str, &'static str)],
}
impl SharedMatchers {
@ -72,6 +74,7 @@ impl SharedMatchers {
multi_line_comments: language.multi_line_comments(),
nested_comments: language.nested_comments(),
string_literals: language.quotes(),
verbatim_string_literals: language.verbatim_quotes(),
}
}
}
@ -81,6 +84,7 @@ impl SyntaxCounter {
Self {
shared: SharedMatchers::new(language),
quote_is_doc_quote: false,
quote_is_verbatim: false,
stack: Vec::with_capacity(1),
quote: None,
}
@ -119,10 +123,24 @@ impl SyntaxCounter {
{
trace!("Start Doc {:?}", start);
self.quote = Some(end);
self.quote_is_verbatim = false;
self.quote_is_doc_quote = true;
return Some(start.len());
}
if let Some((start, end)) = self
.shared
.verbatim_string_literals
.iter()
.find(|(s, _)| window.starts_with(s.as_bytes()))
{
trace!("Start verbatim {:?}", start);
self.quote = Some(end);
self.quote_is_verbatim = true;
self.quote_is_doc_quote = false;
return Some(start.len());
}
if let Some((start, end)) = self
.shared
.string_literals
@ -131,6 +149,7 @@ impl SyntaxCounter {
{
trace!("Start {:?}", start);
self.quote = Some(end);
self.quote_is_verbatim = false;
self.quote_is_doc_quote = false;
return Some(start.len());
}
@ -192,9 +211,9 @@ impl SyntaxCounter {
let quote = self.quote.take().unwrap();
trace!("End {:?}", quote);
Some(quote.len())
} else if window.starts_with(br"\") {
} else if window.starts_with(br"\") && !self.quote_is_verbatim {
// Tell the state machine to skip the next character because it
// has been escaped.
// has been escaped if the string isn't a verbatim string.
Some(2)
} else {
None

View File

@ -1,45 +1,46 @@
/* 45 lines 37 code 2 comments 6 blanks */
/* 46 lines 37 code 3 comments 6 blanks */
#include <stdio.h>
// bubble_sort_function
void bubble_sort (int a[10], int n) {
int t;
int j = n;
int s = 1;
while (s > 0) {
s = 0;
int i = 1;
while (i < j) {
if (a[i] < a[i - 1]) {
t = a[i];
a[i] = a[i - 1];
a[i - 1] = t;
s = 1;
}
i++;
}
j--;
void bubble_sort(int a[10], int n) {
int t;
int j = n;
int s = 1;
while (s > 0) {
s = 0;
int i = 1;
while (i < j) {
if (a[i] < a[i - 1]) {
t = a[i];
a[i] = a[i - 1];
a[i - 1] = t;
s = 1;
}
i++;
}
j--;
}
}
void main() {
int a[] = {4, 65, 2, -31, 0, 99, 2, 83, 782, 1};
int n = 10;
int i = 0;
int main() {
int a[] = {4, 65, 2, -31, 0, 99, 2, 83, 782, 1};
int n = 10;
int i = 0;
printf("Before sorting:\n\n");
while (i < n) {
printf("%d ", a[i]);
i++;
}
printf(R"(Before sorting:\n\n" )");
// Single line comment
while (i < n) {
printf("%d ", a[i]);
i++;
}
bubble_sort(a, n);
bubble_sort(a, n);
printf("\n\nAfter sorting:\n\n");
i = 0;
while (i < n) {
printf("%d ", a[i]);
i++;
}
printf("\n\nAfter sorting:\n\n");
i = 0;
while (i < n) {
printf("%d ", a[i]);
i++;
}
}

26
tests/data/csharp.cs Normal file
View File

@ -0,0 +1,26 @@
// 26 lines 14 code 7 comments 5 blanks
namespace Ns
{
/*
multi-line comment
*/
public class Cls
{
private const string BasePath = @"a:\";
[Fact]
public void MyTest()
{
// Arrange.
Foo();
// Act.
Bar();
// Assert.
Baz();
}
}
}

View File

@ -1,4 +1,4 @@
(* 13 lines 5 code 4 comments 4 blanks *)
(* 15 lines 6 code 5 comments 4 blanks *)
// Comment
@ -11,3 +11,5 @@ let bar = "(*
Code
*)"
let baz = @"a:\"
// Comment

View File

@ -1,8 +1,10 @@
// 39 lines 32 code 2 comments 5 blanks
// 41 lines 33 code 3 comments 5 blanks
/* /**/ */
fn main() {
let start = "/*";
let start = r##"/*##\"
\"##;
// comment
loop {
if x.len() >= 2 && x[0] == '*' && x[1] == '/' { // found the */
break;