From 8657a392ff497b02b4268dc832b4ede33a11b4c8 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Mon, 12 Feb 2024 16:09:13 +0100 Subject: [PATCH] Docstring formatting: Preserve tab indentation when using `indent-style=tabs` (#9915) --- .../test/fixtures/ruff/.editorconfig | 4 + .../docstring_tab_indentation.options.json | 10 + .../ruff/docstring_tab_indentation.py | 72 +++ .../src/string/docstring.rs | 421 ++++++++++++++---- .../format@docstring_tab_indentation.py.snap | 270 +++++++++++ 5 files changed, 678 insertions(+), 99 deletions(-) create mode 100644 crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_tab_indentation.options.json create mode 100644 crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_tab_indentation.py create mode 100644 crates/ruff_python_formatter/tests/snapshots/format@docstring_tab_indentation.py.snap diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/.editorconfig b/crates/ruff_python_formatter/resources/test/fixtures/ruff/.editorconfig index 9d774cc7f6..762b7f0d53 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/.editorconfig +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/.editorconfig @@ -4,4 +4,8 @@ ij_formatter_enabled = false ["range_formatting/*.py"] generated_code = true +ij_formatter_enabled = false + +[docstring_tab_indentation.py] +generated_code = true ij_formatter_enabled = false \ No newline at end of file diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_tab_indentation.options.json b/crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_tab_indentation.options.json new file mode 100644 index 0000000000..977706abb5 --- /dev/null +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_tab_indentation.options.json @@ -0,0 +1,10 @@ +[ + { + "indent_style": "tab", + "indent_width": 4 + }, + { + "indent_style": "tab", + "indent_width": 8 + } +] diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_tab_indentation.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_tab_indentation.py new file mode 100644 index 0000000000..f8ad4560d4 --- /dev/null +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_tab_indentation.py @@ -0,0 +1,72 @@ +# Tests the behavior of the formatter when it comes to tabs inside docstrings +# when using `indent_style="tab` + +# The example below uses tabs exclusively. The formatter should preserve the tab indentation +# of `arg1`. +def tab_argument(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with 2 tabs in front + """ + +# The `arg1` is intended with spaces. The formatter should not change the spaces to a tab +# because it must assume that the spaces are used for alignment and not indentation. +def space_argument(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with a tab and a space in front + """ + +def under_indented(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with a tab and a space in front +arg2: Not properly indented + """ + +def under_indented_tabs(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with a tab and a space in front +arg2: Not properly indented + """ + +def spaces_tabs_over_indent(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with a tab and a space in front + """ + +# The docstring itself is indented with spaces but the argument is indented by a tab. +# Keep the tab indentation of the argument, convert th docstring indent to tabs. +def space_indented_docstring_containing_tabs(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg + """ + + +# The docstring uses tabs, spaces, tabs indentation. +# Fallback to use space indentation +def mixed_indentation(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with a tab and a space in front + """ + + +# The example shows an ascii art. The formatter should not change the spaces +# to tabs because it breaks the ASCII art when inspecting the docstring with `inspect.cleandoc(ascii_art.__doc__)` +# when using an indent width other than 8. +def ascii_art(): + r""" + Look at this beautiful tree. + + a + / \ + b c + / \ + d e + """ + + diff --git a/crates/ruff_python_formatter/src/string/docstring.rs b/crates/ruff_python_formatter/src/string/docstring.rs index b09324a10f..b06ba04b5a 100644 --- a/crates/ruff_python_formatter/src/string/docstring.rs +++ b/crates/ruff_python_formatter/src/string/docstring.rs @@ -2,11 +2,13 @@ // "reStructuredText." #![allow(clippy::doc_markdown)] +use std::cmp::Ordering; use std::{borrow::Cow, collections::VecDeque}; +use itertools::Itertools; + use ruff_formatter::printer::SourceMapGeneration; use ruff_python_parser::ParseError; - use {once_cell::sync::Lazy, regex::Regex}; use { ruff_formatter::{write, FormatOptions, IndentStyle, LineWidth, Printed}, @@ -80,9 +82,7 @@ use super::{NormalizedString, QuoteChar}; /// ``` /// /// Tabs are counted by padding them to the next multiple of 8 according to -/// [`str.expandtabs`](https://docs.python.org/3/library/stdtypes.html#str.expandtabs). When -/// we see indentation that contains a tab or any other none ascii-space whitespace we rewrite the -/// string. +/// [`str.expandtabs`](https://docs.python.org/3/library/stdtypes.html#str.expandtabs). /// /// Additionally, if any line in the docstring has less indentation than the docstring /// (effectively a negative indentation wrt. to the current level), we pad all lines to the @@ -104,6 +104,10 @@ use super::{NormalizedString, QuoteChar}; /// line c /// """ /// ``` +/// The indentation is rewritten to all-spaces when using [`IndentStyle::Space`]. +/// The formatter preserves tab-indentations when using [`IndentStyle::Tab`], but doesn't convert +/// `indent-width * spaces` to tabs because doing so could break ASCII art and other docstrings +/// that use spaces for alignment. pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> FormatResult<()> { let docstring = &normalized.text; @@ -176,19 +180,19 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form // align it with the docstring statement. Conversely, if all lines are over-indented, we strip // the extra indentation. We call this stripped indentation since it's relative to the block // indent printer-made indentation. - let stripped_indentation_length = lines + let stripped_indentation = lines .clone() // We don't want to count whitespace-only lines as miss-indented .filter(|line| !line.trim().is_empty()) - .map(indentation_length) - .min() + .map(Indentation::from_str) + .min_by_key(|indentation| indentation.width()) .unwrap_or_default(); DocstringLinePrinter { f, action_queue: VecDeque::new(), offset, - stripped_indentation_length, + stripped_indentation, already_normalized, quote_char: normalized.quotes.quote_char, code_example: CodeExample::default(), @@ -240,9 +244,9 @@ struct DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { /// printed. offset: TextSize, - /// Indentation alignment (in columns) based on the least indented line in the + /// Indentation alignment based on the least indented line in the /// docstring. - stripped_indentation_length: usize, + stripped_indentation: Indentation, /// Whether the docstring is overall already considered normalized. When it /// is, the formatter can take a fast path. @@ -345,7 +349,7 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { }; // This looks suspicious, but it's consistent with the whitespace // normalization that will occur anyway. - let indent = " ".repeat(min_indent); + let indent = " ".repeat(min_indent.width()); for docline in formatted_lines { self.print_one( &docline.map(|line| std::format!("{indent}{line}")), @@ -355,7 +359,7 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { CodeExampleKind::Markdown(fenced) => { // This looks suspicious, but it's consistent with the whitespace // normalization that will occur anyway. - let indent = " ".repeat(fenced.opening_fence_indent); + let indent = " ".repeat(fenced.opening_fence_indent.width()); for docline in formatted_lines { self.print_one( &docline.map(|line| std::format!("{indent}{line}")), @@ -387,12 +391,58 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { }; } - let tab_or_non_ascii_space = trim_end - .chars() - .take_while(|c| c.is_whitespace()) - .any(|c| c != ' '); + let indent_offset = match self.f.options().indent_style() { + // Normalize all indent to spaces. + IndentStyle::Space => { + let tab_or_non_ascii_space = trim_end + .chars() + .take_while(|c| c.is_whitespace()) + .any(|c| c != ' '); - if tab_or_non_ascii_space { + if tab_or_non_ascii_space { + None + } else { + // It's guaranteed that the `indent` is all spaces because `tab_or_non_ascii_space` is + // `false` (indent contains neither tabs nor non-space whitespace). + let stripped_indentation_len = self.stripped_indentation.text_len(); + + // Take the string with the trailing whitespace removed, then also + // skip the leading whitespace. + Some(stripped_indentation_len) + } + } + IndentStyle::Tab => { + let line_indent = Indentation::from_str(trim_end); + + let non_ascii_whitespace = trim_end + .chars() + .take_while(|c| c.is_whitespace()) + .any(|c| !matches!(c, ' ' | '\t')); + + let trimmed = line_indent.trim_start(self.stripped_indentation); + + // Preserve tabs that are used for indentation, but only if the indent isn't + // * a mix of tabs and spaces + // * the `stripped_indentation` is a prefix of the line's indent + // * the trimmed indent isn't spaces followed by tabs because that would result in a + // mixed tab, spaces, tab indentation, resulting in instabilities. + let preserve_indent = !non_ascii_whitespace + && trimmed.is_some_and(|trimmed| !trimmed.is_spaces_tabs()); + preserve_indent.then_some(self.stripped_indentation.text_len()) + } + }; + + if let Some(indent_offset) = indent_offset { + // Take the string with the trailing whitespace removed, then also + // skip the leading whitespace. + if self.already_normalized { + let trimmed_line_range = + TextRange::at(line.offset, trim_end.text_len()).add_start(indent_offset); + source_text_slice(trimmed_line_range).fmt(self.f)?; + } else { + text(&trim_end[indent_offset.to_usize()..]).fmt(self.f)?; + } + } else { // We strip the indentation that is shared with the docstring // statement, unless a line was indented less than the docstring // statement, in which case we strip only this much indentation to @@ -400,24 +450,11 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { // overindented, in which case we strip the additional whitespace // (see example in [`format_docstring`] doc comment). We then // prepend the in-docstring indentation to the string. - let indent_len = indentation_length(trim_end) - self.stripped_indentation_length; + let indent_len = + Indentation::from_str(trim_end).width() - self.stripped_indentation.width(); let in_docstring_indent = " ".repeat(indent_len) + trim_end.trim_start(); text(&in_docstring_indent).fmt(self.f)?; - } else { - // It's guaranteed that the `indent` is all spaces because `tab_or_non_ascii_space` is - // `false` (indent contains neither tabs nor non-space whitespace). - - // Take the string with the trailing whitespace removed, then also - // skip the leading whitespace. - let trimmed_line_range = TextRange::at(line.offset, trim_end.text_len()) - .add_start(TextSize::try_from(self.stripped_indentation_length).unwrap()); - if self.already_normalized { - source_text_slice(trimmed_line_range).fmt(self.f)?; - } else { - // All indents are ascii spaces, so the slicing is correct. - text(&trim_end[self.stripped_indentation_length..]).fmt(self.f)?; - } - } + }; // We handled the case that the closing quotes are on their own line // above (the last line is empty except for whitespace). If they are on @@ -898,8 +935,7 @@ struct CodeExampleRst<'src> { /// The lines that have been seen so far that make up the block. lines: Vec>, - /// The indent of the line "opening" this block measured via - /// `indentation_length` (in columns). + /// The indent of the line "opening" this block in columns. /// /// It can either be the indent of a line ending with `::` (for a literal /// block) or the indent of a line starting with `.. ` (a directive). @@ -907,9 +943,9 @@ struct CodeExampleRst<'src> { /// The content body of a block needs to be indented more than the line /// opening the block, so we use this indentation to look for indentation /// that is "more than" it. - opening_indent: usize, + opening_indent: Indentation, - /// The minimum indent of the block measured via `indentation_length`. + /// The minimum indent of the block in columns. /// /// This is `None` until the first such line is seen. If no such line is /// found, then we consider it an invalid block and bail out of trying to @@ -926,7 +962,7 @@ struct CodeExampleRst<'src> { /// When the code snippet has been extracted, it is re-built before being /// reformatted. The minimum indent is stripped from each line when it is /// re-built. - min_indent: Option, + min_indent: Option, /// Whether this is a directive block or not. When not a directive, this is /// a literal block. The main difference between them is that they start @@ -975,7 +1011,7 @@ impl<'src> CodeExampleRst<'src> { } Some(CodeExampleRst { lines: vec![], - opening_indent: indentation_length(opening_indent), + opening_indent: Indentation::from_str(opening_indent), min_indent: None, is_directive: false, }) @@ -1013,7 +1049,7 @@ impl<'src> CodeExampleRst<'src> { } Some(CodeExampleRst { lines: vec![], - opening_indent: indentation_length(original.line), + opening_indent: Indentation::from_str(original.line), min_indent: None, is_directive: true, }) @@ -1033,7 +1069,7 @@ impl<'src> CodeExampleRst<'src> { line.code = if line.original.line.trim().is_empty() { "" } else { - indentation_trim(min_indent, line.original.line) + min_indent.trim_start_str(line.original.line) }; } &self.lines @@ -1070,7 +1106,9 @@ impl<'src> CodeExampleRst<'src> { // an empty line followed by an unindented non-empty line. if let Some(next) = original.next { let (next_indent, next_rest) = indent_with_suffix(next); - if !next_rest.is_empty() && indentation_length(next_indent) <= self.opening_indent { + if !next_rest.is_empty() + && Indentation::from_str(next_indent) <= self.opening_indent + { self.push_format_action(queue); return None; } @@ -1082,7 +1120,7 @@ impl<'src> CodeExampleRst<'src> { queue.push_back(CodeExampleAddAction::Kept); return Some(self); } - let indent_len = indentation_length(indent); + let indent_len = Indentation::from_str(indent); if indent_len <= self.opening_indent { // If we find an unindented non-empty line at the same (or less) // indentation of the opening line at this point, then we know it @@ -1144,7 +1182,7 @@ impl<'src> CodeExampleRst<'src> { queue.push_back(CodeExampleAddAction::Print { original }); return Some(self); } - let min_indent = indentation_length(indent); + let min_indent = Indentation::from_str(indent); // At this point, we found a non-empty line. The only thing we require // is that its indentation is strictly greater than the indentation of // the line containing the `::`. Otherwise, we treat this as an invalid @@ -1218,12 +1256,11 @@ struct CodeExampleMarkdown<'src> { /// The lines that have been seen so far that make up the block. lines: Vec>, - /// The indent of the line "opening" fence of this block measured via - /// `indentation_length` (in columns). + /// The indent of the line "opening" fence of this block in columns. /// /// This indentation is trimmed from the indentation of every line in the /// body of the code block, - opening_fence_indent: usize, + opening_fence_indent: Indentation, /// The kind of fence, backticks or tildes, used for this block. We need to /// keep track of which kind was used to open the block in order to look @@ -1292,7 +1329,7 @@ impl<'src> CodeExampleMarkdown<'src> { }; Some(CodeExampleMarkdown { lines: vec![], - opening_fence_indent: indentation_length(opening_fence_indent), + opening_fence_indent: Indentation::from_str(opening_fence_indent), fence_kind, fence_len, }) @@ -1325,7 +1362,7 @@ impl<'src> CodeExampleMarkdown<'src> { // its indent normalized. And, at the time of writing, a subsequent // formatting run undoes this indentation, thus violating idempotency. if !original.line.trim_whitespace().is_empty() - && indentation_length(original.line) < self.opening_fence_indent + && Indentation::from_str(original.line) < self.opening_fence_indent { queue.push_back(self.into_reset_action()); queue.push_back(CodeExampleAddAction::Print { original }); @@ -1371,7 +1408,7 @@ impl<'src> CodeExampleMarkdown<'src> { // Unlike reStructuredText blocks, for Markdown fenced code blocks, the // indentation that we want to strip from each line is known when the // block is opened. So we can strip it as we collect lines. - let code = indentation_trim(self.opening_fence_indent, original.line); + let code = self.opening_fence_indent.trim_start_str(original.line); self.lines.push(CodeExampleLine { original, code }); } @@ -1486,7 +1523,6 @@ enum CodeExampleAddAction<'src> { /// results in that code example becoming invalid. In this case, /// we don't want to treat it as a code example, but instead write /// back the lines to the docstring unchanged. - #[allow(dead_code)] // FIXME: remove when reStructuredText support is added Reset { /// The lines of code that we collected but should be printed back to /// the docstring as-is and not formatted. @@ -1537,53 +1573,241 @@ fn needs_chaperone_space(normalized: &NormalizedString, trim_end: &str) -> bool || trim_end.chars().rev().take_while(|c| *c == '\\').count() % 2 == 1 } -/// Returns the indentation's visual width in columns/spaces. -/// -/// For docstring indentation, black counts spaces as 1 and tabs by increasing the indentation up -/// to the next multiple of 8. This is effectively a port of -/// [`str.expandtabs`](https://docs.python.org/3/library/stdtypes.html#str.expandtabs), -/// which black [calls with the default tab width of 8](https://github.com/psf/black/blob/c36e468794f9256d5e922c399240d49782ba04f1/src/black/strings.py#L61). -fn indentation_length(line: &str) -> usize { - let mut indentation = 0usize; - for char in line.chars() { - if char == '\t' { - // Pad to the next multiple of tab_width - indentation += 8 - (indentation.rem_euclid(8)); - } else if char.is_whitespace() { - indentation += char.len_utf8(); - } else { - break; - } - } - indentation +#[derive(Copy, Clone, Debug)] +enum Indentation { + /// Space only indentation or an empty indentation. + /// + /// The value is the number of spaces. + Spaces(usize), + + /// Tabs only indentation. + Tabs(usize), + + /// Indentation that uses tabs followed by spaces. + /// Also known as smart tabs where tabs are used for indents, and spaces for alignment. + TabSpaces { tabs: usize, spaces: usize }, + + /// Indentation that uses spaces followed by tabs. + SpacesTabs { spaces: usize, tabs: usize }, + + /// Mixed indentation of tabs and spaces. + Mixed { + /// The visual width of the indentation in columns. + width: usize, + + /// The length of the indentation in bytes + len: TextSize, + }, } -/// Trims at most `indent_len` indentation from the beginning of `line`. -/// -/// This treats indentation in precisely the same way as `indentation_length`. -/// As such, it is expected that `indent_len` is computed from -/// `indentation_length`. This is useful when one needs to trim some minimum -/// level of indentation from a code snippet collected from a docstring before -/// attempting to reformat it. -fn indentation_trim(indent_len: usize, line: &str) -> &str { - let mut seen_indent_len = 0; - let mut trimmed = line; - for char in line.chars() { - if seen_indent_len >= indent_len { - return trimmed; +impl Indentation { + const TAB_INDENT_WIDTH: usize = 8; + + fn from_str(s: &str) -> Self { + let mut iter = s.chars().peekable(); + + let spaces = iter.peeking_take_while(|c| *c == ' ').count(); + let tabs = iter.peeking_take_while(|c| *c == '\t').count(); + + if tabs == 0 { + // No indent, or spaces only indent + return Self::Spaces(spaces); } - if char == '\t' { - // Pad to the next multiple of tab_width - seen_indent_len += 8 - (seen_indent_len.rem_euclid(8)); - trimmed = &trimmed[1..]; - } else if char.is_whitespace() { - seen_indent_len += char.len_utf8(); - trimmed = &trimmed[char.len_utf8()..]; - } else { - break; + + let align_spaces = iter.peeking_take_while(|c| *c == ' ').count(); + + if spaces == 0 { + if align_spaces == 0 { + return Self::Tabs(tabs); + } + + // At this point it's either a smart tab (tabs followed by spaces) or a wild mix of tabs and spaces. + if iter.peek().copied() != Some('\t') { + return Self::TabSpaces { + tabs, + spaces: align_spaces, + }; + } + } else if align_spaces == 0 { + return Self::SpacesTabs { spaces, tabs }; + } + + // Sequence of spaces.. tabs, spaces, tabs... + let mut width = spaces + tabs * Self::TAB_INDENT_WIDTH + align_spaces; + // SAFETY: Safe because Ruff doesn't support files larger than 4GB. + let mut len = TextSize::try_from(spaces + tabs + align_spaces).unwrap(); + + for char in iter { + if char == '\t' { + // Pad to the next multiple of tab_width + width += Self::TAB_INDENT_WIDTH - (width.rem_euclid(Self::TAB_INDENT_WIDTH)); + len += '\t'.text_len(); + } else if char.is_whitespace() { + width += char.len_utf8(); + len += char.text_len(); + } else { + break; + } + } + + // Mixed tabs and spaces + Self::Mixed { width, len } + } + + /// Returns the indentation's visual width in columns/spaces. + /// + /// For docstring indentation, black counts spaces as 1 and tabs by increasing the indentation up + /// to the next multiple of 8. This is effectively a port of + /// [`str.expandtabs`](https://docs.python.org/3/library/stdtypes.html#str.expandtabs), + /// which black [calls with the default tab width of 8](https://github.com/psf/black/blob/c36e468794f9256d5e922c399240d49782ba04f1/src/black/strings.py#L61). + const fn width(self) -> usize { + match self { + Self::Spaces(count) => count, + Self::Tabs(count) => count * Self::TAB_INDENT_WIDTH, + Self::TabSpaces { tabs, spaces } => tabs * Self::TAB_INDENT_WIDTH + spaces, + Self::SpacesTabs { spaces, tabs } => { + let mut indent = spaces; + indent += Self::TAB_INDENT_WIDTH - indent.rem_euclid(Self::TAB_INDENT_WIDTH); + indent + (tabs - 1) * Self::TAB_INDENT_WIDTH + } + Self::Mixed { width, .. } => width, } } - trimmed + + /// Returns the length of the indentation in bytes. + /// + /// # Panics + /// If the indentation is longer than 4GB. + fn text_len(self) -> TextSize { + let len = match self { + Self::Spaces(count) => count, + Self::Tabs(count) => count, + Self::TabSpaces { tabs, spaces } => tabs + spaces, + Self::SpacesTabs { spaces, tabs } => spaces + tabs, + Self::Mixed { len, .. } => return len, + }; + + TextSize::try_from(len).unwrap() + } + + /// Trims the indent of `rhs` by `self`. + /// + /// Returns `None` if `self` is not a prefix of `rhs` or either `self` or `rhs` use mixed indentation. + fn trim_start(self, rhs: Self) -> Option { + let (left_tabs, left_spaces) = match self { + Self::Spaces(spaces) => (0usize, spaces), + Self::Tabs(tabs) => (tabs, 0usize), + Self::TabSpaces { tabs, spaces } => (tabs, spaces), + // Handle spaces here because it is the only indent where the spaces come before the tabs. + Self::SpacesTabs { + spaces: left_spaces, + tabs: left_tabs, + } => { + return match rhs { + Self::Spaces(right_spaces) => { + left_spaces.checked_sub(right_spaces).map(|spaces| { + if spaces == 0 { + Self::Tabs(left_tabs) + } else { + Self::SpacesTabs { + tabs: left_tabs, + spaces, + } + } + }) + } + Self::SpacesTabs { + spaces: right_spaces, + tabs: right_tabs, + } => left_spaces.checked_sub(right_spaces).and_then(|spaces| { + let tabs = left_tabs.checked_sub(right_tabs)?; + + Some(if spaces == 0 { + if tabs == 0 { + Self::Spaces(0) + } else { + Self::Tabs(tabs) + } + } else { + Self::SpacesTabs { spaces, tabs } + }) + }), + + _ => None, + } + } + Self::Mixed { .. } => return None, + }; + + let (right_tabs, right_spaces) = match rhs { + Self::Spaces(spaces) => (0usize, spaces), + Self::Tabs(tabs) => (tabs, 0usize), + Self::TabSpaces { tabs, spaces } => (tabs, spaces), + Self::SpacesTabs { .. } | Self::Mixed { .. } => return None, + }; + + let tabs = left_tabs.checked_sub(right_tabs)?; + let spaces = left_spaces.checked_sub(right_spaces)?; + + Some(if tabs == 0 { + Self::Spaces(spaces) + } else if spaces == 0 { + Self::Tabs(tabs) + } else { + Self::TabSpaces { tabs, spaces } + }) + } + + /// Trims at most `indent_len` indentation from the beginning of `line`. + /// + /// This is useful when one needs to trim some minimum + /// level of indentation from a code snippet collected from a docstring before + /// attempting to reformat it. + fn trim_start_str(self, line: &str) -> &str { + let mut seen_indent_len = 0; + let mut trimmed = line; + let indent_len = self.width(); + + for char in line.chars() { + if seen_indent_len >= indent_len { + return trimmed; + } + if char == '\t' { + // Pad to the next multiple of tab_width + seen_indent_len += + Self::TAB_INDENT_WIDTH - (seen_indent_len.rem_euclid(Self::TAB_INDENT_WIDTH)); + trimmed = &trimmed[1..]; + } else if char.is_whitespace() { + seen_indent_len += char.len_utf8(); + trimmed = &trimmed[char.len_utf8()..]; + } else { + break; + } + } + trimmed + } + + const fn is_spaces_tabs(self) -> bool { + matches!(self, Self::SpacesTabs { .. }) + } +} + +impl PartialOrd for Indentation { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.width().cmp(&other.width())) + } +} + +impl PartialEq for Indentation { + fn eq(&self, other: &Self) -> bool { + self.width() == other.width() + } +} + +impl Default for Indentation { + fn default() -> Self { + Self::Spaces(0) + } } /// Returns the indentation of the given line and everything following it. @@ -1613,14 +1837,13 @@ fn is_rst_option(line: &str) -> bool { #[cfg(test)] mod tests { - - use super::indentation_length; + use crate::string::docstring::Indentation; #[test] fn test_indentation_like_black() { - assert_eq!(indentation_length("\t \t \t"), 24); - assert_eq!(indentation_length("\t \t"), 24); - assert_eq!(indentation_length("\t\t\t"), 24); - assert_eq!(indentation_length(" "), 4); + assert_eq!(Indentation::from_str("\t \t \t").width(), 24); + assert_eq!(Indentation::from_str("\t \t").width(), 24); + assert_eq!(Indentation::from_str("\t\t\t").width(), 24); + assert_eq!(Indentation::from_str(" ").width(), 4); } } diff --git a/crates/ruff_python_formatter/tests/snapshots/format@docstring_tab_indentation.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@docstring_tab_indentation.py.snap new file mode 100644 index 0000000000..01089b0c96 --- /dev/null +++ b/crates/ruff_python_formatter/tests/snapshots/format@docstring_tab_indentation.py.snap @@ -0,0 +1,270 @@ +--- +source: crates/ruff_python_formatter/tests/fixtures.rs +input_file: crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_tab_indentation.py +--- +## Input +```python +# Tests the behavior of the formatter when it comes to tabs inside docstrings +# when using `indent_style="tab` + +# The example below uses tabs exclusively. The formatter should preserve the tab indentation +# of `arg1`. +def tab_argument(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with 2 tabs in front + """ + +# The `arg1` is intended with spaces. The formatter should not change the spaces to a tab +# because it must assume that the spaces are used for alignment and not indentation. +def space_argument(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with a tab and a space in front + """ + +def under_indented(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with a tab and a space in front +arg2: Not properly indented + """ + +def under_indented_tabs(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with a tab and a space in front +arg2: Not properly indented + """ + +def spaces_tabs_over_indent(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with a tab and a space in front + """ + +# The docstring itself is indented with spaces but the argument is indented by a tab. +# Keep the tab indentation of the argument, convert th docstring indent to tabs. +def space_indented_docstring_containing_tabs(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg + """ + + +# The docstring uses tabs, spaces, tabs indentation. +# Fallback to use space indentation +def mixed_indentation(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with a tab and a space in front + """ + + +# The example shows an ascii art. The formatter should not change the spaces +# to tabs because it breaks the ASCII art when inspecting the docstring with `inspect.cleandoc(ascii_art.__doc__)` +# when using an indent width other than 8. +def ascii_art(): + r""" + Look at this beautiful tree. + + a + / \ + b c + / \ + d e + """ + + +``` + +## Outputs +### Output 1 +``` +indent-style = tab +line-width = 88 +indent-width = 4 +quote-style = Double +line-ending = LineFeed +magic-trailing-comma = Respect +docstring-code = Disabled +docstring-code-line-width = "dynamic" +preview = Disabled +target_version = Py38 +source_type = Python +``` + +```python +# Tests the behavior of the formatter when it comes to tabs inside docstrings +# when using `indent_style="tab` + +# The example below uses tabs exclusively. The formatter should preserve the tab indentation +# of `arg1`. +def tab_argument(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with 2 tabs in front + """ + + +# The `arg1` is intended with spaces. The formatter should not change the spaces to a tab +# because it must assume that the spaces are used for alignment and not indentation. +def space_argument(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with a tab and a space in front + """ + + +def under_indented(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with a tab and a space in front + arg2: Not properly indented + """ + + +def under_indented_tabs(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with a tab and a space in front + arg2: Not properly indented + """ + + +def spaces_tabs_over_indent(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with a tab and a space in front + """ + + +# The docstring itself is indented with spaces but the argument is indented by a tab. +# Keep the tab indentation of the argument, convert th docstring indent to tabs. +def space_indented_docstring_containing_tabs(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg + """ + + +# The docstring uses tabs, spaces, tabs indentation. +# Fallback to use space indentation +def mixed_indentation(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with a tab and a space in front + """ + + +# The example shows an ascii art. The formatter should not change the spaces +# to tabs because it breaks the ASCII art when inspecting the docstring with `inspect.cleandoc(ascii_art.__doc__)` +# when using an indent width other than 8. +def ascii_art(): + r""" + Look at this beautiful tree. + + a + / \ + b c + / \ + d e + """ +``` + + +### Output 2 +``` +indent-style = tab +line-width = 88 +indent-width = 8 +quote-style = Double +line-ending = LineFeed +magic-trailing-comma = Respect +docstring-code = Disabled +docstring-code-line-width = "dynamic" +preview = Disabled +target_version = Py38 +source_type = Python +``` + +```python +# Tests the behavior of the formatter when it comes to tabs inside docstrings +# when using `indent_style="tab` + +# The example below uses tabs exclusively. The formatter should preserve the tab indentation +# of `arg1`. +def tab_argument(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with 2 tabs in front + """ + + +# The `arg1` is intended with spaces. The formatter should not change the spaces to a tab +# because it must assume that the spaces are used for alignment and not indentation. +def space_argument(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with a tab and a space in front + """ + + +def under_indented(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with a tab and a space in front + arg2: Not properly indented + """ + + +def under_indented_tabs(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with a tab and a space in front + arg2: Not properly indented + """ + + +def spaces_tabs_over_indent(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with a tab and a space in front + """ + + +# The docstring itself is indented with spaces but the argument is indented by a tab. +# Keep the tab indentation of the argument, convert th docstring indent to tabs. +def space_indented_docstring_containing_tabs(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg + """ + + +# The docstring uses tabs, spaces, tabs indentation. +# Fallback to use space indentation +def mixed_indentation(arg1: str) -> None: + """ + Arguments: + arg1: super duper arg with a tab and a space in front + """ + + +# The example shows an ascii art. The formatter should not change the spaces +# to tabs because it breaks the ASCII art when inspecting the docstring with `inspect.cleandoc(ascii_art.__doc__)` +# when using an indent width other than 8. +def ascii_art(): + r""" + Look at this beautiful tree. + + a + / \ + b c + / \ + d e + """ +``` + + +