Merge pull request #14879 from audrey-inglish/master

Fix: normalize punctuation when computing CleanName so searches without punctuation match (closes #1674)
This commit is contained in:
audrey-inglish 2025-12-08 10:43:37 -07:00 committed by GitHub
parent da3bff3edf
commit 8fd59d6f33
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 256 additions and 2 deletions

View File

@ -1373,14 +1373,54 @@ public sealed class BaseItemRepository
}
}
private string GetCleanValue(string value)
/// <summary>
/// Gets the clean value for search and sorting purposes.
/// </summary>
/// <param name="value">The value to clean.</param>
/// <returns>The cleaned value.</returns>
public static string GetCleanValue(string value)
{
if (string.IsNullOrWhiteSpace(value))
{
return value;
}
return value.RemoveDiacritics().ToLowerInvariant();
var noDiacritics = value.RemoveDiacritics();
// Build a string where any punctuation or symbol is treated as a separator (space).
var sb = new StringBuilder(noDiacritics.Length);
var previousWasSpace = false;
foreach (var ch in noDiacritics)
{
char outCh;
if (char.IsLetterOrDigit(ch) || char.IsWhiteSpace(ch))
{
outCh = ch;
}
else
{
outCh = ' ';
}
// normalize any whitespace character to a single ASCII space.
if (char.IsWhiteSpace(outCh))
{
if (!previousWasSpace)
{
sb.Append(' ');
previousWasSpace = true;
}
}
else
{
sb.Append(outCh);
previousWasSpace = false;
}
}
// trim leading/trailing spaces that may have been added.
var collapsed = sb.ToString().Trim();
return collapsed.ToLowerInvariant();
}
private List<(ItemValueType MagicNumber, string Value)> GetItemValuesToSave(BaseItemDto item, List<string> inheritedTags)

View File

@ -0,0 +1,105 @@
using System;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Jellyfin.Database.Implementations;
using Jellyfin.Database.Implementations.Entities;
using Jellyfin.Extensions;
using Jellyfin.Server.Implementations.Item;
using Jellyfin.Server.ServerSetupApp;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
namespace Jellyfin.Server.Migrations.Routines;
/// <summary>
/// Migration to refresh CleanName values for all library items.
/// </summary>
[JellyfinMigration("2025-10-08T12:00:00", nameof(RefreshCleanNames))]
[JellyfinMigrationBackup(JellyfinDb = true)]
public class RefreshCleanNames : IAsyncMigrationRoutine
{
private readonly IStartupLogger<RefreshCleanNames> _logger;
private readonly IDbContextFactory<JellyfinDbContext> _dbProvider;
/// <summary>
/// Initializes a new instance of the <see cref="RefreshCleanNames"/> class.
/// </summary>
/// <param name="logger">The logger.</param>
/// <param name="dbProvider">Instance of the <see cref="IDbContextFactory{JellyfinDbContext}"/> interface.</param>
public RefreshCleanNames(
IStartupLogger<RefreshCleanNames> logger,
IDbContextFactory<JellyfinDbContext> dbProvider)
{
_logger = logger;
_dbProvider = dbProvider;
}
/// <inheritdoc />
public async Task PerformAsync(CancellationToken cancellationToken)
{
const int Limit = 1000;
int itemCount = 0;
var sw = Stopwatch.StartNew();
using var context = _dbProvider.CreateDbContext();
var records = context.BaseItems.Count(b => !string.IsNullOrEmpty(b.Name));
_logger.LogInformation("Refreshing CleanName for {Count} library items", records);
var processedInPartition = 0;
await foreach (var item in context.BaseItems
.Where(b => !string.IsNullOrEmpty(b.Name))
.OrderBy(e => e.Id)
.WithPartitionProgress((partition) => _logger.LogInformation("Processed: {Offset}/{Total} - Updated: {UpdatedCount} - Time: {Elapsed}", partition * Limit, records, itemCount, sw.Elapsed))
.PartitionEagerAsync(Limit, cancellationToken)
.WithCancellation(cancellationToken)
.ConfigureAwait(false))
{
try
{
var newCleanName = string.IsNullOrWhiteSpace(item.Name) ? string.Empty : BaseItemRepository.GetCleanValue(item.Name);
if (!string.Equals(newCleanName, item.CleanName, StringComparison.Ordinal))
{
_logger.LogDebug(
"Updating CleanName for item {Id}: '{OldValue}' -> '{NewValue}'",
item.Id,
item.CleanName,
newCleanName);
item.CleanName = newCleanName;
itemCount++;
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to update CleanName for item {Id} ({Name})", item.Id, item.Name);
}
processedInPartition++;
if (processedInPartition >= Limit)
{
await context.SaveChangesAsync(cancellationToken).ConfigureAwait(false);
// Clear tracked entities to avoid memory growth across partitions
context.ChangeTracker.Clear();
processedInPartition = 0;
}
}
// Save any remaining changes after the loop
if (processedInPartition > 0)
{
await context.SaveChangesAsync(cancellationToken).ConfigureAwait(false);
context.ChangeTracker.Clear();
}
_logger.LogInformation(
"Refreshed CleanName for {UpdatedCount} out of {TotalCount} items in {Time}",
itemCount,
records,
sw.Elapsed);
}
}

View File

@ -0,0 +1,109 @@
using System;
using AutoFixture;
using AutoFixture.AutoMoq;
using Jellyfin.Server.Implementations.Item;
using MediaBrowser.Controller.Entities.TV;
using Microsoft.Extensions.Configuration;
using Moq;
using Xunit;
namespace Jellyfin.Server.Implementations.Tests.Data
{
public class SearchPunctuationTests
{
private readonly IFixture _fixture;
private readonly BaseItemRepository _repo;
public SearchPunctuationTests()
{
var appHost = new Mock<MediaBrowser.Controller.IServerApplicationHost>();
appHost.Setup(x => x.ExpandVirtualPath(It.IsAny<string>()))
.Returns((string x) => x);
appHost.Setup(x => x.ReverseVirtualPath(It.IsAny<string>()))
.Returns((string x) => x);
var configSection = new Mock<IConfigurationSection>();
configSection
.SetupGet(x => x[It.Is<string>(s => s == MediaBrowser.Controller.Extensions.ConfigurationExtensions.SqliteCacheSizeKey)])
.Returns("0");
var config = new Mock<IConfiguration>();
config
.Setup(x => x.GetSection(It.Is<string>(s => s == MediaBrowser.Controller.Extensions.ConfigurationExtensions.SqliteCacheSizeKey)))
.Returns(configSection.Object);
_fixture = new Fixture().Customize(new AutoMoqCustomization { ConfigureMembers = true });
_fixture.Inject(appHost.Object);
_fixture.Inject(config.Object);
_repo = _fixture.Create<BaseItemRepository>();
}
[Fact]
public void CleanName_keeps_punctuation_and_search_without_punctuation_passes()
{
var series = new Series
{
Id = Guid.NewGuid(),
Name = "Mr. Robot"
};
series.SortName = "Mr. Robot";
var entity = _repo.Map(series);
Assert.Equal("mr robot", entity.CleanName);
var searchTerm = "Mr Robot".ToLowerInvariant();
Assert.Contains(searchTerm, entity.CleanName ?? string.Empty, StringComparison.OrdinalIgnoreCase);
}
[Theory]
[InlineData("Spider-Man: Homecoming", "spider man homecoming")]
[InlineData("Beyoncé — Live!", "beyonce live")]
[InlineData("Hello, World!", "hello world")]
[InlineData("(The) Good, the Bad & the Ugly", "the good the bad the ugly")]
[InlineData("Wall-E", "wall e")]
[InlineData("No. 1: The Beginning", "no 1 the beginning")]
[InlineData("Café-au-lait", "cafe au lait")]
public void CleanName_normalizes_various_punctuation(string title, string expectedClean)
{
var series = new Series
{
Id = Guid.NewGuid(),
Name = title
};
series.SortName = title;
var entity = _repo.Map(series);
Assert.Equal(expectedClean, entity.CleanName);
// Ensure a search term without punctuation would match
var searchTerm = expectedClean;
Assert.Contains(searchTerm, entity.CleanName ?? string.Empty, StringComparison.OrdinalIgnoreCase);
}
[Theory]
[InlineData("Face/Off", "face off")]
[InlineData("V/H/S", "v h s")]
public void CleanName_normalizes_titles_withslashes(string title, string expectedClean)
{
var series = new Series
{
Id = Guid.NewGuid(),
Name = title
};
series.SortName = title;
var entity = _repo.Map(series);
Assert.Equal(expectedClean, entity.CleanName);
// Ensure a search term without punctuation would match
var searchTerm = expectedClean;
Assert.Contains(searchTerm, entity.CleanName ?? string.Empty, StringComparison.OrdinalIgnoreCase);
}
}
}