From 70057145908a403f00fd1cf883389e98550a8676 Mon Sep 17 00:00:00 2001 From: Bruno Massa Date: Sat, 9 Mar 2024 20:54:31 -0500 Subject: [PATCH 1/3] feat: checklinks command to check all links in a given folder --- source/CheckLinkCommand.cs | 190 ++++++++++++++++++ .../CommandLineOptions/CheckLinkOptions.cs | 35 ++++ source/Program.cs | 11 +- source/SuCoS.csproj | 1 + 4 files changed, 234 insertions(+), 3 deletions(-) create mode 100644 source/CheckLinkCommand.cs create mode 100644 source/Models/CommandLineOptions/CheckLinkOptions.cs diff --git a/source/CheckLinkCommand.cs b/source/CheckLinkCommand.cs new file mode 100644 index 0000000..045e91c --- /dev/null +++ b/source/CheckLinkCommand.cs @@ -0,0 +1,190 @@ +using System.Collections.Concurrent; +using System.Globalization; +using System.Net; +using System.Text.RegularExpressions; +using Serilog; +using SuCoS.Models.CommandLineOptions; + +namespace SuCoS; + +/// +/// Check links of a given site. +/// +public sealed partial class CheckLinkCommand(CheckLinkOptions settings, ILogger logger) +{ + + [GeneratedRegex(@"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9@:%_\+.~#?&\/=]*)")] + private static partial Regex MyRegex(); + private static readonly Regex linkRegex = MyRegex(); + private const int retriesCount = 3; + private readonly TimeSpan retryInterval = TimeSpan.FromSeconds(1); + private HttpClient httpClient = null!; + private readonly ConcurrentBag checkedLinks = []; + private readonly ConcurrentDictionary> linkToFilesMap = []; + private readonly ConcurrentBag failedLinks = []; + + /// + /// Run the app + /// + /// + public async Task Run() + { + var directoryPath = Path.GetFullPath(settings.Source); + // var filter = "*.html"; + + if (!Directory.Exists(directoryPath)) + { + logger.Fatal("Directory '{directoryPath}' doesn't exist.", directoryPath); + return 1; + } + + httpClient = GetHttpClient(); + + var files = GetFiles(directoryPath, settings.Filters); + var linksAreValid = await CheckLinks(directoryPath, files, httpClient); + + if (!linksAreValid) + { + logger.Error("There are failed checks."); + + foreach (var (link, linkfiles) in linkToFilesMap) + { + if (failedLinks.Contains(link)) + { + linkfiles.Sort(); + logger.Error("Link {link} failed and are in these files:\n{files}", link, string.Join("\n", linkfiles)); + } + } + return 1; + } + logger.Information("Done"); + return 0; + } + + private static HttpClient GetHttpClient() + { + var client = new HttpClient(); + client.DefaultRequestHeaders.Add("User-Agent", "C# App"); + return client; + } + + private async Task CheckLinks(string directoryPath, string[] files, HttpClient httpClient) + { + var filesCount = files.Length; + var result = true; + + var options = new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }; + await Parallel.ForEachAsync(files, options, async (filePath, token) => + { + var fileNameSanitized = filePath[directoryPath.Length..].Trim('/', '\\'); + var fileText = File.ReadAllText(filePath); + var matches = linkRegex.Matches(fileText); + if (matches.Count == 0) + { + LogInformation("{fileName}: no links found", fileNameSanitized); + return; + } + + LogInformation("{fileName}: {matches} link found", fileNameSanitized, matches.Count.ToString(CultureInfo.InvariantCulture)); + foreach (Match match in matches) + { + var link = match.Value.Trim('.'); + + if (!linkToFilesMap.TryGetValue(link, out var value)) + { + value = []; + linkToFilesMap[link] = value; + } + + if (!value.Contains(fileNameSanitized)) + { + value.Add(fileNameSanitized); + } + if (checkedLinks.Contains(link)) + { + continue; + } + checkedLinks.Add(link); + LogInformation("{fileName}: {link} found", fileNameSanitized, link); + + var linkIsValid = false; + for (var j = 0; j < retriesCount && !linkIsValid; j++) + { + linkIsValid |= await CheckLink(fileNameSanitized, link, httpClient); + if (!linkIsValid && j < retriesCount - 1) + { + LogInformation("{fileName}: {link} retrying...", fileNameSanitized, link); + Thread.Sleep(retryInterval); + } + } + + if (linkIsValid) + { + LogInformation("{fileName}: {link} OK", fileNameSanitized, link); + } + else + { + LogError("{fileName}: {link} FAIL", fileNameSanitized, link); + failedLinks.Add(link); + } + + result &= linkIsValid; + } + }); + + return result; + } + + private async Task CheckLink(string fileName, string link, HttpClient httpClient) + { + try + { + var response = await httpClient.GetAsync(link); + if (response.StatusCode != HttpStatusCode.OK) + { + LogError("{fileName}: {link} failed with: {response}", fileName, link, response.StatusCode); + } + + return response.StatusCode == HttpStatusCode.OK; + } + catch (Exception ex) + { + LogError("{fileName}: {link} failed with: {exMessage}", fileName, link, ex.Message); + failedLinks.Add(link); + return false; + } + } + + private string[] GetFiles(string directoryPath, string filter) + { + logger.Information("Searching files in the directory '{directoryPath}' by '{filter}' filter...", directoryPath, filter); + + var files = Directory.GetFiles(directoryPath, filter, SearchOption.AllDirectories); + + logger.Information("{filesLength} files found", files.Length); + return files; + } + + void LogInformation(string message, string fileName, string? link = null, string? arg = null) + { + if (settings.Verbose && false) + { + logger.Information(message, fileName, link, arg); + } + } + + void LogError(string message, string fileName, string? link = null, string? arg = null) + { + if (settings.Verbose) + { + logger.Error(message, fileName, link, arg); + } + } + void LogError(string message, string fileName, string? link, HttpStatusCode arg) + { + if (settings.Verbose) + { + logger.Error(message, fileName, link, arg); + } + } +} \ No newline at end of file diff --git a/source/Models/CommandLineOptions/CheckLinkOptions.cs b/source/Models/CommandLineOptions/CheckLinkOptions.cs new file mode 100644 index 0000000..d1ef435 --- /dev/null +++ b/source/Models/CommandLineOptions/CheckLinkOptions.cs @@ -0,0 +1,35 @@ +using CommandLine; + +namespace SuCoS.Models.CommandLineOptions; + +/// +/// Command line options for the serve command. +/// +[Verb("checklinks", HelpText = "Starts the server")] +public class CheckLinkOptions +{ + /// + /// How verbose it must be. + /// + [Option('v', "verbose", Required = false, HelpText = "How verbose it must be")] + public bool Verbose { get; init; } + + /// + /// The path of the source files. + /// + [Value(0)] + public string Source { get; init; } = "./"; + + /// + /// How verbose it must be. + /// + [Option('f', "filters", Required = false, HelpText = "File name filters (default: '*.html')")] + public string Filters { get; init; } = "*.html"; + + /// + /// How verbose it must be. + /// + [Option('f', "filters", Required = false, HelpText = "File name filters (default: '*.html')")] + public string Ignore { get; init; } = "*.html"; +} + diff --git a/source/Program.cs b/source/Program.cs index df5e474..7439c52 100644 --- a/source/Program.cs +++ b/source/Program.cs @@ -46,7 +46,7 @@ public class Program(ILogger logger) /// public async Task RunCommandLine(string[] args) { - return await CommandLine.Parser.Default.ParseArguments(args) + return await CommandLine.Parser.Default.ParseArguments(args) .WithParsed(options => { logger = CreateLogger(options.Verbose); @@ -90,8 +90,13 @@ public class Program(ILogger logger) return 1; } return 0; - } - , errs => Task.FromResult(1) + }, + (CheckLinkOptions options) => + { + var command = new CheckLinkCommand(options, logger); + return command.Run(); + }, + errs => Task.FromResult(1) ); } diff --git a/source/SuCoS.csproj b/source/SuCoS.csproj index bf151db..ff63bde 100644 --- a/source/SuCoS.csproj +++ b/source/SuCoS.csproj @@ -6,6 +6,7 @@ enable enable true + true -- GitLab From f41779e3c285b3cd02d461bd57445a7d24ab0c02 Mon Sep 17 00:00:00 2001 From: Bruno Massa Date: Sat, 9 Mar 2024 21:14:05 -0500 Subject: [PATCH 2/3] feat: checklink ignore list argument --- source/CheckLinkCommand.cs | 7 ++++++- .../Models/CommandLineOptions/CheckLinkOptions.cs | 14 +++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/source/CheckLinkCommand.cs b/source/CheckLinkCommand.cs index 045e91c..6557912 100644 --- a/source/CheckLinkCommand.cs +++ b/source/CheckLinkCommand.cs @@ -30,7 +30,6 @@ public sealed partial class CheckLinkCommand(CheckLinkOptions settings, ILogger public async Task Run() { var directoryPath = Path.GetFullPath(settings.Source); - // var filter = "*.html"; if (!Directory.Exists(directoryPath)) { @@ -105,6 +104,12 @@ public sealed partial class CheckLinkCommand(CheckLinkOptions settings, ILogger continue; } checkedLinks.Add(link); + + if (settings.SkipLinks.Contains(link)) + { + continue; + } + LogInformation("{fileName}: {link} found", fileNameSanitized, link); var linkIsValid = false; diff --git a/source/Models/CommandLineOptions/CheckLinkOptions.cs b/source/Models/CommandLineOptions/CheckLinkOptions.cs index d1ef435..8dcfc1a 100644 --- a/source/Models/CommandLineOptions/CheckLinkOptions.cs +++ b/source/Models/CommandLineOptions/CheckLinkOptions.cs @@ -17,19 +17,19 @@ public class CheckLinkOptions /// /// The path of the source files. /// - [Value(0)] - public string Source { get; init; } = "./"; + [Value(0, Default = "./")] + public required string Source { get; init; } /// /// How verbose it must be. /// - [Option('f', "filters", Required = false, HelpText = "File name filters (default: '*.html')")] - public string Filters { get; init; } = "*.html"; - + [Option('f', "filters", Required = false, HelpText = "File name filters", Default = "*.html")] + public required string Filters { get; init; } + /// /// How verbose it must be. /// - [Option('f', "filters", Required = false, HelpText = "File name filters (default: '*.html')")] - public string Ignore { get; init; } = "*.html"; + [Option('s', "skip", Required = false, HelpText = "list of links to skip checking")] + public IEnumerable SkipLinks { get; set; } = new List(); } -- GitLab From fe884dab7690c7eb79c00c35d36daafe5ac1f1cc Mon Sep 17 00:00:00 2001 From: Bruno Massa Date: Tue, 2 Apr 2024 00:23:40 -0500 Subject: [PATCH 3/3] feat: check links url to use local files instead remote links --- source/BaseGeneratorCommand.cs | 2 +- source/CheckLinkCommand.cs | 33 ++++++++++++++++++- source/Helpers/SiteCacheManager.cs | 4 +-- source/Helpers/StopwatchReporter.cs | 4 +-- .../CommandLineOptions/CheckLinkOptions.cs | 19 +++++++---- source/Models/FrontMatter.cs | 2 +- source/Models/FrontMatterResources.cs | 2 +- source/Models/Page.cs | 14 ++++---- source/Models/Resource.cs | 2 +- source/Models/SiteSettings.cs | 2 +- source/Parser/YAMLParser.cs | 5 +-- 11 files changed, 63 insertions(+), 26 deletions(-) diff --git a/source/BaseGeneratorCommand.cs b/source/BaseGeneratorCommand.cs index 20579c7..d70757e 100644 --- a/source/BaseGeneratorCommand.cs +++ b/source/BaseGeneratorCommand.cs @@ -68,7 +68,7 @@ public abstract class BaseGeneratorCommand ArgumentNullException.ThrowIfNull(input); ArgumentNullException.ThrowIfNull(arguments); - List result = new(); + List result = []; var list = (input as ArrayValue)!.Values; var keys = arguments.At(0).ToStringValue().Split('.'); diff --git a/source/CheckLinkCommand.cs b/source/CheckLinkCommand.cs index 6557912..f978458 100644 --- a/source/CheckLinkCommand.cs +++ b/source/CheckLinkCommand.cs @@ -105,7 +105,12 @@ public sealed partial class CheckLinkCommand(CheckLinkOptions settings, ILogger } checkedLinks.Add(link); - if (settings.SkipLinks.Contains(link)) + if (settings.Ignore.Contains(link)) + { + continue; + } + + if (TryLocalFile(settings, directoryPath, fileNameSanitized, link)) { continue; } @@ -140,6 +145,32 @@ public sealed partial class CheckLinkCommand(CheckLinkOptions settings, ILogger return result; } + private bool TryLocalFile(CheckLinkOptions settings, string directoryPath, string fileNameSanitized, string link) + { + if (string.IsNullOrEmpty(settings.InternalURL) || !link.StartsWith(settings.InternalURL)) + { + return false; + } + + // Strip the InternalURL from the link + link = link[settings.InternalURL.Length..]; + + // Handle the link as a local file + var localFilePath = Path.Combine(directoryPath, link); + if (File.Exists(localFilePath)) + { + LogInformation("{fileName}: {link} is a local file", fileNameSanitized, link); + } + else + { + LogError("{fileName}: {link} is a local file but does not exist", fileNameSanitized, link); + failedLinks.Add(link); + } + checkedLinks.Add(link); + + return true; + } + private async Task CheckLink(string fileName, string link, HttpClient httpClient) { try diff --git a/source/Helpers/SiteCacheManager.cs b/source/Helpers/SiteCacheManager.cs index 7365db8..e722bd7 100644 --- a/source/Helpers/SiteCacheManager.cs +++ b/source/Helpers/SiteCacheManager.cs @@ -11,12 +11,12 @@ public class SiteCacheManager /// /// Cache for content templates. /// - public Dictionary<(string?, Kind?, string?), string> contentTemplateCache { get; } = new(); + public Dictionary<(string?, Kind?, string?), string> contentTemplateCache { get; } = []; /// /// Cache for base templates. /// - public Dictionary<(string?, Kind?, string?), string> baseTemplateCache { get; } = new(); + public Dictionary<(string?, Kind?, string?), string> baseTemplateCache { get; } = []; /// /// Cache for tag page. diff --git a/source/Helpers/StopwatchReporter.cs b/source/Helpers/StopwatchReporter.cs index c7d4458..34be0c0 100644 --- a/source/Helpers/StopwatchReporter.cs +++ b/source/Helpers/StopwatchReporter.cs @@ -21,8 +21,8 @@ public class StopwatchReporter public StopwatchReporter(ILogger logger) { this.logger = logger; - stopwatches = new Dictionary(); - itemCounts = new Dictionary(); + stopwatches = []; + itemCounts = []; } /// diff --git a/source/Models/CommandLineOptions/CheckLinkOptions.cs b/source/Models/CommandLineOptions/CheckLinkOptions.cs index 8dcfc1a..a99caec 100644 --- a/source/Models/CommandLineOptions/CheckLinkOptions.cs +++ b/source/Models/CommandLineOptions/CheckLinkOptions.cs @@ -3,9 +3,9 @@ using CommandLine; namespace SuCoS.Models.CommandLineOptions; /// -/// Command line options for the serve command. +/// Command line options for the checklinks command. /// -[Verb("checklinks", HelpText = "Starts the server")] +[Verb("checklinks", HelpText = "Checks links of a given site")] public class CheckLinkOptions { /// @@ -21,15 +21,20 @@ public class CheckLinkOptions public required string Source { get; init; } /// - /// How verbose it must be. + /// File names to be checked. /// [Option('f', "filters", Required = false, HelpText = "File name filters", Default = "*.html")] public required string Filters { get; init; } /// - /// How verbose it must be. + /// List of links to ignore checking. /// - [Option('s', "skip", Required = false, HelpText = "list of links to skip checking")] - public IEnumerable SkipLinks { get; set; } = new List(); -} + [Option('i', "ignore", Required = false, HelpText = "List of links to ignore checking")] + public IEnumerable Ignore { get; init; } = []; + /// + /// Site URL, so it can be checked as local path files. + /// + [Option('u', "url", Required = false, HelpText = "Site URL, so it can be checked as local path files.")] + public string? InternalURL { get; init; } +} diff --git a/source/Models/FrontMatter.cs b/source/Models/FrontMatter.cs index 11c5d94..f415158 100644 --- a/source/Models/FrontMatter.cs +++ b/source/Models/FrontMatter.cs @@ -81,7 +81,7 @@ public class FrontMatter : IFrontMatter public DateTime? GetPublishDate => PublishDate ?? Date; /// - public Dictionary Params { get; set; } = new(); + public Dictionary Params { get; set; } = []; #endregion IFrontMatter diff --git a/source/Models/FrontMatterResources.cs b/source/Models/FrontMatterResources.cs index 96833a7..6e63b9e 100644 --- a/source/Models/FrontMatterResources.cs +++ b/source/Models/FrontMatterResources.cs @@ -17,7 +17,7 @@ public class FrontMatterResources : IFrontMatterResources public string? Name { get; set; } /// - public Dictionary Params { get; set; } = new(); + public Dictionary Params { get; set; } = []; /// public Matcher? GlobMatcher { get; set; } diff --git a/source/Models/Page.cs b/source/Models/Page.cs index d64dd5c..ff2201d 100644 --- a/source/Models/Page.cs +++ b/source/Models/Page.cs @@ -113,7 +113,7 @@ public class Page : IPage /// Other content that mention this content. /// Used to create the tags list and Related Posts section. /// - public ConcurrentBag PagesReferences { get; } = new(); + public ConcurrentBag PagesReferences { get; } = []; /// public IPage? Parent { get; set; } @@ -132,7 +132,7 @@ public class Page : IPage /// /// A list of tags, if any. /// - public ConcurrentBag TagsReference { get; } = new(); + public ConcurrentBag TagsReference { get; } = []; /// /// Just a simple check if the current page is the home page @@ -192,7 +192,7 @@ public class Page : IPage return pagesCached; } - pagesCached = new(); + pagesCached = []; foreach (var permalink in PagesReferences) { var page = Site.OutputReferences[permalink] as IPage; @@ -354,7 +354,7 @@ endif // Create all the aliases if (Aliases is not null) { - AliasesProcessed ??= new(); + AliasesProcessed ??= []; foreach (var alias in Aliases) { AliasesProcessed.Add(CreatePermalink(alias)); @@ -402,12 +402,12 @@ endif foreach (var resourceFilename in resourceFiles) { - Resources ??= new(); + Resources ??= []; var filenameOriginal = Path.GetFileName(resourceFilename); var filename = filenameOriginal; var extention = Path.GetExtension(resourceFilename); var title = filename; - Dictionary resourceParams = new(); + Dictionary resourceParams = []; if (ResourceDefinitions is not null) { @@ -439,7 +439,7 @@ endif .SetValue("counter", counter); title = templateTitle.Render(context); } - resourceParams = resourceDefinition.Params ?? new(); + resourceParams = resourceDefinition.Params ?? []; } } } diff --git a/source/Models/Resource.cs b/source/Models/Resource.cs index e4cdb62..5082bd7 100644 --- a/source/Models/Resource.cs +++ b/source/Models/Resource.cs @@ -21,7 +21,7 @@ public class Resource : IResource public string? Permalink { get; set; } /// - public Dictionary Params { get; set; } = new(); + public Dictionary Params { get; set; } = []; /// /// Default constructor. diff --git a/source/Models/SiteSettings.cs b/source/Models/SiteSettings.cs index 60deca4..e2efd0c 100644 --- a/source/Models/SiteSettings.cs +++ b/source/Models/SiteSettings.cs @@ -33,7 +33,7 @@ public class SiteSettings : IParams #region IParams /// - public Dictionary Params { get; set; } = new(); + public Dictionary Params { get; set; } = []; #endregion IParams } \ No newline at end of file diff --git a/source/Parser/YAMLParser.cs b/source/Parser/YAMLParser.cs index f9b85d2..0f8e42d 100644 --- a/source/Parser/YAMLParser.cs +++ b/source/Parser/YAMLParser.cs @@ -1,5 +1,6 @@ using SuCoS.Helpers; using SuCoS.Models; +using System.Diagnostics.CodeAnalysis; using System.Text; using YamlDotNet.Serialization; @@ -98,13 +99,13 @@ public class YAMLParser : IFrontMatterParser } /// - /// Parse all YAML files for non-matching fields. + /// Parse all YAML files for non-matching fields. /// /// Site or Frontmatter object, that implements IParams /// The type (Site or Frontmatter) /// YAML content /// yamlObject already parsed if available - public void ParseParams(IParams settings, Type type, string yaml, object? yamlObject = null) + public void ParseParams(IParams settings, [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicProperties)] Type type, string yaml, object? yamlObject = null) { ArgumentNullException.ThrowIfNull(settings); ArgumentNullException.ThrowIfNull(type); -- GitLab