diff --git a/source/BaseGeneratorCommand.cs b/source/BaseGeneratorCommand.cs index 20579c798f613ba86af0e68a0b081f69cd46e9cb..d70757e3f13daa2ea19b8b4377268cae6e553414 100644 --- a/source/BaseGeneratorCommand.cs +++ b/source/BaseGeneratorCommand.cs @@ -68,7 +68,7 @@ public abstract class BaseGeneratorCommand ArgumentNullException.ThrowIfNull(input); ArgumentNullException.ThrowIfNull(arguments); - List result = new(); + List result = []; var list = (input as ArrayValue)!.Values; var keys = arguments.At(0).ToStringValue().Split('.'); diff --git a/source/CheckLinkCommand.cs b/source/CheckLinkCommand.cs new file mode 100644 index 0000000000000000000000000000000000000000..f978458ce7f321e6f98832f204f3e6ce2ad6dfe0 --- /dev/null +++ b/source/CheckLinkCommand.cs @@ -0,0 +1,226 @@ +using System.Collections.Concurrent; +using System.Globalization; +using System.Net; +using System.Text.RegularExpressions; +using Serilog; +using SuCoS.Models.CommandLineOptions; + +namespace SuCoS; + +/// +/// Check links of a given site. +/// +public sealed partial class CheckLinkCommand(CheckLinkOptions settings, ILogger logger) +{ + + [GeneratedRegex(@"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9@:%_\+.~#?&\/=]*)")] + private static partial Regex MyRegex(); + private static readonly Regex linkRegex = MyRegex(); + private const int retriesCount = 3; + private readonly TimeSpan retryInterval = TimeSpan.FromSeconds(1); + private HttpClient httpClient = null!; + private readonly ConcurrentBag checkedLinks = []; + private readonly ConcurrentDictionary> linkToFilesMap = []; + private readonly ConcurrentBag failedLinks = []; + + /// + /// Run the app + /// + /// + public async Task Run() + { + var directoryPath = Path.GetFullPath(settings.Source); + + if (!Directory.Exists(directoryPath)) + { + logger.Fatal("Directory '{directoryPath}' doesn't exist.", directoryPath); + return 1; + } + + httpClient = GetHttpClient(); + + var files = GetFiles(directoryPath, settings.Filters); + var linksAreValid = await CheckLinks(directoryPath, files, httpClient); + + if (!linksAreValid) + { + logger.Error("There are failed checks."); + + foreach (var (link, linkfiles) in linkToFilesMap) + { + if (failedLinks.Contains(link)) + { + linkfiles.Sort(); + logger.Error("Link {link} failed and are in these files:\n{files}", link, string.Join("\n", linkfiles)); + } + } + return 1; + } + logger.Information("Done"); + return 0; + } + + private static HttpClient GetHttpClient() + { + var client = new HttpClient(); + client.DefaultRequestHeaders.Add("User-Agent", "C# App"); + return client; + } + + private async Task CheckLinks(string directoryPath, string[] files, HttpClient httpClient) + { + var filesCount = files.Length; + var result = true; + + var options = new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }; + await Parallel.ForEachAsync(files, options, async (filePath, token) => + { + var fileNameSanitized = filePath[directoryPath.Length..].Trim('/', '\\'); + var fileText = File.ReadAllText(filePath); + var matches = linkRegex.Matches(fileText); + if (matches.Count == 0) + { + LogInformation("{fileName}: no links found", fileNameSanitized); + return; + } + + LogInformation("{fileName}: {matches} link found", fileNameSanitized, matches.Count.ToString(CultureInfo.InvariantCulture)); + foreach (Match match in matches) + { + var link = match.Value.Trim('.'); + + if (!linkToFilesMap.TryGetValue(link, out var value)) + { + value = []; + linkToFilesMap[link] = value; + } + + if (!value.Contains(fileNameSanitized)) + { + value.Add(fileNameSanitized); + } + if (checkedLinks.Contains(link)) + { + continue; + } + checkedLinks.Add(link); + + if (settings.Ignore.Contains(link)) + { + continue; + } + + if (TryLocalFile(settings, directoryPath, fileNameSanitized, link)) + { + continue; + } + + LogInformation("{fileName}: {link} found", fileNameSanitized, link); + + var linkIsValid = false; + for (var j = 0; j < retriesCount && !linkIsValid; j++) + { + linkIsValid |= await CheckLink(fileNameSanitized, link, httpClient); + if (!linkIsValid && j < retriesCount - 1) + { + LogInformation("{fileName}: {link} retrying...", fileNameSanitized, link); + Thread.Sleep(retryInterval); + } + } + + if (linkIsValid) + { + LogInformation("{fileName}: {link} OK", fileNameSanitized, link); + } + else + { + LogError("{fileName}: {link} FAIL", fileNameSanitized, link); + failedLinks.Add(link); + } + + result &= linkIsValid; + } + }); + + return result; + } + + private bool TryLocalFile(CheckLinkOptions settings, string directoryPath, string fileNameSanitized, string link) + { + if (string.IsNullOrEmpty(settings.InternalURL) || !link.StartsWith(settings.InternalURL)) + { + return false; + } + + // Strip the InternalURL from the link + link = link[settings.InternalURL.Length..]; + + // Handle the link as a local file + var localFilePath = Path.Combine(directoryPath, link); + if (File.Exists(localFilePath)) + { + LogInformation("{fileName}: {link} is a local file", fileNameSanitized, link); + } + else + { + LogError("{fileName}: {link} is a local file but does not exist", fileNameSanitized, link); + failedLinks.Add(link); + } + checkedLinks.Add(link); + + return true; + } + + private async Task CheckLink(string fileName, string link, HttpClient httpClient) + { + try + { + var response = await httpClient.GetAsync(link); + if (response.StatusCode != HttpStatusCode.OK) + { + LogError("{fileName}: {link} failed with: {response}", fileName, link, response.StatusCode); + } + + return response.StatusCode == HttpStatusCode.OK; + } + catch (Exception ex) + { + LogError("{fileName}: {link} failed with: {exMessage}", fileName, link, ex.Message); + failedLinks.Add(link); + return false; + } + } + + private string[] GetFiles(string directoryPath, string filter) + { + logger.Information("Searching files in the directory '{directoryPath}' by '{filter}' filter...", directoryPath, filter); + + var files = Directory.GetFiles(directoryPath, filter, SearchOption.AllDirectories); + + logger.Information("{filesLength} files found", files.Length); + return files; + } + + void LogInformation(string message, string fileName, string? link = null, string? arg = null) + { + if (settings.Verbose && false) + { + logger.Information(message, fileName, link, arg); + } + } + + void LogError(string message, string fileName, string? link = null, string? arg = null) + { + if (settings.Verbose) + { + logger.Error(message, fileName, link, arg); + } + } + void LogError(string message, string fileName, string? link, HttpStatusCode arg) + { + if (settings.Verbose) + { + logger.Error(message, fileName, link, arg); + } + } +} \ No newline at end of file diff --git a/source/Helpers/SiteCacheManager.cs b/source/Helpers/SiteCacheManager.cs index 7365db8d0b1bceea36a125a2d65a6173129d8163..e722bd799df9f8f2b1a9e2a71cebc39dad133ebb 100644 --- a/source/Helpers/SiteCacheManager.cs +++ b/source/Helpers/SiteCacheManager.cs @@ -11,12 +11,12 @@ public class SiteCacheManager /// /// Cache for content templates. /// - public Dictionary<(string?, Kind?, string?), string> contentTemplateCache { get; } = new(); + public Dictionary<(string?, Kind?, string?), string> contentTemplateCache { get; } = []; /// /// Cache for base templates. /// - public Dictionary<(string?, Kind?, string?), string> baseTemplateCache { get; } = new(); + public Dictionary<(string?, Kind?, string?), string> baseTemplateCache { get; } = []; /// /// Cache for tag page. diff --git a/source/Helpers/StopwatchReporter.cs b/source/Helpers/StopwatchReporter.cs index c7d4458356304594d7738dd60bb7537fee429fda..34be0c06b2cfbdbe7e50ab284265a98b4dc7be54 100644 --- a/source/Helpers/StopwatchReporter.cs +++ b/source/Helpers/StopwatchReporter.cs @@ -21,8 +21,8 @@ public class StopwatchReporter public StopwatchReporter(ILogger logger) { this.logger = logger; - stopwatches = new Dictionary(); - itemCounts = new Dictionary(); + stopwatches = []; + itemCounts = []; } /// diff --git a/source/Models/CommandLineOptions/CheckLinkOptions.cs b/source/Models/CommandLineOptions/CheckLinkOptions.cs new file mode 100644 index 0000000000000000000000000000000000000000..a99caec531f7510ee5444e1a574f98e67b7c391f --- /dev/null +++ b/source/Models/CommandLineOptions/CheckLinkOptions.cs @@ -0,0 +1,40 @@ +using CommandLine; + +namespace SuCoS.Models.CommandLineOptions; + +/// +/// Command line options for the checklinks command. +/// +[Verb("checklinks", HelpText = "Checks links of a given site")] +public class CheckLinkOptions +{ + /// + /// How verbose it must be. + /// + [Option('v', "verbose", Required = false, HelpText = "How verbose it must be")] + public bool Verbose { get; init; } + + /// + /// The path of the source files. + /// + [Value(0, Default = "./")] + public required string Source { get; init; } + + /// + /// File names to be checked. + /// + [Option('f', "filters", Required = false, HelpText = "File name filters", Default = "*.html")] + public required string Filters { get; init; } + + /// + /// List of links to ignore checking. + /// + [Option('i', "ignore", Required = false, HelpText = "List of links to ignore checking")] + public IEnumerable Ignore { get; init; } = []; + + /// + /// Site URL, so it can be checked as local path files. + /// + [Option('u', "url", Required = false, HelpText = "Site URL, so it can be checked as local path files.")] + public string? InternalURL { get; init; } +} diff --git a/source/Models/FrontMatter.cs b/source/Models/FrontMatter.cs index 11c5d94a878b6bf7072b886b736abb5c6bceb522..f415158330e110020701d63ed88c093221d82360 100644 --- a/source/Models/FrontMatter.cs +++ b/source/Models/FrontMatter.cs @@ -81,7 +81,7 @@ public class FrontMatter : IFrontMatter public DateTime? GetPublishDate => PublishDate ?? Date; /// - public Dictionary Params { get; set; } = new(); + public Dictionary Params { get; set; } = []; #endregion IFrontMatter diff --git a/source/Models/FrontMatterResources.cs b/source/Models/FrontMatterResources.cs index 96833a731c58d7f575e45f356f4ff23fff09948e..6e63b9e8689cb808353a0caca08a614f0da06978 100644 --- a/source/Models/FrontMatterResources.cs +++ b/source/Models/FrontMatterResources.cs @@ -17,7 +17,7 @@ public class FrontMatterResources : IFrontMatterResources public string? Name { get; set; } /// - public Dictionary Params { get; set; } = new(); + public Dictionary Params { get; set; } = []; /// public Matcher? GlobMatcher { get; set; } diff --git a/source/Models/Page.cs b/source/Models/Page.cs index d64dd5c3bc6c12a89120f79cbef2c5b993a0fe23..ff2201dd34a31805c4f0aa1a949612ee9ac7d24e 100644 --- a/source/Models/Page.cs +++ b/source/Models/Page.cs @@ -113,7 +113,7 @@ public class Page : IPage /// Other content that mention this content. /// Used to create the tags list and Related Posts section. /// - public ConcurrentBag PagesReferences { get; } = new(); + public ConcurrentBag PagesReferences { get; } = []; /// public IPage? Parent { get; set; } @@ -132,7 +132,7 @@ public class Page : IPage /// /// A list of tags, if any. /// - public ConcurrentBag TagsReference { get; } = new(); + public ConcurrentBag TagsReference { get; } = []; /// /// Just a simple check if the current page is the home page @@ -192,7 +192,7 @@ public class Page : IPage return pagesCached; } - pagesCached = new(); + pagesCached = []; foreach (var permalink in PagesReferences) { var page = Site.OutputReferences[permalink] as IPage; @@ -354,7 +354,7 @@ endif // Create all the aliases if (Aliases is not null) { - AliasesProcessed ??= new(); + AliasesProcessed ??= []; foreach (var alias in Aliases) { AliasesProcessed.Add(CreatePermalink(alias)); @@ -402,12 +402,12 @@ endif foreach (var resourceFilename in resourceFiles) { - Resources ??= new(); + Resources ??= []; var filenameOriginal = Path.GetFileName(resourceFilename); var filename = filenameOriginal; var extention = Path.GetExtension(resourceFilename); var title = filename; - Dictionary resourceParams = new(); + Dictionary resourceParams = []; if (ResourceDefinitions is not null) { @@ -439,7 +439,7 @@ endif .SetValue("counter", counter); title = templateTitle.Render(context); } - resourceParams = resourceDefinition.Params ?? new(); + resourceParams = resourceDefinition.Params ?? []; } } } diff --git a/source/Models/Resource.cs b/source/Models/Resource.cs index e4cdb621b175a771d7680a2b791cb30dbc00700d..5082bd7569eeaa5d9c98e59554e4a18a09d68572 100644 --- a/source/Models/Resource.cs +++ b/source/Models/Resource.cs @@ -21,7 +21,7 @@ public class Resource : IResource public string? Permalink { get; set; } /// - public Dictionary Params { get; set; } = new(); + public Dictionary Params { get; set; } = []; /// /// Default constructor. diff --git a/source/Models/SiteSettings.cs b/source/Models/SiteSettings.cs index 60deca497b3a3a09ce8e10dd6bf06cab608a0c83..e2efd0cc5a045133d99269ee52eb88ea13240949 100644 --- a/source/Models/SiteSettings.cs +++ b/source/Models/SiteSettings.cs @@ -33,7 +33,7 @@ public class SiteSettings : IParams #region IParams /// - public Dictionary Params { get; set; } = new(); + public Dictionary Params { get; set; } = []; #endregion IParams } \ No newline at end of file diff --git a/source/Parser/YAMLParser.cs b/source/Parser/YAMLParser.cs index f9b85d2f5930dfcd428a7359cd2245e6c71e2457..0f8e42dc3f1a7348a22d394ac445678fd7a1a6a4 100644 --- a/source/Parser/YAMLParser.cs +++ b/source/Parser/YAMLParser.cs @@ -1,5 +1,6 @@ using SuCoS.Helpers; using SuCoS.Models; +using System.Diagnostics.CodeAnalysis; using System.Text; using YamlDotNet.Serialization; @@ -98,13 +99,13 @@ public class YAMLParser : IFrontMatterParser } /// - /// Parse all YAML files for non-matching fields. + /// Parse all YAML files for non-matching fields. /// /// Site or Frontmatter object, that implements IParams /// The type (Site or Frontmatter) /// YAML content /// yamlObject already parsed if available - public void ParseParams(IParams settings, Type type, string yaml, object? yamlObject = null) + public void ParseParams(IParams settings, [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicProperties)] Type type, string yaml, object? yamlObject = null) { ArgumentNullException.ThrowIfNull(settings); ArgumentNullException.ThrowIfNull(type); diff --git a/source/Program.cs b/source/Program.cs index 1ef8b3720db0065ac7815f3318d5ae52257953d5..70bc94a25c1b578d1d6ccad4fa2b22738b0f9124 100644 --- a/source/Program.cs +++ b/source/Program.cs @@ -42,7 +42,7 @@ public class Program(ILogger logger) /// public async Task RunCommandLine(string[] args) { - return await CommandLine.Parser.Default.ParseArguments(args) + return await CommandLine.Parser.Default.ParseArguments(args) .WithParsed(options => { logger = CreateLogger(options.Verbose); @@ -86,8 +86,13 @@ public class Program(ILogger logger) return 1; } return 0; - } - , errs => Task.FromResult(1) + }, + (CheckLinkOptions options) => + { + var command = new CheckLinkCommand(options, logger); + return command.Run(); + }, + errs => Task.FromResult(1) ); } diff --git a/source/SuCoS.csproj b/source/SuCoS.csproj index bf151db29be23f6d19fc808da431a1e0aa70de40..ff63bde3691ebe2b8b31905130187818571adfe3 100644 --- a/source/SuCoS.csproj +++ b/source/SuCoS.csproj @@ -6,6 +6,7 @@ enable enable true + true