Skip to content

Commit

Permalink
Merge pull request #43 from AlexCSDev/0_9_4_1
Browse files Browse the repository at this point in the history
Fixed #40, #42
  • Loading branch information
Aleksey authored Jan 24, 2021
2 parents 33790e1 + ed45668 commit ee4bcef
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 15 deletions.
21 changes: 10 additions & 11 deletions PatreonDownloader.Engine/DefaultPlugin.cs
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ public async Task<List<string>> ExtractSupportedUrls(string htmlContents)
{
foreach (var imgNode in imgNodeCollection)
{
if (imgNode.Attributes.Count == 0 || !imgNode.Attributes.Contains("src"))
continue;

string url = imgNode.Attributes["src"].Value;

if (IsAllowedUrl(url))
Expand All @@ -207,19 +210,15 @@ public async Task<List<string>> ExtractSupportedUrls(string htmlContents)
{
foreach (var linkNode in linkNodeCollection)
{
if (linkNode.Attributes["href"] != null)
{
var url = linkNode.Attributes["href"].Value;
if (linkNode.Attributes.Count == 0 || !linkNode.Attributes.Contains("href"))
continue;

if (IsAllowedUrl(url))
{
retList.Add(url);
_logger.Debug($"Parsed by default plugin (direct): {url}");
}
}
else
var url = linkNode.Attributes["href"].Value;

if (IsAllowedUrl(url))
{
_logger.Warn($"link with invalid href found, ignoring...");
retList.Add(url);
_logger.Debug($"Parsed by default plugin (direct): {url}");
}
}
}
Expand Down
9 changes: 5 additions & 4 deletions PatreonDownloader.MegaDownloader/Plugin.cs
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,14 @@ public async Task<List<string>> ExtractSupportedUrls(string htmlContents)
List<string> retList = new List<string>();
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(htmlContents);
string plainText = string.Join(" ", doc.DocumentNode.Descendants()
string parseText = string.Join(" ", doc.DocumentNode.Descendants()
.Where(n => !n.HasChildNodes && !string.IsNullOrWhiteSpace(n.InnerText))
.Select(n => n.InnerText));
.Select(n => n.InnerText)); //first get a copy of text without all html tags
parseText += doc.DocumentNode.InnerHtml; //now append a copy of this text with all html tags intact (otherwise we lose all <a href=... links)

MatchCollection matchesNewFormat = _newFormatRegex.Matches(plainText);
MatchCollection matchesNewFormat = _newFormatRegex.Matches(parseText);

MatchCollection matchesOldFormat = _oldFormatRegex.Matches(plainText);
MatchCollection matchesOldFormat = _oldFormatRegex.Matches(parseText);

_logger.Debug($"Found NEW:{matchesNewFormat.Count}|OLD:{matchesOldFormat.Count} possible mega links in description");

Expand Down

0 comments on commit ee4bcef

Please sign in to comment.