Skip to content

Commit

Permalink
Add New Filtering Methods - 2.17 Beta 1
Browse files Browse the repository at this point in the history
Adds new functions that can do multi-variable filtering to better detect more types of spam
  • Loading branch information
ThioJoe committed Sep 15, 2022
1 parent 814d656 commit 8d61194
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 14 deletions.
26 changes: 15 additions & 11 deletions Scripts/filter_variables.py

Large diffs are not rendered by default.

28 changes: 28 additions & 0 deletions Scripts/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,9 @@ def check_against_filter(current, filtersDict, miscData, config, currentCommentD
languages = smartFilter['languages']
sensitive = smartFilter['sensitive']
rootDomainRegex = smartFilter['rootDomainRegex']
accompanyingLinkSpamDict = smartFilter['accompanyingLinkSpamDict']
comboDict = smartFilter['comboDict']

# Spam Lists
spamListCombinedRegex = smartFilter['spamListCombinedRegex']

Expand Down Expand Up @@ -889,6 +892,27 @@ def check_if_only_link(string):
else:
return False

def find_accompanying_link_spam(string):
linkResult = re.search(accompanyingLinkSpamDict['videoLinkRegex'], string)
if not linkResult:
return False
else:
phrasesList = accompanyingLinkSpamDict['accompanyingLinkSpamPhrasesList']
notSpecialChars = accompanyingLinkSpamDict['notSpecial']
nonLinkString = string.replace(linkResult.group(0), '')
for char in notSpecialChars:
nonLinkString = nonLinkString.replace(char, '').replace('\n', '')
if any(phrase.lower().replace(' ', '') == nonLinkString for phrase in phrasesList):
return True
else:
return False

def multiVarDetect(text, username):
multiUsernameAllList = comboDict['multiUsernameAllList']
for checkList in multiUsernameAllList:
if all(word in username for word in checkList):
return True

# ------------------------------------------------------------------------

# Normalize usernames and text, remove multiple whitespace and invisible chars
Expand Down Expand Up @@ -946,6 +970,10 @@ def check_if_only_link(string):
add_spam(current, config, miscData, currentCommentDict, videoID)
elif config['detect_link_spam'] and check_if_only_link(commentTextNormalized.strip()):
add_spam(current, config, miscData, currentCommentDict, videoID)
elif find_accompanying_link_spam(commentTextNormalized.lower()):
add_spam(current, config, miscData, currentCommentDict, videoID)
elif multiVarDetect(commentTextNormalized.lower(), authorChannelName.lower()):
add_spam(current, config, miscData, currentCommentDict, videoID)
elif sensitive and re.search(smartFilter['usernameConfuseRegex'], authorChannelName):
add_spam(current, config, miscData, currentCommentDict, videoID)
elif not sensitive and (findObf(smartFilter['usernameConfuseRegex'], miscData.channelOwnerName, authorChannelName) or authorChannelName == miscData.channelOwnerName):
Expand Down
10 changes: 9 additions & 1 deletion Scripts/prepare_modes.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,12 @@ def prepare_filter_mode_smart(scanMode, config, miscData, sensitive=False):
'cashRegex': cashRegex,
}

accompanyingLinkSpamDict = {
'accompanyingLinkSpamPhrasesList': filter.accompanyingLinkSpamPhrasesList,
'notSpecial': filter.notSpecial,
'videoLinkRegex': re.compile(r"((?:https?:)?\/\/)?((?:www|m)\.)?((?:youtube\.com|youtu.be))(\/(?:[\w\-]+\?v=|embed\/|v\/)?)([\w\-]+)(\S+)?"),
}

print(" Loading Filters [====== ]", end="\r")

# Compile regex with upper case, otherwise many false positive character matches
Expand Down Expand Up @@ -438,7 +444,9 @@ def prepare_filter_mode_smart(scanMode, config, miscData, sensitive=False):
'sensitiveRootDomainRegex': sensitiveRootDomainRegex,
'unicodeCategoriesStrip': unicodeCategoriesStrip,
'spamListCombinedRegex': spamListCombinedRegex,
'threadFiltersDict': threadFiltersDict
'threadFiltersDict': threadFiltersDict,
'accompanyingLinkSpamDict': accompanyingLinkSpamDict,
'comboDict': filter.comboDict
}
print(" ") # Erases line that says "loading filters"

Expand Down
5 changes: 3 additions & 2 deletions YTSpammerPurge.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@
### IMPORTANT: I OFFER NO WARRANTY OR GUARANTEE FOR THIS SCRIPT. USE AT YOUR OWN RISK.
### I tested it on my own and implemented some failsafes as best as I could,
### but there could always be some kind of bug. You should inspect the code yourself.
version = "2.17.0-Dev2"
configVersion = 31
version = "2.17.0-Beta1"
configVersion = 32
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
print("Importing Script Modules...")
# Import other module files
Expand Down Expand Up @@ -430,6 +430,7 @@ def primaryInstance(miscData):
# User selects scanning mode, while Loop to get scanning mode, so if invalid input, it will keep asking until valid input
print("\n{:<59}{:<18}{:>7}".format("> At any prompt, enter 'X' to return here", updateStringLabel, updateString))
print("> Enter 'Q' now to quit")
print(f"{F.LIGHTYELLOW_EX}NOTE: This beta version uses a new method for checking updates. It is possible it will not work properly, so you should periodically check for new versions on the GitHub page yourself until the stable release.{S.R}")

print(f"\n\n-------------------------------- {F.YELLOW}Scanning Options{S.R} --------------------------------")
print(f" 1. Scan {F.LIGHTCYAN_EX}specific videos{S.R}")
Expand Down

0 comments on commit 8d61194

Please sign in to comment.