Skip to content

Commit

Permalink
Redesign FilterTemplateOutputHandler to output optional regex
Browse files Browse the repository at this point in the history
Signed-off-by: Gary O'Neall <[email protected]>
  • Loading branch information
goneall committed Dec 8, 2023
1 parent 18a80a0 commit 5aad1e0
Show file tree
Hide file tree
Showing 7 changed files with 766 additions and 16 deletions.
339 changes: 339 additions & 0 deletions TestFiles/GPL-2.0-NL.txt

Large diffs are not rendered by default.

127 changes: 127 additions & 0 deletions TestFiles/GPL-2.0-only.template.txt

Large diffs are not rendered by default.

33 changes: 33 additions & 0 deletions TestFiles/GPL-optional-template.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
THE POSSIBILITY OF SUCH DAMAGES.<<beginOptional>> END OF TERMS AND CONDITIONS

How to Apply These Terms to Your New Programs

If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms.

To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found.

<<beginOptional>><<<endOptional>>one line to give the program's name and <<var;name="ideaArticle";original="an";match="a brief|an">> idea of what it does.<<beginOptional>>><<endOptional>>

Copyright (C)<<beginOptional>><<<endOptional>> <<var;name="templateYear";original="yyyy";match="yyyy|year">><<beginOptional>>> <<endOptional>><<beginOptional>> <<<endOptional>>name of author<<beginOptional>>><<endOptional>>

This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301<<beginOptional>>, <<endOptional>> USA.

Also add information on how to contact you by electronic and paper mail.

If the program is interactive, make it output a short notice like this when it starts in an interactive mode:

Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details.

The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program.

You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names:

Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker.

<<beginOptional>><<<endOptional>>signature of Ty Coon<<beginOptional>> ><<endOptional>>, 1 April 1989 Ty Coon, President of Vice

<<endOptional>>
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@
package org.spdx.utility.compare;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

import org.spdx.licenseTemplate.ILicenseTemplateOutputHandler;
import org.spdx.licenseTemplate.LicenseTemplateRule;
Expand All @@ -35,34 +39,53 @@ public enum VarTextHandling {
REGEX, // Include the regex itself included by the REGEX_ESCAPE strings
}

public enum OptionalTextHandling {
OMIT, // Omit the optional text
ORIGINAL, // Retain the optional text
REGEX_USING_TOKENS // Create a regex for the optional text with the REGEX_ESCAPE string tokenizing the words
}

private VarTextHandling varTextHandling;
private OptionalTextHandling optionalTextHandling;
private List<String> filteredText = new ArrayList<>();
StringBuilder currentString = new StringBuilder();
private int optionalDepth = 0; // depth of optional rules
private Map<Integer, List<String>> optionalTokens = new HashMap<>(); // map of optional dept to a list of tokens for the optional text

/**
* @param includeVarText if true, include the default variable text
*/
@Deprecated
public FilterTemplateOutputHandler(boolean includeVarText) {
this(includeVarText ? VarTextHandling.ORIGINAL : VarTextHandling.OMIT);
this(includeVarText ? VarTextHandling.ORIGINAL : VarTextHandling.OMIT, OptionalTextHandling.OMIT);
}


/**
* @param varTextHandling include original, exclude, or include the regex (enclosed with "~~~") for "var" text
*/
public FilterTemplateOutputHandler(VarTextHandling varTextHandling) {
this(varTextHandling, OptionalTextHandling.OMIT);
}

/**
* @param varTextHandling include original, exclude, or include the regex (enclosed with "~~~") for "var" text
* @param optionalTextHandling include optional text, exclude, or include a regex for the optional text
*/
public FilterTemplateOutputHandler(VarTextHandling varTextHandling, OptionalTextHandling optionalTextHandling) {
this.varTextHandling = varTextHandling;
this.optionalTextHandling = optionalTextHandling;
}

/* (non-Javadoc)
* @see org.spdx.licenseTemplate.ILicenseTemplateOutputHandler#text(java.lang.String)
*/
@Override
public void text(String text) {
if (optionalDepth <= 0) {
if (optionalDepth <= 0 || OptionalTextHandling.ORIGINAL.equals(optionalTextHandling)) {
currentString.append(text);
} else if (OptionalTextHandling.REGEX_USING_TOKENS.equals(optionalTextHandling)) {
optionalTokens.get(optionalDepth).addAll(Arrays.asList(
LicenseCompareHelper.tokenizeLicenseText(text, new HashMap<Integer, LineColumn>())));
}
}

Expand All @@ -73,10 +96,16 @@ public void text(String text) {
public void variableRule(LicenseTemplateRule rule) {
if (VarTextHandling.REGEX.equals(varTextHandling) && optionalDepth <= 0) {
currentString.append(REGEX_ESCAPE);
currentString.append('(');
currentString.append(rule.getMatch());
currentString.append(')');
currentString.append(REGEX_ESCAPE);
} else if (VarTextHandling.ORIGINAL.equals(varTextHandling) && optionalDepth <= 0) {
currentString.append(rule.getOriginal());
} else if (optionalDepth > 0 && OptionalTextHandling.REGEX_USING_TOKENS.equals(optionalTextHandling)) {
currentString.append('(');
currentString.append(rule.getMatch());
currentString.append(')');
} else {
if (currentString.length() > 0) {
filteredText.add(currentString.toString());
Expand All @@ -90,19 +119,63 @@ public void variableRule(LicenseTemplateRule rule) {
*/
@Override
public void beginOptional(LicenseTemplateRule rule) {
if (OptionalTextHandling.REGEX_USING_TOKENS.equals(optionalTextHandling)) {
if (optionalDepth == 0) {
if (currentString.length() > 0) {
filteredText.add(currentString.toString());
currentString.setLength(0);
}
currentString.append(REGEX_ESCAPE);
} else {
currentString.append(toTokenRegex(optionalTokens.get(optionalDepth)));
optionalTokens.get(optionalDepth).clear();
}
currentString.append('(');
} else if (currentString.length() > 0) {
filteredText.add(currentString.toString());
currentString.setLength(0);
}
optionalDepth++;
optionalTokens.put(optionalDepth, new ArrayList<>());
}

/**
* @param tokens list of tokens
* @return regular expression with quoted tokens
*/
private String toTokenRegex(List<String> tokens) {
StringBuilder sb = new StringBuilder();
for (String token:optionalTokens.get(optionalDepth)) {
token = token.trim();
if (LicenseCompareHelper.NORMALIZE_TOKENS.containsKey(token.toLowerCase())) {
token = LicenseCompareHelper.NORMALIZE_TOKENS.get(token.toLowerCase());
}
sb.append(Pattern.quote(token));
sb.append("\\s*");
}
return sb.toString();
}


/* (non-Javadoc)
* @see org.spdx.licenseTemplate.ILicenseTemplateOutputHandler#endOptional(org.spdx.licenseTemplate.LicenseTemplateRule)
*/
@Override
public void endOptional(LicenseTemplateRule rule) {
optionalDepth--;
if (optionalDepth == 0 && currentString.length() > 0) {
if (OptionalTextHandling.REGEX_USING_TOKENS.equals(optionalTextHandling)) {
currentString.append(toTokenRegex(optionalTokens.get(optionalDepth)));
currentString.append(")?");
if (optionalDepth == 1) {
currentString.append(REGEX_ESCAPE);
filteredText.add(currentString.toString());
currentString.setLength(0);
}
} else if (currentString.length() > 0) {
filteredText.add(currentString.toString());
currentString.setLength(0);
}
optionalTokens.remove(optionalDepth);
optionalDepth--;
}

/* (non-Javadoc)
Expand Down
39 changes: 28 additions & 11 deletions src/main/java/org/spdx/utility/compare/LicenseCompareHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
import org.spdx.licenseTemplate.LicenseTemplateRuleException;
import org.spdx.licenseTemplate.SpdxLicenseTemplateHelper;
import org.spdx.utility.compare.CompareTemplateOutputHandler.DifferenceDescription;
import org.spdx.utility.compare.FilterTemplateOutputHandler.OptionalTextHandling;
import org.spdx.utility.compare.FilterTemplateOutputHandler.VarTextHandling;

/**
Expand Down Expand Up @@ -646,18 +647,33 @@ private static boolean isLicenseSetsEqual(LicenseSet license1, LicenseSet licens
@Deprecated
public static List<String> getNonOptionalLicenseText(String licenseTemplate, boolean includeVarText) throws SpdxCompareException {
return getNonOptionalLicenseText(licenseTemplate,
includeVarText ? VarTextHandling.ORIGINAL : VarTextHandling.OMIT);
includeVarText ? VarTextHandling.ORIGINAL : VarTextHandling.OMIT,
OptionalTextHandling.OMIT);
}

/**
* Get the text of a license minus any optional text - note: this include the default variable text
* Get the text of a license minus any optional text
* @param licenseTemplate license template containing optional and var tags
* @param varTextHandling include original, exclude, or include the regex (enclosed with "~~~") for "var" text
* @return list of strings for all non-optional license text.
* @throws SpdxCompareException
*/
public static List<String> getNonOptionalLicenseText(String licenseTemplate, VarTextHandling varTextHandling) throws SpdxCompareException {
FilterTemplateOutputHandler filteredOutput = new FilterTemplateOutputHandler(varTextHandling);
public static List<String> getNonOptionalLicenseText(String licenseTemplate,
VarTextHandling varTextHandling) throws SpdxCompareException {
return getNonOptionalLicenseText(licenseTemplate, varTextHandling, OptionalTextHandling.OMIT);
}

/**
* Get the text of a license converting variable and optional text according to the options
* @param licenseTemplate license template containing optional and var tags
* @param varTextHandling include original, exclude, or include the regex (enclosed with "~~~") for "var" text
* @param optionalTextHandling include optional text, exclude, or include a regex for the optional text
* @return list of strings for all non-optional license text.
* @throws SpdxCompareException
*/
public static List<String> getNonOptionalLicenseText(String licenseTemplate,
VarTextHandling varTextHandling, OptionalTextHandling optionalTextHandling) throws SpdxCompareException {
FilterTemplateOutputHandler filteredOutput = new FilterTemplateOutputHandler(varTextHandling, optionalTextHandling);
try {
SpdxLicenseTemplateHelper.parseTemplate(licenseTemplate, filteredOutput);
} catch (LicenseTemplateRuleException e) {
Expand Down Expand Up @@ -686,9 +702,9 @@ public static Pair<Pattern, Pattern> nonOptionalTextToPatterns(List<String> nonO
String lastRegex = "";
while (startWordCount < numberOfWords && startTextIndex < nonOptionalText.size()) {
String line = nonOptionalText.get(startTextIndex++);
if (startPatternBuilder.length() > 0 && line.trim().length() > 0 && !startPatternBuilder.toString().endsWith("}")) {
startPatternBuilder.append(".{0,50}"); //TODO: Replace this with the optional text match itself - requires redesign
}
// if (startPatternBuilder.length() > 0 && line.trim().length() > 0 && !startPatternBuilder.toString().endsWith("}")) {
// startPatternBuilder.append(".{0").append(regexLimit); //TODO: Replace this with the optional text match itself - requires redesign
// }
String[] regexSplits = line.trim().split(FilterTemplateOutputHandler.REGEX_ESCAPE);
boolean inRegex = false; // if it starts with a regex, it will start with a blank line
for (String regexSplit:regexSplits) {
Expand Down Expand Up @@ -753,9 +769,9 @@ public static Pair<Pattern, Pattern> nonOptionalTextToPatterns(List<String> nonO
(endTextIndex == lastProcessedStartLine && (numberOfWords - endWordCount) < (nonOptionalText.get(endTextIndex).length() - wordsInLastLine)))) { // Check to make sure we're not overlapping the start words
List<String> nonEmptyTokens = new ArrayList<>();
String line = nonOptionalText.get(endTextIndex);
if (endTextReversePattern.size() > 0 && line.trim().length() > 0 && !endTextReversePattern.get(endTextReversePattern.size()-1).endsWith("}")) {
endTextReversePattern.add(".{0,50}"); //TODO: Replace this with the optional text match itself - requires redesign
}
// if (endTextReversePattern.size() > 0 && line.trim().length() > 0 && !endTextReversePattern.get(endTextReversePattern.size()-1).endsWith("}")) {
// endTextReversePattern.add(".{0,50}"); //TODO: Replace this with the optional text match itself - requires redesign
// }
String[] regexSplits = line.trim().split(FilterTemplateOutputHandler.REGEX_ESCAPE);
boolean inRegex = false;
for (String regexSplit:regexSplits) {
Expand Down Expand Up @@ -953,7 +969,8 @@ private static String findTemplateWithinText(String text, String template) throw
return null;
}

List<String> templateNonOptionalText = getNonOptionalLicenseText(removeCommentChars(template), VarTextHandling.REGEX);
List<String> templateNonOptionalText = getNonOptionalLicenseText(removeCommentChars(template),
VarTextHandling.REGEX, OptionalTextHandling.REGEX_USING_TOKENS);
if (templateNonOptionalText.size() > 0 && templateNonOptionalText.get(0).startsWith("~~~.")) {
// Change to a non-greedy match
String firstLine = templateNonOptionalText.get(0);
Expand Down
Loading

0 comments on commit 5aad1e0

Please sign in to comment.