From 62209dbc319fabb0723210e07173970454ba0f83 Mon Sep 17 00:00:00 2001 From: Jesse Brault Date: Wed, 25 Jan 2023 15:35:53 +0100 Subject: [PATCH] Tokenizer works with string Component values. --- .../jessebrault/gcp/tokenizer/Counter.java | 24 ++++ .../gcp/tokenizer/DollarScriptletMatcher.java | 97 ++++++---------- .../gcp/tokenizer/GStringMatcher.java | 109 ++++++++++++++++++ .../gcp/tokenizer/MatchResultFsmOutput.java | 23 ---- .../gcp/tokenizer/PatternMatcher.java | 21 ++++ .../gcp/tokenizer/TokenizerFsm.java | 18 ++- .../gcp/tokenizer/GStringMatcherTests.groovy | 44 +++++++ .../gcp/tokenizer/TokenizerTests.groovy | 67 ++++++++++- 8 files changed, 314 insertions(+), 89 deletions(-) create mode 100644 gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/Counter.java create mode 100644 gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/GStringMatcher.java delete mode 100644 gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/MatchResultFsmOutput.java create mode 100644 gcp-impl/src/test/groovy/com/jessebrault/gcp/tokenizer/GStringMatcherTests.groovy diff --git a/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/Counter.java b/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/Counter.java new file mode 100644 index 0000000..c465c18 --- /dev/null +++ b/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/Counter.java @@ -0,0 +1,24 @@ +package com.jessebrault.gcp.tokenizer; + +final class Counter { + + private int count = 0; + + public void increment() { + this.count++; + } + + public void decrement() { + this.count--; + } + + public boolean isZero() { + return this.count == 0; + } + + @Override + public String toString() { + return "Counter(" + this.count + ")"; + } + +} diff --git a/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/DollarScriptletMatcher.java b/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/DollarScriptletMatcher.java index 04c8fe7..8928fc6 100644 --- a/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/DollarScriptletMatcher.java +++ b/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/DollarScriptletMatcher.java @@ -7,10 +7,8 @@ import java.util.Deque; import java.util.Iterator; import java.util.LinkedList; import java.util.function.Function; +import java.util.function.Supplier; -/** - * NOT THREAD SAFE - */ final class DollarScriptletMatcher implements Function { private static final Logger logger = LoggerFactory.getLogger(DollarScriptletMatcher.class); @@ -59,65 +57,46 @@ final class DollarScriptletMatcher implements Function { NO_STRING, G_STRING, SINGLE_QUOTE_STRING } - private static final class Counter { + private static final class StringCharIterator implements Iterator { - private int count = 0; + private final String s; + private int cur; - public void increment() { - this.count++; - } - - public void decrement() { - this.count--; - } - - public boolean isZero() { - return this.count == 0; + public StringCharIterator(String s) { + this.s = s; } @Override - public String toString() { - return "Counter(" + this.count + ")"; + public boolean hasNext() { + return this.cur < s.length(); } - } - - private Deque stateStack; - private Deque counterStack; - - private Counter getCurrentCounter() { - final var currentCounter = this.counterStack.peek(); - if (currentCounter == null) { - throw new IllegalStateException("currentCounter is null"); + @Override + public String next() { + final var c = String.valueOf(s.charAt(this.cur)); + this.cur++; + return c; } - return currentCounter; + } @Override public FsmOutput apply(String s) { - this.stateStack = new LinkedList<>(); - this.counterStack = new LinkedList<>(); + final Deque stateStack = new LinkedList<>(); + final Deque counterStack = new LinkedList<>(); + + final Supplier currentCounterSupplier = () -> { + final var currentCounter = counterStack.peek(); + if (currentCounter == null) { + throw new IllegalStateException("currentCounter is null"); + } + return currentCounter; + }; stateStack.push(State.NO_STRING); counterStack.push(new Counter()); - final Iterator iterator = new Iterator<>() { - - private int cur; - - @Override - public boolean hasNext() { - return this.cur < s.length(); - } - - @Override - public String next() { - final var c = String.valueOf(s.charAt(this.cur)); - this.cur++; - return c; - } - - }; + final Iterator iterator = new StringCharIterator(s); final var entireAcc = new StringBuilder(); @@ -131,7 +110,7 @@ final class DollarScriptletMatcher implements Function { return null; } else { entireAcc.append("{"); - this.getCurrentCounter().increment(); + currentCounterSupplier.get().increment(); } outer: @@ -151,24 +130,24 @@ final class DollarScriptletMatcher implements Function { if (stateStack.peek() == State.NO_STRING) { switch (c0) { - case "{" -> this.getCurrentCounter().increment(); + case "{" -> currentCounterSupplier.get().increment(); case "}" -> { - final var currentCounter = this.getCurrentCounter(); + final var currentCounter = currentCounterSupplier.get(); currentCounter.decrement(); if (currentCounter.isZero()) { - if (this.counterStack.size() == 1) { + if (counterStack.size() == 1) { logger.debug("last Counter is zero; breaking while loop"); break outer; } else { logger.debug("counterStack.size() is greater than 1 and top Counter is zero; " + "popping state and counter stacks."); - this.stateStack.pop(); - this.counterStack.pop(); + stateStack.pop(); + counterStack.pop(); } } } - case "\"" -> this.stateStack.push(State.G_STRING); - case "'" -> this.stateStack.push(State.SINGLE_QUOTE_STRING); + case "\"" -> stateStack.push(State.G_STRING); + case "'" -> stateStack.push(State.SINGLE_QUOTE_STRING); } } else if (stateStack.peek() == State.G_STRING) { switch (c0) { @@ -185,9 +164,9 @@ final class DollarScriptletMatcher implements Function { final var c1 = iterator.next(); entireAcc.append(c1); if (c1.equals("{")) { - this.stateStack.push(State.NO_STRING); - this.counterStack.push(new Counter()); - this.getCurrentCounter().increment(); + stateStack.push(State.NO_STRING); + counterStack.push(new Counter()); + currentCounterSupplier.get().increment(); } } else { throw new IllegalArgumentException("Ill-formed dollarScriptlet (ends with a dollar)"); @@ -195,7 +174,7 @@ final class DollarScriptletMatcher implements Function { } case "\"" -> { logger.debug("popping G_STRING state"); - this.stateStack.pop(); + stateStack.pop(); } } } else if (stateStack.peek() == State.SINGLE_QUOTE_STRING) { @@ -209,7 +188,7 @@ final class DollarScriptletMatcher implements Function { } case "'" -> { logger.debug("popping SINGLE_QUOTE_STRING state"); - this.stateStack.pop(); + stateStack.pop(); } } } else { diff --git a/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/GStringMatcher.java b/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/GStringMatcher.java new file mode 100644 index 0000000..87e6da7 --- /dev/null +++ b/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/GStringMatcher.java @@ -0,0 +1,109 @@ +package com.jessebrault.gcp.tokenizer; + +import com.jessebrault.fsm.stackfunction.StackFunctionFsm; +import com.jessebrault.fsm.stackfunction.StackFunctionFsmBuilder; +import com.jessebrault.fsm.stackfunction.StackFunctionFsmBuilderImpl; + +import java.util.function.Function; +import java.util.regex.Pattern; + +final class GStringMatcher implements Function { + + private static final class GStringMatcherOutput implements FsmOutput { + + private final String entire; + private final String contents; + + public GStringMatcherOutput(String entire, String contents) { + this.entire = entire; + this.contents = contents; + } + + @Override + public String entire() { + return this.entire; + } + + @Override + public String part(int index) { + return switch(index) { + case 1, 3 -> "\""; + case 2 -> this.contents; + default -> throw new IllegalArgumentException(); + }; + } + + } + + private static final PatternMatcher text = new PatternMatcher( + Pattern.compile("^(?:[\\w\\W&&[^$\\\\\"\\n\\r]]|\\\\[\"nrbfst\\\\u]|\\$(?!\\{|[\\w$]+(?:\\.[\\w$]+)*))+") + ); + private static final DollarScriptletMatcher dollarScriptlet = new DollarScriptletMatcher(); + private static final PatternMatcher doubleQuote = new PatternMatcher( + Pattern.compile("^\"") + ); + + private enum State { + START, CONTENTS, DONE + } + + private static StackFunctionFsmBuilder getFsmBuilder() { + return new StackFunctionFsmBuilderImpl<>(); + } + + private static StackFunctionFsm getFsm(StringBuilder acc) { + return getFsmBuilder() + .setInitialState(State.START) + .whileIn(State.START, sc -> { + sc.on(doubleQuote).shiftTo(State.CONTENTS).exec(o -> { + acc.append(o.entire()); + }); + sc.onNoMatch().exec(input -> { + throw new IllegalArgumentException(); + }); + }) + .whileIn(State.CONTENTS, sc -> { + sc.on(text).exec(o -> { + acc.append(o.entire()); + }); + sc.on(dollarScriptlet).exec(o -> { + acc.append(o.entire()); + }); + sc.on(doubleQuote).shiftTo(State.DONE).exec(o -> { + acc.append(o.entire()); + }); + sc.onNoMatch().exec(input -> { + throw new IllegalArgumentException(); + }); + }) + .build(); + } + + @Override + public FsmOutput apply(final String s) { + final var acc = new StringBuilder(); + final var fsm = getFsm(acc); + + String remaining = s; + + // Look-ahead + if (!remaining.startsWith("\"")) { + return null; + } + + while (remaining.length() > 0) { + final var output = fsm.apply(remaining); + if (output == null) { + throw new IllegalStateException("output is null"); + } + if (fsm.getCurrentState() == State.DONE) { + break; + } + remaining = remaining.substring(output.entire().length()); + } + + final var entire = acc.toString(); + return new GStringMatcherOutput(entire, entire.substring(1, entire.length() - 1)); + } + +} diff --git a/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/MatchResultFsmOutput.java b/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/MatchResultFsmOutput.java deleted file mode 100644 index 113312f..0000000 --- a/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/MatchResultFsmOutput.java +++ /dev/null @@ -1,23 +0,0 @@ -package com.jessebrault.gcp.tokenizer; - -import java.util.regex.MatchResult; - -public class MatchResultFsmOutput implements FsmOutput { - - private final MatchResult matchResult; - - public MatchResultFsmOutput(MatchResult matchResult) { - this.matchResult = matchResult; - } - - @Override - public String entire() { - return this.matchResult.group(); - } - - @Override - public String part(int index) { - return this.matchResult.group(index); - } - -} diff --git a/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/PatternMatcher.java b/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/PatternMatcher.java index a9672df..73c6785 100644 --- a/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/PatternMatcher.java +++ b/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/PatternMatcher.java @@ -1,10 +1,31 @@ package com.jessebrault.gcp.tokenizer; import java.util.function.Function; +import java.util.regex.MatchResult; import java.util.regex.Pattern; final class PatternMatcher implements Function { + private static final class MatchResultFsmOutput implements FsmOutput { + + private final MatchResult matchResult; + + public MatchResultFsmOutput(MatchResult matchResult) { + this.matchResult = matchResult; + } + + @Override + public String entire() { + return this.matchResult.group(); + } + + @Override + public String part(int index) { + return this.matchResult.group(index); + } + + } + private final Pattern pattern; public PatternMatcher(Pattern pattern) { diff --git a/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/TokenizerFsm.java b/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/TokenizerFsm.java index 859bb61..d41ea39 100644 --- a/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/TokenizerFsm.java +++ b/gcp-impl/src/main/groovy/com/jessebrault/gcp/tokenizer/TokenizerFsm.java @@ -57,7 +57,7 @@ final class TokenizerFsm { /** * Whitespace */ - private static final PatternMatcher whitespace = new PatternMatcher(Pattern.compile("^[\\s&&[^\n\r]]+")); + private static final PatternMatcher whitespace = new PatternMatcher(Pattern.compile("^\\s+")); /** * Keys and values @@ -66,6 +66,10 @@ final class TokenizerFsm { Pattern.compile("^[\\p{L}0-9_$]+") ); private static final PatternMatcher equals = new PatternMatcher(Pattern.compile("^=")); + private static final PatternMatcher singleQuoteString = new PatternMatcher( + Pattern.compile("^(')((?:[\\w\\W&&[^\\\\'\\n\\r]]|\\\\['nrbfst\\\\u])*)(')") + ); + private static final GStringMatcher gString = new GStringMatcher(); /** * Component ends @@ -147,8 +151,16 @@ final class TokenizerFsm { sc.on(equals).exec(o -> { acc.accumulate(EQUALS, o.entire()); }); - // sc.on(gString) - // sc.on(singleQuoteString) + sc.on(gString).exec(o -> { + acc.accumulate(DOUBLE_QUOTE, o.part(1)); + acc.accumulate(STRING, o.part(2)); + acc.accumulate(DOUBLE_QUOTE, o.part(3)); + }); + sc.on(singleQuoteString).exec(o -> { + acc.accumulate(SINGLE_QUOTE, o.part(1)); + acc.accumulate(STRING, o.part(2)); + acc.accumulate(SINGLE_QUOTE, o.part(3)); + }); sc.on(dollarReference).exec(o -> { acc.accumulate(DOLLAR, o.part(1)); acc.accumulate(GROOVY_REFERENCE, o.part(2)); diff --git a/gcp-impl/src/test/groovy/com/jessebrault/gcp/tokenizer/GStringMatcherTests.groovy b/gcp-impl/src/test/groovy/com/jessebrault/gcp/tokenizer/GStringMatcherTests.groovy new file mode 100644 index 0000000..83ed339 --- /dev/null +++ b/gcp-impl/src/test/groovy/com/jessebrault/gcp/tokenizer/GStringMatcherTests.groovy @@ -0,0 +1,44 @@ +package com.jessebrault.gcp.tokenizer + +import org.junit.jupiter.api.Test + +import static org.junit.jupiter.api.Assertions.assertEquals + +class GStringMatcherTests { + + private final GStringMatcher matcher = new GStringMatcher() + + private void test(String expectedEntire, String input) { + def output = this.matcher.apply(input) + assertEquals(expectedEntire, output.entire()) + assertEquals('"', output.part(1)) + assertEquals(expectedEntire.substring(1, expectedEntire.length() - 1), output.part(2)) + assertEquals('"', output.part(3)) + } + + @Test + void empty() { + test '""', '""' + } + + @Test + void simple() { + test '"abc"', '"abc"' + } + + @Test + void nestedDollarClosureWithGString() { + test '"abc ${ \'def\'.each { "$it " }.join() }"', '"abc ${ \'def\'.each { "$it " }.join() }"' + } + + @Test + void nestedDollarClosureWithGStringTakesOnlyAsNeeded() { + test '"abc ${ \'def\'.each { "$it " }.join() }"', '"abc ${ \'def\'.each { "$it " }.join() }" test="rest"' + } + + @Test + void takesOnlyAsNeeded() { + test '"abc"', '"abc" test="def"' + } + +} diff --git a/gcp-impl/src/test/groovy/com/jessebrault/gcp/tokenizer/TokenizerTests.groovy b/gcp-impl/src/test/groovy/com/jessebrault/gcp/tokenizer/TokenizerTests.groovy index 5b37eee..ce14868 100644 --- a/gcp-impl/src/test/groovy/com/jessebrault/gcp/tokenizer/TokenizerTests.groovy +++ b/gcp-impl/src/test/groovy/com/jessebrault/gcp/tokenizer/TokenizerTests.groovy @@ -1,12 +1,10 @@ package com.jessebrault.gcp.tokenizer -import org.junit.jupiter.api.Disabled import org.junit.jupiter.api.Test import org.slf4j.Logger import org.slf4j.LoggerFactory import static com.jessebrault.gcp.tokenizer.Token.Type.* - import static org.junit.jupiter.api.Assertions.assertEquals import static org.junit.jupiter.api.Assertions.assertTrue @@ -107,8 +105,7 @@ class TokenizerTests { } @Test - @Disabled - void componentWithKeysAndValues() { + void componentWithGString() { test('') { expect COMPONENT_START, '<', 1, 1 expect CLASS_NAME, 'Test', 1, 2 @@ -124,6 +121,23 @@ class TokenizerTests { } } + @Test + void componentWithGStringWithNestedGString() { + test('') { + expect COMPONENT_START, '<', 1, 1 + expect CLASS_NAME, 'Test', 1, 2 + expect WHITESPACE, ' ', 1, 6 + expect KEY, 'test', 1, 7 + expect EQUALS, '=', 1, 11 + expect DOUBLE_QUOTE, '"', 1, 12 + expect STRING, 'abc ${ \'abc\'.collect { "it " }.join() }', 1, 13 + expect DOUBLE_QUOTE, '"', 1, 52 + expect WHITESPACE, ' ', 1, 53 + expect FORWARD_SLASH, '/', 1, 54 + expect COMPONENT_END, '>', 1, 55 + } + } + @Test void newlinesCounted() { test('Hello,\n$person!') { @@ -134,4 +148,49 @@ class TokenizerTests { } } + @Test + void componentWithSingleQuoteString() { + test("") { + expect COMPONENT_START, '<', 1, 1 + expect CLASS_NAME, 'Test', 1, 2 + expect WHITESPACE, ' ', 1, 6 + expect KEY, 'test', 1, 7 + expect EQUALS, '=', 1, 11 + expect SINGLE_QUOTE, "'", 1, 12 + expect STRING, 'Hello, World!', 1, 13 + expect SINGLE_QUOTE, "'", 1, 26 + expect WHITESPACE, ' ', 1, 27 + expect FORWARD_SLASH, '/', 1, 28 + expect COMPONENT_END, '>', 1, 29 + } + } + + @Test + void componentWithFullyQualifiedName() { + test('') { + expect COMPONENT_START, '<', 1, 1 + expect PACKAGE_NAME, 'com', 1, 2 + expect DOT, '.', 1, 5 + expect PACKAGE_NAME, 'jessebrault', 1, 6 + expect DOT, '.', 1, 17 + expect PACKAGE_NAME, 'gcp', 1, 18 + expect DOT, '.', 1, 21 + expect CLASS_NAME, 'Test', 1, 22 + expect WHITESPACE, ' ', 1, 26 + expect FORWARD_SLASH, '/', 1, 27 + expect COMPONENT_END, '>', 1, 28 + } + } + + @Test + void componentWithNewlineWhitespace() { + test('') { + expect COMPONENT_START, '<', 1, 1 + expect CLASS_NAME, 'Test', 1, 2 + expect WHITESPACE, '\n', 1, 6 + expect FORWARD_SLASH, '/', 2, 1 + expect COMPONENT_END, '>', 2, 2 + } + } + }