Tokenizer works with string Component values.

This commit is contained in:
Jesse Brault 2023-01-25 15:35:53 +01:00
parent 3fee229003
commit 62209dbc31
8 changed files with 314 additions and 89 deletions

View File

@ -0,0 +1,24 @@
package com.jessebrault.gcp.tokenizer;
final class Counter {
private int count = 0;
public void increment() {
this.count++;
}
public void decrement() {
this.count--;
}
public boolean isZero() {
return this.count == 0;
}
@Override
public String toString() {
return "Counter(" + this.count + ")";
}
}

View File

@ -7,10 +7,8 @@ import java.util.Deque;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.function.Function;
import java.util.function.Supplier;
/**
* NOT THREAD SAFE
*/
final class DollarScriptletMatcher implements Function<String, FsmOutput> {
private static final Logger logger = LoggerFactory.getLogger(DollarScriptletMatcher.class);
@ -59,65 +57,46 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
NO_STRING, G_STRING, SINGLE_QUOTE_STRING
}
private static final class Counter {
private static final class StringCharIterator implements Iterator<String> {
private int count = 0;
private final String s;
private int cur;
public void increment() {
this.count++;
}
public void decrement() {
this.count--;
}
public boolean isZero() {
return this.count == 0;
public StringCharIterator(String s) {
this.s = s;
}
@Override
public String toString() {
return "Counter(" + this.count + ")";
public boolean hasNext() {
return this.cur < s.length();
}
}
private Deque<State> stateStack;
private Deque<Counter> counterStack;
private Counter getCurrentCounter() {
final var currentCounter = this.counterStack.peek();
if (currentCounter == null) {
throw new IllegalStateException("currentCounter is null");
@Override
public String next() {
final var c = String.valueOf(s.charAt(this.cur));
this.cur++;
return c;
}
return currentCounter;
}
@Override
public FsmOutput apply(String s) {
this.stateStack = new LinkedList<>();
this.counterStack = new LinkedList<>();
final Deque<State> stateStack = new LinkedList<>();
final Deque<Counter> counterStack = new LinkedList<>();
final Supplier<Counter> currentCounterSupplier = () -> {
final var currentCounter = counterStack.peek();
if (currentCounter == null) {
throw new IllegalStateException("currentCounter is null");
}
return currentCounter;
};
stateStack.push(State.NO_STRING);
counterStack.push(new Counter());
final Iterator<String> iterator = new Iterator<>() {
private int cur;
@Override
public boolean hasNext() {
return this.cur < s.length();
}
@Override
public String next() {
final var c = String.valueOf(s.charAt(this.cur));
this.cur++;
return c;
}
};
final Iterator<String> iterator = new StringCharIterator(s);
final var entireAcc = new StringBuilder();
@ -131,7 +110,7 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
return null;
} else {
entireAcc.append("{");
this.getCurrentCounter().increment();
currentCounterSupplier.get().increment();
}
outer:
@ -151,24 +130,24 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
if (stateStack.peek() == State.NO_STRING) {
switch (c0) {
case "{" -> this.getCurrentCounter().increment();
case "{" -> currentCounterSupplier.get().increment();
case "}" -> {
final var currentCounter = this.getCurrentCounter();
final var currentCounter = currentCounterSupplier.get();
currentCounter.decrement();
if (currentCounter.isZero()) {
if (this.counterStack.size() == 1) {
if (counterStack.size() == 1) {
logger.debug("last Counter is zero; breaking while loop");
break outer;
} else {
logger.debug("counterStack.size() is greater than 1 and top Counter is zero; " +
"popping state and counter stacks.");
this.stateStack.pop();
this.counterStack.pop();
stateStack.pop();
counterStack.pop();
}
}
}
case "\"" -> this.stateStack.push(State.G_STRING);
case "'" -> this.stateStack.push(State.SINGLE_QUOTE_STRING);
case "\"" -> stateStack.push(State.G_STRING);
case "'" -> stateStack.push(State.SINGLE_QUOTE_STRING);
}
} else if (stateStack.peek() == State.G_STRING) {
switch (c0) {
@ -185,9 +164,9 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
final var c1 = iterator.next();
entireAcc.append(c1);
if (c1.equals("{")) {
this.stateStack.push(State.NO_STRING);
this.counterStack.push(new Counter());
this.getCurrentCounter().increment();
stateStack.push(State.NO_STRING);
counterStack.push(new Counter());
currentCounterSupplier.get().increment();
}
} else {
throw new IllegalArgumentException("Ill-formed dollarScriptlet (ends with a dollar)");
@ -195,7 +174,7 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
}
case "\"" -> {
logger.debug("popping G_STRING state");
this.stateStack.pop();
stateStack.pop();
}
}
} else if (stateStack.peek() == State.SINGLE_QUOTE_STRING) {
@ -209,7 +188,7 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
}
case "'" -> {
logger.debug("popping SINGLE_QUOTE_STRING state");
this.stateStack.pop();
stateStack.pop();
}
}
} else {

View File

@ -0,0 +1,109 @@
package com.jessebrault.gcp.tokenizer;
import com.jessebrault.fsm.stackfunction.StackFunctionFsm;
import com.jessebrault.fsm.stackfunction.StackFunctionFsmBuilder;
import com.jessebrault.fsm.stackfunction.StackFunctionFsmBuilderImpl;
import java.util.function.Function;
import java.util.regex.Pattern;
final class GStringMatcher implements Function<String, FsmOutput> {
private static final class GStringMatcherOutput implements FsmOutput {
private final String entire;
private final String contents;
public GStringMatcherOutput(String entire, String contents) {
this.entire = entire;
this.contents = contents;
}
@Override
public String entire() {
return this.entire;
}
@Override
public String part(int index) {
return switch(index) {
case 1, 3 -> "\"";
case 2 -> this.contents;
default -> throw new IllegalArgumentException();
};
}
}
private static final PatternMatcher text = new PatternMatcher(
Pattern.compile("^(?:[\\w\\W&&[^$\\\\\"\\n\\r]]|\\\\[\"nrbfst\\\\u]|\\$(?!\\{|[\\w$]+(?:\\.[\\w$]+)*))+")
);
private static final DollarScriptletMatcher dollarScriptlet = new DollarScriptletMatcher();
private static final PatternMatcher doubleQuote = new PatternMatcher(
Pattern.compile("^\"")
);
private enum State {
START, CONTENTS, DONE
}
private static StackFunctionFsmBuilder<String, State, FsmOutput> getFsmBuilder() {
return new StackFunctionFsmBuilderImpl<>();
}
private static StackFunctionFsm<String, State, FsmOutput> getFsm(StringBuilder acc) {
return getFsmBuilder()
.setInitialState(State.START)
.whileIn(State.START, sc -> {
sc.on(doubleQuote).shiftTo(State.CONTENTS).exec(o -> {
acc.append(o.entire());
});
sc.onNoMatch().exec(input -> {
throw new IllegalArgumentException();
});
})
.whileIn(State.CONTENTS, sc -> {
sc.on(text).exec(o -> {
acc.append(o.entire());
});
sc.on(dollarScriptlet).exec(o -> {
acc.append(o.entire());
});
sc.on(doubleQuote).shiftTo(State.DONE).exec(o -> {
acc.append(o.entire());
});
sc.onNoMatch().exec(input -> {
throw new IllegalArgumentException();
});
})
.build();
}
@Override
public FsmOutput apply(final String s) {
final var acc = new StringBuilder();
final var fsm = getFsm(acc);
String remaining = s;
// Look-ahead
if (!remaining.startsWith("\"")) {
return null;
}
while (remaining.length() > 0) {
final var output = fsm.apply(remaining);
if (output == null) {
throw new IllegalStateException("output is null");
}
if (fsm.getCurrentState() == State.DONE) {
break;
}
remaining = remaining.substring(output.entire().length());
}
final var entire = acc.toString();
return new GStringMatcherOutput(entire, entire.substring(1, entire.length() - 1));
}
}

View File

@ -1,23 +0,0 @@
package com.jessebrault.gcp.tokenizer;
import java.util.regex.MatchResult;
public class MatchResultFsmOutput implements FsmOutput {
private final MatchResult matchResult;
public MatchResultFsmOutput(MatchResult matchResult) {
this.matchResult = matchResult;
}
@Override
public String entire() {
return this.matchResult.group();
}
@Override
public String part(int index) {
return this.matchResult.group(index);
}
}

View File

@ -1,10 +1,31 @@
package com.jessebrault.gcp.tokenizer;
import java.util.function.Function;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
final class PatternMatcher implements Function<String, FsmOutput> {
private static final class MatchResultFsmOutput implements FsmOutput {
private final MatchResult matchResult;
public MatchResultFsmOutput(MatchResult matchResult) {
this.matchResult = matchResult;
}
@Override
public String entire() {
return this.matchResult.group();
}
@Override
public String part(int index) {
return this.matchResult.group(index);
}
}
private final Pattern pattern;
public PatternMatcher(Pattern pattern) {

View File

@ -57,7 +57,7 @@ final class TokenizerFsm {
/**
* Whitespace
*/
private static final PatternMatcher whitespace = new PatternMatcher(Pattern.compile("^[\\s&&[^\n\r]]+"));
private static final PatternMatcher whitespace = new PatternMatcher(Pattern.compile("^\\s+"));
/**
* Keys and values
@ -66,6 +66,10 @@ final class TokenizerFsm {
Pattern.compile("^[\\p{L}0-9_$]+")
);
private static final PatternMatcher equals = new PatternMatcher(Pattern.compile("^="));
private static final PatternMatcher singleQuoteString = new PatternMatcher(
Pattern.compile("^(')((?:[\\w\\W&&[^\\\\'\\n\\r]]|\\\\['nrbfst\\\\u])*)(')")
);
private static final GStringMatcher gString = new GStringMatcher();
/**
* Component ends
@ -147,8 +151,16 @@ final class TokenizerFsm {
sc.on(equals).exec(o -> {
acc.accumulate(EQUALS, o.entire());
});
// sc.on(gString)
// sc.on(singleQuoteString)
sc.on(gString).exec(o -> {
acc.accumulate(DOUBLE_QUOTE, o.part(1));
acc.accumulate(STRING, o.part(2));
acc.accumulate(DOUBLE_QUOTE, o.part(3));
});
sc.on(singleQuoteString).exec(o -> {
acc.accumulate(SINGLE_QUOTE, o.part(1));
acc.accumulate(STRING, o.part(2));
acc.accumulate(SINGLE_QUOTE, o.part(3));
});
sc.on(dollarReference).exec(o -> {
acc.accumulate(DOLLAR, o.part(1));
acc.accumulate(GROOVY_REFERENCE, o.part(2));

View File

@ -0,0 +1,44 @@
package com.jessebrault.gcp.tokenizer
import org.junit.jupiter.api.Test
import static org.junit.jupiter.api.Assertions.assertEquals
class GStringMatcherTests {
private final GStringMatcher matcher = new GStringMatcher()
private void test(String expectedEntire, String input) {
def output = this.matcher.apply(input)
assertEquals(expectedEntire, output.entire())
assertEquals('"', output.part(1))
assertEquals(expectedEntire.substring(1, expectedEntire.length() - 1), output.part(2))
assertEquals('"', output.part(3))
}
@Test
void empty() {
test '""', '""'
}
@Test
void simple() {
test '"abc"', '"abc"'
}
@Test
void nestedDollarClosureWithGString() {
test '"abc ${ \'def\'.each { "$it " }.join() }"', '"abc ${ \'def\'.each { "$it " }.join() }"'
}
@Test
void nestedDollarClosureWithGStringTakesOnlyAsNeeded() {
test '"abc ${ \'def\'.each { "$it " }.join() }"', '"abc ${ \'def\'.each { "$it " }.join() }" test="rest"'
}
@Test
void takesOnlyAsNeeded() {
test '"abc"', '"abc" test="def"'
}
}

View File

@ -1,12 +1,10 @@
package com.jessebrault.gcp.tokenizer
import org.junit.jupiter.api.Disabled
import org.junit.jupiter.api.Test
import org.slf4j.Logger
import org.slf4j.LoggerFactory
import static com.jessebrault.gcp.tokenizer.Token.Type.*
import static org.junit.jupiter.api.Assertions.assertEquals
import static org.junit.jupiter.api.Assertions.assertTrue
@ -107,8 +105,7 @@ class TokenizerTests {
}
@Test
@Disabled
void componentWithKeysAndValues() {
void componentWithGString() {
test('<Test test="test" />') {
expect COMPONENT_START, '<', 1, 1
expect CLASS_NAME, 'Test', 1, 2
@ -124,6 +121,23 @@ class TokenizerTests {
}
}
@Test
void componentWithGStringWithNestedGString() {
test('<Test test="abc ${ \'abc\'.collect { "it " }.join() }" />') {
expect COMPONENT_START, '<', 1, 1
expect CLASS_NAME, 'Test', 1, 2
expect WHITESPACE, ' ', 1, 6
expect KEY, 'test', 1, 7
expect EQUALS, '=', 1, 11
expect DOUBLE_QUOTE, '"', 1, 12
expect STRING, 'abc ${ \'abc\'.collect { "it " }.join() }', 1, 13
expect DOUBLE_QUOTE, '"', 1, 52
expect WHITESPACE, ' ', 1, 53
expect FORWARD_SLASH, '/', 1, 54
expect COMPONENT_END, '>', 1, 55
}
}
@Test
void newlinesCounted() {
test('Hello,\n$person!') {
@ -134,4 +148,49 @@ class TokenizerTests {
}
}
@Test
void componentWithSingleQuoteString() {
test("<Test test='Hello, World!' />") {
expect COMPONENT_START, '<', 1, 1
expect CLASS_NAME, 'Test', 1, 2
expect WHITESPACE, ' ', 1, 6
expect KEY, 'test', 1, 7
expect EQUALS, '=', 1, 11
expect SINGLE_QUOTE, "'", 1, 12
expect STRING, 'Hello, World!', 1, 13
expect SINGLE_QUOTE, "'", 1, 26
expect WHITESPACE, ' ', 1, 27
expect FORWARD_SLASH, '/', 1, 28
expect COMPONENT_END, '>', 1, 29
}
}
@Test
void componentWithFullyQualifiedName() {
test('<com.jessebrault.gcp.Test />') {
expect COMPONENT_START, '<', 1, 1
expect PACKAGE_NAME, 'com', 1, 2
expect DOT, '.', 1, 5
expect PACKAGE_NAME, 'jessebrault', 1, 6
expect DOT, '.', 1, 17
expect PACKAGE_NAME, 'gcp', 1, 18
expect DOT, '.', 1, 21
expect CLASS_NAME, 'Test', 1, 22
expect WHITESPACE, ' ', 1, 26
expect FORWARD_SLASH, '/', 1, 27
expect COMPONENT_END, '>', 1, 28
}
}
@Test
void componentWithNewlineWhitespace() {
test('<Test\n/>') {
expect COMPONENT_START, '<', 1, 1
expect CLASS_NAME, 'Test', 1, 2
expect WHITESPACE, '\n', 1, 6
expect FORWARD_SLASH, '/', 2, 1
expect COMPONENT_END, '>', 2, 2
}
}
}