Tokenizer works with string Component values.
This commit is contained in:
parent
3fee229003
commit
62209dbc31
@ -0,0 +1,24 @@
|
||||
package com.jessebrault.gcp.tokenizer;
|
||||
|
||||
final class Counter {
|
||||
|
||||
private int count = 0;
|
||||
|
||||
public void increment() {
|
||||
this.count++;
|
||||
}
|
||||
|
||||
public void decrement() {
|
||||
this.count--;
|
||||
}
|
||||
|
||||
public boolean isZero() {
|
||||
return this.count == 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Counter(" + this.count + ")";
|
||||
}
|
||||
|
||||
}
|
@ -7,10 +7,8 @@ import java.util.Deque;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
/**
|
||||
* NOT THREAD SAFE
|
||||
*/
|
||||
final class DollarScriptletMatcher implements Function<String, FsmOutput> {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(DollarScriptletMatcher.class);
|
||||
@ -59,65 +57,46 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
|
||||
NO_STRING, G_STRING, SINGLE_QUOTE_STRING
|
||||
}
|
||||
|
||||
private static final class Counter {
|
||||
private static final class StringCharIterator implements Iterator<String> {
|
||||
|
||||
private int count = 0;
|
||||
private final String s;
|
||||
private int cur;
|
||||
|
||||
public void increment() {
|
||||
this.count++;
|
||||
}
|
||||
|
||||
public void decrement() {
|
||||
this.count--;
|
||||
}
|
||||
|
||||
public boolean isZero() {
|
||||
return this.count == 0;
|
||||
public StringCharIterator(String s) {
|
||||
this.s = s;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Counter(" + this.count + ")";
|
||||
public boolean hasNext() {
|
||||
return this.cur < s.length();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private Deque<State> stateStack;
|
||||
private Deque<Counter> counterStack;
|
||||
|
||||
private Counter getCurrentCounter() {
|
||||
final var currentCounter = this.counterStack.peek();
|
||||
if (currentCounter == null) {
|
||||
throw new IllegalStateException("currentCounter is null");
|
||||
@Override
|
||||
public String next() {
|
||||
final var c = String.valueOf(s.charAt(this.cur));
|
||||
this.cur++;
|
||||
return c;
|
||||
}
|
||||
return currentCounter;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public FsmOutput apply(String s) {
|
||||
this.stateStack = new LinkedList<>();
|
||||
this.counterStack = new LinkedList<>();
|
||||
final Deque<State> stateStack = new LinkedList<>();
|
||||
final Deque<Counter> counterStack = new LinkedList<>();
|
||||
|
||||
final Supplier<Counter> currentCounterSupplier = () -> {
|
||||
final var currentCounter = counterStack.peek();
|
||||
if (currentCounter == null) {
|
||||
throw new IllegalStateException("currentCounter is null");
|
||||
}
|
||||
return currentCounter;
|
||||
};
|
||||
|
||||
stateStack.push(State.NO_STRING);
|
||||
counterStack.push(new Counter());
|
||||
|
||||
final Iterator<String> iterator = new Iterator<>() {
|
||||
|
||||
private int cur;
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return this.cur < s.length();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String next() {
|
||||
final var c = String.valueOf(s.charAt(this.cur));
|
||||
this.cur++;
|
||||
return c;
|
||||
}
|
||||
|
||||
};
|
||||
final Iterator<String> iterator = new StringCharIterator(s);
|
||||
|
||||
final var entireAcc = new StringBuilder();
|
||||
|
||||
@ -131,7 +110,7 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
|
||||
return null;
|
||||
} else {
|
||||
entireAcc.append("{");
|
||||
this.getCurrentCounter().increment();
|
||||
currentCounterSupplier.get().increment();
|
||||
}
|
||||
|
||||
outer:
|
||||
@ -151,24 +130,24 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
|
||||
|
||||
if (stateStack.peek() == State.NO_STRING) {
|
||||
switch (c0) {
|
||||
case "{" -> this.getCurrentCounter().increment();
|
||||
case "{" -> currentCounterSupplier.get().increment();
|
||||
case "}" -> {
|
||||
final var currentCounter = this.getCurrentCounter();
|
||||
final var currentCounter = currentCounterSupplier.get();
|
||||
currentCounter.decrement();
|
||||
if (currentCounter.isZero()) {
|
||||
if (this.counterStack.size() == 1) {
|
||||
if (counterStack.size() == 1) {
|
||||
logger.debug("last Counter is zero; breaking while loop");
|
||||
break outer;
|
||||
} else {
|
||||
logger.debug("counterStack.size() is greater than 1 and top Counter is zero; " +
|
||||
"popping state and counter stacks.");
|
||||
this.stateStack.pop();
|
||||
this.counterStack.pop();
|
||||
stateStack.pop();
|
||||
counterStack.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
case "\"" -> this.stateStack.push(State.G_STRING);
|
||||
case "'" -> this.stateStack.push(State.SINGLE_QUOTE_STRING);
|
||||
case "\"" -> stateStack.push(State.G_STRING);
|
||||
case "'" -> stateStack.push(State.SINGLE_QUOTE_STRING);
|
||||
}
|
||||
} else if (stateStack.peek() == State.G_STRING) {
|
||||
switch (c0) {
|
||||
@ -185,9 +164,9 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
|
||||
final var c1 = iterator.next();
|
||||
entireAcc.append(c1);
|
||||
if (c1.equals("{")) {
|
||||
this.stateStack.push(State.NO_STRING);
|
||||
this.counterStack.push(new Counter());
|
||||
this.getCurrentCounter().increment();
|
||||
stateStack.push(State.NO_STRING);
|
||||
counterStack.push(new Counter());
|
||||
currentCounterSupplier.get().increment();
|
||||
}
|
||||
} else {
|
||||
throw new IllegalArgumentException("Ill-formed dollarScriptlet (ends with a dollar)");
|
||||
@ -195,7 +174,7 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
|
||||
}
|
||||
case "\"" -> {
|
||||
logger.debug("popping G_STRING state");
|
||||
this.stateStack.pop();
|
||||
stateStack.pop();
|
||||
}
|
||||
}
|
||||
} else if (stateStack.peek() == State.SINGLE_QUOTE_STRING) {
|
||||
@ -209,7 +188,7 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
|
||||
}
|
||||
case "'" -> {
|
||||
logger.debug("popping SINGLE_QUOTE_STRING state");
|
||||
this.stateStack.pop();
|
||||
stateStack.pop();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -0,0 +1,109 @@
|
||||
package com.jessebrault.gcp.tokenizer;
|
||||
|
||||
import com.jessebrault.fsm.stackfunction.StackFunctionFsm;
|
||||
import com.jessebrault.fsm.stackfunction.StackFunctionFsmBuilder;
|
||||
import com.jessebrault.fsm.stackfunction.StackFunctionFsmBuilderImpl;
|
||||
|
||||
import java.util.function.Function;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
final class GStringMatcher implements Function<String, FsmOutput> {
|
||||
|
||||
private static final class GStringMatcherOutput implements FsmOutput {
|
||||
|
||||
private final String entire;
|
||||
private final String contents;
|
||||
|
||||
public GStringMatcherOutput(String entire, String contents) {
|
||||
this.entire = entire;
|
||||
this.contents = contents;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String entire() {
|
||||
return this.entire;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String part(int index) {
|
||||
return switch(index) {
|
||||
case 1, 3 -> "\"";
|
||||
case 2 -> this.contents;
|
||||
default -> throw new IllegalArgumentException();
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static final PatternMatcher text = new PatternMatcher(
|
||||
Pattern.compile("^(?:[\\w\\W&&[^$\\\\\"\\n\\r]]|\\\\[\"nrbfst\\\\u]|\\$(?!\\{|[\\w$]+(?:\\.[\\w$]+)*))+")
|
||||
);
|
||||
private static final DollarScriptletMatcher dollarScriptlet = new DollarScriptletMatcher();
|
||||
private static final PatternMatcher doubleQuote = new PatternMatcher(
|
||||
Pattern.compile("^\"")
|
||||
);
|
||||
|
||||
private enum State {
|
||||
START, CONTENTS, DONE
|
||||
}
|
||||
|
||||
private static StackFunctionFsmBuilder<String, State, FsmOutput> getFsmBuilder() {
|
||||
return new StackFunctionFsmBuilderImpl<>();
|
||||
}
|
||||
|
||||
private static StackFunctionFsm<String, State, FsmOutput> getFsm(StringBuilder acc) {
|
||||
return getFsmBuilder()
|
||||
.setInitialState(State.START)
|
||||
.whileIn(State.START, sc -> {
|
||||
sc.on(doubleQuote).shiftTo(State.CONTENTS).exec(o -> {
|
||||
acc.append(o.entire());
|
||||
});
|
||||
sc.onNoMatch().exec(input -> {
|
||||
throw new IllegalArgumentException();
|
||||
});
|
||||
})
|
||||
.whileIn(State.CONTENTS, sc -> {
|
||||
sc.on(text).exec(o -> {
|
||||
acc.append(o.entire());
|
||||
});
|
||||
sc.on(dollarScriptlet).exec(o -> {
|
||||
acc.append(o.entire());
|
||||
});
|
||||
sc.on(doubleQuote).shiftTo(State.DONE).exec(o -> {
|
||||
acc.append(o.entire());
|
||||
});
|
||||
sc.onNoMatch().exec(input -> {
|
||||
throw new IllegalArgumentException();
|
||||
});
|
||||
})
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public FsmOutput apply(final String s) {
|
||||
final var acc = new StringBuilder();
|
||||
final var fsm = getFsm(acc);
|
||||
|
||||
String remaining = s;
|
||||
|
||||
// Look-ahead
|
||||
if (!remaining.startsWith("\"")) {
|
||||
return null;
|
||||
}
|
||||
|
||||
while (remaining.length() > 0) {
|
||||
final var output = fsm.apply(remaining);
|
||||
if (output == null) {
|
||||
throw new IllegalStateException("output is null");
|
||||
}
|
||||
if (fsm.getCurrentState() == State.DONE) {
|
||||
break;
|
||||
}
|
||||
remaining = remaining.substring(output.entire().length());
|
||||
}
|
||||
|
||||
final var entire = acc.toString();
|
||||
return new GStringMatcherOutput(entire, entire.substring(1, entire.length() - 1));
|
||||
}
|
||||
|
||||
}
|
@ -1,23 +0,0 @@
|
||||
package com.jessebrault.gcp.tokenizer;
|
||||
|
||||
import java.util.regex.MatchResult;
|
||||
|
||||
public class MatchResultFsmOutput implements FsmOutput {
|
||||
|
||||
private final MatchResult matchResult;
|
||||
|
||||
public MatchResultFsmOutput(MatchResult matchResult) {
|
||||
this.matchResult = matchResult;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String entire() {
|
||||
return this.matchResult.group();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String part(int index) {
|
||||
return this.matchResult.group(index);
|
||||
}
|
||||
|
||||
}
|
@ -1,10 +1,31 @@
|
||||
package com.jessebrault.gcp.tokenizer;
|
||||
|
||||
import java.util.function.Function;
|
||||
import java.util.regex.MatchResult;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
final class PatternMatcher implements Function<String, FsmOutput> {
|
||||
|
||||
private static final class MatchResultFsmOutput implements FsmOutput {
|
||||
|
||||
private final MatchResult matchResult;
|
||||
|
||||
public MatchResultFsmOutput(MatchResult matchResult) {
|
||||
this.matchResult = matchResult;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String entire() {
|
||||
return this.matchResult.group();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String part(int index) {
|
||||
return this.matchResult.group(index);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private final Pattern pattern;
|
||||
|
||||
public PatternMatcher(Pattern pattern) {
|
||||
|
@ -57,7 +57,7 @@ final class TokenizerFsm {
|
||||
/**
|
||||
* Whitespace
|
||||
*/
|
||||
private static final PatternMatcher whitespace = new PatternMatcher(Pattern.compile("^[\\s&&[^\n\r]]+"));
|
||||
private static final PatternMatcher whitespace = new PatternMatcher(Pattern.compile("^\\s+"));
|
||||
|
||||
/**
|
||||
* Keys and values
|
||||
@ -66,6 +66,10 @@ final class TokenizerFsm {
|
||||
Pattern.compile("^[\\p{L}0-9_$]+")
|
||||
);
|
||||
private static final PatternMatcher equals = new PatternMatcher(Pattern.compile("^="));
|
||||
private static final PatternMatcher singleQuoteString = new PatternMatcher(
|
||||
Pattern.compile("^(')((?:[\\w\\W&&[^\\\\'\\n\\r]]|\\\\['nrbfst\\\\u])*)(')")
|
||||
);
|
||||
private static final GStringMatcher gString = new GStringMatcher();
|
||||
|
||||
/**
|
||||
* Component ends
|
||||
@ -147,8 +151,16 @@ final class TokenizerFsm {
|
||||
sc.on(equals).exec(o -> {
|
||||
acc.accumulate(EQUALS, o.entire());
|
||||
});
|
||||
// sc.on(gString)
|
||||
// sc.on(singleQuoteString)
|
||||
sc.on(gString).exec(o -> {
|
||||
acc.accumulate(DOUBLE_QUOTE, o.part(1));
|
||||
acc.accumulate(STRING, o.part(2));
|
||||
acc.accumulate(DOUBLE_QUOTE, o.part(3));
|
||||
});
|
||||
sc.on(singleQuoteString).exec(o -> {
|
||||
acc.accumulate(SINGLE_QUOTE, o.part(1));
|
||||
acc.accumulate(STRING, o.part(2));
|
||||
acc.accumulate(SINGLE_QUOTE, o.part(3));
|
||||
});
|
||||
sc.on(dollarReference).exec(o -> {
|
||||
acc.accumulate(DOLLAR, o.part(1));
|
||||
acc.accumulate(GROOVY_REFERENCE, o.part(2));
|
||||
|
@ -0,0 +1,44 @@
|
||||
package com.jessebrault.gcp.tokenizer
|
||||
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals
|
||||
|
||||
class GStringMatcherTests {
|
||||
|
||||
private final GStringMatcher matcher = new GStringMatcher()
|
||||
|
||||
private void test(String expectedEntire, String input) {
|
||||
def output = this.matcher.apply(input)
|
||||
assertEquals(expectedEntire, output.entire())
|
||||
assertEquals('"', output.part(1))
|
||||
assertEquals(expectedEntire.substring(1, expectedEntire.length() - 1), output.part(2))
|
||||
assertEquals('"', output.part(3))
|
||||
}
|
||||
|
||||
@Test
|
||||
void empty() {
|
||||
test '""', '""'
|
||||
}
|
||||
|
||||
@Test
|
||||
void simple() {
|
||||
test '"abc"', '"abc"'
|
||||
}
|
||||
|
||||
@Test
|
||||
void nestedDollarClosureWithGString() {
|
||||
test '"abc ${ \'def\'.each { "$it " }.join() }"', '"abc ${ \'def\'.each { "$it " }.join() }"'
|
||||
}
|
||||
|
||||
@Test
|
||||
void nestedDollarClosureWithGStringTakesOnlyAsNeeded() {
|
||||
test '"abc ${ \'def\'.each { "$it " }.join() }"', '"abc ${ \'def\'.each { "$it " }.join() }" test="rest"'
|
||||
}
|
||||
|
||||
@Test
|
||||
void takesOnlyAsNeeded() {
|
||||
test '"abc"', '"abc" test="def"'
|
||||
}
|
||||
|
||||
}
|
@ -1,12 +1,10 @@
|
||||
package com.jessebrault.gcp.tokenizer
|
||||
|
||||
import org.junit.jupiter.api.Disabled
|
||||
import org.junit.jupiter.api.Test
|
||||
import org.slf4j.Logger
|
||||
import org.slf4j.LoggerFactory
|
||||
|
||||
import static com.jessebrault.gcp.tokenizer.Token.Type.*
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue
|
||||
|
||||
@ -107,8 +105,7 @@ class TokenizerTests {
|
||||
}
|
||||
|
||||
@Test
|
||||
@Disabled
|
||||
void componentWithKeysAndValues() {
|
||||
void componentWithGString() {
|
||||
test('<Test test="test" />') {
|
||||
expect COMPONENT_START, '<', 1, 1
|
||||
expect CLASS_NAME, 'Test', 1, 2
|
||||
@ -124,6 +121,23 @@ class TokenizerTests {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void componentWithGStringWithNestedGString() {
|
||||
test('<Test test="abc ${ \'abc\'.collect { "it " }.join() }" />') {
|
||||
expect COMPONENT_START, '<', 1, 1
|
||||
expect CLASS_NAME, 'Test', 1, 2
|
||||
expect WHITESPACE, ' ', 1, 6
|
||||
expect KEY, 'test', 1, 7
|
||||
expect EQUALS, '=', 1, 11
|
||||
expect DOUBLE_QUOTE, '"', 1, 12
|
||||
expect STRING, 'abc ${ \'abc\'.collect { "it " }.join() }', 1, 13
|
||||
expect DOUBLE_QUOTE, '"', 1, 52
|
||||
expect WHITESPACE, ' ', 1, 53
|
||||
expect FORWARD_SLASH, '/', 1, 54
|
||||
expect COMPONENT_END, '>', 1, 55
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void newlinesCounted() {
|
||||
test('Hello,\n$person!') {
|
||||
@ -134,4 +148,49 @@ class TokenizerTests {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void componentWithSingleQuoteString() {
|
||||
test("<Test test='Hello, World!' />") {
|
||||
expect COMPONENT_START, '<', 1, 1
|
||||
expect CLASS_NAME, 'Test', 1, 2
|
||||
expect WHITESPACE, ' ', 1, 6
|
||||
expect KEY, 'test', 1, 7
|
||||
expect EQUALS, '=', 1, 11
|
||||
expect SINGLE_QUOTE, "'", 1, 12
|
||||
expect STRING, 'Hello, World!', 1, 13
|
||||
expect SINGLE_QUOTE, "'", 1, 26
|
||||
expect WHITESPACE, ' ', 1, 27
|
||||
expect FORWARD_SLASH, '/', 1, 28
|
||||
expect COMPONENT_END, '>', 1, 29
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void componentWithFullyQualifiedName() {
|
||||
test('<com.jessebrault.gcp.Test />') {
|
||||
expect COMPONENT_START, '<', 1, 1
|
||||
expect PACKAGE_NAME, 'com', 1, 2
|
||||
expect DOT, '.', 1, 5
|
||||
expect PACKAGE_NAME, 'jessebrault', 1, 6
|
||||
expect DOT, '.', 1, 17
|
||||
expect PACKAGE_NAME, 'gcp', 1, 18
|
||||
expect DOT, '.', 1, 21
|
||||
expect CLASS_NAME, 'Test', 1, 22
|
||||
expect WHITESPACE, ' ', 1, 26
|
||||
expect FORWARD_SLASH, '/', 1, 27
|
||||
expect COMPONENT_END, '>', 1, 28
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void componentWithNewlineWhitespace() {
|
||||
test('<Test\n/>') {
|
||||
expect COMPONENT_START, '<', 1, 1
|
||||
expect CLASS_NAME, 'Test', 1, 2
|
||||
expect WHITESPACE, '\n', 1, 6
|
||||
expect FORWARD_SLASH, '/', 2, 1
|
||||
expect COMPONENT_END, '>', 2, 2
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user