Adding everything before the big move to a new project.
This commit is contained in:
parent
62209dbc31
commit
610eac927b
@ -0,0 +1,49 @@
|
||||
package com.jessebrault.gcp.tokenizer;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
public interface Token {
|
||||
|
||||
enum Type {
|
||||
TEXT,
|
||||
|
||||
DOLLAR,
|
||||
GROOVY_REFERENCE,
|
||||
CURLY_OPEN,
|
||||
SCRIPTLET,
|
||||
CURLY_CLOSE,
|
||||
BLOCK_SCRIPTLET_OPEN,
|
||||
EXPRESSION_SCRIPTLET_OPEN,
|
||||
SCRIPTLET_CLOSE,
|
||||
|
||||
CLASS_NAME,
|
||||
PACKAGE_NAME,
|
||||
DOT,
|
||||
|
||||
WHITESPACE,
|
||||
|
||||
KEY,
|
||||
EQUALS,
|
||||
|
||||
DOUBLE_QUOTE,
|
||||
STRING,
|
||||
SINGLE_QUOTE,
|
||||
|
||||
COMPONENT_START,
|
||||
FORWARD_SLASH,
|
||||
COMPONENT_END,
|
||||
;
|
||||
|
||||
boolean isAnyOf(Collection<Type> types) {
|
||||
return types.contains(this);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Type getType();
|
||||
CharSequence getText();
|
||||
int getInputIndex();
|
||||
int getLine();
|
||||
int getCol();
|
||||
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
package com.jessebrault.gcp.tokenizer;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.Queue;
|
||||
|
||||
public interface Tokenizer {
|
||||
|
||||
enum State {
|
||||
TEXT,
|
||||
COMPONENT_NAME,
|
||||
COMPONENT_KEYS_AND_VALUES
|
||||
}
|
||||
|
||||
void start(CharSequence input, int startOffset, int endOffset, State initialState);
|
||||
boolean hasNext();
|
||||
Token next();
|
||||
State getCurrentState();
|
||||
|
||||
default Queue<Token> tokenizeAll(CharSequence input, State initialState) {
|
||||
this.start(input, 0, input.length(), initialState);
|
||||
final Queue<Token> tokens = new LinkedList<>();
|
||||
while (this.hasNext()) {
|
||||
tokens.add(this.next());
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
}
|
@ -1,6 +1,5 @@
|
||||
package com.jessebrault.gcp.tokenizer;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.Queue;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@ -8,12 +7,18 @@ final class Accumulator {
|
||||
|
||||
private static final Pattern newline = Pattern.compile("([\n\r])");
|
||||
|
||||
private final Queue<Token> tokens = new LinkedList<>();
|
||||
private final Queue<Token> tokens;
|
||||
private int inputIndex = 0;
|
||||
private int line = 1;
|
||||
private int col = 1;
|
||||
|
||||
public void accumulate(Token.Type type, String text) {
|
||||
this.tokens.add(new Token(type, text, this.line, this.col));
|
||||
public Accumulator(Queue<Token> tokenQueue) {
|
||||
this.tokens = tokenQueue;
|
||||
}
|
||||
|
||||
public void accumulate(Token.Type type, CharSequence text) {
|
||||
this.tokens.add(new TokenImpl(type, text, this.inputIndex, this.line, this.col));
|
||||
this.inputIndex += text.length();
|
||||
final var m = newline.matcher(text);
|
||||
if (m.find()) {
|
||||
this.line += m.groupCount();
|
||||
@ -23,8 +28,4 @@ final class Accumulator {
|
||||
}
|
||||
}
|
||||
|
||||
public Queue<Token> getTokens() {
|
||||
return this.tokens;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -6,47 +6,37 @@ import org.slf4j.LoggerFactory;
|
||||
import java.util.Deque;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
final class DollarScriptletMatcher implements Function<String, FsmOutput> {
|
||||
final class DollarScriptletMatcher implements FsmFunction {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(DollarScriptletMatcher.class);
|
||||
|
||||
private static final class DollarScriptletMatcherOutput implements FsmOutput {
|
||||
|
||||
private final String entire;
|
||||
private final String dollar;
|
||||
private final String openingCurly;
|
||||
private final CharSequence entire;
|
||||
private final String scriptlet;
|
||||
private final String closingCurly;
|
||||
|
||||
public DollarScriptletMatcherOutput(
|
||||
String entire,
|
||||
String dollar,
|
||||
String openingCurly,
|
||||
String scriptlet,
|
||||
String closingCurly
|
||||
String scriptlet
|
||||
) {
|
||||
this.entire = entire;
|
||||
this.dollar = dollar;
|
||||
this.openingCurly = openingCurly;
|
||||
this.scriptlet = scriptlet;
|
||||
this.closingCurly = closingCurly;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String entire() {
|
||||
public CharSequence entire() {
|
||||
return this.entire;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String part(int index) {
|
||||
public CharSequence part(int index) {
|
||||
return switch (index) {
|
||||
case 1 -> this.dollar;
|
||||
case 2 -> this.openingCurly;
|
||||
case 1 -> "$";
|
||||
case 2 -> "{";
|
||||
case 3 -> this.scriptlet;
|
||||
case 4 -> this.closingCurly;
|
||||
case 4 -> "}";
|
||||
default -> throw new IllegalArgumentException();
|
||||
};
|
||||
}
|
||||
@ -57,23 +47,23 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
|
||||
NO_STRING, G_STRING, SINGLE_QUOTE_STRING
|
||||
}
|
||||
|
||||
private static final class StringCharIterator implements Iterator<String> {
|
||||
private static final class CharSequenceIterator implements Iterator<String> {
|
||||
|
||||
private final String s;
|
||||
private final CharSequence input;
|
||||
private int cur;
|
||||
|
||||
public StringCharIterator(String s) {
|
||||
this.s = s;
|
||||
public CharSequenceIterator(CharSequence input) {
|
||||
this.input = input;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return this.cur < s.length();
|
||||
return this.cur < input.length();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String next() {
|
||||
final var c = String.valueOf(s.charAt(this.cur));
|
||||
final var c = String.valueOf(input.charAt(this.cur));
|
||||
this.cur++;
|
||||
return c;
|
||||
}
|
||||
@ -81,7 +71,7 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public FsmOutput apply(String s) {
|
||||
public FsmOutput apply(CharSequence s) {
|
||||
final Deque<State> stateStack = new LinkedList<>();
|
||||
final Deque<Counter> counterStack = new LinkedList<>();
|
||||
|
||||
@ -96,7 +86,7 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
|
||||
stateStack.push(State.NO_STRING);
|
||||
counterStack.push(new Counter());
|
||||
|
||||
final Iterator<String> iterator = new StringCharIterator(s);
|
||||
final Iterator<String> iterator = new CharSequenceIterator(s);
|
||||
|
||||
final var entireAcc = new StringBuilder();
|
||||
|
||||
@ -156,7 +146,9 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
|
||||
final var c1 = iterator.next();
|
||||
entireAcc.append(c1);
|
||||
} else {
|
||||
throw new IllegalArgumentException("Ill-formed dollarScriptlet (backslash followed by nothing)");
|
||||
throw new IllegalArgumentException(
|
||||
"Ill-formed dollarScriptlet (backslash followed by nothing)"
|
||||
);
|
||||
}
|
||||
}
|
||||
case "$" -> {
|
||||
@ -183,7 +175,9 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
|
||||
if (iterator.hasNext()) {
|
||||
entireAcc.append(iterator.next());
|
||||
} else {
|
||||
throw new IllegalArgumentException("Ill-formed dollarScriptlet (backslash followed by nothing)");
|
||||
throw new IllegalArgumentException(
|
||||
"Ill-formed dollarScriptlet (backslash followed by nothing)"
|
||||
);
|
||||
}
|
||||
}
|
||||
case "'" -> {
|
||||
@ -192,7 +186,9 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new IllegalStateException("stateStack contains something which does not equal a state or is null");
|
||||
throw new IllegalStateException(
|
||||
"stateStack contains something which does not equal a state or is null"
|
||||
);
|
||||
}
|
||||
|
||||
logger.debug("entireAcc: {}", entireAcc);
|
||||
@ -202,10 +198,7 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
|
||||
|
||||
return new DollarScriptletMatcherOutput(
|
||||
entireAcc.toString(),
|
||||
"$",
|
||||
"{",
|
||||
entireAcc.substring(2, entireAcc.length() - 1),
|
||||
"}"
|
||||
entireAcc.substring(2, entireAcc.length() - 1)
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,5 @@
|
||||
package com.jessebrault.gcp.tokenizer;
|
||||
|
||||
import java.util.function.Function;
|
||||
|
||||
interface FsmFunction extends Function<CharSequence, FsmOutput> {}
|
@ -1,6 +1,6 @@
|
||||
package com.jessebrault.gcp.tokenizer;
|
||||
|
||||
interface FsmOutput {
|
||||
String entire();
|
||||
String part(int index);
|
||||
CharSequence entire();
|
||||
CharSequence part(int index);
|
||||
}
|
||||
|
@ -4,15 +4,14 @@ import com.jessebrault.fsm.stackfunction.StackFunctionFsm;
|
||||
import com.jessebrault.fsm.stackfunction.StackFunctionFsmBuilder;
|
||||
import com.jessebrault.fsm.stackfunction.StackFunctionFsmBuilderImpl;
|
||||
|
||||
import java.util.function.Function;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
final class GStringMatcher implements Function<String, FsmOutput> {
|
||||
final class GStringMatcher implements FsmFunction {
|
||||
|
||||
private static final class GStringMatcherOutput implements FsmOutput {
|
||||
|
||||
private final String entire;
|
||||
private final String contents;
|
||||
private final CharSequence entire;
|
||||
private final CharSequence contents;
|
||||
|
||||
public GStringMatcherOutput(String entire, String contents) {
|
||||
this.entire = entire;
|
||||
@ -20,12 +19,12 @@ final class GStringMatcher implements Function<String, FsmOutput> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String entire() {
|
||||
public CharSequence entire() {
|
||||
return this.entire;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String part(int index) {
|
||||
public CharSequence part(int index) {
|
||||
return switch(index) {
|
||||
case 1, 3 -> "\"";
|
||||
case 2 -> this.contents;
|
||||
@ -47,11 +46,11 @@ final class GStringMatcher implements Function<String, FsmOutput> {
|
||||
START, CONTENTS, DONE
|
||||
}
|
||||
|
||||
private static StackFunctionFsmBuilder<String, State, FsmOutput> getFsmBuilder() {
|
||||
private static StackFunctionFsmBuilder<CharSequence, State, FsmOutput> getFsmBuilder() {
|
||||
return new StackFunctionFsmBuilderImpl<>();
|
||||
}
|
||||
|
||||
private static StackFunctionFsm<String, State, FsmOutput> getFsm(StringBuilder acc) {
|
||||
private static StackFunctionFsm<CharSequence, State, FsmOutput> getFsm(StringBuilder acc) {
|
||||
return getFsmBuilder()
|
||||
.setInitialState(State.START)
|
||||
.whileIn(State.START, sc -> {
|
||||
@ -80,14 +79,14 @@ final class GStringMatcher implements Function<String, FsmOutput> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public FsmOutput apply(final String s) {
|
||||
public FsmOutput apply(final CharSequence s) {
|
||||
final var acc = new StringBuilder();
|
||||
final var fsm = getFsm(acc);
|
||||
|
||||
String remaining = s;
|
||||
CharSequence remaining = s;
|
||||
|
||||
// Look-ahead
|
||||
if (!remaining.startsWith("\"")) {
|
||||
if (!String.valueOf(remaining.charAt(0)).equals("\"")) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@ -99,7 +98,7 @@ final class GStringMatcher implements Function<String, FsmOutput> {
|
||||
if (fsm.getCurrentState() == State.DONE) {
|
||||
break;
|
||||
}
|
||||
remaining = remaining.substring(output.entire().length());
|
||||
remaining = remaining.subSequence(output.entire().length(), remaining.length());
|
||||
}
|
||||
|
||||
final var entire = acc.toString();
|
||||
|
@ -1,10 +1,9 @@
|
||||
package com.jessebrault.gcp.tokenizer;
|
||||
|
||||
import java.util.function.Function;
|
||||
import java.util.regex.MatchResult;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
final class PatternMatcher implements Function<String, FsmOutput> {
|
||||
final class PatternMatcher implements FsmFunction {
|
||||
|
||||
private static final class MatchResultFsmOutput implements FsmOutput {
|
||||
|
||||
@ -15,15 +14,20 @@ final class PatternMatcher implements Function<String, FsmOutput> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String entire() {
|
||||
return this.matchResult.group();
|
||||
public CharSequence entire() {
|
||||
return this.matchResult.group(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String part(int index) {
|
||||
public CharSequence part(int index) {
|
||||
return this.matchResult.group(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "MatchResultFsmOutput(" + this.entire() + ")";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private final Pattern pattern;
|
||||
@ -33,7 +37,7 @@ final class PatternMatcher implements Function<String, FsmOutput> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public FsmOutput apply(String s) {
|
||||
public FsmOutput apply(CharSequence s) {
|
||||
final var m = this.pattern.matcher(s);
|
||||
return m.find() ? new MatchResultFsmOutput(m) : null;
|
||||
}
|
||||
|
@ -1,76 +0,0 @@
|
||||
package com.jessebrault.gcp.tokenizer;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
public final class Token {
|
||||
|
||||
public enum Type {
|
||||
TEXT,
|
||||
|
||||
DOLLAR,
|
||||
GROOVY_REFERENCE,
|
||||
CURLY_OPEN,
|
||||
SCRIPTLET,
|
||||
CURLY_CLOSE,
|
||||
BLOCK_SCRIPTLET_OPEN,
|
||||
EXPRESSION_SCRIPTLET_OPEN,
|
||||
SCRIPTLET_CLOSE,
|
||||
|
||||
CLASS_NAME,
|
||||
PACKAGE_NAME,
|
||||
DOT,
|
||||
|
||||
WHITESPACE,
|
||||
|
||||
KEY,
|
||||
EQUALS,
|
||||
|
||||
DOUBLE_QUOTE,
|
||||
STRING,
|
||||
SINGLE_QUOTE,
|
||||
|
||||
COMPONENT_START,
|
||||
FORWARD_SLASH,
|
||||
COMPONENT_END,
|
||||
;
|
||||
|
||||
boolean isAnyOf(Collection<Type> types) {
|
||||
return types.contains(this);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private final Type type;
|
||||
private final String text;
|
||||
private final int line;
|
||||
private final int col;
|
||||
|
||||
public Token(Type type, String text, int line, int col) {
|
||||
this.type = type;
|
||||
this.text = text;
|
||||
this.line = line;
|
||||
this.col = col;
|
||||
}
|
||||
|
||||
public Type getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public String getText() {
|
||||
return text;
|
||||
}
|
||||
|
||||
public int getLine() {
|
||||
return line;
|
||||
}
|
||||
|
||||
public int getCol() {
|
||||
return col;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Token(%s, %s, %d, %d)", this.type, this.text, this.line, this.col);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,49 @@
|
||||
package com.jessebrault.gcp.tokenizer;
|
||||
|
||||
public final class TokenImpl implements Token {
|
||||
|
||||
private final Type type;
|
||||
private final CharSequence text;
|
||||
private final int inputIndex;
|
||||
private final int line;
|
||||
private final int col;
|
||||
|
||||
public TokenImpl(Type type, CharSequence text, int inputIndex, int line, int col) {
|
||||
this.type = type;
|
||||
this.text = text;
|
||||
this.inputIndex = inputIndex;
|
||||
this.line = line;
|
||||
this.col = col;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Type getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharSequence getText() {
|
||||
return text;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getInputIndex() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getLine() {
|
||||
return line;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getCol() {
|
||||
return col;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Token(%s, %s, %d, %d, %d)", this.type, this.text, this.inputIndex, this.line, this.col);
|
||||
}
|
||||
|
||||
}
|
@ -1,23 +0,0 @@
|
||||
package com.jessebrault.gcp.tokenizer;
|
||||
|
||||
import java.util.Queue;
|
||||
|
||||
public final class Tokenizer {
|
||||
|
||||
public static Queue<Token> tokenize(final String gcpSrc) {
|
||||
final var acc = new Accumulator();
|
||||
final var fsm = TokenizerFsm.get(acc);
|
||||
|
||||
String remaining = gcpSrc;
|
||||
while (remaining.length() > 0) {
|
||||
final var o = fsm.apply(remaining);
|
||||
if (o == null) {
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
remaining = remaining.substring(o.entire().length());
|
||||
}
|
||||
|
||||
return acc.getTokens();
|
||||
}
|
||||
|
||||
}
|
@ -13,78 +13,78 @@ final class TokenizerFsm {
|
||||
/**
|
||||
* Text
|
||||
*/
|
||||
private static final PatternMatcher text = new PatternMatcher(
|
||||
private static final FsmFunction text = new PatternMatcher(
|
||||
Pattern.compile("^(?:[\\w\\W&&[^<$]]|<(?!%|/?\\p{Lu}|/?[\\p{L}0-9_$]+(?:\\.[\\p{L}0-9_$]+)+)|\\$(?![\\w$]+(?:\\.[\\w$]+)*))+")
|
||||
);
|
||||
|
||||
/**
|
||||
* Gsp dollar reference and scriptlets, also used as component values
|
||||
*/
|
||||
private static final PatternMatcher dollarReference = new PatternMatcher(
|
||||
private static final FsmFunction dollarReference = new PatternMatcher(
|
||||
Pattern.compile("^(\\$)([\\w$]+(?:\\.[\\w$]+)*)")
|
||||
);
|
||||
private static final DollarScriptletMatcher dollarScriptlet = new DollarScriptletMatcher();
|
||||
private static final PatternMatcher blockScriptlet = new PatternMatcher(
|
||||
private static final FsmFunction dollarScriptlet = new DollarScriptletMatcher();
|
||||
private static final FsmFunction blockScriptlet = new PatternMatcher(
|
||||
Pattern.compile("^(<%)(.*?)(%>)")
|
||||
);
|
||||
private static final PatternMatcher expressionScriptlet = new PatternMatcher(
|
||||
private static final FsmFunction expressionScriptlet = new PatternMatcher(
|
||||
Pattern.compile("^(<%=)(.*?)(%>)")
|
||||
);
|
||||
|
||||
/**
|
||||
* Component starts
|
||||
*/
|
||||
private static final PatternMatcher openingComponentStart = new PatternMatcher(
|
||||
private static final FsmFunction openingComponentStart = new PatternMatcher(
|
||||
Pattern.compile("^<(?=\\p{Lu}|[\\p{L}0-9_$]+(?:\\.[\\p{L}0-9_$]+)+)")
|
||||
);
|
||||
private static final PatternMatcher closingComponentStart = new PatternMatcher(
|
||||
private static final FsmFunction closingComponentStart = new PatternMatcher(
|
||||
Pattern.compile("^(<)(/)(?=\\p{Lu}|[\\p{L}0-9_$]+(?:\\.[\\p{L}0-9_$]+)+)")
|
||||
);
|
||||
|
||||
/**
|
||||
* Component names
|
||||
*/
|
||||
private static final PatternMatcher className = new PatternMatcher(
|
||||
private static final FsmFunction className = new PatternMatcher(
|
||||
Pattern.compile("^\\p{Lu}[\\p{L}0-9_$]*")
|
||||
);
|
||||
private static final PatternMatcher packageName = new PatternMatcher(
|
||||
private static final FsmFunction packageName = new PatternMatcher(
|
||||
Pattern.compile("^[\\p{L}0-9_$]+(?=\\.)")
|
||||
);
|
||||
private static final PatternMatcher dot = new PatternMatcher(
|
||||
private static final FsmFunction dot = new PatternMatcher(
|
||||
Pattern.compile("^\\.")
|
||||
);
|
||||
|
||||
/**
|
||||
* Whitespace
|
||||
*/
|
||||
private static final PatternMatcher whitespace = new PatternMatcher(Pattern.compile("^\\s+"));
|
||||
private static final FsmFunction whitespace = new PatternMatcher(Pattern.compile("^\\s+"));
|
||||
|
||||
/**
|
||||
* Keys and values
|
||||
*/
|
||||
private static final PatternMatcher key = new PatternMatcher(
|
||||
private static final FsmFunction key = new PatternMatcher(
|
||||
Pattern.compile("^[\\p{L}0-9_$]+")
|
||||
);
|
||||
private static final PatternMatcher equals = new PatternMatcher(Pattern.compile("^="));
|
||||
private static final PatternMatcher singleQuoteString = new PatternMatcher(
|
||||
private static final FsmFunction equals = new PatternMatcher(Pattern.compile("^="));
|
||||
private static final FsmFunction singleQuoteString = new PatternMatcher(
|
||||
Pattern.compile("^(')((?:[\\w\\W&&[^\\\\'\\n\\r]]|\\\\['nrbfst\\\\u])*)(')")
|
||||
);
|
||||
private static final GStringMatcher gString = new GStringMatcher();
|
||||
private static final FsmFunction gString = new GStringMatcher();
|
||||
|
||||
/**
|
||||
* Component ends
|
||||
*/
|
||||
private static final PatternMatcher forwardSlash = new PatternMatcher(Pattern.compile("^/"));
|
||||
private static final PatternMatcher componentEnd = new PatternMatcher(Pattern.compile("^>"));
|
||||
private static final FsmFunction forwardSlash = new PatternMatcher(Pattern.compile("^/"));
|
||||
private static final FsmFunction componentEnd = new PatternMatcher(Pattern.compile("^>"));
|
||||
|
||||
private static FunctionFsmBuilder<String, TokenizerState, FsmOutput> getFsmBuilder() {
|
||||
private static FunctionFsmBuilder<CharSequence, Tokenizer.State, FsmOutput> getFsmBuilder() {
|
||||
return new FunctionFsmBuilderImpl<>();
|
||||
}
|
||||
|
||||
public static FunctionFsm<String, TokenizerState, FsmOutput> get(Accumulator acc) {
|
||||
public static FunctionFsm<CharSequence, Tokenizer.State, FsmOutput> get(Accumulator acc, Tokenizer.State state) {
|
||||
return getFsmBuilder()
|
||||
.setInitialState(TokenizerState.NORMAL)
|
||||
.whileIn(TokenizerState.NORMAL, sc -> {
|
||||
.setInitialState(state)
|
||||
.whileIn(Tokenizer.State.TEXT, sc -> {
|
||||
sc.on(text).exec(o -> {
|
||||
acc.accumulate(TEXT, o.entire());
|
||||
});
|
||||
@ -108,16 +108,16 @@ final class TokenizerFsm {
|
||||
acc.accumulate(SCRIPTLET, o.part(2));
|
||||
acc.accumulate(SCRIPTLET_CLOSE, o.part(3));
|
||||
});
|
||||
sc.on(openingComponentStart).shiftTo(TokenizerState.COMPONENT_NAME).exec(o ->
|
||||
sc.on(openingComponentStart).shiftTo(Tokenizer.State.COMPONENT_NAME).exec(o ->
|
||||
acc.accumulate(COMPONENT_START, o.entire())
|
||||
);
|
||||
sc.on(closingComponentStart).shiftTo(TokenizerState.COMPONENT_NAME).exec(o -> {
|
||||
sc.on(closingComponentStart).shiftTo(Tokenizer.State.COMPONENT_NAME).exec(o -> {
|
||||
acc.accumulate(COMPONENT_START, o.part(1));
|
||||
acc.accumulate(FORWARD_SLASH, o.part(2));
|
||||
});
|
||||
sc.onNoMatch().exec(input -> { throw new IllegalArgumentException(); });
|
||||
})
|
||||
.whileIn(TokenizerState.COMPONENT_NAME, sc -> {
|
||||
.whileIn(Tokenizer.State.COMPONENT_NAME, sc -> {
|
||||
sc.on(packageName).exec(o -> {
|
||||
acc.accumulate(PACKAGE_NAME, o.entire());
|
||||
});
|
||||
@ -130,16 +130,16 @@ final class TokenizerFsm {
|
||||
sc.on(forwardSlash).exec(o -> {
|
||||
acc.accumulate(FORWARD_SLASH, o.entire());
|
||||
});
|
||||
sc.on(componentEnd).shiftTo(TokenizerState.NORMAL).exec(o -> {
|
||||
sc.on(componentEnd).shiftTo(Tokenizer.State.TEXT).exec(o -> {
|
||||
acc.accumulate(COMPONENT_END, o.entire());
|
||||
});
|
||||
sc.on(whitespace).shiftTo(TokenizerState.COMPONENT_KEYS_AND_VALUES).exec(o -> {
|
||||
sc.on(whitespace).shiftTo(Tokenizer.State.COMPONENT_KEYS_AND_VALUES).exec(o -> {
|
||||
acc.accumulate(WHITESPACE, o.entire());
|
||||
});
|
||||
sc.onNoMatch().exec(input -> { throw new IllegalArgumentException(); });
|
||||
})
|
||||
.whileIn(TokenizerState.COMPONENT_KEYS_AND_VALUES, sc -> {
|
||||
sc.on(componentEnd).shiftTo(TokenizerState.NORMAL).exec(o -> {
|
||||
.whileIn(Tokenizer.State.COMPONENT_KEYS_AND_VALUES, sc -> {
|
||||
sc.on(componentEnd).shiftTo(Tokenizer.State.TEXT).exec(o -> {
|
||||
acc.accumulate(COMPONENT_END, o.entire());
|
||||
});
|
||||
sc.on(whitespace).exec(o -> {
|
||||
@ -184,7 +184,7 @@ final class TokenizerFsm {
|
||||
sc.on(forwardSlash).exec(o -> {
|
||||
acc.accumulate(FORWARD_SLASH, o.entire());
|
||||
});
|
||||
sc.on(componentEnd).shiftTo(TokenizerState.NORMAL).exec(o -> {
|
||||
sc.on(componentEnd).shiftTo(Tokenizer.State.TEXT).exec(o -> {
|
||||
acc.accumulate(COMPONENT_END, o.entire());
|
||||
});
|
||||
sc.onNoMatch().exec(input -> { throw new IllegalArgumentException(); });
|
||||
@ -192,6 +192,4 @@ final class TokenizerFsm {
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,62 @@
|
||||
package com.jessebrault.gcp.tokenizer;
|
||||
|
||||
import com.jessebrault.fsm.function.FunctionFsm;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.Queue;
|
||||
|
||||
public final class TokenizerImpl implements Tokenizer {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(TokenizerImpl.class);
|
||||
|
||||
private CharSequence input;
|
||||
private int currentOffset;
|
||||
private int endOffset;
|
||||
|
||||
private Queue<Token> tokens;
|
||||
private FunctionFsm<CharSequence, State, FsmOutput> fsm;
|
||||
|
||||
@Override
|
||||
public void start(CharSequence input, int startOffset, int endOffset, State initialState) {
|
||||
this.input = input;
|
||||
this.currentOffset = startOffset;
|
||||
this.endOffset = endOffset;
|
||||
this.tokens = new LinkedList<>();
|
||||
this.fsm = TokenizerFsm.get(new Accumulator(this.tokens), initialState);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
if (this.tokens.isEmpty()) {
|
||||
this.getNextTokens();
|
||||
}
|
||||
return !this.tokens.isEmpty();
|
||||
}
|
||||
|
||||
private void getNextTokens() {
|
||||
if (this.currentOffset != this.endOffset) {
|
||||
final var match = this.fsm.apply(this.input.subSequence(this.currentOffset, this.endOffset));
|
||||
if (match == null) {
|
||||
logger.error("match is null!");
|
||||
} else {
|
||||
this.currentOffset += match.entire().length();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Token next() {
|
||||
if (this.tokens.isEmpty()) {
|
||||
throw new IllegalStateException("currentAccumulatedTokens is empty");
|
||||
}
|
||||
return this.tokens.remove();
|
||||
}
|
||||
|
||||
@Override
|
||||
public State getCurrentState() {
|
||||
return this.fsm.getCurrentState();
|
||||
}
|
||||
|
||||
}
|
@ -1,7 +0,0 @@
|
||||
package com.jessebrault.gcp.tokenizer;
|
||||
|
||||
enum TokenizerState {
|
||||
NORMAL,
|
||||
COMPONENT_NAME,
|
||||
COMPONENT_KEYS_AND_VALUES
|
||||
}
|
6
gcp-impl/src/main/resources/test.gdsl
Normal file
6
gcp-impl/src/main/resources/test.gdsl
Normal file
@ -0,0 +1,6 @@
|
||||
def ctx = context(filetypes: ['gsp'])
|
||||
|
||||
contributor(ctx) {
|
||||
method name: 'foo', params: [bar: 'String'], type: 'int'
|
||||
property name: 'texts', type: 'java.util.List<String>', doc: 'Some texts.'
|
||||
}
|
1
gcp-impl/src/main/resources/test.groovy
Normal file
1
gcp-impl/src/main/resources/test.groovy
Normal file
@ -0,0 +1 @@
|
||||
|
5
gcp-impl/src/main/resources/test.gsp
Normal file
5
gcp-impl/src/main/resources/test.gsp
Normal file
@ -0,0 +1,5 @@
|
||||
def elf = foo('elf')
|
||||
|
||||
texts.each {
|
||||
|
||||
}
|
@ -66,7 +66,7 @@ class TokenizerTests {
|
||||
configure.setResolveStrategy(Closure.DELEGATE_FIRST)
|
||||
configure()
|
||||
|
||||
def r = Tokenizer.tokenize(src)
|
||||
def r = new TokenizerImpl().tokenizeAll(src, Tokenizer.State.TEXT)
|
||||
logger.debug('r: {}', r)
|
||||
logger.debug('configurator.specs: {}', configurator.specs)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user