Adding everything before the big move to a new project.

This commit is contained in:
Jesse Brault 2023-01-27 10:13:29 +01:00
parent 62209dbc31
commit 610eac927b
18 changed files with 293 additions and 199 deletions

View File

@ -0,0 +1,49 @@
package com.jessebrault.gcp.tokenizer;
import java.util.Collection;
public interface Token {
enum Type {
TEXT,
DOLLAR,
GROOVY_REFERENCE,
CURLY_OPEN,
SCRIPTLET,
CURLY_CLOSE,
BLOCK_SCRIPTLET_OPEN,
EXPRESSION_SCRIPTLET_OPEN,
SCRIPTLET_CLOSE,
CLASS_NAME,
PACKAGE_NAME,
DOT,
WHITESPACE,
KEY,
EQUALS,
DOUBLE_QUOTE,
STRING,
SINGLE_QUOTE,
COMPONENT_START,
FORWARD_SLASH,
COMPONENT_END,
;
boolean isAnyOf(Collection<Type> types) {
return types.contains(this);
}
}
Type getType();
CharSequence getText();
int getInputIndex();
int getLine();
int getCol();
}

View File

@ -0,0 +1,28 @@
package com.jessebrault.gcp.tokenizer;
import java.util.LinkedList;
import java.util.Queue;
public interface Tokenizer {
enum State {
TEXT,
COMPONENT_NAME,
COMPONENT_KEYS_AND_VALUES
}
void start(CharSequence input, int startOffset, int endOffset, State initialState);
boolean hasNext();
Token next();
State getCurrentState();
default Queue<Token> tokenizeAll(CharSequence input, State initialState) {
this.start(input, 0, input.length(), initialState);
final Queue<Token> tokens = new LinkedList<>();
while (this.hasNext()) {
tokens.add(this.next());
}
return tokens;
}
}

View File

@ -1,6 +1,5 @@
package com.jessebrault.gcp.tokenizer;
import java.util.LinkedList;
import java.util.Queue;
import java.util.regex.Pattern;
@ -8,12 +7,18 @@ final class Accumulator {
private static final Pattern newline = Pattern.compile("([\n\r])");
private final Queue<Token> tokens = new LinkedList<>();
private final Queue<Token> tokens;
private int inputIndex = 0;
private int line = 1;
private int col = 1;
public void accumulate(Token.Type type, String text) {
this.tokens.add(new Token(type, text, this.line, this.col));
public Accumulator(Queue<Token> tokenQueue) {
this.tokens = tokenQueue;
}
public void accumulate(Token.Type type, CharSequence text) {
this.tokens.add(new TokenImpl(type, text, this.inputIndex, this.line, this.col));
this.inputIndex += text.length();
final var m = newline.matcher(text);
if (m.find()) {
this.line += m.groupCount();
@ -23,8 +28,4 @@ final class Accumulator {
}
}
public Queue<Token> getTokens() {
return this.tokens;
}
}

View File

@ -6,47 +6,37 @@ import org.slf4j.LoggerFactory;
import java.util.Deque;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.function.Function;
import java.util.function.Supplier;
final class DollarScriptletMatcher implements Function<String, FsmOutput> {
final class DollarScriptletMatcher implements FsmFunction {
private static final Logger logger = LoggerFactory.getLogger(DollarScriptletMatcher.class);
private static final class DollarScriptletMatcherOutput implements FsmOutput {
private final String entire;
private final String dollar;
private final String openingCurly;
private final CharSequence entire;
private final String scriptlet;
private final String closingCurly;
public DollarScriptletMatcherOutput(
String entire,
String dollar,
String openingCurly,
String scriptlet,
String closingCurly
String scriptlet
) {
this.entire = entire;
this.dollar = dollar;
this.openingCurly = openingCurly;
this.scriptlet = scriptlet;
this.closingCurly = closingCurly;
}
@Override
public String entire() {
public CharSequence entire() {
return this.entire;
}
@Override
public String part(int index) {
public CharSequence part(int index) {
return switch (index) {
case 1 -> this.dollar;
case 2 -> this.openingCurly;
case 1 -> "$";
case 2 -> "{";
case 3 -> this.scriptlet;
case 4 -> this.closingCurly;
case 4 -> "}";
default -> throw new IllegalArgumentException();
};
}
@ -57,23 +47,23 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
NO_STRING, G_STRING, SINGLE_QUOTE_STRING
}
private static final class StringCharIterator implements Iterator<String> {
private static final class CharSequenceIterator implements Iterator<String> {
private final String s;
private final CharSequence input;
private int cur;
public StringCharIterator(String s) {
this.s = s;
public CharSequenceIterator(CharSequence input) {
this.input = input;
}
@Override
public boolean hasNext() {
return this.cur < s.length();
return this.cur < input.length();
}
@Override
public String next() {
final var c = String.valueOf(s.charAt(this.cur));
final var c = String.valueOf(input.charAt(this.cur));
this.cur++;
return c;
}
@ -81,7 +71,7 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
}
@Override
public FsmOutput apply(String s) {
public FsmOutput apply(CharSequence s) {
final Deque<State> stateStack = new LinkedList<>();
final Deque<Counter> counterStack = new LinkedList<>();
@ -96,7 +86,7 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
stateStack.push(State.NO_STRING);
counterStack.push(new Counter());
final Iterator<String> iterator = new StringCharIterator(s);
final Iterator<String> iterator = new CharSequenceIterator(s);
final var entireAcc = new StringBuilder();
@ -156,7 +146,9 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
final var c1 = iterator.next();
entireAcc.append(c1);
} else {
throw new IllegalArgumentException("Ill-formed dollarScriptlet (backslash followed by nothing)");
throw new IllegalArgumentException(
"Ill-formed dollarScriptlet (backslash followed by nothing)"
);
}
}
case "$" -> {
@ -183,7 +175,9 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
if (iterator.hasNext()) {
entireAcc.append(iterator.next());
} else {
throw new IllegalArgumentException("Ill-formed dollarScriptlet (backslash followed by nothing)");
throw new IllegalArgumentException(
"Ill-formed dollarScriptlet (backslash followed by nothing)"
);
}
}
case "'" -> {
@ -192,7 +186,9 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
}
}
} else {
throw new IllegalStateException("stateStack contains something which does not equal a state or is null");
throw new IllegalStateException(
"stateStack contains something which does not equal a state or is null"
);
}
logger.debug("entireAcc: {}", entireAcc);
@ -202,10 +198,7 @@ final class DollarScriptletMatcher implements Function<String, FsmOutput> {
return new DollarScriptletMatcherOutput(
entireAcc.toString(),
"$",
"{",
entireAcc.substring(2, entireAcc.length() - 1),
"}"
entireAcc.substring(2, entireAcc.length() - 1)
);
}

View File

@ -0,0 +1,5 @@
package com.jessebrault.gcp.tokenizer;
import java.util.function.Function;
interface FsmFunction extends Function<CharSequence, FsmOutput> {}

View File

@ -1,6 +1,6 @@
package com.jessebrault.gcp.tokenizer;
interface FsmOutput {
String entire();
String part(int index);
CharSequence entire();
CharSequence part(int index);
}

View File

@ -4,15 +4,14 @@ import com.jessebrault.fsm.stackfunction.StackFunctionFsm;
import com.jessebrault.fsm.stackfunction.StackFunctionFsmBuilder;
import com.jessebrault.fsm.stackfunction.StackFunctionFsmBuilderImpl;
import java.util.function.Function;
import java.util.regex.Pattern;
final class GStringMatcher implements Function<String, FsmOutput> {
final class GStringMatcher implements FsmFunction {
private static final class GStringMatcherOutput implements FsmOutput {
private final String entire;
private final String contents;
private final CharSequence entire;
private final CharSequence contents;
public GStringMatcherOutput(String entire, String contents) {
this.entire = entire;
@ -20,12 +19,12 @@ final class GStringMatcher implements Function<String, FsmOutput> {
}
@Override
public String entire() {
public CharSequence entire() {
return this.entire;
}
@Override
public String part(int index) {
public CharSequence part(int index) {
return switch(index) {
case 1, 3 -> "\"";
case 2 -> this.contents;
@ -47,11 +46,11 @@ final class GStringMatcher implements Function<String, FsmOutput> {
START, CONTENTS, DONE
}
private static StackFunctionFsmBuilder<String, State, FsmOutput> getFsmBuilder() {
private static StackFunctionFsmBuilder<CharSequence, State, FsmOutput> getFsmBuilder() {
return new StackFunctionFsmBuilderImpl<>();
}
private static StackFunctionFsm<String, State, FsmOutput> getFsm(StringBuilder acc) {
private static StackFunctionFsm<CharSequence, State, FsmOutput> getFsm(StringBuilder acc) {
return getFsmBuilder()
.setInitialState(State.START)
.whileIn(State.START, sc -> {
@ -80,14 +79,14 @@ final class GStringMatcher implements Function<String, FsmOutput> {
}
@Override
public FsmOutput apply(final String s) {
public FsmOutput apply(final CharSequence s) {
final var acc = new StringBuilder();
final var fsm = getFsm(acc);
String remaining = s;
CharSequence remaining = s;
// Look-ahead
if (!remaining.startsWith("\"")) {
if (!String.valueOf(remaining.charAt(0)).equals("\"")) {
return null;
}
@ -99,7 +98,7 @@ final class GStringMatcher implements Function<String, FsmOutput> {
if (fsm.getCurrentState() == State.DONE) {
break;
}
remaining = remaining.substring(output.entire().length());
remaining = remaining.subSequence(output.entire().length(), remaining.length());
}
final var entire = acc.toString();

View File

@ -1,10 +1,9 @@
package com.jessebrault.gcp.tokenizer;
import java.util.function.Function;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
final class PatternMatcher implements Function<String, FsmOutput> {
final class PatternMatcher implements FsmFunction {
private static final class MatchResultFsmOutput implements FsmOutput {
@ -15,15 +14,20 @@ final class PatternMatcher implements Function<String, FsmOutput> {
}
@Override
public String entire() {
return this.matchResult.group();
public CharSequence entire() {
return this.matchResult.group(0);
}
@Override
public String part(int index) {
public CharSequence part(int index) {
return this.matchResult.group(index);
}
@Override
public String toString() {
return "MatchResultFsmOutput(" + this.entire() + ")";
}
}
private final Pattern pattern;
@ -33,7 +37,7 @@ final class PatternMatcher implements Function<String, FsmOutput> {
}
@Override
public FsmOutput apply(String s) {
public FsmOutput apply(CharSequence s) {
final var m = this.pattern.matcher(s);
return m.find() ? new MatchResultFsmOutput(m) : null;
}

View File

@ -1,76 +0,0 @@
package com.jessebrault.gcp.tokenizer;
import java.util.Collection;
public final class Token {
public enum Type {
TEXT,
DOLLAR,
GROOVY_REFERENCE,
CURLY_OPEN,
SCRIPTLET,
CURLY_CLOSE,
BLOCK_SCRIPTLET_OPEN,
EXPRESSION_SCRIPTLET_OPEN,
SCRIPTLET_CLOSE,
CLASS_NAME,
PACKAGE_NAME,
DOT,
WHITESPACE,
KEY,
EQUALS,
DOUBLE_QUOTE,
STRING,
SINGLE_QUOTE,
COMPONENT_START,
FORWARD_SLASH,
COMPONENT_END,
;
boolean isAnyOf(Collection<Type> types) {
return types.contains(this);
}
}
private final Type type;
private final String text;
private final int line;
private final int col;
public Token(Type type, String text, int line, int col) {
this.type = type;
this.text = text;
this.line = line;
this.col = col;
}
public Type getType() {
return type;
}
public String getText() {
return text;
}
public int getLine() {
return line;
}
public int getCol() {
return col;
}
@Override
public String toString() {
return String.format("Token(%s, %s, %d, %d)", this.type, this.text, this.line, this.col);
}
}

View File

@ -0,0 +1,49 @@
package com.jessebrault.gcp.tokenizer;
public final class TokenImpl implements Token {
private final Type type;
private final CharSequence text;
private final int inputIndex;
private final int line;
private final int col;
public TokenImpl(Type type, CharSequence text, int inputIndex, int line, int col) {
this.type = type;
this.text = text;
this.inputIndex = inputIndex;
this.line = line;
this.col = col;
}
@Override
public Type getType() {
return type;
}
@Override
public CharSequence getText() {
return text;
}
@Override
public int getInputIndex() {
return 0;
}
@Override
public int getLine() {
return line;
}
@Override
public int getCol() {
return col;
}
@Override
public String toString() {
return String.format("Token(%s, %s, %d, %d, %d)", this.type, this.text, this.inputIndex, this.line, this.col);
}
}

View File

@ -1,23 +0,0 @@
package com.jessebrault.gcp.tokenizer;
import java.util.Queue;
public final class Tokenizer {
public static Queue<Token> tokenize(final String gcpSrc) {
final var acc = new Accumulator();
final var fsm = TokenizerFsm.get(acc);
String remaining = gcpSrc;
while (remaining.length() > 0) {
final var o = fsm.apply(remaining);
if (o == null) {
throw new IllegalStateException();
}
remaining = remaining.substring(o.entire().length());
}
return acc.getTokens();
}
}

View File

@ -13,78 +13,78 @@ final class TokenizerFsm {
/**
* Text
*/
private static final PatternMatcher text = new PatternMatcher(
private static final FsmFunction text = new PatternMatcher(
Pattern.compile("^(?:[\\w\\W&&[^<$]]|<(?!%|/?\\p{Lu}|/?[\\p{L}0-9_$]+(?:\\.[\\p{L}0-9_$]+)+)|\\$(?![\\w$]+(?:\\.[\\w$]+)*))+")
);
/**
* Gsp dollar reference and scriptlets, also used as component values
*/
private static final PatternMatcher dollarReference = new PatternMatcher(
private static final FsmFunction dollarReference = new PatternMatcher(
Pattern.compile("^(\\$)([\\w$]+(?:\\.[\\w$]+)*)")
);
private static final DollarScriptletMatcher dollarScriptlet = new DollarScriptletMatcher();
private static final PatternMatcher blockScriptlet = new PatternMatcher(
private static final FsmFunction dollarScriptlet = new DollarScriptletMatcher();
private static final FsmFunction blockScriptlet = new PatternMatcher(
Pattern.compile("^(<%)(.*?)(%>)")
);
private static final PatternMatcher expressionScriptlet = new PatternMatcher(
private static final FsmFunction expressionScriptlet = new PatternMatcher(
Pattern.compile("^(<%=)(.*?)(%>)")
);
/**
* Component starts
*/
private static final PatternMatcher openingComponentStart = new PatternMatcher(
private static final FsmFunction openingComponentStart = new PatternMatcher(
Pattern.compile("^<(?=\\p{Lu}|[\\p{L}0-9_$]+(?:\\.[\\p{L}0-9_$]+)+)")
);
private static final PatternMatcher closingComponentStart = new PatternMatcher(
private static final FsmFunction closingComponentStart = new PatternMatcher(
Pattern.compile("^(<)(/)(?=\\p{Lu}|[\\p{L}0-9_$]+(?:\\.[\\p{L}0-9_$]+)+)")
);
/**
* Component names
*/
private static final PatternMatcher className = new PatternMatcher(
private static final FsmFunction className = new PatternMatcher(
Pattern.compile("^\\p{Lu}[\\p{L}0-9_$]*")
);
private static final PatternMatcher packageName = new PatternMatcher(
private static final FsmFunction packageName = new PatternMatcher(
Pattern.compile("^[\\p{L}0-9_$]+(?=\\.)")
);
private static final PatternMatcher dot = new PatternMatcher(
private static final FsmFunction dot = new PatternMatcher(
Pattern.compile("^\\.")
);
/**
* Whitespace
*/
private static final PatternMatcher whitespace = new PatternMatcher(Pattern.compile("^\\s+"));
private static final FsmFunction whitespace = new PatternMatcher(Pattern.compile("^\\s+"));
/**
* Keys and values
*/
private static final PatternMatcher key = new PatternMatcher(
private static final FsmFunction key = new PatternMatcher(
Pattern.compile("^[\\p{L}0-9_$]+")
);
private static final PatternMatcher equals = new PatternMatcher(Pattern.compile("^="));
private static final PatternMatcher singleQuoteString = new PatternMatcher(
private static final FsmFunction equals = new PatternMatcher(Pattern.compile("^="));
private static final FsmFunction singleQuoteString = new PatternMatcher(
Pattern.compile("^(')((?:[\\w\\W&&[^\\\\'\\n\\r]]|\\\\['nrbfst\\\\u])*)(')")
);
private static final GStringMatcher gString = new GStringMatcher();
private static final FsmFunction gString = new GStringMatcher();
/**
* Component ends
*/
private static final PatternMatcher forwardSlash = new PatternMatcher(Pattern.compile("^/"));
private static final PatternMatcher componentEnd = new PatternMatcher(Pattern.compile("^>"));
private static final FsmFunction forwardSlash = new PatternMatcher(Pattern.compile("^/"));
private static final FsmFunction componentEnd = new PatternMatcher(Pattern.compile("^>"));
private static FunctionFsmBuilder<String, TokenizerState, FsmOutput> getFsmBuilder() {
private static FunctionFsmBuilder<CharSequence, Tokenizer.State, FsmOutput> getFsmBuilder() {
return new FunctionFsmBuilderImpl<>();
}
public static FunctionFsm<String, TokenizerState, FsmOutput> get(Accumulator acc) {
public static FunctionFsm<CharSequence, Tokenizer.State, FsmOutput> get(Accumulator acc, Tokenizer.State state) {
return getFsmBuilder()
.setInitialState(TokenizerState.NORMAL)
.whileIn(TokenizerState.NORMAL, sc -> {
.setInitialState(state)
.whileIn(Tokenizer.State.TEXT, sc -> {
sc.on(text).exec(o -> {
acc.accumulate(TEXT, o.entire());
});
@ -108,16 +108,16 @@ final class TokenizerFsm {
acc.accumulate(SCRIPTLET, o.part(2));
acc.accumulate(SCRIPTLET_CLOSE, o.part(3));
});
sc.on(openingComponentStart).shiftTo(TokenizerState.COMPONENT_NAME).exec(o ->
sc.on(openingComponentStart).shiftTo(Tokenizer.State.COMPONENT_NAME).exec(o ->
acc.accumulate(COMPONENT_START, o.entire())
);
sc.on(closingComponentStart).shiftTo(TokenizerState.COMPONENT_NAME).exec(o -> {
sc.on(closingComponentStart).shiftTo(Tokenizer.State.COMPONENT_NAME).exec(o -> {
acc.accumulate(COMPONENT_START, o.part(1));
acc.accumulate(FORWARD_SLASH, o.part(2));
});
sc.onNoMatch().exec(input -> { throw new IllegalArgumentException(); });
})
.whileIn(TokenizerState.COMPONENT_NAME, sc -> {
.whileIn(Tokenizer.State.COMPONENT_NAME, sc -> {
sc.on(packageName).exec(o -> {
acc.accumulate(PACKAGE_NAME, o.entire());
});
@ -130,16 +130,16 @@ final class TokenizerFsm {
sc.on(forwardSlash).exec(o -> {
acc.accumulate(FORWARD_SLASH, o.entire());
});
sc.on(componentEnd).shiftTo(TokenizerState.NORMAL).exec(o -> {
sc.on(componentEnd).shiftTo(Tokenizer.State.TEXT).exec(o -> {
acc.accumulate(COMPONENT_END, o.entire());
});
sc.on(whitespace).shiftTo(TokenizerState.COMPONENT_KEYS_AND_VALUES).exec(o -> {
sc.on(whitespace).shiftTo(Tokenizer.State.COMPONENT_KEYS_AND_VALUES).exec(o -> {
acc.accumulate(WHITESPACE, o.entire());
});
sc.onNoMatch().exec(input -> { throw new IllegalArgumentException(); });
})
.whileIn(TokenizerState.COMPONENT_KEYS_AND_VALUES, sc -> {
sc.on(componentEnd).shiftTo(TokenizerState.NORMAL).exec(o -> {
.whileIn(Tokenizer.State.COMPONENT_KEYS_AND_VALUES, sc -> {
sc.on(componentEnd).shiftTo(Tokenizer.State.TEXT).exec(o -> {
acc.accumulate(COMPONENT_END, o.entire());
});
sc.on(whitespace).exec(o -> {
@ -184,7 +184,7 @@ final class TokenizerFsm {
sc.on(forwardSlash).exec(o -> {
acc.accumulate(FORWARD_SLASH, o.entire());
});
sc.on(componentEnd).shiftTo(TokenizerState.NORMAL).exec(o -> {
sc.on(componentEnd).shiftTo(Tokenizer.State.TEXT).exec(o -> {
acc.accumulate(COMPONENT_END, o.entire());
});
sc.onNoMatch().exec(input -> { throw new IllegalArgumentException(); });
@ -192,6 +192,4 @@ final class TokenizerFsm {
.build();
}
}

View File

@ -0,0 +1,62 @@
package com.jessebrault.gcp.tokenizer;
import com.jessebrault.fsm.function.FunctionFsm;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.LinkedList;
import java.util.Queue;
public final class TokenizerImpl implements Tokenizer {
private static final Logger logger = LoggerFactory.getLogger(TokenizerImpl.class);
private CharSequence input;
private int currentOffset;
private int endOffset;
private Queue<Token> tokens;
private FunctionFsm<CharSequence, State, FsmOutput> fsm;
@Override
public void start(CharSequence input, int startOffset, int endOffset, State initialState) {
this.input = input;
this.currentOffset = startOffset;
this.endOffset = endOffset;
this.tokens = new LinkedList<>();
this.fsm = TokenizerFsm.get(new Accumulator(this.tokens), initialState);
}
@Override
public boolean hasNext() {
if (this.tokens.isEmpty()) {
this.getNextTokens();
}
return !this.tokens.isEmpty();
}
private void getNextTokens() {
if (this.currentOffset != this.endOffset) {
final var match = this.fsm.apply(this.input.subSequence(this.currentOffset, this.endOffset));
if (match == null) {
logger.error("match is null!");
} else {
this.currentOffset += match.entire().length();
}
}
}
@Override
public Token next() {
if (this.tokens.isEmpty()) {
throw new IllegalStateException("currentAccumulatedTokens is empty");
}
return this.tokens.remove();
}
@Override
public State getCurrentState() {
return this.fsm.getCurrentState();
}
}

View File

@ -1,7 +0,0 @@
package com.jessebrault.gcp.tokenizer;
enum TokenizerState {
NORMAL,
COMPONENT_NAME,
COMPONENT_KEYS_AND_VALUES
}

View File

@ -0,0 +1,6 @@
def ctx = context(filetypes: ['gsp'])
contributor(ctx) {
method name: 'foo', params: [bar: 'String'], type: 'int'
property name: 'texts', type: 'java.util.List<String>', doc: 'Some texts.'
}

View File

@ -0,0 +1 @@

View File

@ -0,0 +1,5 @@
def elf = foo('elf')
texts.each {
}

View File

@ -66,7 +66,7 @@ class TokenizerTests {
configure.setResolveStrategy(Closure.DELEGATE_FIRST)
configure()
def r = Tokenizer.tokenize(src)
def r = new TokenizerImpl().tokenizeAll(src, Tokenizer.State.TEXT)
logger.debug('r: {}', r)
logger.debug('configurator.specs: {}', configurator.specs)