commit 1469b2fd6a67ccb477a6465d26f94aa505053bdd
parent f3ccd3a9ffe7a7a4dba78c2a3ee4f9925f811298
Author: glenda <glenda@9front.local>
Date: Wed, 22 Sep 2021 23:13:01 +0000
more markdown: parse headers, text, page breaks
Diffstat:
M | extra/Markdown.c | | | 216 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------ |
1 file changed, 151 insertions(+), 65 deletions(-)
diff --git a/extra/Markdown.c b/extra/Markdown.c
@@ -10,38 +10,39 @@ long count;
enum {
TEOF = 0,
- TCHAR,
- THEADER,
-};
-
-enum {
- SEOF = 0,
- SNEW,
- SDEFAULT,
- SHEADER,
- SSPACE,
+ TH0, TH1, TH2, TH3, TH4, TH5, TH6,
+ TWORD, TWBRK, TPBRK,
+ TUNDEF = -1,
};
typedef struct Token Token;
struct Token {
int type;
- int header;
char c;
+ String *s;
};
-int state;
+void (*lex)(void);
long p;
Token tok, *tokens;
+int oldtype;
+
+void lnewline(void);
+
+void lheader(void);
+void lhspace(void);
+void lhword(void);
+
+void lword(void);
+void lspace(void);
-int lex(void);
-int lnew(void);
-int ldefault(void);
-int lheader(void);
-int lspace(void);
char consume(void);
char peek(int);
void emit(void);
+void emitwbrk(void);
+void emitpbrk(void);
+
/* Rich */
char * newobj(void);
@@ -69,97 +70,157 @@ main(int argc, char **argv)
}
if (n < 0) sysfatal("%r");
- state = SNEW;
- while(state != SEOF) {
- state = lex();
- }
-}
-
-int
-lex(void)
-{
- switch(state) {
- case SNEW: return lnew();
- case SDEFAULT: return ldefault();
- case SHEADER: return lheader();
- case SSPACE: return lspace();
+ tok.s = s_new();
+ tok.type = TUNDEF;
+ oldtype = TUNDEF;
+ lex = lnewline;
+ while(lex != nil) {
+ lex();
}
- fprint(2, "lex err\n");
- return SEOF;
+ tok.type = TEOF;
+ emit();
}
-int
-lnew(void)
+void
+lnewline(void)
{
char c;
c = peek(0);
switch (c){
+ case 0:
+ lex = nil;
+ break;
+ case '\n':
+ consume();
+ emitpbrk();
+ tok.type = TUNDEF;
+ break;
case '#':
- tok.type = THEADER;
- return SHEADER;
+ lex = lheader;
+ consume();
+ tok.type = TH0;
+ break;
default:
- return SDEFAULT;
+ lex = lword;
+ emitwbrk();
+ tok.type = TWORD;
}
}
-int
-ldefault(void)
+void
+lword(void)
{
- int newstate;
- tok.c = consume();
- switch (tok.c) {
+ char c;
+ c = peek(0);
+ switch (c) {
case 0:
- tok.type = TEOF;
+ lex = nil;
emit();
- newstate = SEOF;
break;
case '\n':
+ lex = lnewline;
+ consume();
+ emit();
+ tok.type = TUNDEF;
+ break;
case ' ':
- tok.type = TCHAR;
- tok.c = ' ';
+ lex = lspace;
+ consume();
emit();
- newstate = SSPACE;
+ emitwbrk();
+ tok.type = TWORD;
break;
default:
- tok.type = TCHAR;
- newstate = SDEFAULT;
- emit();
+ s_putc(tok.s, c);
+ consume();
}
- return newstate;
}
-int
+void
+lspace(void)
+{
+ char c;
+ c = peek(0);
+ switch (c) {
+ case ' ':
+ case '\n':
+ lex = lheader;
+ consume();
+ break;
+ default:
+ lex = lword;
+ tok.type = TWORD;
+ }
+}
+
+void
lheader(void)
{
char c;
+ if ((tok.type >= TH0) && (tok.type < TH6)) tok.type++;
+ else {
+ /* an error */
+ lex = nil;
+ return;
+ }
c = peek(0);
switch (c){
case '#':
- tok.header++;
consume();
- return SHEADER;
+ lex = lheader;
+ break;
case '\n':
+ /* an error */
+ lex = nil;
+ break;
+ case ' ':
consume();
- emit();
- return SNEW;
+ lex = lhspace;
+ break;
default:
+ /* an error */
+ lex = nil;
+ }
+}
+
+void
+lhspace(void)
+{
+ char c;
+ c = peek(0);
+ switch(c) {
+ case 0:
+ case '\n':
+ lex = nil;
+ case ' ':
consume();
- return SHEADER;
+ break;
+ default:
+ lex = lhword;
}
}
-int
-lspace(void)
+void
+lhword(void)
{
char c;
c = peek(0);
- switch (c) {
+ switch(c) {
+ case 0:
+ lex = nil;
case ' ':
+ s_putc(tok.s, c);
+ consume();
+ lex = lhspace;
+ break;
case '\n':
consume();
- return SSPACE;
+ emit();
+ lex = lnewline;
+ break;
default:
- return SDEFAULT;
+ s_putc(tok.s, c);
+ consume();
}
}
@@ -180,8 +241,33 @@ peek(int k)
void
emit(void)
{
- print("[%d %c]", tok.type, tok.c);
- /* TODO: should add tokens to tokens array */
+ s_terminate(tok.s);
+ print("[%d] %s\n", tok.type, s_to_c(tok.s));
+
+ /* TODO: should add token to tokens array */
+
+ /* cleaning up tok state */
+ s_reset(tok.s);
+ oldtype = tok.type;
+ tok.type = TUNDEF;
+}
+
+void
+emitwbrk(void)
+{
+ if (oldtype == TWORD) {
+ tok.type = TWBRK;
+ emit();
+ }
+}
+
+void
+emitpbrk(void)
+{
+ if (oldtype != TPBRK) {
+ tok.type = TPBRK;
+ emit();
+ }
}
int