Markdown.c (12188B)
1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include <thread.h> 5 6 #include "config.h" 7 8 #define SIZE(X) (sizeof(X)/sizeof(*X)) 9 10 #define ATTACH(array, size, new) \ 11 { \ 12 array = realloc(array, sizeof(*array) * (size + 1)); \ 13 array[size] = new; \ 14 size++; \ 15 } 16 17 #define APPEND(t1, t2) \ 18 { \ 19 ATTACH(t1->tokens, t1->count, t2) \ 20 t2 = t1; \ 21 t1 = nil; \ 22 } 23 24 typedef struct Token Token; 25 26 struct Token { 27 int type; 28 union { 29 Rune rune; 30 struct { 31 int count; 32 Token **tokens; 33 }; 34 }; 35 }; 36 37 enum { 38 TNil, 39 40 TRune, 41 42 TSpace, 43 TNewline, 44 TTab, 45 TBraceOpen, 46 TBraceClose, 47 TSqrBraceOpen, 48 TSqrBraceClose, 49 THash, 50 TQuote, 51 52 TWhiteSpace, 53 THMarker, 54 TWord, 55 TWords, 56 TQuoted, 57 TBraced, 58 TSqrBraced, 59 TLink, 60 TText, 61 THeader, 62 TLine, 63 TEmptyLine, 64 TParagraph, 65 66 TMax, 67 }; 68 69 char *names[] = { 70 [TNil] "nil", 71 [TRune] "rune", 72 73 [TSpace] "sp", 74 [TNewline] "nl", 75 [TTab] "tab", 76 [TBraceOpen] "(", 77 [TBraceClose] ")", 78 [TSqrBraceOpen] "[", 79 [TSqrBraceClose] "]", 80 [THash] "#", 81 [TQuote] "\"", 82 83 [TWhiteSpace] "ws", 84 [THMarker] "h_marker", 85 [TWord] "word", 86 [TWords] "words", 87 [TQuoted] "quoted", 88 [TBraced] "()", 89 [TSqrBraced] "[]", 90 [TLink] "link", 91 [TText] "text", 92 [THeader] "h", 93 [TLine] "line", 94 [TEmptyLine] "pb", 95 [TParagraph] "p", 96 }; 97 98 Biobuf *bfdin; 99 100 Token* twrap(int, int, Token **); 101 Token** token1(Token *); 102 103 void input(void *); 104 void header(void *); 105 void pass1(void *); 106 void quote(void *); 107 void pass2(void *); 108 void words(void *); 109 void link(void *); 110 void line(void *); 111 void debug(void *); 112 void output(void *); 113 void clear(void *); 114 115 void freetoken(Token *t); 116 void dbgprinttoken(Biobuf *, Token *, int); 117 void printtoken(Biobuf *, Token *); 118 void printlink(Biobuf *, Token *); 119 char * tokentotext(Token *, int); 120 121 Rune trune(Token *); 122 int ttype(Token *); 123 int ttest(Token **, int *, int); 124 Token ** findtype(Token **, int, int); 125 void tflush(Token *, Channel *); 126 127 void 128 usage(void) 129 { 130 fprint(2, "usage: %s [file]", argv0); 131 threadexitsall("usage"); 132 } 133 134 void 135 threadmain(int argc, char **argv) 136 { 137 ARGBEGIN { 138 default: 139 usage(); 140 }ARGEND 141 142 if (argc > 0) { 143 bfdin = Bopen(argv[0], OREAD); 144 if (bfdin == nil) sysfatal("%r"); 145 } else bfdin = Bfdopen(0, OREAD); 146 147 int n; 148 Channel **c; 149 void (*pipeline[])(void *) = { 150 input, 151 pass1, 152 quote, 153 pass2, 154 words, 155 link, 156 header, 157 line, 158 // debug, 159 output, 160 clear, 161 }; 162 163 n = sizeof(pipeline) / sizeof(*pipeline); 164 165 c = mallocz(sizeof(Channel *) * (n + 1), 1); 166 167 int i; 168 for (i = 1; i < n; i ++) c[i] = chancreate(sizeof(Token *), 64); 169 for (i = 0; i < n; i++) threadcreate(pipeline[i], (void *)(c + i), 64 * 1024); 170 } 171 172 Token * 173 twrap(int type, int count, Token **tokens) 174 { 175 Token *nt = mallocz(sizeof(Token), 1); 176 nt->type = type; 177 nt->count = count; 178 nt->tokens = tokens; 179 return nt; 180 } 181 182 Token ** 183 token1(Token *t) 184 { 185 Token **tt; 186 tt = malloc(sizeof(Token *)); 187 tt[0] = t; 188 return tt; 189 } 190 191 void 192 input(void *v) 193 { 194 Channel **c = v; 195 196 Rune r; 197 while ((r = Bgetrune(bfdin)) != Beof) { 198 Token *t = mallocz(sizeof(Token), 1); 199 t->type = TRune; 200 t->rune = r; 201 send(c[1], &t); 202 } 203 chanclose(c[1]); 204 } 205 206 void 207 header(void *v) 208 { 209 Channel **c = v; 210 Token *t, *nt, **tt; 211 tt = nil; 212 int h = 0; 213 int count = 0; 214 while (recv(c[0], &t) > 0) { 215 if (h == 0) { 216 if (t->type == THMarker) h = 1; 217 else h = -1; 218 } 219 if (h > 0) { 220 if ((t->type == TNewline) || (t->type == TEmptyLine)) { 221 h = 0; 222 nt = twrap(THeader, count, tt); 223 send(c[1], &nt); 224 send(c[1], &t); 225 tt = nil; 226 count = 0; 227 } else ATTACH(tt, count, t) 228 } 229 if (h < 0) { 230 send(c[1], &t); 231 if ((t->type == TNewline) || (t->type == TEmptyLine)) h = 0; 232 } 233 } 234 if (tt != nil) { 235 nt = twrap(THeader, count, tt); 236 send(c[1], &nt); 237 } 238 chanclose(c[1]); 239 } 240 241 void 242 pass1(void *v) 243 { 244 Channel **c = v; 245 Token *t; 246 while (recv(c[0], &t) > 0) { 247 if (ttype(t) == TRune) { 248 switch (trune(t)) { 249 case L'[': 250 t = twrap(TSqrBraceOpen, 1, token1(t)); 251 break; 252 case L']': 253 t = twrap(TSqrBraceClose, 1, token1(t)); 254 break; 255 case L'(': 256 t = twrap(TBraceOpen, 1, token1(t)); 257 break; 258 case L')': 259 t = twrap(TBraceClose, 1, token1(t)); 260 break; 261 case L'\n': 262 t = twrap(TNewline, 1, token1(t)); 263 break; 264 case L' ': 265 t = twrap(TSpace, 1, token1(t)); 266 break; 267 case L'\t': 268 t = twrap(TTab, 1, token1(t)); 269 break; 270 case L'\#': 271 t = twrap(THash, 1, token1(t)); 272 break; 273 case L'\"': 274 t = twrap(TQuote, 1, token1(t)); 275 break; 276 } 277 send(c[1], &t); 278 } 279 } 280 chanclose(c[1]); 281 } 282 283 void 284 quote(void *v) 285 { 286 Channel **c = v; 287 Token *t, *q = nil; 288 while (recv(c[0], &t) > 0) { 289 if (q == nil) { 290 if (ttype(t) == TQuote) { 291 q = twrap(TQuoted, 1, token1(t)); 292 } else send(c[1], &t); 293 } else { 294 if (ttype(t) == TQuote) { 295 ATTACH(q->tokens, q->count, t) 296 send(c[1], &q); 297 q = nil; 298 } else ATTACH(q->tokens, q->count, t) 299 } 300 } 301 if (q != nil) { 302 fprint(2, "missing end quote\n"); 303 send(c[1], &q); 304 } 305 chanclose(c[1]); 306 } 307 308 void 309 pass2(void *v) 310 { 311 Channel **c = v; 312 Token *t[2] = {nil, nil}; 313 while (recv(c[0], &t[0]) > 0) { 314 switch(ttype(t[1])) { 315 case TTab: 316 case TSpace: 317 t[1] = twrap(TWhiteSpace, 1, token1(t[1])); 318 break; 319 case THash: 320 t[1] = twrap(THMarker, 1, token1(t[1])); 321 break; 322 case TRune: 323 t[1] = twrap(TWord, 1, token1(t[1])); 324 break; 325 } 326 327 switch(ttype(t[1])) { 328 case TNewline: 329 if (ttype(t[0]) == TNewline) { 330 t[1] = twrap(TEmptyLine, 1, token1(t[1])); 331 APPEND(t[1], t[0]) 332 } 333 break; 334 case TWhiteSpace: 335 if ((ttype(t[0]) == TSpace) || (ttype(t[0]) == TTab)) { 336 APPEND(t[1], t[0]) 337 } 338 break; 339 case THMarker: 340 if (ttype(t[0]) == THash) { 341 APPEND(t[1], t[0]) 342 } 343 break; 344 case TWord: 345 if ((ttype(t[0]) == TRune) || (ttype(t[0]) == THash)) { 346 APPEND(t[1], t[0]) 347 } 348 break; 349 } 350 351 if (t[1] != nil) send(c[1], &t[1]); 352 t[1] = t[0]; 353 t[0] = nil; 354 } 355 if (t[1] != nil) send(c[1], &t[1]); 356 chanclose(c[1]); 357 } 358 359 void 360 words(void *v) 361 { 362 Channel **c = v; 363 Token *t, **buf; 364 char bf = 0xff; 365 buf = mallocz(sizeof(Token *) * 8, 1); 366 int r = 1; 367 while (bf != 0) { 368 t = nil; 369 if (r > 0) { 370 recv(c[0], &t); 371 } 372 memcpy(buf, buf + 1, 7 * sizeof(Token *)); 373 buf[7] = t; 374 bf = (bf << 1) | (1 & (t != nil)); 375 376 if (ttype(buf[4]) == TWord) { 377 buf[4] = twrap(TWords, 1, token1(buf[4])); 378 } 379 380 if ((ttype(buf[4]) == TWords) && 381 (ttype(buf[5]) == TWhiteSpace) && 382 (ttype(buf[6]) == TWord)) { 383 APPEND(buf[4], buf[5]) 384 APPEND(buf[5], buf[6]) 385 } 386 387 if (buf[0] != nil) { 388 send(c[1], &buf[0]); 389 } 390 } 391 chanclose(c[1]); 392 } 393 394 void 395 link(void *v) 396 { 397 Channel **c = v; 398 Token **tp, *tbuf[7]; 399 int i, j; 400 401 int rlink[] = { TSqrBraceOpen, TWords, TSqrBraceClose, TBraceOpen, TWords, TBraceClose}; 402 403 for (i = 0; i < SIZE(tbuf); i++) tbuf[i] = nil; 404 405 for (;;) { 406 for (i = 0; i < SIZE(tbuf); i++) { 407 if (tbuf[i] == nil) recv(c[0], &tbuf[i]); 408 } 409 410 if (ttest(tbuf, rlink, 6) != 0) { 411 Token *t; 412 Token **t0 = malloc(sizeof(Token *) * 3); 413 Token **t1 = malloc(sizeof(Token *) * 3); 414 Token **tlink = malloc(sizeof(Token *) * 2); 415 416 memcpy(t0, tbuf, sizeof(Token *) * 3); 417 memcpy(t1, tbuf + 3, sizeof(Token *) * 3); 418 419 tlink[0] = twrap(TSqrBraced, 2, t0); 420 tlink[1] = twrap(TBraced, 2, t1); 421 422 t = twrap(TLink, 2, tlink); 423 424 for (j = 0; j < 6; j++) { 425 tbuf[j] = nil; 426 } 427 send(c[1], &t); 428 } 429 430 if (tbuf[0] != nil) send(c[1], &tbuf[0]); 431 tbuf[0] = nil; 432 for (tp = tbuf, i = 1; i < SIZE(tbuf); i++) { 433 if (tbuf[i] != nil) { 434 *tp = tbuf[i]; 435 tbuf[i] = nil; 436 tp++; 437 } 438 } 439 440 if (tbuf[0] == nil) break; 441 } 442 443 chanclose(c[1]); 444 } 445 446 void 447 line(void *v) 448 { 449 Channel **c = v; 450 Token *t, *l = nil; 451 while(recv(c[0], &t) > 0) { 452 if (l == nil) { 453 switch (ttype(t)) { 454 case THMarker: 455 l = twrap(THeader, 1, token1(t)); 456 break; 457 case TWords: 458 case TLink: 459 case TQuoted: 460 case TBraceOpen: 461 case TSqrBraceOpen: 462 case TBraceClose: 463 case TSqrBraceClose: 464 case TBraced: 465 case TSqrBraced: 466 case TWhiteSpace: 467 l = twrap(TLine, 1, token1(t)); 468 break; 469 default: 470 send(c[1], &t); 471 } 472 } 473 else switch(ttype(t)) { 474 case TNewline: 475 case TEmptyLine: 476 send(c[1], &l); 477 l = nil; 478 send(c[1], &t); 479 break; 480 default: 481 ATTACH(l->tokens, l->count, t) 482 } 483 } 484 chanclose(c[1]); 485 } 486 487 void 488 debug(void *v) 489 { 490 Channel **c = v; 491 Token *t; 492 Biobuf *b; 493 b = Bfdopen(2, OWRITE); 494 if (b == nil) sysfatal("debug: %r"); 495 while (recv(c[0], &t) > 0) { 496 dbgprinttoken(b, t, 0); 497 Bflush(b); 498 send(c[1], &t); 499 } 500 chanclose(c[1]); 501 Bflush(b); 502 } 503 504 void 505 output(void *v) 506 { 507 Channel **c = v; 508 Token *t; 509 Biobuf *b; 510 b = Bfdopen(1, OWRITE); 511 while (recv(c[0], &t) > 0) { 512 printtoken(b, t); 513 send(c[1], &t); 514 } 515 chanclose(c[1]); 516 Bflush(b); 517 } 518 519 void 520 clear(void *v) 521 { 522 Channel **c = v; 523 Token *t; 524 while (recv(c[0], &t) > 0) { 525 freetoken(t); 526 } 527 } 528 529 void 530 freetoken(Token *t) 531 { 532 if (ttype(t) != TRune) { 533 for (; t->count > 0; t->count--) freetoken(t->tokens[t->count - 1]); 534 free(t->tokens); 535 } 536 free(t); 537 } 538 539 void 540 dbgprinttoken(Biobuf *b, Token *t, int ind) 541 { 542 int i; 543 char *s; 544 static neednl; 545 546 for (i = 0; i < ind; i++) Bprint(b, " "); 547 548 neednl = 1; 549 switch (t->type) { 550 case TRune: 551 s = tokentotext(t, 1); 552 Bprint(b, "'%s'", s); 553 free(s); 554 break; 555 case TText: 556 s = tokentotext(t, 1); 557 Bprint(b, "text \"%s\"", s); 558 free(s); 559 break; 560 case THMarker: 561 Bprint(b, "h_marker %d", t->count); 562 break; 563 case TQuoted: 564 Bprint(b, "%s ", names[t->type]); 565 case TWord: 566 s = tokentotext(t, 1); 567 Bprint(b, "\"%s\"", s); 568 free(s); 569 break; 570 case TLine: 571 case TLink: 572 case TBraced: 573 case TSqrBraced: 574 case TWords: 575 Bprint(b, "%s\n", names[t->type]); 576 for (i = 0; i < t->count; i++) { 577 dbgprinttoken(b, t->tokens[i], ind + 1); 578 } 579 break; 580 default: 581 Bprint(b, "%s", names[t->type]); 582 } 583 584 if (neednl > 0) { 585 neednl = 0; 586 Bprint(b, "\n"); 587 } 588 } 589 590 void 591 printtoken(Biobuf *b, Token *t) 592 { 593 int i; 594 switch(ttype(t)) { 595 case TWord: 596 Bprint(b, "."); 597 for (i = 0; i < t->count; i++) { 598 Bprint(b, "%C", trune(t->tokens[i])); 599 } 600 Bprint(b, "\n"); 601 break; 602 case TLink: 603 printlink(b, t); 604 break; 605 case TLine: 606 for (i = 0; i < t->count; i++) printtoken(b, t->tokens[i]); 607 Bprint(b, "n\n"); 608 break; 609 case THeader: 610 i = t->tokens[0]->count; 611 if (i > 6) i = 6; 612 i--; 613 614 Bprint(b, "f%s\n", fonts[Fheader1 + i]); 615 for (i = 2; i < t->count; i++) printtoken(b, t->tokens[i]); 616 Bprint(b, "n\n" "f\n"); 617 break; 618 case TEmptyLine: 619 Bprint(b, "n\n"); 620 break; 621 case TWhiteSpace: 622 Bprint(b, "s\n"); 623 break; 624 case TRune: 625 Bprint(b, ".%C\n", t->rune); 626 break; 627 case TNewline: 628 break; 629 default: 630 for (i = 0; i < t->count; i++) printtoken(b, t->tokens[i]); 631 } 632 } 633 634 void 635 printlink(Biobuf *b, Token *t) 636 { 637 char *text, *url; 638 Token *tlink, *ttext, **tt; 639 ttext = t->tokens[0]; 640 tlink = t->tokens[1]; 641 642 tt = findtype(ttext->tokens, ttext->count, TWords); 643 if (tt == nil) { 644 fprint(2, "malformed link\n"); 645 return; 646 } 647 text = tokentotext(*tt, 0); 648 649 tt = findtype(tlink->tokens, tlink->count, TWords); 650 if (tt == nil) { 651 fprint(2, "malformed link\n"); 652 free(text); 653 return; 654 } 655 url = tokentotext(*tt, 0); 656 657 Bprint(b, "l%s\n", url); 658 Bprint(b, ".%s\n", text); 659 Bprint(b, "l\n"); 660 free(url); 661 free(text); 662 } 663 664 Token ** 665 findtype(Token **tt, int count, int type) 666 { 667 int i; 668 for (i = 0; i < count; i++) { 669 if (ttype(tt[i]) == type) return tt + i; 670 } 671 return nil; 672 } 673 674 char * 675 tokentotext(Token *t, int escape) 676 { 677 char *r, *s; 678 int i; 679 switch (t->type) { 680 case TRune: 681 if (escape != 0) { 682 if (t->rune == L'\n') return smprint("\\n"); 683 if (t->rune == L'\t') return smprint("\\t"); 684 if (t->rune == L'"') return smprint("\\%c", '"'); 685 } 686 return smprint("%C", t->rune); 687 case TWhiteSpace: 688 return smprint(" "); 689 default: 690 r = malloc(128); 691 r[0] = '\0'; 692 for (i = 0; i < t->count; i++) { 693 s = tokentotext(t->tokens[i], escape); 694 strncat(r, s, 128); 695 free(s); 696 } 697 return r; 698 } 699 } 700 701 Rune 702 trune(Token *t) 703 { 704 if (t == nil) return Runeerror; 705 switch (t->type) { 706 case TRune: 707 return t->rune; 708 default: 709 return Runeerror; 710 } 711 } 712 713 int 714 ttype(Token *t) 715 { 716 if (t == nil) return 0; 717 return t->type; 718 } 719 720 void 721 tflush(Token *t, Channel *c) 722 { 723 int i; 724 for (i = 0; i < t->count; i++) { 725 send(c, &t->tokens[i]); 726 } 727 } 728 729 int 730 ttest(Token **tt, int *rules, int count) 731 { 732 int i; 733 for (i = 0; i < count; i++) if (rules[i] != ttype(tt[i])) return 0; 734 return 1; 735 }