#include "doc_elt.h"
#include "org_heading.h"
#include "org_text.h"
+#include "org_property.h"
int yywrap (yyscan_t scanner);
void yyerror (char const *);
%option extra-type="struct extra *"
/* Rule Start Conditions */
-%x heading text
- /* %s property */
+%x heading text property property_skipwhitespace property_value property_finish
+%s property_drawer
- /* Start of rules */
-%%
-
- /**
- * Element Break
- */
-[\n]*"\n\n\n" {
- debug_msg (LEXER, 4, "Element Break\n");
- /* Create a temporary data store */
- yyextra->elt = yyextra->curr_elt;
- TOKEN temp = yyextra->curr_type;
-
- /* create a new element */
- yyextra->curr_type = T_ORG_TEXT;
- yyextra->curr_elt = (doc_elt *) org_text_create_empty (&org_text_ops);
- org_text_initversion ( (org_text *) yyextra->curr_elt, yyextra->src);
-
- /* add data to the element */
- size_t size = org_text_get_length ((org_text *)yyextra->curr_elt, yyextra->src);
- char * string = org_text_get_text ((org_text *) yyextra->curr_elt, yyextra->src);
- char * new_string = malloc (sizeof (char) * (size + yyleng));
- strncpy (new_string, string, size);
- strncpy ((new_string + size), yytext, yyleng);
- org_text_set_text((org_text *) yyextra->elt, new_string, size + yyleng, yyextra->src);
-
- /* return the previous element, if there was one */
- if (temp != T_NOTHING)
- {
- return temp;
- }
-}
+%% /* Start of rules */
- /**
+ /*
* Heading
+ * "** Heading blah blah "
*/
-
/* start of a heading */
-^[*]+" " {
+^[*]+" ".*[\n]? {
debug_msg (LEXER, 4, "heading start\n");
/* Create a temporary data store */
TOKEN temp = yyextra->curr_type;
yyextra->elt = yyextra->curr_elt;
- /* create the new element */
- yyextra->curr_elt = (doc_elt *) org_heading_create_empty (&org_heading_ops);
- yyextra->curr_type = T_ORG_HEADING;
- org_heading_initversion ((org_heading *)yyextra->curr_elt, yyextra->src);
- org_heading_set_level ((org_heading *) yyextra->curr_elt, yyleng - 1, yyextra->src);
- BEGIN (heading);
-
- /* return the previous element, if there was one */
- if (yyextra->elt != NULL)
- {
- debug_msg (LEXER, 3, "element return\n");
- return temp;
- }
-}
-
- /* Grab the rest of the heading element */
-<heading>.*"\n"? {
- debug_msg (LEXER, 5, "heading finishing line\n");
-
- /* copy in the data in */
- char *c = malloc (sizeof (char)*(yyleng) + 2);
- strncpy (c, yytext, yyleng);
- org_heading_set_text ((org_heading *)yyextra->curr_elt, c, yyleng, yyextra->src);
-
- /* scan the line for the internal values */
-
- BEGIN (INITIAL);
+ /* create the new element */
+ yyextra->curr_elt = (doc_elt *) org_heading_create_empty (&org_heading_ops);
+ yyextra->curr_type = T_ORG_HEADING;
+ org_heading_initversion ((org_heading *)yyextra->curr_elt, yyextra->src);
+
+ /* copy in the data in */
+ char *c = malloc (sizeof (char)*(yyleng));
+ strncpy (c, yytext, yyleng);
+ org_heading_set_entire_text ((org_heading *)yyextra->curr_elt, c, yyleng, yyextra->src, yyextra->ctxt);
+
+ /* scan the line for the internal values */
+ BEGIN (INITIAL);
+
+ /* return the previous element, if there was one */
+ if (yyextra->elt != NULL && temp != T_NOTHING)
+ {
+ debug_msg (LEXER, 3, "element return\n");
+ return temp;
+ }
}
- /**
+ /*
* Text
+ *
+ * Any paragraph of text, also a catch all for anything without
+ * specific rules
*/
-
/* start of a plain text segment */
^. {
BEGIN(text);
yymore ();
debug_msg (LEXER, 5, "text start line\n");
- }
+}
/* finish a line */
<text>.*"\n"? {
BEGIN(INITIAL);
/* return the previous element, if there was one */
- if (temp != T_NOTHING)
+ if (temp != T_NOTHING && yyextra->elt != NULL)
{
debug_msg (LEXER, 3, "element return\n");
return temp;
BEGIN(INITIAL);
/* return the previous element, if there was one */
- if (temp != T_NOTHING)
+ if (temp != T_NOTHING && yyextra->elt != NULL)
{
debug_msg (LEXER, 3, "element return\n");
return temp;
/* Can still match more text for this element */
}
- /* Close the Lexer */
+ /* Properties
+ * " :ID: 1201324054621536421035 "
+ *
+ * Org mode properties are (key, value) pairs that can apear
+ * anywhere. They will split elements that are currently being
+ * defined. They are recognized as any element that has only
+ * whitespace between it and a semicolon surounded key. Spaces are
+ * allewed as apart of the key.
+ */
+ /*start of a property line, with the tag */
+<property_drawer>^" "*":"[^:]*":" {
+ debug_msg (LEXER, 3, "property start\n");
+ /* Create a temporary data store */
+ TOKEN temp = yyextra->curr_type;
+ yyextra->elt = yyextra->curr_elt;
+
+ /* create the new element */
+ yyextra->curr_elt = (doc_elt *) org_property_create_empty (&org_property_ops);
+ yyextra->curr_type = T_ORG_PROPERTY;
+ org_property_initversion ((org_property *)yyextra->curr_elt, yyextra->src);
+
+ org_property_set_key_length ((org_property *)yyextra->curr_elt, yyextra->src, yyleng - 1);
+ BEGIN(property_skipwhitespace);
+ yymore();
+
+ /* return the previous element, if there was one */
+ if (yyextra->elt != NULL && temp != T_NOTHING)
+ {
+ debug_msg (LEXER, 3, "property element return\n");
+ return temp;
+ }
+}
+
+ /* skip white space */
+<property_skipwhitespace>[ \t]* {
+ /* store the offset to the start of the value */
+ org_property_set_value_length ((org_property *)yyextra->curr_elt, yyextra->src,
+ yyleng);
+ yymore();
+ BEGIN(property_finish);
+}
+
+ /* Finish the property line */
+<property_finish>.*[\n]? {
+ /* Set the property text.
+ * At this point, the values of the property look like this:
+ * " :key: value text "
+ * ^ ^ ^
+ * \------|------------|-> key length
+ * \------------|-> value length
+ * \-> yylen
+ */
+ //debug_msg ("Properties");
+
+ /* create the new string */
+ char * new_text = malloc (sizeof (char) * (yyleng));
+ strncpy (new_text, yytext, yyleng);
+
+ size_t key_length = org_property_get_key_length ((org_property *)yyextra->curr_elt,
+ yyextra->src);
+
+ size_t value_length = org_property_get_value_length ((org_property *)yyextra->curr_elt,
+ yyextra->src);
+
+ org_property_set_text((org_property *) yyextra->curr_elt, yyextra->src,
+ new_text, yyleng);
+
+ org_property_set_value_string ((org_property *)yyextra->curr_elt, yyextra->src,
+ new_text + value_length);
+
+ /* do a backward seach to find the end of the string */
+ size_t pos = yyleng-1;
+ while (pos > 0)
+ {
+ if (new_text[pos] != ' ' && new_text[pos] != '\t')
+ break;
+ pos--;
+ }
+ value_length = pos - value_length;
+ assert (value_length >= 0);
+ org_property_set_value_length ((org_property *)yyextra->curr_elt,
+ yyextra->src, value_length);
+
+ /* do a backword search to find the start of the string.
+ * set the key substring, do not include the markup ':'
+ */
+ pos = key_length;
+ while (pos > 0)
+ {
+ if (new_text[pos - 1] == ':')
+ break;
+
+ pos--;
+ }
+ org_property_set_key_string ((org_property *)yyextra->curr_elt, yyextra->src,
+ new_text + pos);
+
+ org_property_set_key_length ((org_property *)yyextra->curr_elt, yyextra->src, key_length - pos);
+
+#if LEXER_PRINTLEVEL <= 5
+
+ fwrite ( org_property_get_key_string ((org_property *)yyextra->curr_elt, yyextra->src),
+ sizeof (char) , org_property_get_key_length ((org_property *)yyextra->curr_elt,
+ yyextra->src),
+ stderr);
+
+ fwrite ( org_property_get_value_string ((org_property *)yyextra->curr_elt, yyextra->src),
+ sizeof (char) , org_property_get_value_length ((org_property *)yyextra->curr_elt,
+ yyextra->src),
+ stderr);
+#endif
+
+ /* return the property */
+ debug_msg (LEXER, 3, "Property Return\n");
+ BEGIN(property_drawer);
+
+ /* set the last element to nothing */
+ yyextra->curr_type = T_NOTHING;
+ yyextra->elt = yyextra->curr_elt;
+ yyextra->curr_elt = NULL;
+
+ return T_ORG_PROPERTY;
+}
+
+ /* Drawer Parser
+ * ":BEGIN:"
+ * ":END:"
+ *
+ * A fully recursive element. They can start on any line. Since the
+ * syntax is recursive, seperate tokens are needed for both the start
+ * and end of a drawer.
+ *
+ * Current drawer support is just to create a property with no key.
+ */
+ /* Begin a drawer */
+^[ ]*":PROPERTIES:"[ ]*[\n]? {
+ debug_msg (LEXER, 3, "PROPERTIES drawer start\n");
+ /* Create a temporary data store */
+ TOKEN temp = yyextra->curr_type;
+ yyextra->elt = yyextra->curr_elt;
+
+ /* create the new element */
+ yyextra->curr_elt = (doc_elt *) org_property_create_empty (&org_property_ops);
+ yyextra->curr_type = T_ORG_PROPERTY;
+ org_property_initversion ((org_property *)yyextra->curr_elt, yyextra->src);
+
+ /* set the line text */
+ char * new_text = malloc (sizeof (char) * (yyleng));
+ strncpy (new_text, yytext, yyleng);
+
+ org_property_set_text((org_property *) yyextra->curr_elt, yyextra->src,
+ new_text, yyleng);
+
+ /* do a backword search to find the start of the string.
+ * set the key substring, do not include the markup ':'
+ */
+ int pos = yyleng - 1;
+ while (pos >= 0)
+ {
+ if (new_text[pos] == ':')
+ break;
+
+ pos--;
+ }
+
+ /* set the key to the value of the line */
+ org_property_set_key ((org_property *)yyextra->curr_elt, yyextra->src, new_text+pos-10, 10);
+
+ /* set the value to 0 */
+ org_property_set_value ((org_property *)yyextra->curr_elt, yyextra->src, NULL, 0);
+
+ /* mark that we are currently in a property drawer */
+ BEGIN(property_drawer);
+
+ /* return the previous element, if there was one */
+ if (yyextra->elt != NULL && temp != T_NOTHING)
+ {
+ debug_msg (LEXER, 3, "drawer element return\n");
+ return temp;
+ }
+}
+
+ /* Finish A drawer. */
+<property_drawer>^[ ]*":END:"[ ]*"\n"? {
+ debug_msg (LEXER, 3, "END Drawer\n");
+
+ /* Create a temporary data store */
+ TOKEN temp = yyextra->curr_type;
+ yyextra->elt = yyextra->curr_elt;
+
+ /* create the new element */
+ yyextra->curr_elt = (doc_elt *) org_property_create_empty (&org_property_ops);
+ yyextra->curr_type = T_ORG_PROPERTY;
+ org_property_initversion ((org_property *)yyextra->curr_elt, yyextra->src);
+
+ /* set the line text */
+ char * new_text = malloc (sizeof (char) * (yyleng));
+ strncpy (new_text, yytext, yyleng);
+
+ org_property_set_text((org_property *) yyextra->curr_elt, yyextra->src,
+ new_text, yyleng);
+
+ /* do a backword search to find the start of the string.
+ * set the key substring, do not include the markup ':'
+ */
+ int pos = yyleng - 1;
+ while (pos >= 0)
+ {
+ if (new_text[pos] == ':')
+ break;
+
+ pos--;
+ }
+
+ /* set the key to the value of the line */
+ org_property_set_key ((org_property *)yyextra->curr_elt, yyextra->src, new_text+pos-3, 3);
+
+ /* set the value to 0 */
+ org_property_set_value ((org_property *)yyextra->curr_elt, yyextra->src, NULL, 0);
+
+ /* mark that we are currently in a property drawer */
+
+ BEGIN(INITIAL);
+
+ /* return the previous element, if there was one */
+ if (yyextra->elt != NULL && temp != T_NOTHING)
+ {
+ debug_msg (LEXER, 3, "drawer element return\n");
+ return temp;
+ }
+}
+
+ /*
+ * End Of File Wrap up
+ *
+ * Close the Lexer and wrap up the last element
+ */
<<EOF>> {
debug_msg (LEXER, 5, "EOF\n");
if (yyextra->curr_elt != NULL)
%%
- /* User Code Section */
int
yywrap (yyscan_t scanner)
{
+ /* Tell lex to stop processing at the end of a file */
return 1;
}
/**
* @brief Parse a titile line, setting all the propper substrings
*/
+static void
parse_title_line (org_heading * h, char * line, size_t len)
{
int i = 0;
bool exit = false;
while ( i < len)
{
-
if (next_substr.string[0] == ':')
{
/* grab all characters untill there is no tag */
#include "org_document.h"
#include "org_heading.h"
#include "org_text.h"
+#include "org_property.h"
-static void rec_parse_document (yyscan_t scanner, org_document *this);
+static void rec_parse_document (yyscan_t scanner, org_document *this, parse_ctxt *ctxt);
-static doc_elt *rec_parse_heading(yyscan_t scanner, org_heading *dc, int level);
+static doc_elt *rec_parse_heading(yyscan_t scanner, org_heading *dc, int level, parse_ctxt *ctxt);
/* This parser needs some serious love. Right now it assumes that the
* only type of elements which can be nested are headings.
*/
org_document *
-org_parse_file_stream (FILE * file, doc_src src)
+org_parse_file_stream (FILE * file, doc_src src, parse_ctxt *ctxt)
{
assert (file);
debug_msg (PARSER, 3, "Parsing File\n");
e.curr_elt = NULL;
e.curr_type = T_NOTHING;
e.src = src;
+ e.ctxt = ctxt;
yyset_extra (&e, scanner);
/* Initialize doc_tree */
org_document *document = org_document_create_empty (&org_document_ops);
/* call the recursive function */
- rec_parse_document (scanner, document);
+ rec_parse_document (scanner, document, ctxt);
/* Destroy scanner */
yylex_destroy (scanner);
}
static void
-rec_parse_document (yyscan_t scanner, org_document *this)
+rec_parse_document (yyscan_t scanner, org_document *this, parse_ctxt *ctxt)
{
/* 1. get the text element
* 2 check type
/* next level is at least more than this one */
org_document_add_heading_last (this, src, (org_heading *) elt);
elt = (doc_elt *)
- rec_parse_heading(scanner, (org_heading *) elt, next_level);
+ rec_parse_heading(scanner, (org_heading *) elt, next_level, ctxt);
}
}
- else if (tok == T_ORG_TEXT)
+ else if (tok == T_ORG_TEXT || tok == T_ORG_PROPERTY)
{
debug_msg (PARSER, 3, "Got Text\n");
- /* eat up all text elements below this one */
+
org_document_add_text_last (this, src, (org_text *) elt);
/* Get the next element from the scanner */
tok = yylex (scanner);
elt = yyget_extra (scanner) -> elt;
}
+ else
+ {
+ debug_msg (PARSER, 2, "Got unknown element, skipping");
+ /* Get the next element from the scanner */
+ tok = yylex (scanner);
+ elt = yyget_extra (scanner) -> elt;
+ }
if (tok == T_QUIT || elt == NULL)
{
}
static doc_elt *
-rec_parse_heading(yyscan_t scanner, org_heading *this, int this_level)
+rec_parse_heading(yyscan_t scanner, org_heading *this, int this_level, parse_ctxt *ctxt)
{
/* 1. get the text element
* 2 check type
{
debug_msg (PARSER, 3, "Adding Sub-Heading\n");
/* next level is at least more than this one */
- org_heading_add_subheading_last (this, src, (org_heading *) elt);
+ org_heading_add_subheading_last (this, src, elt);
elt =
- rec_parse_heading(scanner, (org_heading *)elt, next_level);
+ rec_parse_heading(scanner, (org_heading *)elt, next_level, ctxt);
}
}
else if (tok == T_ORG_TEXT)
{
debug_msg (PARSER, 3, "Got Text\n");
- /* eat up all text elements below this one */
- org_heading_add_subtext_last (this, src, (org_text *) elt);
+ org_heading_add_subtext_last (this, src, elt);
/* Get the next element from the scanner */
tok = yylex (scanner);
elt = yyget_extra (scanner)-> elt;
}
+ else if (tok == T_ORG_PROPERTY)
+ {
+ debug_msg (PARSER, 3, "Got property\n");
+ /* if the property was a UID, add it to the heading. Only
+ * recognize the first ID below a heading */
+ if (doc_elt_get_key((doc_elt *)this)->length == 0)
+ {
+ org_property *p = (org_property *) elt;
+ char *key = org_property_get_key_string (p, src);
+ size_t length = org_property_get_key_length (p, src);
+ if (length == 2) /* length of ID */
+ {
+ if (strncmp (key, "id", 2) == 0
+ || strncmp (key, "ID", 2) == 0)
+ {
+ debug_msg (PARSER, 3, "Setting heading key to property");
+ org_heading_set_key (this, org_property_get_value_string (p, src),
+ org_property_get_value_length (p, src));
+ }
+ }
+ }
+
+ /* add the property as a text element */
+ org_heading_add_subtext_last (this, src, elt);
+
+ /* Get the next element from the scanner */
+ tok = yylex (scanner);
+ elt = yyget_extra (scanner)-> elt;
+ }
+ else
+ {
+ debug_msg (PARSER, 2, "Got unknown element, skipping");
+ /* Get the next element from the scanner */
+ tok = yylex (scanner);
+ elt = yyget_extra (scanner) -> elt;
+ }
if (tok == T_QUIT || elt == NULL)
{