Merge pull request #2212 from TwlyY29/bibtex-parser
Added a bibtex parser that extracts identifiers of entries in bib-fil…master
commit
8abe5342c5
|
@ -14,6 +14,7 @@ parsers = \
|
|||
parsers/asciidoc.c \
|
||||
parsers/asm.c \
|
||||
parsers/basic.c \
|
||||
parsers/bibtex.c \
|
||||
parsers/c.c \
|
||||
parsers/cobol.c \
|
||||
parsers/iniconf.c \
|
||||
|
|
|
@ -65,6 +65,7 @@
|
|||
GoParser, \
|
||||
JsonParser, \
|
||||
ZephirParser, \
|
||||
PowerShellParser
|
||||
PowerShellParser, \
|
||||
BibtexParser
|
||||
|
||||
#endif /* CTAGS_MAIN_PARSERS_H */
|
||||
|
|
|
@ -0,0 +1,431 @@
|
|||
/*
|
||||
* Copyright (c) 2000-2001, Jérôme Plût
|
||||
* Copyright (c) 2006, Enrico Tröger
|
||||
* Copyright (c) 2019, Mirco Schönfeld
|
||||
*
|
||||
* This source code is released for free distribution under the terms of the
|
||||
* GNU General Public License.
|
||||
*
|
||||
* This module contains functions for generating tags for source files
|
||||
* for the BibTex formatting system.
|
||||
* https://en.wikipedia.org/wiki/BibTeX
|
||||
*/
|
||||
|
||||
/*
|
||||
* INCLUDE FILES
|
||||
*/
|
||||
#include "general.h" /* must always come first */
|
||||
#include <ctype.h> /* to define isalpha () */
|
||||
#include <string.h>
|
||||
|
||||
#include "debug.h"
|
||||
#include "entry.h"
|
||||
#include "keyword.h"
|
||||
#include "parse.h"
|
||||
#include "read.h"
|
||||
#include "routines.h"
|
||||
#include "vstring.h"
|
||||
|
||||
/*
|
||||
* MACROS
|
||||
*/
|
||||
#define isType(token,t) (bool) ((token)->type == (t))
|
||||
#define isKeyword(token,k) (bool) ((token)->keyword == (k))
|
||||
#define isIdentChar(c) \
|
||||
(isalpha (c) || isdigit (c) || (c) == '_' || (c) == '-' || (c) == '+')
|
||||
|
||||
/*
|
||||
* DATA DECLARATIONS
|
||||
*/
|
||||
|
||||
/*
|
||||
* Used to specify type of keyword.
|
||||
*/
|
||||
enum eKeywordId {
|
||||
KEYWORD_article,
|
||||
KEYWORD_book,
|
||||
KEYWORD_booklet,
|
||||
KEYWORD_conference,
|
||||
KEYWORD_inbook,
|
||||
KEYWORD_incollection,
|
||||
KEYWORD_inproceedings,
|
||||
KEYWORD_manual,
|
||||
KEYWORD_mastersthesis,
|
||||
KEYWORD_misc,
|
||||
KEYWORD_phdthesis,
|
||||
KEYWORD_proceedings,
|
||||
KEYWORD_string,
|
||||
KEYWORD_techreport,
|
||||
KEYWORD_unpublished
|
||||
};
|
||||
typedef int keywordId; /* to allow KEYWORD_NONE */
|
||||
|
||||
enum eTokenType {
|
||||
/* 0..255 are the byte's value. Some are named for convenience */
|
||||
TOKEN_OPEN_CURLY = '{',
|
||||
/* above is special types */
|
||||
TOKEN_UNDEFINED = 256,
|
||||
TOKEN_KEYWORD,
|
||||
TOKEN_IDENTIFIER
|
||||
};
|
||||
typedef int tokenType;
|
||||
|
||||
typedef struct sTokenInfo {
|
||||
tokenType type;
|
||||
keywordId keyword;
|
||||
vString * string;
|
||||
unsigned long lineNumber;
|
||||
MIOPos filePosition;
|
||||
} tokenInfo;
|
||||
|
||||
/*
|
||||
* DATA DEFINITIONS
|
||||
*/
|
||||
|
||||
static langType Lang_bib;
|
||||
|
||||
typedef enum {
|
||||
BIBTAG_ARTICLE,
|
||||
BIBTAG_BOOK,
|
||||
BIBTAG_BOOKLET,
|
||||
BIBTAG_CONFERENCE,
|
||||
BIBTAG_INBOOK,
|
||||
BIBTAG_INCOLLECTION,
|
||||
BIBTAG_INPROCEEDINGS,
|
||||
BIBTAG_MANUAL,
|
||||
BIBTAG_MASTERSTHESIS,
|
||||
BIBTAG_MISC,
|
||||
BIBTAG_PHDTHESIS,
|
||||
BIBTAG_PROCEEDINGS,
|
||||
BIBTAG_STRING,
|
||||
BIBTAG_TECHREPORT,
|
||||
BIBTAG_UNPUBLISHED,
|
||||
BIBTAG_COUNT
|
||||
} bibKind;
|
||||
|
||||
static kindDefinition BibKinds [] = {
|
||||
{ true, 'a', "article", "article" },
|
||||
{ true, 'b', "book", "book" },
|
||||
{ true, 'B', "booklet", "booklet" },
|
||||
{ true, 'c', "conference", "conference" },
|
||||
{ true, 'i', "inbook", "inbook" },
|
||||
{ true, 'I', "incollection", "incollection" },
|
||||
{ true, 'j', "inproceedings", "inproceedings" },
|
||||
{ true, 'm', "manual", "manual" },
|
||||
{ true, 'M', "mastersthesis", "mastersthesis" },
|
||||
{ true, 'n', "misc", "misc" },
|
||||
{ true, 'p', "phdthesis", "phdthesis" },
|
||||
{ true, 'P', "proceedings", "proceedings" },
|
||||
{ true, 's', "string", "string" },
|
||||
{ true, 't', "techreport", "techreport" },
|
||||
{ true, 'u', "unpublished", "unpublished" }
|
||||
};
|
||||
|
||||
static const keywordTable BibKeywordTable [] = {
|
||||
/* keyword keyword ID */
|
||||
{ "article", KEYWORD_article },
|
||||
{ "book", KEYWORD_book },
|
||||
{ "booklet", KEYWORD_booklet },
|
||||
{ "conference", KEYWORD_conference },
|
||||
{ "inbook", KEYWORD_inbook },
|
||||
{ "incollection", KEYWORD_incollection },
|
||||
{ "inproceedings",KEYWORD_inproceedings },
|
||||
{ "manual", KEYWORD_manual },
|
||||
{ "mastersthesis",KEYWORD_mastersthesis },
|
||||
{ "misc", KEYWORD_misc },
|
||||
{ "phdthesis", KEYWORD_phdthesis },
|
||||
{ "proceedings", KEYWORD_proceedings },
|
||||
{ "string", KEYWORD_string },
|
||||
{ "techreport", KEYWORD_techreport },
|
||||
{ "unpublished", KEYWORD_unpublished }
|
||||
};
|
||||
|
||||
/*
|
||||
* FUNCTION DEFINITIONS
|
||||
*/
|
||||
|
||||
static tokenInfo *newToken (void)
|
||||
{
|
||||
tokenInfo *const token = xMalloc (1, tokenInfo);
|
||||
|
||||
token->type = TOKEN_UNDEFINED;
|
||||
token->keyword = KEYWORD_NONE;
|
||||
token->string = vStringNew ();
|
||||
token->lineNumber = getInputLineNumber ();
|
||||
token->filePosition = getInputFilePosition ();
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
static void deleteToken (tokenInfo *const token)
|
||||
{
|
||||
vStringDelete (token->string);
|
||||
eFree (token);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tag generation functions
|
||||
*/
|
||||
static void makeBibTag (tokenInfo *const token, bibKind kind)
|
||||
{
|
||||
if (BibKinds [kind].enabled)
|
||||
{
|
||||
const char *const name = vStringValue (token->string);
|
||||
tagEntryInfo e;
|
||||
initTagEntry (&e, name, kind);
|
||||
|
||||
e.lineNumber = token->lineNumber;
|
||||
e.filePosition = token->filePosition;
|
||||
|
||||
makeTagEntry (&e);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Parsing functions
|
||||
*/
|
||||
|
||||
/*
|
||||
* Read a C identifier beginning with "firstChar" and places it into
|
||||
* "name".
|
||||
*/
|
||||
static void parseIdentifier (vString *const string, const int firstChar)
|
||||
{
|
||||
int c = firstChar;
|
||||
Assert (isIdentChar (c));
|
||||
do
|
||||
{
|
||||
vStringPut (string, c);
|
||||
c = getcFromInputFile ();
|
||||
} while (c != EOF && isIdentChar (c));
|
||||
if (c != EOF)
|
||||
ungetcToInputFile (c); /* unget non-identifier character */
|
||||
}
|
||||
|
||||
static bool readToken (tokenInfo *const token)
|
||||
{
|
||||
int c;
|
||||
|
||||
token->type = TOKEN_UNDEFINED;
|
||||
token->keyword = KEYWORD_NONE;
|
||||
vStringClear (token->string);
|
||||
|
||||
getNextChar:
|
||||
|
||||
do
|
||||
{
|
||||
c = getcFromInputFile ();
|
||||
}
|
||||
while (c == '\t' || c == ' ' || c == '\n');
|
||||
|
||||
token->lineNumber = getInputLineNumber ();
|
||||
token->filePosition = getInputFilePosition ();
|
||||
|
||||
token->type = (unsigned char) c;
|
||||
switch (c)
|
||||
{
|
||||
case EOF: return false;
|
||||
|
||||
case '@':
|
||||
/*
|
||||
* All Bib entries start with an at symbol.
|
||||
* Check if the next character is an alpha character
|
||||
* else it is not a potential tex tag.
|
||||
*/
|
||||
c = getcFromInputFile ();
|
||||
if (! isalpha (c))
|
||||
ungetcToInputFile (c);
|
||||
else
|
||||
{
|
||||
vStringPut (token->string, '@');
|
||||
parseIdentifier (token->string, c);
|
||||
token->keyword = lookupCaseKeyword (vStringValue (token->string) + 1, Lang_bib);
|
||||
if (isKeyword (token, KEYWORD_NONE))
|
||||
token->type = TOKEN_IDENTIFIER;
|
||||
else
|
||||
token->type = TOKEN_KEYWORD;
|
||||
}
|
||||
break;
|
||||
case '%':
|
||||
skipToCharacterInInputFile ('\n'); /* % are single line comments */
|
||||
goto getNextChar;
|
||||
break;
|
||||
default:
|
||||
if (isIdentChar (c))
|
||||
{
|
||||
parseIdentifier (token->string, c);
|
||||
token->type = TOKEN_IDENTIFIER;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void copyToken (tokenInfo *const dest, tokenInfo *const src)
|
||||
{
|
||||
dest->lineNumber = src->lineNumber;
|
||||
dest->filePosition = src->filePosition;
|
||||
dest->type = src->type;
|
||||
dest->keyword = src->keyword;
|
||||
vStringCopy (dest->string, src->string);
|
||||
}
|
||||
|
||||
/*
|
||||
* Scanning functions
|
||||
*/
|
||||
|
||||
static bool parseTag (tokenInfo *const token, bibKind kind)
|
||||
{
|
||||
tokenInfo * const name = newToken ();
|
||||
vString * currentid;
|
||||
bool eof = false;
|
||||
|
||||
currentid = vStringNew ();
|
||||
/*
|
||||
* Bib entries are of these formats:
|
||||
* @article{identifier,
|
||||
* author="John Doe"}
|
||||
*
|
||||
* When a keyword is found, loop through all words up to
|
||||
* a comma brace for the tag name.
|
||||
*
|
||||
*/
|
||||
if (isType (token, TOKEN_KEYWORD))
|
||||
{
|
||||
copyToken (name, token);
|
||||
if (!readToken (token))
|
||||
{
|
||||
eof = true;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (isType (token, TOKEN_OPEN_CURLY))
|
||||
{
|
||||
if (!readToken (token))
|
||||
{
|
||||
eof = true;
|
||||
goto out;
|
||||
}
|
||||
if (isType (token, TOKEN_IDENTIFIER)){
|
||||
vStringCat (currentid, token->string);
|
||||
vStringStripTrailing (currentid);
|
||||
if (vStringLength (currentid) > 0)
|
||||
{
|
||||
vStringCopy (name->string, currentid);
|
||||
makeBibTag (name, kind);
|
||||
}
|
||||
}
|
||||
else
|
||||
{ // should find an identifier for bib item at first place
|
||||
eof = true;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
out:
|
||||
deleteToken (name);
|
||||
vStringDelete (currentid);
|
||||
return eof;
|
||||
}
|
||||
|
||||
static void parseBibFile (tokenInfo *const token)
|
||||
{
|
||||
bool eof = false;
|
||||
|
||||
do
|
||||
{
|
||||
if (!readToken (token))
|
||||
break;
|
||||
|
||||
if (isType (token, TOKEN_KEYWORD))
|
||||
{
|
||||
switch (token->keyword)
|
||||
{
|
||||
case KEYWORD_article:
|
||||
eof = parseTag (token, BIBTAG_ARTICLE);
|
||||
break;
|
||||
case KEYWORD_book:
|
||||
eof = parseTag (token, BIBTAG_BOOK);
|
||||
break;
|
||||
case KEYWORD_booklet:
|
||||
eof = parseTag (token, BIBTAG_BOOKLET);
|
||||
break;
|
||||
case KEYWORD_conference:
|
||||
eof = parseTag (token, BIBTAG_CONFERENCE);
|
||||
break;
|
||||
case KEYWORD_inbook:
|
||||
eof = parseTag (token, BIBTAG_INBOOK);
|
||||
break;
|
||||
case KEYWORD_incollection:
|
||||
eof = parseTag (token, BIBTAG_INCOLLECTION);
|
||||
break;
|
||||
case KEYWORD_inproceedings:
|
||||
eof = parseTag (token, BIBTAG_INPROCEEDINGS);
|
||||
break;
|
||||
case KEYWORD_manual:
|
||||
eof = parseTag (token, BIBTAG_MANUAL);
|
||||
break;
|
||||
case KEYWORD_mastersthesis:
|
||||
eof = parseTag (token, BIBTAG_MASTERSTHESIS);
|
||||
break;
|
||||
case KEYWORD_misc:
|
||||
eof = parseTag (token, BIBTAG_MISC);
|
||||
break;
|
||||
case KEYWORD_phdthesis:
|
||||
eof = parseTag (token, BIBTAG_PHDTHESIS);
|
||||
break;
|
||||
case KEYWORD_proceedings:
|
||||
eof = parseTag (token, BIBTAG_PROCEEDINGS);
|
||||
break;
|
||||
case KEYWORD_string:
|
||||
eof = parseTag (token, BIBTAG_STRING);
|
||||
break;
|
||||
case KEYWORD_techreport:
|
||||
eof = parseTag (token, BIBTAG_TECHREPORT);
|
||||
break;
|
||||
case KEYWORD_unpublished:
|
||||
eof = parseTag (token, BIBTAG_UNPUBLISHED);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (eof)
|
||||
break;
|
||||
} while (true);
|
||||
}
|
||||
|
||||
static void initialize (const langType language)
|
||||
{
|
||||
Lang_bib = language;
|
||||
}
|
||||
|
||||
static void findBibTags (void)
|
||||
{
|
||||
tokenInfo *const token = newToken ();
|
||||
|
||||
parseBibFile (token);
|
||||
|
||||
deleteToken (token);
|
||||
}
|
||||
|
||||
/* Create parser definition structure */
|
||||
extern parserDefinition* BibtexParser (void)
|
||||
{
|
||||
Assert (ARRAY_SIZE (BibKinds) == BIBTAG_COUNT);
|
||||
static const char *const extensions [] = { "bib", NULL };
|
||||
parserDefinition *const def = parserNew ("BibTeX");
|
||||
def->extensions = extensions;
|
||||
/*
|
||||
* New definitions for parsing instead of regex
|
||||
*/
|
||||
def->kindTable = BibKinds;
|
||||
def->kindCount = ARRAY_SIZE (BibKinds);
|
||||
def->parser = findBibTags;
|
||||
def->initialize = initialize;
|
||||
def->keywordTable = BibKeywordTable;
|
||||
def->keywordCount = ARRAY_SIZE (BibKeywordTable);
|
||||
return def;
|
||||
}
|
|
@ -11,6 +11,7 @@ filetypes = \
|
|||
filedefs/filetypes.asciidoc \
|
||||
filedefs/filetypes.asm \
|
||||
filedefs/filetypes.batch \
|
||||
filedefs/filetypes.bibtex \
|
||||
filedefs/filetypes.c \
|
||||
filedefs/filetypes.caml \
|
||||
filedefs/filetypes.Clojure.conf \
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
# For complete documentation of this file, please see Geany's main documentation
|
||||
|
||||
[settings]
|
||||
# highlights commented lines
|
||||
lexer_filetype=LaTeX
|
||||
# default extension used when saving files
|
||||
extension=bib
|
|
@ -10,6 +10,7 @@ Arduino=*.ino;*.pde;
|
|||
Asciidoc=*.asciidoc;*.adoc;
|
||||
ASM=*.asm;*.asm51;*.a51;*.s;*.S;*.sx;
|
||||
Batch=*.bat;*.cmd;*.nt;
|
||||
BibTeX=*.bib;
|
||||
CAML=*.ml;*.mli;
|
||||
C=*.c;*.xpm;
|
||||
C++=*.cpp;*.cxx;*.c++;*.cc;*.h;*.hpp;*.hxx;*.h++;*.hh;*.C;*.H;
|
||||
|
@ -43,7 +44,7 @@ Java=*.java;*.jsp;
|
|||
Javascript=*.js;
|
||||
JSON=*.json;
|
||||
Kotlin=*.kt;*.kts;
|
||||
LaTeX=*.tex;*.sty;*.idx;*.ltx;*.latex;*.aux;*.bib;
|
||||
LaTeX=*.tex;*.sty;*.idx;*.ltx;*.latex;*.aux;
|
||||
Lisp=*.lisp;
|
||||
Lua=*.lua;
|
||||
Make=*.mak;*.mk;GNUmakefile;makefile;Makefile;makefile.*;Makefile.*;
|
||||
|
|
|
@ -160,6 +160,7 @@ static void init_builtin_filetypes(void)
|
|||
FT_INIT( SQL, SQL, "SQL", NULL, FILE, MISC );
|
||||
FT_INIT( COBOL, COBOL, "COBOL", NULL, SOURCE_FILE, COMPILED );
|
||||
FT_INIT( LATEX, LATEX, "LaTeX", NULL, SOURCE_FILE, MARKUP );
|
||||
FT_INIT( BIBTEX, BIBTEX, "BibTeX", NULL, SOURCE_FILE, MARKUP );
|
||||
FT_INIT( VHDL, VHDL, "VHDL", NULL, SOURCE_FILE, COMPILED );
|
||||
FT_INIT( VERILOG, VERILOG, "Verilog", NULL, SOURCE_FILE, COMPILED );
|
||||
FT_INIT( DIFF, DIFF, "Diff", NULL, FILE, MISC );
|
||||
|
|
|
@ -105,6 +105,7 @@ typedef enum
|
|||
GEANY_FILETYPES_COFFEESCRIPT,
|
||||
GEANY_FILETYPES_GO,
|
||||
GEANY_FILETYPES_ZEPHIR,
|
||||
GEANY_FILETYPES_BIBTEX,
|
||||
/* ^ append items here */
|
||||
GEANY_MAX_BUILT_IN_FILETYPES /* Don't use this, use filetypes_array->len instead */
|
||||
}
|
||||
|
|
|
@ -525,6 +525,20 @@ static void add_top_level_items(GeanyDocument *doc)
|
|||
NULL);
|
||||
break;
|
||||
}
|
||||
case GEANY_FILETYPES_BIBTEX:
|
||||
{
|
||||
tag_list_add_groups(tag_store,
|
||||
&(tv_iters.tag_function), _("Articles"), ICON_NONE,
|
||||
&(tv_iters.tag_macro), _("Book Chapters"), ICON_NONE,
|
||||
&(tv_iters.tag_class), _("Books & Conference Proceedings"), ICON_NONE,
|
||||
&(tv_iters.tag_member), _("Conference Papers"), ICON_NONE,
|
||||
&(tv_iters.tag_variable), _("Theses"), ICON_NONE,
|
||||
&(tv_iters.tag_namespace), _("Strings"), ICON_NONE,
|
||||
&(tv_iters.tag_externvar), _("Unpublished"), ICON_NONE,
|
||||
&(tv_iters.tag_other), _("Other"), ICON_NONE,
|
||||
NULL);
|
||||
break;
|
||||
}
|
||||
case GEANY_FILETYPES_MATLAB:
|
||||
{
|
||||
tag_list_add_groups(tag_store,
|
||||
|
|
|
@ -124,6 +124,23 @@ static TMParserMapEntry map_LATEX[] = {
|
|||
{'n', tm_tag_namespace_t},
|
||||
{'s', tm_tag_struct_t},
|
||||
};
|
||||
static TMParserMapEntry map_BIBTEX[] = {
|
||||
{'a', tm_tag_function_t},
|
||||
{'b', tm_tag_class_t},
|
||||
{'B', tm_tag_class_t},
|
||||
{'c', tm_tag_member_t},
|
||||
{'i', tm_tag_macro_t},
|
||||
{'I', tm_tag_macro_t},
|
||||
{'j', tm_tag_member_t},
|
||||
{'m', tm_tag_other_t},
|
||||
{'M', tm_tag_variable_t},
|
||||
{'n', tm_tag_other_t},
|
||||
{'p', tm_tag_variable_t},
|
||||
{'P', tm_tag_class_t},
|
||||
{'s', tm_tag_namespace_t},
|
||||
{'t', tm_tag_other_t},
|
||||
{'u', tm_tag_externvar_t},
|
||||
};
|
||||
|
||||
static TMParserMapEntry map_ASM[] = {
|
||||
{'d', tm_tag_macro_t},
|
||||
|
@ -531,6 +548,7 @@ static TMParserMap parser_map[] = {
|
|||
MAP_ENTRY(PHP),
|
||||
MAP_ENTRY(PYTHON),
|
||||
MAP_ENTRY(LATEX),
|
||||
MAP_ENTRY(BIBTEX),
|
||||
MAP_ENTRY(ASM),
|
||||
MAP_ENTRY(CONF),
|
||||
MAP_ENTRY(SQL),
|
||||
|
|
|
@ -109,6 +109,7 @@ enum
|
|||
TM_PARSER_JSON,
|
||||
TM_PARSER_ZEPHIR,
|
||||
TM_PARSER_POWERSHELL,
|
||||
TM_PARSER_BIBTEX,
|
||||
TM_PARSER_COUNT
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue