Data Structures | |
struct | OpenPosTableItem |
Open parts of speech table entry. More... | |
struct | Lexicon |
Lexicon. More... | |
struct | LexiconAccess |
Lexicon access result. More... | |
struct | LexiconSearch |
Lexicon Search result. More... | |
Defines | |
#define | POS_DELIMITER_CHAR "#" |
Delimiter character used between lemma graphy and lemma part of speech. | |
#define | LEXICON_WHITE_CHAR ' ' |
White character used by the lexicon (for example between "carte" and "bleue" for the "carte bleue" entry). | |
#define | LEXICON_FILE_EXT ".slplex" |
Default lexicon header file extension. | |
Typedefs | |
typedef LexicalEntryIndex | Lemma |
Lemma. | |
typedef unsigned short | Morpho |
Part of speech. | |
typedef unsigned int | Frequency |
Frequency of occurence. | |
typedef double | Probability |
Probability of occurence. | |
Functions | |
int | lexiconCreate (Lexicon *lexicon, const LexicalMemoryType memory_type, const LexicalDataType data_type) |
gboolean | lexiconContains_CHARACTER (const Lexicon *lexicon, const char *sequence, const Lemma *lemma, const char *part_of_speech, const Frequency *frequency, const Probability *probability) |
gboolean | lexiconContains_UNSIGNED_LONG (const Lexicon *lexicon, const LongArray *sequence, const Lemma *lemma, const Morpho *part_of_speech, const Frequency *frequency, const Probability *probability) |
gboolean | lexiconInsert_CHARACTER (Lexicon *lexicon, const char *sequence, const Lemma *lemma, const char *part_of_speech, const Frequency *frequency, const Probability *probability, LexicalEntryIndex *size, const gboolean by_force) |
gboolean | lexiconInsert_UNSIGNED_LONG (Lexicon *lexicon, const LongArray *sequence, const Lemma *lemma, const Morpho *part_of_speech, const Frequency *frequency, const Probability *probability, LexicalEntryIndex *size, const gboolean by_force) |
int | lexiconExport (const Lexicon *lexicon, const char *input_filename) |
int | lexiconImport (Lexicon *lexicon, const char *basefilename) |
int | lexiconSave (const Lexicon *lexicon, const char *input_filename) |
gboolean | lexiconNormalizeProba (Lexicon *lexicon) |
gboolean | lexiconIsNormalized (const Lexicon *lexicon) |
void | lexiconApplyLogOnProba (Lexicon *lexicon) |
int | lexiconLoad (Lexicon *lexicon, const char *filename) |
LexiconSearch | lexiconLookFor_CHARACTER (const Lexicon *lexicon, const char *graphy) |
LexiconSearch | lexiconLookFor_UNSIGNED_LONG (const Lexicon *lexicon, const LongArray *graphy) |
gboolean | lexiconSearchNext (LexicalAssocMem *associative_memory, LexiconSearch *lexicon_search) |
LexiconAccess | lexiconAccess (const Lexicon *lexicon, const LexicalEntryIndex index) |
LexicalEntry | lexiconAccessGetGraphy (const LexiconAccess *lexicon_access) |
char * | lexiconAccessGetPartOfSpeech (const LexiconAccess *lexicon_access) |
size_t | lexiconGetSize (const Lexicon *lexicon) |
void | lexiconFree (Lexicon *lexicon) |
void | lexiconDump (Lexicon *lexicon, int(*print)(const char *,...), const char *delimiter, gboolean all, gboolean numeration) |
SlpTK Library 0.6.0
<lexicon.h>
Antonin Merçay (revision on 22.12.2004)
typedef unsigned int Frequency |
Frequency of occurence.
Total number of occurence of a given Lexicon entry in a given corpus
typedef LexicalEntryIndex Lemma |
Lemma.
The lemma field is represented as the corresponding entry index in the Lexicon
typedef unsigned short Morpho |
Part of speech.
Internal part of speech (PoS) field representation in the Lexicon
typedef double Probability |
Probability of occurence.
Probability of occurence of a given lexicon entry
LexiconAccess lexiconAccess | ( | const Lexicon * | lexicon, | |
const LexicalEntryIndex | index | |||
) |
Access a lexicon entry identified by its index
[in] | lexicon | The source lexicon |
[in] | index | The entry index |
Accede_Lexique
LexicalEntry lexiconAccessGetGraphy | ( | const LexiconAccess * | lexicon_access | ) |
Return the graphy associated to a given entry via a lexicon access result
[in] | lexicon_access | The lexicon access result |
char * lexiconAccessGetPartOfSpeech | ( | const LexiconAccess * | lexicon_access | ) |
Return the string representation of the part of speech associated to a given entry via a lexicon access result
[in] | lexicon_access | The lexicon access result |
NULL
void lexiconApplyLogOnProba | ( | Lexicon * | lexicon | ) |
Convert each value of the lexicon probability field to its logarithm
[in] | lexicon | The lexicon to treat |
Log_Proba
gboolean lexiconContains_CHARACTER | ( | const Lexicon * | lexicon, | |
const char * | sequence, | |||
const Lemma * | lemma, | |||
const char * | part_of_speech, | |||
const Frequency * | frequency, | |||
const Probability * | probability | |||
) |
Check if a specified entry is stored in a lexicon. Only provide (not NULL
) fields are checked. If a field is provided to the function is not handled by the lexicon, it is ignored.
[in] | lexicon | The source lexicon |
[in] | sequence | The entry graphy (required) |
[in] | lemma | The entry lemma (optional) |
[in] | part_of_speech | The entry part_of_speech (optional) |
[in] | frequency | The entry frequency (optional) |
[in] | probability | The entry probability (optional) |
TRUE
) or not (FALSE
)Dans_Lexique
& Dans_Lexique_Ulong
gboolean lexiconContains_UNSIGNED_LONG | ( | const Lexicon * | lexicon, | |
const LongArray * | sequence, | |||
const Lemma * | lemma, | |||
const Morpho * | part_of_speech, | |||
const Frequency * | frequency, | |||
const Probability * | probability | |||
) |
Check if a specified entry is stored in a lexicon. Only provide (not NULL
) fields are checked. If a field is provided to the function is not handled by the lexicon, it is ignored.
[in] | lexicon | The source lexicon |
[in] | sequence | The entry graphy (required) |
[in] | lemma | The entry lemma (optional) |
[in] | part_of_speech | The entry part_of_speech (optional) |
[in] | frequency | The entry frequency (optional) |
[in] | probability | The entry probability (optional) |
TRUE
) or not (FALSE
)Dans_Lexique
& Dans_Lexique_Ulong
int lexiconCreate | ( | Lexicon * | lexicon, | |
const LexicalMemoryType | memory_type, | |||
const LexicalDataType | data_type | |||
) |
Allow and initialize a new lexicon
[in] | lexicon | The lexicon to create |
[in] | memory_type | The type of memory that stores the graphies |
[in] | data_type | The type of stored graphies |
char
strings.Init_Lexique
void lexiconDump | ( | Lexicon * | lexicon, | |
int(*)(const char *,...) | print, | |||
const char * | delimiter, | |||
gboolean | all, | |||
gboolean | numeration | |||
) |
Dump the content of a lexicon
[in] | lexicon | The lexicon to dump |
[in] | The print function to use | |
[in] | delimiter | The delimiter string to dump between fields |
[in] | all | Dump delimiters for not handled fields |
[in] | numeration | Print the numeration for each entry |
Liste_Lexique
int lexiconExport | ( | const Lexicon * | lexicon, | |
const char * | input_filename | |||
) |
Save the content of a lexicon in a set of human-readable ASCII files. The operation generates:
input_filename
that defines the lexicon properties like the memory type, the handled fields or the open parts of speech informations;
[in] | lexicon | The lexicon to export |
[in] | input_filename | The input filename |
Exporte_Lexique
void lexiconFree | ( | Lexicon * | lexicon | ) |
Free the memory allocated to a lexicon
[in] | lexicon | The lexicon to free |
Libere_Lexique
size_t lexiconGetSize | ( | const Lexicon * | lexicon | ) |
Returns the number of entries stored in a lexicon
[in] | lexicon | The source lexicon |
int lexiconImport | ( | Lexicon * | lexicon, | |
const char * | basefilename | |||
) |
Load a lexicon from the content of a set of human-readable ASCII files. See lexiconExport for more informations about the required files.
[in] | lexicon | The lexicon where to import |
[in] | basefilename | The lexicon header filename |
Importe_Lexique
gboolean lexiconInsert_CHARACTER | ( | Lexicon * | lexicon, | |
const char * | sequence, | |||
const Lemma * | lemma, | |||
const char * | part_of_speech, | |||
const Frequency * | frequency, | |||
const Probability * | probability, | |||
LexicalEntryIndex * | size, | |||
const gboolean | by_force | |||
) |
Insert an entry into a lexicon. The first insertion set which fields are handled by the lexicon. Afterwards, a warning message is displayed each time a handled field is not specified by the function. If a similar entry is already stored by the lexicon and the insertion is not forced (by_force
parameter set to FALSE
), the entry is not inserted, an error message is printed and the function returns TRUE
.
[in] | lexicon | The lexicon where to insert |
[in] | sequence | The entry graphy (required) |
[in] | lemma | The entry lemma (optional) |
[in] | part_of_speech | The entry part of speech (optional) |
[in] | frequency | The entry frequency (optional) |
[in] | probability | The entry probability (optional) |
[out] | size | The number of entries in the lexicon after the insertion (optional) |
[in] | by_force | Set if the insertion is forced in case of duplicates or not. If set to TRUE , the function acts as if duplicates were never detected. |
Insere_Lexique
, Insere_Lexique_De_Force
, Insere_Lexique_Ulong
& Insere_Lexique_De_Force_Ulong
gboolean lexiconInsert_UNSIGNED_LONG | ( | Lexicon * | lexicon, | |
const LongArray * | sequence, | |||
const Lemma * | lemma, | |||
const Morpho * | part_of_speech, | |||
const Frequency * | frequency, | |||
const Probability * | probability, | |||
LexicalEntryIndex * | size, | |||
const gboolean | by_force | |||
) |
Insert an entry into a lexicon. The first insertion set which fields are handled by the lexicon. Afterwards, a warning message is displayed each time a handled field is not specified by the function. If a similar entry is already stored by the lexicon and the insertion is not forced (by_force
parameter set to FALSE
), the entry is not inserted, an error message is printed and the function returns TRUE
.
[in] | lexicon | The lexicon where to insert |
[in] | sequence | The entry graphy (required) |
[in] | lemma | The entry lemma (optional) |
[in] | part_of_speech | The entry part of speech (optional) |
[in] | frequency | The entry frequency (optional) |
[in] | probability | The entry probability (optional) |
[out] | size | The number of entries in the lexicon after the insertion (optional) |
[in] | by_force | Set if the insertion is forced in case of duplicates or not. If set to TRUE , the function acts as if duplicates were never detected. |
Insere_Lexique
, Insere_Lexique_De_Force
, Insere_Lexique_Ulong
& Insere_Lexique_De_Force_Ulong
gboolean lexiconIsNormalized | ( | const Lexicon * | lexicon | ) |
Check if the probability field of a lexicon is normalized
[in] | lexicon | The lexicon to check |
TRUE
if the given lexicon is normalized, FALSE
otherwise.TRUE
if the provided lexicon doesn't handle the probability fieldNormalise_Proba
int lexiconLoad | ( | Lexicon * | lexicon, | |
const char * | filename | |||
) |
Load the content of a lexicon from a set of binary files. See lexiconSave for more informations on the required files.
[in] | lexicon | The lexicon where to load |
[in] | filename | The lexicon header filename |
Read_Lexique
LexiconSearch lexiconLookFor_CHARACTER | ( | const Lexicon * | lexicon, | |
const char * | graphy | |||
) |
Search the first lexicon entry with a given graphy. Since several entries may have the same graphy, all results can be iterativly obtained using lexiconSearchNext.
[in] | lexicon | The lexicon where to search |
[in] | graphy | The searched graphy |
Recherche_Lexique
& Recherche_Lexique_Ulong
LexiconSearch lexiconLookFor_UNSIGNED_LONG | ( | const Lexicon * | lexicon, | |
const LongArray * | graphy | |||
) |
Search the first lexicon entry with a given graphy. Since several entries may have the same graphy, all results can be iterativly obtained using lexiconSearchNext.
[in] | lexicon | The lexicon where to search |
[in] | graphy | The searched graphy |
Recherche_Lexique
& Recherche_Lexique_Ulong
gboolean lexiconNormalizeProba | ( | Lexicon * | lexicon | ) |
Normalize the probability field of a lexicon
[in] | lexicon | The lexicon to normalize |
TRUE
if the given lexicon was already normalized, FALSE
otherwise.TRUE
if the provided lexicon doesn't handle the probability fieldNormalise_Proba
int lexiconSave | ( | const Lexicon * | lexicon, | |
const char * | input_filename | |||
) |
Save the content of a lexicon in a set of binary files. The operation generates:
input_filename
that defines the lexicon properties like the memory type, the handled fields or the open parts of speech informations;
[in] | lexicon | The lexicon to save |
[in] | input_filename | The input filename |
Write_Lexique
gboolean lexiconSearchNext | ( | LexicalAssocMem * | associative_memory, | |
LexiconSearch * | lexicon_search | |||
) |
Carry on a graphy-based search into a lexicon. Each time this function is called, the fields of the provided search result are updated to reflect the properties of the next relevant entry, until its returns FALSE
, telling that all corresponding results have been output.
[in] | associative_memory | The associative memory on to perform the search |
[in] | lexicon_search | The lexicon search result to update |
Suivant_Lexique