Data Structures | |
| struct | OpenPosTableItem |
| Open parts of speech table entry. More... | |
| struct | Lexicon |
| Lexicon. More... | |
| struct | LexiconAccess |
| Lexicon access result. More... | |
| struct | LexiconSearch |
| Lexicon Search result. More... | |
Defines | |
| #define | POS_DELIMITER_CHAR "#" |
| Delimiter character used between lemma graphy and lemma part of speech. | |
| #define | LEXICON_WHITE_CHAR ' ' |
| White character used by the lexicon (for example between "carte" and "bleue" for the "carte bleue" entry). | |
| #define | LEXICON_FILE_EXT ".slplex" |
| Default lexicon header file extension. | |
Typedefs | |
| typedef LexicalEntryIndex | Lemma |
| Lemma. | |
| typedef unsigned short | Morpho |
| Part of speech. | |
| typedef unsigned int | Frequency |
| Frequency of occurence. | |
| typedef double | Probability |
| Probability of occurence. | |
Functions | |
| int | lexiconCreate (Lexicon *lexicon, const LexicalMemoryType memory_type, const LexicalDataType data_type) |
| gboolean | lexiconContains_CHARACTER (const Lexicon *lexicon, const char *sequence, const Lemma *lemma, const char *part_of_speech, const Frequency *frequency, const Probability *probability) |
| gboolean | lexiconContains_UNSIGNED_LONG (const Lexicon *lexicon, const LongArray *sequence, const Lemma *lemma, const Morpho *part_of_speech, const Frequency *frequency, const Probability *probability) |
| gboolean | lexiconInsert_CHARACTER (Lexicon *lexicon, const char *sequence, const Lemma *lemma, const char *part_of_speech, const Frequency *frequency, const Probability *probability, LexicalEntryIndex *size, const gboolean by_force) |
| gboolean | lexiconInsert_UNSIGNED_LONG (Lexicon *lexicon, const LongArray *sequence, const Lemma *lemma, const Morpho *part_of_speech, const Frequency *frequency, const Probability *probability, LexicalEntryIndex *size, const gboolean by_force) |
| int | lexiconExport (const Lexicon *lexicon, const char *input_filename) |
| int | lexiconImport (Lexicon *lexicon, const char *basefilename) |
| int | lexiconSave (const Lexicon *lexicon, const char *input_filename) |
| gboolean | lexiconNormalizeProba (Lexicon *lexicon) |
| gboolean | lexiconIsNormalized (const Lexicon *lexicon) |
| void | lexiconApplyLogOnProba (Lexicon *lexicon) |
| int | lexiconLoad (Lexicon *lexicon, const char *filename) |
| LexiconSearch | lexiconLookFor_CHARACTER (const Lexicon *lexicon, const char *graphy) |
| LexiconSearch | lexiconLookFor_UNSIGNED_LONG (const Lexicon *lexicon, const LongArray *graphy) |
| gboolean | lexiconSearchNext (LexicalAssocMem *associative_memory, LexiconSearch *lexicon_search) |
| LexiconAccess | lexiconAccess (const Lexicon *lexicon, const LexicalEntryIndex index) |
| LexicalEntry | lexiconAccessGetGraphy (const LexiconAccess *lexicon_access) |
| char * | lexiconAccessGetPartOfSpeech (const LexiconAccess *lexicon_access) |
| size_t | lexiconGetSize (const Lexicon *lexicon) |
| void | lexiconFree (Lexicon *lexicon) |
| void | lexiconDump (Lexicon *lexicon, int(*print)(const char *,...), const char *delimiter, gboolean all, gboolean numeration) |
SlpTK Library 0.6.0
<lexicon.h> Antonin Merçay (revision on 22.12.2004)
| typedef unsigned int Frequency |
Frequency of occurence.
Total number of occurence of a given Lexicon entry in a given corpus
| typedef LexicalEntryIndex Lemma |
Lemma.
The lemma field is represented as the corresponding entry index in the Lexicon
| typedef unsigned short Morpho |
Part of speech.
Internal part of speech (PoS) field representation in the Lexicon
| typedef double Probability |
Probability of occurence.
Probability of occurence of a given lexicon entry
| LexiconAccess lexiconAccess | ( | const Lexicon * | lexicon, | |
| const LexicalEntryIndex | index | |||
| ) |
Access a lexicon entry identified by its index
| [in] | lexicon | The source lexicon |
| [in] | index | The entry index |
Accede_Lexique | LexicalEntry lexiconAccessGetGraphy | ( | const LexiconAccess * | lexicon_access | ) |
Return the graphy associated to a given entry via a lexicon access result
| [in] | lexicon_access | The lexicon access result |
| char * lexiconAccessGetPartOfSpeech | ( | const LexiconAccess * | lexicon_access | ) |
Return the string representation of the part of speech associated to a given entry via a lexicon access result
| [in] | lexicon_access | The lexicon access result |
NULL | void lexiconApplyLogOnProba | ( | Lexicon * | lexicon | ) |
Convert each value of the lexicon probability field to its logarithm
| [in] | lexicon | The lexicon to treat |
Log_Proba | gboolean lexiconContains_CHARACTER | ( | const Lexicon * | lexicon, | |
| const char * | sequence, | |||
| const Lemma * | lemma, | |||
| const char * | part_of_speech, | |||
| const Frequency * | frequency, | |||
| const Probability * | probability | |||
| ) |
Check if a specified entry is stored in a lexicon. Only provide (not NULL) fields are checked. If a field is provided to the function is not handled by the lexicon, it is ignored.
| [in] | lexicon | The source lexicon |
| [in] | sequence | The entry graphy (required) |
| [in] | lemma | The entry lemma (optional) |
| [in] | part_of_speech | The entry part_of_speech (optional) |
| [in] | frequency | The entry frequency (optional) |
| [in] | probability | The entry probability (optional) |
TRUE) or not (FALSE)Dans_Lexique & Dans_Lexique_Ulong | gboolean lexiconContains_UNSIGNED_LONG | ( | const Lexicon * | lexicon, | |
| const LongArray * | sequence, | |||
| const Lemma * | lemma, | |||
| const Morpho * | part_of_speech, | |||
| const Frequency * | frequency, | |||
| const Probability * | probability | |||
| ) |
Check if a specified entry is stored in a lexicon. Only provide (not NULL) fields are checked. If a field is provided to the function is not handled by the lexicon, it is ignored.
| [in] | lexicon | The source lexicon |
| [in] | sequence | The entry graphy (required) |
| [in] | lemma | The entry lemma (optional) |
| [in] | part_of_speech | The entry part_of_speech (optional) |
| [in] | frequency | The entry frequency (optional) |
| [in] | probability | The entry probability (optional) |
TRUE) or not (FALSE)Dans_Lexique & Dans_Lexique_Ulong | int lexiconCreate | ( | Lexicon * | lexicon, | |
| const LexicalMemoryType | memory_type, | |||
| const LexicalDataType | data_type | |||
| ) |
Allow and initialize a new lexicon
| [in] | lexicon | The lexicon to create |
| [in] | memory_type | The type of memory that stores the graphies |
| [in] | data_type | The type of stored graphies |
char strings.Init_Lexique | void lexiconDump | ( | Lexicon * | lexicon, | |
| int(*)(const char *,...) | print, | |||
| const char * | delimiter, | |||
| gboolean | all, | |||
| gboolean | numeration | |||
| ) |
Dump the content of a lexicon
| [in] | lexicon | The lexicon to dump |
| [in] | The print function to use | |
| [in] | delimiter | The delimiter string to dump between fields |
| [in] | all | Dump delimiters for not handled fields |
| [in] | numeration | Print the numeration for each entry |
Liste_Lexique | int lexiconExport | ( | const Lexicon * | lexicon, | |
| const char * | input_filename | |||
| ) |
Save the content of a lexicon in a set of human-readable ASCII files. The operation generates:
input_filename that defines the lexicon properties like the memory type, the handled fields or the open parts of speech informations;
| [in] | lexicon | The lexicon to export |
| [in] | input_filename | The input filename |
Exporte_Lexique | void lexiconFree | ( | Lexicon * | lexicon | ) |
Free the memory allocated to a lexicon
| [in] | lexicon | The lexicon to free |
Libere_Lexique | size_t lexiconGetSize | ( | const Lexicon * | lexicon | ) |
Returns the number of entries stored in a lexicon
| [in] | lexicon | The source lexicon |
| int lexiconImport | ( | Lexicon * | lexicon, | |
| const char * | basefilename | |||
| ) |
Load a lexicon from the content of a set of human-readable ASCII files. See lexiconExport for more informations about the required files.
| [in] | lexicon | The lexicon where to import |
| [in] | basefilename | The lexicon header filename |
Importe_Lexique | gboolean lexiconInsert_CHARACTER | ( | Lexicon * | lexicon, | |
| const char * | sequence, | |||
| const Lemma * | lemma, | |||
| const char * | part_of_speech, | |||
| const Frequency * | frequency, | |||
| const Probability * | probability, | |||
| LexicalEntryIndex * | size, | |||
| const gboolean | by_force | |||
| ) |
Insert an entry into a lexicon. The first insertion set which fields are handled by the lexicon. Afterwards, a warning message is displayed each time a handled field is not specified by the function. If a similar entry is already stored by the lexicon and the insertion is not forced (by_force parameter set to FALSE), the entry is not inserted, an error message is printed and the function returns TRUE.
| [in] | lexicon | The lexicon where to insert |
| [in] | sequence | The entry graphy (required) |
| [in] | lemma | The entry lemma (optional) |
| [in] | part_of_speech | The entry part of speech (optional) |
| [in] | frequency | The entry frequency (optional) |
| [in] | probability | The entry probability (optional) |
| [out] | size | The number of entries in the lexicon after the insertion (optional) |
| [in] | by_force | Set if the insertion is forced in case of duplicates or not. If set to TRUE, the function acts as if duplicates were never detected. |
Insere_Lexique, Insere_Lexique_De_Force, Insere_Lexique_Ulong & Insere_Lexique_De_Force_Ulong | gboolean lexiconInsert_UNSIGNED_LONG | ( | Lexicon * | lexicon, | |
| const LongArray * | sequence, | |||
| const Lemma * | lemma, | |||
| const Morpho * | part_of_speech, | |||
| const Frequency * | frequency, | |||
| const Probability * | probability, | |||
| LexicalEntryIndex * | size, | |||
| const gboolean | by_force | |||
| ) |
Insert an entry into a lexicon. The first insertion set which fields are handled by the lexicon. Afterwards, a warning message is displayed each time a handled field is not specified by the function. If a similar entry is already stored by the lexicon and the insertion is not forced (by_force parameter set to FALSE), the entry is not inserted, an error message is printed and the function returns TRUE.
| [in] | lexicon | The lexicon where to insert |
| [in] | sequence | The entry graphy (required) |
| [in] | lemma | The entry lemma (optional) |
| [in] | part_of_speech | The entry part of speech (optional) |
| [in] | frequency | The entry frequency (optional) |
| [in] | probability | The entry probability (optional) |
| [out] | size | The number of entries in the lexicon after the insertion (optional) |
| [in] | by_force | Set if the insertion is forced in case of duplicates or not. If set to TRUE, the function acts as if duplicates were never detected. |
Insere_Lexique, Insere_Lexique_De_Force, Insere_Lexique_Ulong & Insere_Lexique_De_Force_Ulong | gboolean lexiconIsNormalized | ( | const Lexicon * | lexicon | ) |
Check if the probability field of a lexicon is normalized
| [in] | lexicon | The lexicon to check |
TRUE if the given lexicon is normalized, FALSE otherwise.TRUE if the provided lexicon doesn't handle the probability fieldNormalise_Proba | int lexiconLoad | ( | Lexicon * | lexicon, | |
| const char * | filename | |||
| ) |
Load the content of a lexicon from a set of binary files. See lexiconSave for more informations on the required files.
| [in] | lexicon | The lexicon where to load |
| [in] | filename | The lexicon header filename |
Read_Lexique | LexiconSearch lexiconLookFor_CHARACTER | ( | const Lexicon * | lexicon, | |
| const char * | graphy | |||
| ) |
Search the first lexicon entry with a given graphy. Since several entries may have the same graphy, all results can be iterativly obtained using lexiconSearchNext.
| [in] | lexicon | The lexicon where to search |
| [in] | graphy | The searched graphy |
Recherche_Lexique & Recherche_Lexique_Ulong | LexiconSearch lexiconLookFor_UNSIGNED_LONG | ( | const Lexicon * | lexicon, | |
| const LongArray * | graphy | |||
| ) |
Search the first lexicon entry with a given graphy. Since several entries may have the same graphy, all results can be iterativly obtained using lexiconSearchNext.
| [in] | lexicon | The lexicon where to search |
| [in] | graphy | The searched graphy |
Recherche_Lexique & Recherche_Lexique_Ulong | gboolean lexiconNormalizeProba | ( | Lexicon * | lexicon | ) |
Normalize the probability field of a lexicon
| [in] | lexicon | The lexicon to normalize |
TRUE if the given lexicon was already normalized, FALSE otherwise.TRUE if the provided lexicon doesn't handle the probability fieldNormalise_Proba | int lexiconSave | ( | const Lexicon * | lexicon, | |
| const char * | input_filename | |||
| ) |
Save the content of a lexicon in a set of binary files. The operation generates:
input_filename that defines the lexicon properties like the memory type, the handled fields or the open parts of speech informations;
| [in] | lexicon | The lexicon to save |
| [in] | input_filename | The input filename |
Write_Lexique | gboolean lexiconSearchNext | ( | LexicalAssocMem * | associative_memory, | |
| LexiconSearch * | lexicon_search | |||
| ) |
Carry on a graphy-based search into a lexicon. Each time this function is called, the fields of the provided search result are updated to reflect the properties of the next relevant entry, until its returns FALSE, telling that all corresponding results have been output.
| [in] | associative_memory | The associative memory on to perform the search |
| [in] | lexicon_search | The lexicon search result to update |
Suivant_Lexique
1.4.7