.. _text2data-py: ############ text2data.py ############ ************************************** class ConversionMappingDef(XmlObject): ************************************** def _init__(self, typedef, schema, xmldata): ============================================ Initializes the conversion mapping by processing the schema and xml documents:: >>> from simo.builder.importers.text2data import ConversionMappingDef >>> tdf = open('../../simulator/xml/schemas/Typedefs_SIMO.xsd') >>> typedef = tdf.read() >>> tdf.close() >>> sf = open('../../simulator/xml/schemas/text2data.xsd') >>> schema = sf.read() >>> sf.close() >>> xml = u''' ... ... ... comp_unit ... ... 0 ... ... 1 ... 1 ... 7 ... ... stratum ... ... 2 ... ... 2 ... 2 ... ... tree ... ... 1 ... ... ... comp_unit ... 1 ... ... 3 ... ... ... ... ... '' -1 ... ! # ... ... ... MAIN_GROUP ... ... 4 5 6 7 8 ... ... ... ... ... ... comp_unit ... USE_RESTRICTION_HARVEST ... 0 ... ... ... comp_unit ... USE_RESTRICTION_SILVIC ... 0 ... ... ... ... ... Y-koordinaatti ... LAT ... ... 1 ... 9 ... double ... ... ... 0.001 ... ... ... ... ... Pääryhmä ... MAIN_GROUP ... ... 1 ... 13 ... int ... ... ... ... ... 1 ... 1 ... ... ... 2 ... 2 ... ... ... ... ... ... ... Inventointipäivä ... inventory_date ... ... 1 ... 15 ... date ... ... ... gregorian ... ... ... ... ... Tekstivariable ... text_test ... ... 1 ... 16 ... string ... ... ... ... ... ... invalid ... invalid ... ... 99 ... 1 ... double ... ... ... 1 ... ... ... ''' >>> class Lexicon(object): ... def get_level_ind(self, level): ... if level=='comp_unit': ... return 2 ... else: ... return 3 ... def get_variable_ind(self, level, var, active=False): ... if var == 'MAIN_GROUP': ... return (1, 1) ... elif var == 'LAT': ... return (1, 2) ... elif var == 'inventory_date': ... return (1, 3) ... elif var == 'text_test': ... return (1, 4) ... elif var == 'USE_RESTRICTION_HARVEST': ... return (1, 5) ... elif var == 'USE_RESTRICTION_SILVIC': ... return (1, 6) ... else: ... return (None, None) >>> cmd = ConversionMappingDef(typedef) >>> cmd.schema = schema >>> try: ... cmd.xml = ('testxml', xml, Lexicon()) ... except ValueError, e: ... print e errors in xml to object conversion >>> cmd.errors # doctest: +NORMALIZE_WHITESPACE set(["Link id position (1) is the same as data id position. This conflict will result in import errors and data corruption for conversion mapping 'testxml'", "invalid rowtype for variable 'invalid' for conversion mapping 'testxml'", "Data level 'comp_unit' is not the child level of simulation level in lexicon for conversion mapping 'testxml'"]) >>> cmd.xml['testxml'][:19] u'>> cm = cmd.obj['testxml'] >>> urs = cm.defaults['comp_unit']['USE_RESTRICTION_SILVIC'] >>> print urs['varind'], urs['value'] (1, 6) 0 >>> urh = cm.defaults['comp_unit']['USE_RESTRICTION_HARVEST'] >>> print urh['varind'], urh['value'] (1, 5) 0 >>> len(cm.defaults['comp_unit'].keys()) 2 >>> len(cm.level_list) 3 >>> ldef = cm.level_list[0] >>> ldef.level_name 'comp_unit' >>> ldef.rowtype_value [1] >>> ldef.id_pos [0] >>> ldef.id_delimiter >>> ldef.linkid_pos >>> ldef.rowtype_pos 1 >>> ldef.new_object_row 1 >>> ldef.date_pos 7 >>> ldef = cm.level_list[2] >>> ldef.linkid_pos ('comp_unit', 1) >>> ldef.rowtype_pos >>> cm.none_val ['', '-1'] >>> cm.comment_prefix ['!', '#'] >>> cm.object_rejection {'MAIN_GROUP': [{'oper': 'in', 'criteria': [4, 5, 6, 7, 8]}]} >>> cm.attributes {'comp_unit': ['LAT', 'MAIN_GROUP', 'inventory_date', 'text_test']} >>> numvar = cm.mapping[1][9][0] >>> numvar.var_type 'numerical' >>> isinstance(numvar.map, list) True >>> numvar.map[0].var_ind (1, 2) >>> numvar.map[0].to_var 'LAT' >>> numvar.map[0].to_val >>> numvar.map[0].conv_fact 0.001 >>> numvar.map[0].epoch_year >>> numvar.from_data_type 'double' >>> textvar = cm.mapping[1][16][0] >>> textvar.var_type 'text' >>> isinstance(textvar.map, list) True >>> textvar.map[0].var_ind (1, 4) >>> textvar.map[0].to_var 'text_test' >>> textvar.map[0].to_val >>> textvar.map[0].conv_fact >>> textvar.map[0].epoch_year >>> textvar.from_data_type 'string' >>> catvar = cm.mapping[1][13][0] >>> catvar.var_type 'categorical' >>> isinstance(catvar.map, dict) True >>> catvar.map[1][0].var_ind (1, 1) >>> catvar.map[1][0].to_var 'MAIN_GROUP' >>> catvar.map[1][0].to_val 1 >>> catvar.map[1][0].conv_fact >>> catvar.map[1][0].epoch_year >>> catvar.from_data_type 'int' >>> datevar = cm.mapping[1][15][0] >>> datevar.var_type 'date' >>> isinstance(datevar.map, list) True >>> datevar.map[0].var_ind (1, 3) >>> datevar.map[0].to_val >>> datevar.map[0].to_var 'inventory_date' >>> datevar.map[0].conv_fact >>> datevar.map[0].epoch_year 'gregorian' >>> datevar.from_data_type 'date' >>> cm.validator >>> cmd.errors # doctest: +NORMALIZE_WHITESPACE set(["Link id position (1) is the same as data id position. This conflict will result in import errors and data corruption for conversion mapping 'testxml'", "invalid rowtype for variable 'invalid' for conversion mapping 'testxml'", "Data level 'comp_unit' is not the child level of simulation level in lexicon for conversion mapping 'testxml'"]) >>> cmd.warnings #doctest: +NORMALIZE_WHITESPACE ["Different row type indicator positions given for different data levels. This may cause data import errors for conversion mapping 'testxml'", "Different row type indicator positions given for different data levels. This may cause data import errors for conversion mapping 'testxml'"] **************************** class ValueConv(Persistent): **************************** Value conversion definition for imported data values. ************************** class Mapping(Persistent): ************************** Mapping definition for imported data; given for each variable and contains the variable type and the map between the original and imported values. Attributes: - vartype: categorical, numerical or date - map: either a dictionary (categorical variable; key is the original value, value is a list of ValueConv objects) or list (numerical, date) of ValueConv objects - from_data_type: the data type the data comes in as *************************** class LevelDef(Persistent): *************************** Data level definition for imported data. Attributes: - levelname: the name of the data level - rowtypevalue: a list of row type values mapping to this level - idpos: the index number(s) of data field(s) containing the id - delim: delimiter for creating an id for object, if the id consists of multiple values - linkidpos: the index number of the data field containing the id of the parent data object - newobjectrow: the value of the row type values that triggers the creation of a new data object on the data level ************************************ class ConversionMapping(Persistent): ************************************ Data conversion definition for data import. Attributes: - name - level_list: list of LevelDef instances - none_val: list of original values treated as None values - object_rejection: dictionary of conditions of object rejection - comment_prefix: when an original row is treated as comment - attributes: dictionary of level variable names by level name - mapping: a dictionary by row type number and row position of Mapping instances - validator: ConversionMappingDef instance used during the object construction to provide lexicon validation, set to None at the end def __init__(self, ns, root, validator): ======================================== Parses the XML data into a class instance def _check_rowtype_pos(self): ============================= Sanity check for the rowtypepos definitions which should be equal for all rowtypes. A warning is generated if they are not. def _convert_var(self, elem, ns): ================================= Construct mapping definitions for single variable. def _extract_levels(self, ns, elem): ==================================== Parse level information from an XML element. def _set_rejection_criteria(self, elem, ns): ============================================ Set criteria for rejecting input for an object. def _create_categorical_mapping(self, elem, ns, rowtype, rowpos, vartype, fromdt, defval, level, toname): =============================== Import data value to SIMO data value mappings for categorical variables. def _extract_id_const(self, ns, elem): ====================================== Parse level id_rowpos element information. This information is used for constructing object ids def _parse_linkids(self, ns, elem): =================================== Parse link_id_rowpos element into link structure which links individual objects to a single top level object.