Table Of Contents

Previous topic

operationmapping.py

Next topic

operationconversion.py

This Page

text2data.py

class ConversionMappingDef(XmlObject):

def _init__(self, typedef, schema, xmldata):

Initializes the conversion mapping by processing the schema and xml documents:

>>> from simo.builder.importers.text2data import ConversionMappingDef
>>> tdf = open('../../simulator/xml/schemas/Typedefs_SIMO.xsd')
>>> typedef = tdf.read()
>>> tdf.close()
>>> sf = open('../../simulator/xml/schemas/text2data.xsd')
>>> schema = sf.read()
>>> sf.close()
>>> xml = u'''<conversion_mapping xmlns="http://www.simo-project.org/simo"
...    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
...    xsi:schemaLocation="http://www.simo-project.org/simo
...    ../schemas/conversion_mapping.xsd">
...    <data_levels>
...       <level>
...          <name>comp_unit</name>
...          <id_rowpos>
...             <pos>0</pos>
...          </id_rowpos>
...          <rowtype_rowpos>1</rowtype_rowpos>
...          <rowtype_value>1</rowtype_value>
...          <date_rowpos>7</date_rowpos>
...          <sublevel>
...             <name>stratum</name>
...             <id_rowpos>
...                <pos>2</pos>
...             </id_rowpos>
...             <rowtype_rowpos>2</rowtype_rowpos>
...             <rowtype_value>2</rowtype_value>
...             <sublevel>
...                <name>tree</name>
...                <id_rowpos>
...                   <pos>1</pos>
...                </id_rowpos>
...                <link_id_rowpos>
...                   <level>comp_unit</level>
...                   <pos>1</pos><!--should be 0-->
...                </link_id_rowpos>
...                <rowtype_value>3</rowtype_value>
...             </sublevel>
...          </sublevel>
...       </level>
...    </data_levels>
...    <none_value_indicator>'' -1</none_value_indicator>
...    <comment_prefix>! #</comment_prefix>
...    <object_rejection>
...       <SIMO_variable>
...          <name>MAIN_GROUP</name>
...          <reject_criterion oper="in">
...             <enum>4 5 6 7 8</enum>
...          </reject_criterion>
...       </SIMO_variable>
...    </object_rejection>
...    <defaults>
...       <variable>
...          <level>comp_unit</level>
...          <name>USE_RESTRICTION_HARVEST</name>
...          <value>0</value>
...       </variable>
...       <variable>
...          <level>comp_unit</level>
...          <name>USE_RESTRICTION_SILVIC</name>
...          <value>0</value>
...       </variable>
...    </defaults>
...    <variable>
...       <name>
...          <from>Y-koordinaatti</from>
...          <to>LAT</to>
...       </name>
...       <row_type>1</row_type>
...       <row_position>9</row_position>
...       <from_datatype>double</from_datatype>
...       <none_to_value/>
...       <numerical>
...          <conversion_factor>0.001</conversion_factor>
...       </numerical>
...    </variable>
...    <variable>
...       <name>
...          <from>Pääryhmä</from>
...          <to>MAIN_GROUP</to>
...       </name>
...       <row_type>1</row_type>
...       <row_position>13</row_position>
...       <from_datatype>int</from_datatype>
...       <none_to_value/>
...       <categorical>
...          <value_mapping>
...             <value>
...                <from>1</from>
...                <to>1</to>
...             </value>
...             <value>
...                <from>2</from>
...                <to>2</to>
...             </value>
...          </value_mapping>
...       </categorical>
...    </variable>
...    <variable>
...       <name>
...          <from>Inventointipäivä</from>
...          <to>inventory_date</to>
...       </name>
...       <row_type>1</row_type>
...       <row_position>15</row_position>
...       <from_datatype>date</from_datatype>
...       <none_to_value/>
...       <date>
...          <epoch_year>gregorian</epoch_year>
...       </date>
...    </variable>
...    <variable>
...       <name>
...          <from>Tekstivariable</from>
...          <to>text_test</to>
...       </name>
...       <row_type>1</row_type>
...       <row_position>16</row_position>
...       <from_datatype>string</from_datatype>
...       <none_to_value/>
...       <text/>
...    </variable>
...    <variable>
...       <name>
...          <from>invalid</from>
...          <to>invalid</to>
...       </name>
...       <row_type>99</row_type>
...       <row_position>1</row_position>
...       <from_datatype>double</from_datatype>
...       <none_to_value/>
...       <numerical>
...          <conversion_factor>1</conversion_factor>
...       </numerical>
...    </variable>
... </conversion_mapping>'''
>>> class Lexicon(object):
...     def get_level_ind(self, level):
...         if level=='comp_unit':
...             return 2
...         else:
...             return 3
...     def get_variable_ind(self, level, var, active=False):
...         if var == 'MAIN_GROUP':
...             return (1, 1)
...         elif var == 'LAT':
...             return (1, 2)
...         elif var == 'inventory_date':
...             return (1, 3)
...         elif var == 'text_test':
...             return (1, 4)
...         elif var == 'USE_RESTRICTION_HARVEST':
...             return (1, 5)
...         elif var == 'USE_RESTRICTION_SILVIC':
...             return (1, 6)
...         else:
...             return (None, None)
>>> cmd = ConversionMappingDef(typedef)
>>> cmd.schema = schema
>>> try:
...     cmd.xml = ('testxml', xml, Lexicon())
... except ValueError, e:
...     print e
errors in xml to object conversion
>>> cmd.errors 
set(["Link id position (1) is the same as data id position.
      This conflict will result in import errors and data corruption
      for conversion mapping 'testxml'",
     "invalid rowtype for variable 'invalid' for conversion mapping
     'testxml'",
     "Data level 'comp_unit' is not the child level of simulation level
     in lexicon for conversion mapping 'testxml'"])
>>> cmd.xml['testxml'][:19]
u'<conversion_mapping'

def xml_to_obj(self, root, lexicon):

>>> cm = cmd.obj['testxml']
>>> urs = cm.defaults['comp_unit']['USE_RESTRICTION_SILVIC']
>>> print urs['varind'], urs['value']
(1, 6) 0
>>> urh = cm.defaults['comp_unit']['USE_RESTRICTION_HARVEST']
>>> print urh['varind'], urh['value']
(1, 5) 0
>>> len(cm.defaults['comp_unit'].keys())
2
>>> len(cm.level_list)
3
>>> ldef = cm.level_list[0]
>>> ldef.level_name
'comp_unit'
>>> ldef.rowtype_value
[1]
>>> ldef.id_pos
[0]
>>> ldef.id_delimiter

>>> ldef.linkid_pos

>>> ldef.rowtype_pos
1
>>> ldef.new_object_row
1
>>> ldef.date_pos
7
>>> ldef = cm.level_list[2]
>>> ldef.linkid_pos
('comp_unit', 1)
>>> ldef.rowtype_pos

>>> cm.none_val
['', '-1']
>>> cm.comment_prefix
['!', '#']
>>> cm.object_rejection
{'MAIN_GROUP': [{'oper': 'in', 'criteria': [4, 5, 6, 7, 8]}]}
>>> cm.attributes
{'comp_unit': ['LAT', 'MAIN_GROUP', 'inventory_date', 'text_test']}
>>> numvar = cm.mapping[1][9][0]
>>> numvar.var_type
'numerical'
>>> isinstance(numvar.map, list)
True
>>> numvar.map[0].var_ind
(1, 2)
>>> numvar.map[0].to_var
'LAT'
>>> numvar.map[0].to_val

>>> numvar.map[0].conv_fact
0.001
>>> numvar.map[0].epoch_year

>>> numvar.from_data_type
'double'

>>> textvar = cm.mapping[1][16][0]
>>> textvar.var_type
'text'
>>> isinstance(textvar.map, list)
True
>>> textvar.map[0].var_ind
(1, 4)
>>> textvar.map[0].to_var
'text_test'
>>> textvar.map[0].to_val

>>> textvar.map[0].conv_fact

>>> textvar.map[0].epoch_year

>>> textvar.from_data_type
'string'

>>> catvar = cm.mapping[1][13][0]
>>> catvar.var_type
'categorical'
>>> isinstance(catvar.map, dict)
True
>>> catvar.map[1][0].var_ind
(1, 1)
>>> catvar.map[1][0].to_var
'MAIN_GROUP'
>>> catvar.map[1][0].to_val
1
>>> catvar.map[1][0].conv_fact

>>> catvar.map[1][0].epoch_year

>>> catvar.from_data_type
'int'
>>> datevar = cm.mapping[1][15][0]
>>> datevar.var_type
'date'
>>> isinstance(datevar.map, list)
True
>>> datevar.map[0].var_ind
(1, 3)
>>> datevar.map[0].to_val

>>> datevar.map[0].to_var
'inventory_date'
>>> datevar.map[0].conv_fact

>>> datevar.map[0].epoch_year
'gregorian'
>>> datevar.from_data_type
'date'
>>> cm.validator

>>> cmd.errors 
set(["Link id position (1) is the same as data id position.
      This conflict will result in import errors and data corruption
      for conversion mapping 'testxml'",
     "invalid rowtype for variable 'invalid' for conversion mapping
     'testxml'",
     "Data level 'comp_unit' is not the child level of simulation level
     in lexicon for conversion mapping 'testxml'"])
>>> cmd.warnings 
["Different row type indicator positions given for different data levels.
  This may cause data import errors for conversion mapping 'testxml'",
 "Different row type indicator positions given for different data levels.
  This may cause data import errors for conversion mapping 'testxml'"]

class ValueConv(Persistent):

Value conversion definition for imported data values.

class Mapping(Persistent):

Mapping definition for imported data; given for each variable and contains the variable type and the map between the original and imported values.

Attributes:

  • vartype: categorical, numerical or date
  • map: either a dictionary (categorical variable; key is the original value, value is a list of ValueConv objects) or list (numerical, date) of ValueConv objects
  • from_data_type: the data type the data comes in as

class LevelDef(Persistent):

Data level definition for imported data.

Attributes:

  • levelname: the name of the data level

  • rowtypevalue: a list of row type values mapping to this level

  • idpos: the index number(s) of data field(s) containing the id

  • delim: delimiter for creating an id for object, if the id consists of multiple values

  • linkidpos: the index number of the data field containing the id of the parent data object

  • newobjectrow: the value of the row type values that triggers the

    creation of a new data object on the data level

class ConversionMapping(Persistent):

Data conversion definition for data import.

Attributes:

  • name
  • level_list: list of LevelDef instances
  • none_val: list of original values treated as None values
  • object_rejection: dictionary of conditions of object rejection
  • comment_prefix: when an original row is treated as comment
  • attributes: dictionary of level variable names by level name
  • mapping: a dictionary by row type number and row position of Mapping instances
  • validator: ConversionMappingDef instance used during the object construction to provide lexicon validation, set to None at the end

def __init__(self, ns, root, validator):

Parses the XML data into a class instance

def _check_rowtype_pos(self):

Sanity check for the rowtypepos definitions which should be equal for all rowtypes. A warning is generated if they are not.

def _convert_var(self, elem, ns):

Construct mapping definitions for single variable.

def _extract_levels(self, ns, elem):

Parse level information from an XML element.

def _set_rejection_criteria(self, elem, ns):

Set criteria for rejecting input for an object.

def _create_categorical_mapping(self, elem, ns, rowtype, rowpos, vartype, fromdt, defval, level, toname): ===============================

Import data value to SIMO data value mappings for categorical variables.

def _extract_id_const(self, ns, elem):

Parse level id_rowpos element information. This information is used for constructing object ids

def _parse_linkids(self, ns, elem):

Parse link_id_rowpos element into link structure which links individual objects to a single top level object.