How to create a tree in python


data structures - How can I implement a tree in Python?

Hi you may give itertree a try (I'm the author).

The package goes in the direction of anytree package but with a bit different focus. The performance on huge trees (>100000 items) is much better and it deals with iterators to have effective filter mechanism.

>>>from itertree import * >>>root=iTree('root') >>># add some children: >>>root.append(iTree('Africa',data={'surface':30200000,'inhabitants':1257000000})) >>>root.append(iTree('Asia', data={'surface': 44600000, 'inhabitants': 4000000000})) >>>root.append(iTree('America', data={'surface': 42549000, 'inhabitants': 1009000000})) >>>root.append(iTree('Australia&Oceania', data={'surface': 8600000, 'inhabitants': 36000000})) >>>root.append(iTree('Europe', data={'surface': 10523000 , 'inhabitants': 746000000})) >>># you might use __iadd__ operator for adding too: >>>root+=iTree('Antarktika', data={'surface': 14000000, 'inhabitants': 1100}) >>># for building next level we select per index: >>>root[0]+=iTree('Ghana',data={'surface':238537,'inhabitants':30950000}) >>>root[0]+=iTree('Niger', data={'surface': 1267000, 'inhabitants': 23300000}) >>>root[1]+=iTree('China', data={'surface': 9596961, 'inhabitants': 1411780000}) >>>root[1]+=iTree('India', data={'surface': 3287263, 'inhabitants': 1380004000}) >>>root[2]+=iTree('Canada', data={'type': 'country', 'surface': 9984670, 'inhabitants': 38008005}) >>>root[2]+=iTree('Mexico', data={'surface': 1972550, 'inhabitants': 127600000 }) >>># extend multiple items: >>>root[3]. extend([iTree('Australia', data={'surface': 7688287, 'inhabitants': 25700000 }), iTree('New Zealand', data={'surface': 269652, 'inhabitants': 4900000 })]) >>>root[4]+=iTree('France', data={'surface': 632733, 'inhabitants': 67400000 })) >>># select parent per TagIdx - remember in itertree you might put items with same tag multiple times: >>>root[TagIdx('Europe'0)]+=iTree('Finland', data={'surface': 338465, 'inhabitants': 5536146 }) 

The created tree can be rendered:

>>>root.render() iTree('root') └──iTree('Africa', data=iTData({'surface': 30200000, 'inhabitants': 1257000000})) └──iTree('Ghana', data=iTData({'surface': 238537, 'inhabitants': 30950000})) └──iTree('Niger', data=iTData({'surface': 1267000, 'inhabitants': 23300000})) └──iTree('Asia', data=iTData({'surface': 44600000, 'inhabitants': 4000000000})) └──iTree('China', data=iTData({'surface': 9596961, 'inhabitants': 1411780000})) └──iTree('India', data=iTData({'surface': 3287263, 'inhabitants': 1380004000})) └──iTree('America', data=iTData({'surface': 42549000, 'inhabitants': 1009000000})) └──iTree('Canada', data=iTData({'surface': 9984670, 'inhabitants': 38008005})) └──iTree('Mexico', data=iTData({'surface': 1972550, 'inhabitants': 127600000})) └──iTree('Australia&Oceania', data=iTData({'surface': 8600000, 'inhabitants': 36000000})) └──iTree('Australia', data=iTData({'surface': 7688287, 'inhabitants': 25700000})) └──iTree('New Zealand', data=iTData({'surface': 269652, 'inhabitants': 4900000})) └──iTree('Europe', data=iTData({'surface': 10523000, 'inhabitants': 746000000})) └──iTree('France', data=iTData({'surface': 632733, 'inhabitants': 67400000})) └──iTree('Finland', data=iTData({'surface': 338465, 'inhabitants': 5536146})) └──iTree('Antarktika', data=iTData({'surface': 14000000, 'inhabitants': 1100})) 

E. g. Filtering can be done like this:

>>>item_filter = Filter.iTFilterData(data_key='inhabitants', data_value=iTInterval(0, 20000000)) >>>iterator=root.iter_all(item_filter=item_filter) >>>for i in iterator: >>> print(i) iTree("'New Zealand'", data=iTData({'surface': 269652, 'inhabitants': 4900000}), subtree=[]) iTree("'Finland'", data=iTData({'surface': 338465, 'inhabitants': 5536146}), subtree=[]) iTree("'Antarktika'", data=iTData({'surface': 14000000, 'inhabitants': 1100}), subtree=[]) 

python - A general tree implementation?

I've published a Python [3] tree implementation on my site: http://www.quesucede.com/page/show/id/python_3_tree_implementation.

Hope it is of use,

Ok, here's the code:

import uuid def sanitize_id(id): return id.strip().replace(" ", "") (_ADD, _DELETE, _INSERT) = range(3) (_ROOT, _DEPTH, _WIDTH) = range(3) class Node: def __init__(self, name, identifier=None, expanded=True): self. __identifier = (str(uuid.uuid1()) if identifier is None else sanitize_id(str(identifier))) self.name = name self.expanded = expanded self.__bpointer = None self.__fpointer = [] @property def identifier(self): return self.__identifier @property def bpointer(self): return self.__bpointer @bpointer.setter def bpointer(self, value): if value is not None: self.__bpointer = sanitize_id(value) @property def fpointer(self): return self.__fpointer def update_fpointer(self, identifier, mode=_ADD): if mode is _ADD: self.__fpointer.append(sanitize_id(identifier)) elif mode is _DELETE: self.__fpointer.remove(sanitize_id(identifier)) elif mode is _INSERT: self.__fpointer = [sanitize_id(identifier)] class Tree: def __init__(self): self.nodes = [] def get_index(self, position): for index, node in enumerate(self. nodes): if node.identifier == position: break return index def create_node(self, name, identifier=None, parent=None): node = Node(name, identifier) self.nodes.append(node) self.__update_fpointer(parent, node.identifier, _ADD) node.bpointer = parent return node def show(self, position, level=_ROOT): queue = self[position].fpointer if level == _ROOT: print("{0} [{1}]".format(self[position].name, self[position].identifier)) else: print("\t"*level, "{0} [{1}]".format(self[position].name, self[position].identifier)) if self[position].expanded: level += 1 for element in queue: self.show(element, level) # recursive call def expand_tree(self, position, mode=_DEPTH): # Python generator. Loosly based on an algorithm from 'Essential LISP' by # John R. Anderson, Albert T. Corbett, and Brian J. Reiser, page 239-241 yield position queue = self[position]. fpointer while queue: yield queue[0] expansion = self[queue[0]].fpointer if mode is _DEPTH: queue = expansion + queue[1:] # depth-first elif mode is _WIDTH: queue = queue[1:] + expansion # width-first def is_branch(self, position): return self[position].fpointer def __update_fpointer(self, position, identifier, mode): if position is None: return else: self[position].update_fpointer(identifier, mode) def __update_bpointer(self, position, identifier): self[position].bpointer = identifier def __getitem__(self, key): return self.nodes[self.get_index(key)] def __setitem__(self, key, item): self.nodes[self.get_index(key)] = item def __len__(self): return len(self.nodes) def __contains__(self, identifier): return [node.identifier for node in self.nodes if node.identifier is identifier] if __name__ == "__main__": tree = Tree() tree. create_node("Harry", "harry") # root node tree.create_node("Jane", "jane", parent = "harry") tree.create_node("Bill", "bill", parent = "harry") tree.create_node("Joe", "joe", parent = "jane") tree.create_node("Diane", "diane", parent = "jane") tree.create_node("George", "george", parent = "diane") tree.create_node("Mary", "mary", parent = "diane") tree.create_node("Jill", "jill", parent = "george") tree.create_node("Carol", "carol", parent = "jill") tree.create_node("Grace", "grace", parent = "bill") tree.create_node("Mark", "mark", parent = "jane") print("="*80) tree.show("harry") print("="*80) for node in tree.expand_tree("harry", mode=_WIDTH): print(node) print("="*80) 

How can I implement a tree in Python? Are there built-in data structures in Python like there are in Java? En Python

I'm trying to build a generic tree. Are there built-in data structures in Python to implement a tree?

Python does not have a fairly wide range of "built-in" data structures, as Java does. However, since Python is dynamic, a shared tree is easy to create. For example, a binary tree could be:

 class Tree(object): def __init__(self): self.left = None self.right = None self.data = None 

You can use it like this:

 root = Tree() root.data = "root" root.left = Tree() root.left.data = "left" root.right = Tree() root.right. data = "right" 

I recommend https://pypi.python.org/pypi/anytree

example

 from anytree import Node, RenderTree udo = Node("Udo") marc = Node("Marc", parent =udo) lian = Node("Lian", parent=marc) dan = Node("Dan", parent=udo) jet = Node("Jet", parent=dan) jan = Node("Jan", parent=dan ) joe = Node("Joe", parent=dan) print(udo) Node('/Udo') print(joe) Node('/Udo/Dan/Joe') for pre, fill, node in RenderTree(udo) : print("%s%s" % (pre, node.name)) Udo ├── Marc │ └── Lian └── Dan ├── Jet ├── Jan └── Joe print(dan.children) (Node('/Udo/Dan/Jet'), Node('/Udo/Dan/Jan'), Node('/Udo/Dan/Joe')) 

Features

anytree also has a powerful API:

  • easy tree creation
  • simple wood modification
  • pre-order tree iteration
  • tree iteration after order
  • allow relative and absolute node paths
  • going from one node to another.
  • tree rendering (see example above)
  • connecting/disconnecting units

A general tree is a node with zero or more children, each of which is a regular (tree). This is not the same as a binary tree, they are different data structures, although both share some terminology.

Python does not have a built-in data structure for generic trees, but it is easily implemented using classes.

 class Tree(object): "Generic tree node." def __init__(self, name='root', children=None): self.name = name self.children = [] if children is not None: for child in children: self.add_child(child) def __repr__(self): return self.name def add_child(self, node): assert isinstance(node, Tree) self.children.append(node) # * # /|\ # 1 2 + # / \ # 3 4 t = Tree('*' , [Tree('1'), Tree('2'), Tree('+', [Tree('3'), Tree('4')])]) 

You can try:

 from collections import defaultdict def tree(): return defaultdict(tree) users = tree() users['harold']['username'] = 'hrldcpr' users['handler']['username '] = 'matthandlersux' 

As suggested here: https://gist. github.com/2012250

There are no trees inside, but you can easily build them by subclassing the node type from a list and writing traversal methods. If you do this, I found bisect useful.

There are also many PyPi implementations you can browse.

If I remember correctly, the Python standard library does not include tree data structures for the same reason that the .NET base class library does: memory locality decreases, resulting in more cache misses. On modern processors, it's generally faster to just bring a large chunk of memory into the cache, and pointer-rich data structures negate the benefit.

 class Node: """ Class Node """ def __init__(self, value): self.left = None self.data = value self.right = None class Tree: """ Class tree will provide a tree as well as utility functions. """ def createNode(self, data): """ Utility function to create a node. """ return Node(data) def insert(self, node , data): """ Insert function will insert a node into tree. Duplicate keys are not allowed. """ #if tree is empty , return a root node if node is None: return self.createNode(data) # if data is smaller than parent , insert it into left side if data < node.data: node.left = self.insert(node.left, data) elif data > node.data: node.right = self.insert(node.right, data) return node def search(self, node, data): """ Search function will search a node into tree. """ # if root is None or root is the search data. if node is None or node.data == data: return node if node.data < data: return self.search(node.right, data) else: return self.search(node.left, data) def deleteNode(self, node,data): """ Delete function will delete a node into tree. Not complete , may need some more scenarion that we can handle Now it is handling only leaf. """ # Check if tree is empty. if node is None: return None # searching key into BST. if data < node.data: node.left = self.deleteNode(node.left, data) elif data > node.data: node.right = self.deleteNode(node.right, data) else: # reach to the node that need to delete from BST. if node.left is None and node.right is None: del node if node.left == None: temp = node.right del node return temp elif node.right == None: temp = node.left del node return temp return node def traverseInorder(self, root): """ traverse function will print all the node in the tree. """ if root is not None: self.traverseInorder(root.left) print root.data self.traverseInorder(root. right) def traversePreorder(self, root): """ traverse function will print all the node in the tree. """ if root is not None: print root.data self.traversePreorder(root.left) self.traversePreorder(root .right) def traversePostorder(self, root): """ traverse function will print all the node in the tree. """ if root is not None: self.traversePreorder(root.left) self.traversePreorder(root.right) print root.data def main(): root = None tree = Tree() root = tree.insert(root, 10) print root tree.insert(root, 20) tree.insert(root, 30) tree.insert(root , 40) tree.insert(root, 70) tree.insert(root, 60) tree.insert(root, 80) print "Traverse Inorder" tree. traverseInorder(root) print "Traverse Preorder" tree.traversePreorder(root) print "Traverse Postorder" tree.traversePostorder(root) if __name__ == "__main__": main() 

I have implemented the root tree as a dictionary {child:parent} . So, for example, with the root node 0 , the tree might look like this:

 tree={1:0, 2:0, 3:1, 4:2, 5:3} 

from any node to the root, which was relevant for the problem I was working on.

I have implemented trees using nested dicts. This is pretty easy to do and has worked for me with fairly large datasets. I have posted a sample below and you can see more in google code

 def addBallotToTree(self, tree, ballotIndex, ballot=""): """Add one ballot to the tree. The root of the tree is a dictionary that has as keys the indicies of all continuing and winning candidates. For each candidate, the value is also a dictionary, and the keys of that dictionary include "n" and "bi". tree[c]["n"] is the number of ballots that rank candidate c first. tree[c][" bi"] is a list of ballot indices where the ballots rank c first. If candidate c is a winning candidate, then that portion of the tree is expanded to indicate the breakdown of the subsequently ranked candidates. In this situation, additional keys are added to the tree[c] dictionary corresponding to subsequently ranked candidates. tree[c]["n"] is the number of ballots that rank candidate c first. tree[c]["bi"] is a list of ballot indices where the tree[c][d]["n"] is the number of ballots that rank c first and d second.tree[c][d]["bi"] is a list of the corresponding ballot indices . Where the second ranked candidates is also a winner, then the tree is expanded to the next level. Losing candidates are ignored and treated as if they do not appear on the ballots. For example, tree[c][d]["n"] is the total number of ballots where candidate c is the first non-losing candidate, c is a winner, and d is the next non-losing candidate. This will include the following ballots, where x represents a losing candidate: [cd] [xcd] [cxd] [xcxxd] During the count, the tree is dynamically updated as candidates change their status. The parameter "tree" to this method may be the root of the tree or may be a sub-tree. """ if ballot == "": # Add the complete ballot to the tree weight, ballot = self.b.getWeightedBallot(ballotIndex) else: # When ballot is not "", we are adding a truncated ballot to the tree, # because a higher-ranked candidate is a winner.weight = self.b.getWeight(ballotIndex) # Get the top choice among candidates still in the running # Note that we can't use Ballots.getTopChoiceFromWeightedBallot since # we are looking for the for c in ballot: if c in self.continuing | self.winners: break # c is the top choice so stop else: c = None # no candidates left on this ballot if c is None: # This will happen if the ballot contains only winning and losing # candidates.The ballot index will not need to be transferred # again so it can be thrown away.return # Create space if necessary.if not tree.has_key(c): tree[ c] = {} tree[c]["n"] = 0 tree[c]["bi"] = [] tree[c]["n"] += weight if c in self.winners: # Because candidate is a winner, a portion of the ballot goes to # the next candidate. Pass on a truncated ballot so that the same # candidate doesn't get counted twice. i = ballot.index(c) ballot2 = ballot[i+1:] self.addBallotToTree(tree[c], ballotIndex, ballot2) else: # Candidate is in continuing so we stop here. tree[c]["bi"].append(ballotIndex) 

Greg Hugill's answer is great, but if you need more nodes per level, you can use a list to create them. And then use a method to access them by name or order (like id)

 class node(object): def __init__(self): self.name=None self.node=[] self.otherInfo = None self.prev =None def nex(self,child): "Gets a node by number" return self.node[child] def prev(self): return self.prev def goto(self,data): "Gets the node by name" for child in range(0,len(self.node)): if(self.node[child].name==data): return self.node[child] def add(self): node1=node() self.node .append(node1) node1.prev=self return node1 

Now just create a root and create it: ex:

 tree=node() #create a node tree.name="root" #name it root tree. otherInfo="blue" #or what ever tree=tree.add () #add a node to the root tree.name="node1" #name it root / child1 tree=tree.add() tree.name="grandchild1" root / child1 / grandchild1 tree=tree.prev() tree= tree.add() tree.name="gchild2" root / child1 / \ grandchild1 gchild2 tree=tree.prev() tree=tree.prev() tree=tree.add() tree=tree.name="child2" root / \ child1 child2 / \ grandchild1 gchild2 tree=tree.prev() tree=tree.goto("child1") or tree=tree.nex(0) tree.name="changed" root / \ changed child2 / \ grandchild1 gchild2 

This should be enough to get you started figuring out how to make this work

I have published a Python tree implementation [3] on my website: http://www.quesucede.com/page/show/id/python_3_tree_implementation .

Hope this is helpful,

Ok, here is the code:

 import uuid def sanitize_id(id): return id.strip().replace(" ", "") (_ADD, _DELETE, _INSERT) = range( 3) (_ROOT, _DEPTH, _WIDTH) = range(3) class Node: def __init__(self, name, identifier=None, expanded=True): self. __identifier = (str(uuid.uuid1()) if identifier is None else sanitize_id(str(identifier))) self.name = name self.expanded = expanded self.__bpointer = None self.__fpointer = [] @property def identifier(self): return self.__identifier @property def bpointer(self): return self.__bpointer @bpointer.setter def bpointer(self, value): if value is not None: self.__bpointer = sanitize_id(value) @property def fpointer(self): return self.__fpointer def update_fpointer(self, identifier, mode =_ADD): if mode is _ADD: self.__fpointer.append(sanitize_id(identifier)) elif mode is _DELETE: self.__fpointer.remove(sanitize_id(identifier)) elif mode is _INSERT: self.__fpointer = [sanitize_id(identifier) ] class s Tree: def __init__(self): self.nodes = [] def get_index(self, position): for index, node in enumerate(self.nodes): if node.identifier == position: break return index def create_node(self , name, identifier=None, parent=None): node = Node(name, identifier) ​​self.nodes.append(node) self.__update_fpointer(parent, node. identifier, _ADD) node.bpointer = parent return node def show( self, position, level=_ROOT): queue = self[position].fpointer if level == _ROOT: print("{0} [{1}]".format(self[position].name, self[position]. identifier)) else: print("\t"*level, "{0} [{1}]".format(self[position].name, self[position].identifier)) if self[position].expanded: level += 1 for element in queue: self.show(element, level) # recursive call def expand_tree(self, position, mode=_DEPTH): # Python generator. Loosly based on an algorithm from 'Essential LISP' by # John R. Anderson, Albert T. Corbett, and Brian J. Reiser, page 239-241 yield position queue = self[position].fpointer while queue: yield queue[0] expansion = self[queue[0]].fpointer if mode is _DEPTH: queue = expansion + queue[1:] # depth-first elif mode is _WIDTH: queue = queue[1:] + expansion # width-first def is_branch(self, position): return self[position].fpointer def __update_fpointer(self, position, identifier, mode): if position is None: return else: self[position]. update_fpointer(identifier, mode) def __update_bpointer(self, position, identifier): self[position].bpointer = identifier def __getitem__(self, key): return self.nodes[self.get_index(key) ] def __setitem__(self, key, item): self.nodes[self.get_index(key)] = item def __len__(self): return len(self.nodes) def __contains__(self, identifier): return [node.identifier for node in self.nodes if node.identifier is identifier] if __name__ == "__main__": tree = Tree() tree.create_node("Harry", "harry") # root node tree.create_node("Jane", " jane", parent = "harry") tree.create_node("Bill", "bi ll", parent = "harry") tree.create_node("Joe", "joe", parent = "jane") tree.create_node("Diane", "diane", parent = "jane") tree.create_node(" George", "george", parent = "diane") tree.create_node("Mary", "mary", parent = "diane") tree.create_node("Jill", "jill", parent = "george") tree .create_node("Carol", "carol", parent = "jill") tree.create_node("Grace", "grace", parent = "bill") tree.create_node("Mark", "mark", parent = " jane") print("="*80) tree. show("harry") print("="*80) for node in tree.expand_tree("harry", mode=_WIDTH): print(node) print(" ="*80) 
 class Tree(dict): """A tree implementation using python's autovivification feature.""" def __missing__(self, key): value = self[key] = type(self)() return value #cast a (nested ) dict to a (nested) Tree class def __init__(self, data={}): for k, data in data.items(): if isinstance(data, dict): self[k] = type(self)(data ) else: self[k] = data 

works like a dictionary but provides as many nested dicts as you want. Try this:

 your_tree = Tree() your_tree['a']['1']['x'] = '@' your_tree['a']['1']['y'] = '#' your_tree['a']['2']['x'] = '$' your_tree['a']['3'] = '%' your_tree['b'] = '*' 

will supply a nested dict... that works like a tree.

 {'a': {'1': {'x': '@', 'y': '#'}, '2': {'x': '$'}, '3': '% '}, 'b': '*'} 

… If you already have a dict, it will cast every level to the tree:

 d = {'foo': {'amy': {'what': 'runs '} } } tree = Tree(d) print(d['foo']['amy']['what']) # returns 'runs' d['foo']['amy']['when'] = 'now' # add new branch 

So you can keep editing/adding/removing each signal level as you wish. All dict methods for traversal etc still apply.

What operations do you need? Python often has a good solution using a dict or a list with the bisect module.

PyPI has many tree implementations, and many types of trees are almost trivial to implement in pure Python. However, this is rarely necessary.

 def iterative_bfs(graph, start): '''iterative breadth first search from start''' bfs_tree = {start: {"parents":[], "children":[], "level":0}} q = [start] while q: current = q.pop(0) for v in graph[current]: if not v in bfs_tree: bfs_tree[v]={"parents":[current], "children":[], "level": bfs_tree[current]["level"] + 1} bfs_tree[current]["children"].append(v) q.append(v) else: if bfs_tree[v]["level"] > bfs_tree [current]["level"]: bfs_tree[current]["children"].append(v) bfs_tree[v]["parents"].append(current) 
  • matplotlib: how to change color of data points based on some variable
  • Function change list values, not variable values ​​in Python
  • Fix depth tree in Python
  • print out python list from python binary tree
  • Get tree decomposition from ordering ordering and chordal graph
  • Python Tkinter: tree selection
  • Optimize binary tree diameter search in Python
  • Tree creator and print nodes in hierarchy
  • How to draw this tree template in python?
  • All possible sections (clusters) of the tree
  • Smooth out a repeating tree

Let's write and understand a Decision Tree in Python from scratch! Part 4.

Data structures / Sudo Null IT News

This article is the fourth in a series. Links to previous articles: first, second, third

4.1 Data structures

A data structure is a representation of how individual data is organized.

Array



Individual data are presented in one row. To identify one piece of data, for example, what order this particular piece of an array is, its identifier is needed.

 #Python array implementation example a = [2,6,4,5,1,8] 

Table, two-dimensional array


The data is arranged in several columns, the individual elements of the two-dimensional array also form rows.

To identify a piece of data in this case, two identifiers are required: for example, a column number and an element number.

 #Python table implementation example a = [ [2,6,4,5,1,8], [4,4,1,3,4,2], [5,3,6,6,5,3], [7,8,0,9,5,3], ] 

Tree


This is a data structure where individual data appears to be connected by a line.

These lines are a route that directly connects one data to another. And one such route, for example, will be a tree-like data structure.

Often such a structure is depicted as a diagram resembling a tree growing from top to bottom. The lines are called edges or branches, the data is called nodes (node), the data after which the line does not continue is called leaves (leaf), and the data located at the very top is called the root node or simply the root.

 # An example implementation of a Tree in Python containing a list of child nodes. # Written in the format [Value,list of child arrays]. For example, the tree in the picture above will be written in order from top to bottom, left to right. # In addition to such a record, there are options using classes, parent nodes, and others. tree=\ [2,[ [6,[]], [four,[ [5,[ [6,[]], ]], [eight,[]], [one,[]], ]], ]] # Function to convert a tree structure to a symbolic one def tstr(node,indent=""): s = indent+str(node[0])+"\n" for c in node[1]: # Loop on child node s += tstr(c,indent+"+-") pass return s print(tstr(tree)) # Conclusion #2 # +-6 # +-4 # +-+-5 # +-+-+-6 # +-+-8 # +-+-1 # You can not create the whole tree at once, but create several variables for the nodes # Create all leaf nodes. The number in the variable indicates the node line and its serial number in this line from left to right. n10 = [6,[]] n21 = [8,[]] n22 = [1,[]] n40 = [6,[]] # Create parent nodes for already created child nodes. n20 = [5,[n40]] n11 = [4,[n20,n21,n22]] n00 = [2,[n10,n11]] # Display the resulting Tree, specifying the node that should be considered the root. print(tstr(n11)) # Conclusion # four # +-5 # +-+-6 # +-8 # +-1 

Network or graphs


This is a structure where one node can have multiple edges. Individual data is also called nodes, and lines are also called edges. Often, graphs do not have a root node, as in a tree structure.

 # Graph implementation in Python import pandas as pd # In our case, the node name and its value are the same. # If the name of the node and its value do not match, you will need to get the value of the node. nodes = [2,6,4,5,8,1] # Specify links between nodes in the form of a matrix. If there is an edge from node 2 (first row) to node 6 (second row), # The value of the 1st row of the 2nd column of the matrix will be 1, and if the edge does not pass, then it will be 0. Such a matrix is ​​called an adjacency matrix. df = pd.DataFrame( [ # 2,6,4,5,8,1 [ 0,1,1,0,0,0], # from node 2 [ 1,0,0,1,0,0], # from node 6 [ 1,0,0,1,1,1], # from node 4 [ 0,1,1,0,0,0], # from node 5 [ 0,0,1,0,0,0], # from node 8 [ 0,0,1,0,0,0], # from node 1 ],columns=nodes,index=nodes) print(df) # Conclusion # 2 6 4 5 8 1 # 2 0 1 1 0 0 0 #6 1 0 0 1 0 0 #4 1 0 0 1 1 1 # 5 0 1 1 0 0 0 # 8 0 0 1 0 0 0 # 1 0 0 1 0 0 0 # Using the matplotlib and networkx libraries, draw a graph. import matplotlib.pyplot as plt import networkx as nx plt.figure(figsize=(4,4)) plt.axis("off") nx.draw_networkx(nx.from_pandas_adjacency(df)) plt.show() 

Output:

4.2 Decision Tree implementation example in Python

Decision Tree, as the name implies, can be represented as a tree structure.
The following data is stored in the nodes: a list of child nodes, branching rules, the path from one node to another within the Decision Tree.

As shown below, the root node connects all data. The numerical value [...] attached to a node is the number of the original data from which this decision tree is generated. From the root node, only those data that satisfy the conditions of the child nodes “go down”. For example, you can see this if you look at the nodes “going golf” and “not going golf” in the first decision tree.

The implementation in python is, for example, as follows. Let's make one node an associative array, name is a symbolic representation of that node's state, df is the data associated with that node, and edges is a list of child nodes.

 # Tree structure data tree = { # name: name of this node "name":"decision tree "+df0.columns[-1]+" "+str(cstr(df0.iloc[:,-1])), # df: data associated with this node "df":df0, # edges: list of edges (branches) coming out of this node. If the node is a leaf node, that is, if it has no edges, the array remains empty. "edges":[], } 

The tstr function that texts this tree structure will look like this.

 # Lambda expression for distribution of values, argument - pandas.Series, return value - array with the number of each of the values # From the input s, use value_counts() to find the frequency of each of the values, and as long as there are items in our dictionary, the loop triggered by items() will run. # So that the output does not change with each run of the loop, we use sorted, which changes the order from largest to smallest # As a result, an array is generated containing a string of the following components: key (k) and value (v). cstr = lambda s:[k+":"+str(v) for k,v in sorted(s.value_counts().items())] # Tree textification method: argument - tree (tree data structure), indent (indentation of child nodes), # And the return value will be a textual representation of tree. This method is called recursively to convert everything in the tree into characters. def tstr(tree,indent=""): # Create a symbolic representation of this node. # If this node is a leaf node (the number of elements in the edge array is 0), # The frequency distribution of the last column of df data associated with the tree is converted to symbols. s = indent+tree["name"]+str(cstr(tree["df"].iloc[:,-1]) if len(tree["edges"])==0 else "")+"\ n" # Loop through all branches of this node. for e in tree["edges"]: # Add the symbolic representation of the child node to the symbolic representation of the parent node. # Add more characters to this node's indent. s += tstr(e,indent+" ") pass return s

The Decision Tree, textified by the tstr function, will look like this. The root node displays the text (decision tree Golf) that we set at the very beginning when creating the tree variable, as well as the frequency distribution of "going / not going to golf" from all data. Each node below displays the branching rules, and if the node is a leaf node, the go/no go golf frequency distribution is displayed based on the data associated with that node.


Learn more