Create Interactive hierarchy diagram from pandas/dictionary

Steve_Greenwood :

I have data that shows the relationship for each employee with their managers(Person:Manager) -

data = {'PersonX':'Person1', 'PersonY':'Person1', 'PersonZ':'Person 2', 'Person1':'Person100','Person2':'Person100' }

I am trying to show a hierarchy chart from the above data in a clean looking chart and if I can filter that data in the visualization itself that is a Bonus.

The data that I get can contain sometimes 5 people or sometimes the number of records is more than 5000.

I have tried these approaches but they are no where close to generating any graphs that are interactive.

Code -

Try 1 -

import pandas as pd
import networkx as nx

d = {'PersonX': 'Person1', 'PersonY': 'Person1', 'PersonZ': 'Person2', 'Person1': 'Person100', 'Person2': 'Person100'}
df = pd.DataFrame(d.items(), columns=['Person', 'Manager'])
G = nx.from_pandas_edgelist(df,  source='Person', target='Manager')
nx.draw(G, with_labels=True)
plt.show()

Network X graph

Try 2 -

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from scipy.cluster import hierarchy
df2 = df.apply(LabelEncoder().fit_transform)
df2.set_index('Manager', inplace=True)
Z = hierarchy.linkage(df2, 'ward')
hierarchy.dendrogram(hierarchy.linkage(df2, method='ward'))
plt.show()

Attempt 2nd

Try 3 -

print('strict digraph tree {')
for row in d.items():
    print('    {0} -> {1};'.format(*row))
print('}')

And ran the

test.py | dot -Tpng -otree.png

GraphViz

Steve_Greenwood :

I went with the following code to create a graph that was interactive, this is a work in progress but I wanted to post this so that people can use this in case needed.

import pandas as pd
import dash
import dash_html_components as html
import dash_cytoscape as cyto
from matplotlib import colors as mcolors
from itertools import zip_longest
from ast import literal_eval

colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)
# Sort colors by hue, saturation, value and name.
by_hsv = sorted((tuple(mcolors.rgb_to_hsv(mcolors.to_rgba(color)[:3])), name)
                for name, color in colors.items())
sorted_names = [name for hsv, name in by_hsv]

app = dash.Dash(__name__)
# colors = ['red', 'blue', 'green', 'yellow', 'pink']

# stylesheet for the web page generated

default_stylesheet = [
    {
        "selector": 'node',
        'style': {
            "opacity": 0.9,
            'height': 15,
            'width': 15,
            'background-color': '#222222',
            'label': 'data(label)'
        }
    },
    {
        "selector": 'edge',
        'style': {
            "curve-style": "bezier",
            "opacity": 0.3,
            'width': 2
        }
    },
    *[{
        "selector": '.' + color,
        'style': {'line-color': color}
    } for color in sorted_names]
]

# Example data for illustration
# My actual data was in the excel file with two columns Managers and Person

managers = ['Person A',
            'Person A',
            'Person A',
            'Person A',
            'Person A',
            'Person A',
            'Person B',
            'Person B',
            'Person B',
            'Person B',
            'Person B',
            'Person B',
            'Person C',
            'Person C',
            'Person C',
            'Person C',
            'Person C',
            'Person C',
            'Person V',
            'Person V',
            'Person V',
            'Person V',
            'Person V']

person = ['Person D',
          'Person E',
          'Person F',
          'Person G',
          'Person H',
          'Person I',
          'Person J',
          'Person K',
          'Person L',
          'Person M',
          'Person N',
          'Person O',
          'Person P',
          'Person Q',
          'Person R',
          'Person S',
          'Person T',
          'Person U',
          'Person A',
          'Person W',
          'Person X',
          'Person B',
          'Person C']

# Creating a dataframe with the illustration data
df = pd.DataFrame(list(zip(person, managers)), columns=['Person', 'Manager'])
# Giving colors to each managers in the dataframe
df['colors'] = df['Manager'].map(dict(zip_longest(list(set(managers)), sorted_names)))
# Creating the nodes within the dataframe
df['y_node_target'] = "{\"data\": {\"id\": \"" + df['Person'] + "\", \"label\":\""+df['Person']+"\"}, \"classes\": \"" + df['colors'] + "\"}"
df['y_node'] = "{\"data\": {\"id\": \"" + df['Manager'] + "\", \"label\":\""+df['Manager']+"\"}, \"classes\": \"" + df['colors'] + "\"}"
nodes = list(set(pd.concat([df['y_node'], df['y_node_target']]).to_list()))
df['Edges'] = "{\'data\': {\'source\':\"" + df['Manager'] + "\", \'target\': \"" + df[
    'Person'] + "\"},\'classes\': \"" + df['colors'] + "\"}"


# Converting the strings to dictionaries and assigning them to variables
edges = list(set(df['Edges'].astype(str).to_list()))
edges = list(map(literal_eval, edges))
nodes = list(map(literal_eval, nodes))

app.layout = html.Div([
    cyto.Cytoscape(
        id='cytoscape',
        elements=edges + nodes,
        stylesheet=default_stylesheet,
        layout={
            'name': 'breadthfirst'
        },
        style={'height': '95vh', 'width': '100%'}
    )
])

if __name__ == '__main__':
    app.run_server(debug=True)

Output was a webpage -

enter image description here

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=336618&siteId=1