3  Metabolic Graphs

gc()
          used  (Mb) gc trigger  (Mb) max used  (Mb)
Ncells 2767419 147.8    5141138 274.6  4916595 262.6
Vcells 4561965  34.9   10146329  77.5  8388589  64.0
load(file='metadag_work_space.RData')
gc()
           used  (Mb) gc trigger  (Mb) max used  (Mb)
Ncells  2867110 153.2    5141138 274.6  4916595 262.6
Vcells 38144536 291.1   49087077 374.6 38165243 291.2

We present here some analysis examples of the metabolic graphs generated in GraphML format.

3.1 Metabolic graphs for each organism

Read the individual metabolic graphs generated for Homo sapiens (KEGG id: hsa) in the directory(Individuals/hsa)

experiment=
  "0a845f74-826e-3b46-aed9-e7ecf74db262/"
path_exp=paste0("data/",experiment)
files_hsa=dir(paste0(path_exp,"Individuals/hsa"))
files_hsa
 [1] "hsa_mDAG.graphml"       "hsa_mDAG.pdf"           "hsa_mDAG.svg"          
 [4] "hsa_mDAG_adj.csv"       "hsa_mDAG_biggerDAG.pdf" "hsa_mDAG_biggerDAG.svg"
 [7] "hsa_mDAG_nl.csv"        "hsa_mDAG_structure.csv" "hsa_R_adj.csv"         
[10] "hsa_R_nl.csv"           "hsa_RC.graphml"         "hsa_RC.pdf"            
[13] "hsa_RC.svg"             "hsa_summary.txt"       
files_Individual_hsa Description
hsa_mDAG.graphml m-DAG GraphML format
hsa_mDAG.pdf m-DAG pdf graphic
hsa_mDAG.svg m-DAG svg graphic
hsa_mDAG_adj.csv csv file with the adjacency matrix of the m-DAG
hsa_mDAG_biggerDAG.pdf pdf graphic with the biggest conected component of the m-DAG
hsa_mDAG_biggerDAG.svg svg graphic with the biggest conected component of the m-DAG
hsa_mDAG_nl.csv csv file with the node (MBBs) labels of the m-DAG
hsa_mDAG_structure.csv csv file with all connected components of the m-DAG
hsa_R_adj.csv csv file with the adjacency matrix of the reaction graph
hsa_R_nl.csv csv file with the node (reactions) labels of the reaction graph
hsa_RC.graphml reaction graph GraphML format
hsa_RC.pdf reaction graph pdf graphic
hsa_RC.svg reaction graph svg graphic
hsa_summary.txt text summary file with the number of MBBs, reactions, etc. in the previous graphs

3.2 Pan & core metabolic graphs

Pan and core metabolic graphs for every group were generated. For instance, one can read the pan and core metabolic graphs generated for the group Algae in the directory (Groups/Algae).

files_Algae=dir(paste0(path_exp,"Groups/Algae"))
files_Algae
[1] "core" "pan" 

The global core reaction graph, which is the core of all the organisms’ reaction graphs in this Eukaryotes test, is empty.

graph_core_RC=read.graph(
  paste0(path_exp,
         "Global/core/core_RC.graphml"),
  format = "graphml")
summary(graph_core_RC)
IGRAPH 089f3be D--- 0 0 -- 
+ attr: color (v/c), label (v/c), id (v/c)

The global core reaction graph has 0 vertex and 0 edges. It is an empty graph.

The core reaction graph for the Algae group is:

knitr::include_graphics(
  paste0(path_exp,"Groups/MSA_Cluster_3/core/MSA_Cluster_3_core_RC.pdf"))

Algae core reaction graph

The global core m-DAG, i.e., the core of all organisms in this example is empty.

graph_core_mDAG=read.graph(
  paste0(path_exp,"Global/core/core_mDAG.graphml"),
  format = "graphml")
summary(graph_core_mDAG)
IGRAPH 08b0d3a D--- 0 0 -- 
+ attr: color (v/c), label (v/c), id (v/c)

The global core m-DAG has 0 vertex and 0 edges. It is an empty graph.

The core metabolic DAG for the Algae group is:

knitr::include_graphics(paste0(path_exp,                              "Groups/Algae/core/Algae_core_mDAG.pdf"))

Core m-DAG for Algae

The global pan reaction graph for the Animals Kingdom is:

graph_pan_RC=read.graph(
  paste0(path_exp,
         "TaxonomyLevels/Kingdom/Animals/pan/Animals_pan_RC.graphml"),
  format = "graphml")
summary(graph_pan_RC)
IGRAPH 08d0e67 D--- 4556 5798 -- 
+ attr: color (v/c), label (v/c), id (v/c), id (e/c)

This pan reaction graph has 4556 nodes and 5798 edges.

3.3 Graph’s topology

From the GraphML files, one can extract topological information. Some examples are as follows.

The diagram below illustrates the distribution of node degrees for an m-DAG.

graph_mDAG=read.graph(
  paste0(path_exp,
         "Individuals/hsa/hsa_mDAG.graphml"),
  format= "graphml")
summary(graph_mDAG)
IGRAPH 08e076b D--- 1026 1086 -- 
+ attr: color (v/c), label (v/c), id (v/c), id (e/c)
barplot(table(igraph::degree(graph_mDAG,mode="all")),
        ylim=c(0,350),col="blue",
        main="Frequency of Node Degrees",
        ylab="Frequency",xlab="Degree")

The connected components of every graph as well as the size of every connected component can be obtained as:

compo=components(graph_mDAG,mode = "weak")
str(compo)
List of 3
 $ membership: num [1:1026] 1 1 1 1 1 1 1 1 1 1 ...
 $ csize     : num [1:167] 589 1 1 1 1 1 4 3 4 3 ...
 $ no        : num 167
compo$csize
  [1] 589   1   1   1   1   1   4   3   4   3   2   3   3   1   1   1   2   6
 [19]   3   1   3   6   1   1   1   1   1   3   1   6   2   1   1   1   2   1
 [37]   1  14   1  16   1   6   2   2   4   1   1   1   1   1   1   1   1   1
 [55]  13   1   1   1   1   2   6   5   5   2   2  10   1   1   1   2   2   1
 [73]   1   1  62   6   2   1   2   1   1   1   2   1   2  14   3   1   1   1
 [91]   1   1   1   1   1   1   3   6   1   3   1   3   2   1   1   1   2   2
[109]   3   1   1   2   5   1   1   2   3   2   1   1   2   3   4   1   1   2
[127]   1   1   2   1   1   1   1   1   3   1   2   2   1   6   1   1   1   2
[145]   1   1   1   1   1   2   7   1  15   3   1   1   1   1   2   1   3   1
[163]   1   1   1   1   2
k=which.max(compo$csize==max(compo$csize))
k
[1] 1
table(compo$membership)

  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20 
589   1   1   1   1   1   4   3   4   3   2   3   3   1   1   1   2   6   3   1 
 21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40 
  3   6   1   1   1   1   1   3   1   6   2   1   1   1   2   1   1  14   1  16 
 41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60 
  1   6   2   2   4   1   1   1   1   1   1   1   1   1  13   1   1   1   1   2 
 61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80 
  6   5   5   2   2  10   1   1   1   2   2   1   1   1  62   6   2   1   2   1 
 81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 
  1   1   2   1   2  14   3   1   1   1   1   1   1   1   1   1   3   6   1   3 
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 
  1   3   2   1   1   1   2   2   3   1   1   2   5   1   1   2   3   2   1   1 
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 
  2   3   4   1   1   2   1   1   2   1   1   1   1   1   3   1   2   2   1   6 
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 
  1   1   1   2   1   1   1   1   1   2   7   1  15   3   1   1   1   1   2   1 
161 162 163 164 165 166 167 
  3   1   1   1   1   1   2 
vertex=which(compo$membership==k)
length(vertex)
[1] 589
Big_Component=induced_subgraph(graph_mDAG, vids=vertex)
igraph::vcount(Big_Component)
[1] 589
igraph::ecount(Big_Component)
[1] 774

And the plot of the bigger component of the m-DAG in Homo sapiens is:

knitr::include_graphics(paste0(path_exp,
                               "Individuals/hsa/hsa_mDAG_biggerDAG.pdf"))

#path_exp="data/result_bb261b6e-95c6-3e39-b82b-b68eea80e30b/data/" 
list_names=dir(paste0(path_exp,"Individuals/"))
list_names= list_names[-1] # filter 0000_RefPw
length(list_names) 
[1] 884
graphs_list=paste0(path_exp,"Individuals/", list_names,"/",list_names, "_MDAG.graphml")
knitr::include_graphics(
  paste0(path_exp,"Individuals/cang/cang_RC.pdf"))

3.4 Graph statistics

The number of connected component in each generated m-DAG with their frequency in the entire set of m-DAGs, can be obtained as follows:

read_mDAG=function(x) {DAG=read.graph(file=x,
                                      format="graphml")
return(DAG)}
mDAG_components=function(x) {
  sort(components(x,mode = "weak")$csize,
       decreasing=TRUE)
}

compo_list=lapply(graphs_list,
                  FUN=function(x) {
                    gg=read_mDAG(x)
                    aux=list(
                      mDAG_components=mDAG_components(gg),
                      degree_count=igraph::degree(gg,mode="total"))
                    return(aux)}
)

names(compo_list)=list_names
n=max(sapply(compo_list,FUN=function(x) {length(x[[1]])}))
n
[1] 234
size_compo_list=lapply(compo_list,FUN=function(x) {
  return(c(x[[1]],rep(NA,n-length(x[[1]]))))})

aux=do.call(bind_cols,size_compo_list)
weak_components_size=pivot_longer(aux,aaf:zvi,names_to="Organism",
                                 values_to="csize") %>% 
  arrange(Organism,-csize)
weak_components_size$index=rep(1:n,times=dim(aux)[2])
weak_components_size=weak_components_size %>%
  left_join(meta_taxo,by="Organism") %>% filter(!is.na(Kingdom),!is.na(csize))
weak_components_size_raw=weak_components_size
Organism=names(compo_list)

size_MBB=function(org){
  #org="hsa"
  x=Results %>% filter(Organism==org) %>% select(!contains("_rev"))
  x=as.character(x[1,6:dim(x)[2]])
  x=x[(!is.na(x))]
  x=x[x!="NA"]
  tt=data.frame(sort(table(x),decreasing=TRUE),org)
  names(tt)=c("MBB","size","Organism")
  return(tt)
}


size_list_raw= lapply(Organism,FUN=function(x) size_MBB(x))
names(size_list_raw)=Organism
size_MBB_df=do.call(rbind,size_list_raw)
max(size_MBB_df$size)
[1] 757
size_MBB_df=size_MBB_df %>% left_join(meta_taxo%>% select(Organism,Kingdom),by="Organism") %>% filter(!is.na(Kingdom))

We can visualize the sizes of the weak components for each m-DAG, using colors to represent the different Kingdoms, also we scale the results by a log-log plot:

COLOR_KINGDOM=c("red","yellow","green","black")
colors_kingdom=weak_components_size%>% select(Organism,Kingdom) %>% distinct()
names(COLOR_KINGDOM)=sort(unique(colors_kingdom$Kingdom))

p1<- ggplot(data=weak_components_size) + 
  geom_line(mapping=aes(x=index,
                        y=csize,group = Organism,
                        color=Kingdom),
            na.rm=TRUE) + 
  scale_x_continuous(trans="identity") + 
  scale_y_continuous(trans="identity") +
  ylim(0,640)+
  ggtitle("Plot of weak components size decreasing order.")+
  ylab("Weak components size") + xlab("Order")+
  scale_color_manual(values =COLOR_KINGDOM[colors_kingdom$Kingdom])

p2<- ggplot(data=weak_components_size) + 
  geom_line(mapping=aes(x=index,
                        y=csize,group = Organism,
                        color=Kingdom),
            na.rm=TRUE) + 
  scale_y_continuous(trans='log10') + 
  scale_x_continuous(trans='log10') +
  scale_color_manual(values =COLOR_KINGDOM[colors_kingdom$Kingdom])+
  ggtitle("Plot log10-log10 of size  weak components decreasing order.") +
  ylab("Log10 weak component size") + xlab("Log10 order")
p1

p2

A table with the frequencies of the weak connected components sizes, displayed by Kingdom, can be obtained as follows:

aux=table(weak_components_size$csize,weak_components_size$Kingdom)
table_wcc_size=tibble(Order=1:dim(aux)[1],
                      Wcc_size=as.integer(unlist(dimnames(aux)[1])),
                      Animals=aux[,1],
                      Fungi=aux[,2],
                      Plants=aux[,3],
                      Protists=aux[,4])
knitr::kable(table_wcc_size, 
             caption = "Weak Connected Componet Size")%>% kable_styling(bootstrap_options = "striped", full_width = FALSE) %>%
  scroll_box(height = "300px", width = "100%")
Weak Connected Componet Size
Order Wcc_size Animals Fungi Plants Protists
1 1 57634 15311 15024 4462
2 2 13528 2118 2908 932
3 3 7018 1070 911 317
4 4 2459 313 292 96
5 5 1962 215 389 81
6 6 1778 154 323 49
7 7 633 46 111 15
8 8 278 57 110 7
9 9 174 76 151 22
10 10 671 6 124 7
11 11 411 14 6 20
12 12 109 12 4 5
13 13 369 92 132 7
14 14 774 9 2 2
15 15 234 1 5 6
16 16 79 6 1 1
17 17 55 8 1 1
18 18 4 5 9 5
19 19 5 3 87 4
20 20 9 3 0 1
21 21 13 2 12 1
22 22 7 2 2 0
23 23 21 1 3 0
24 24 19 1 3 1
25 25 20 0 2 1
26 26 12 0 39 0
27 27 1 1 11 0
28 28 5 2 77 0
29 29 40 1 0 1
30 30 0 1 1 0
31 31 72 8 1 0
32 32 11 7 1 0
33 33 1 0 0 2
34 34 6 0 0 0
35 35 2 0 0 0
36 36 3 0 0 0
37 37 5 1 2 0
38 38 0 0 0 1
39 39 5 0 0 0
40 40 5 1 0 0
41 41 4 0 0 0
42 42 1 0 0 0
43 43 2 0 0 1
44 44 6 0 0 0
45 45 1 0 0 0
46 46 5 0 0 0
47 47 9 0 0 0
48 48 1 0 0 0
49 49 1 1 0 0
50 50 1 0 0 0
51 51 4 0 0 0
52 53 50 1 0 1
53 54 6 0 0 0
54 55 1 0 0 0
55 56 8 0 0 0
56 57 2 0 0 0
57 58 12 1 0 0
58 59 6 0 0 0
59 60 30 1 0 0
60 61 5 0 0 0
61 62 15 0 0 0
62 63 4 0 0 0
63 64 2 0 0 0
64 65 34 0 0 0
65 66 6 0 0 0
66 67 1 0 0 0
67 69 1 0 0 0
68 70 2 0 0 1
69 71 1 0 0 0
70 72 1 0 0 0
71 73 6 0 0 0
72 77 2 0 0 1
73 78 1 0 0 1
74 83 0 0 0 2
75 84 1 0 0 0
76 87 1 0 0 0
77 88 1 0 0 1
78 90 2 0 0 1
79 91 1 0 0 0
80 94 4 0 0 1
81 95 1 0 0 0
82 96 1 0 0 1
83 97 0 0 0 1
84 99 1 0 0 1
85 100 2 0 0 0
86 101 1 0 0 0
87 102 0 0 0 1
88 103 3 0 0 1
89 104 0 0 0 2
90 105 0 0 0 1
91 107 0 0 0 1
92 108 1 0 0 2
93 110 2 0 0 0
94 111 1 0 0 0
95 112 0 1 0 0
96 113 2 0 0 0
97 114 1 0 0 1
98 116 1 0 0 0
99 117 3 0 0 0
100 120 1 0 0 0
101 121 2 0 0 0
102 125 1 0 0 0
103 127 1 0 0 1
104 128 0 0 0 2
105 153 0 1 0 0
106 166 0 0 0 1
107 175 0 0 0 2
108 181 0 0 0 1
109 183 0 0 0 2
110 184 1 0 0 0
111 187 0 0 0 1
112 193 1 0 0 1
113 195 0 0 0 1
114 196 0 0 0 1
115 198 1 0 0 0
116 203 0 0 0 1
117 205 0 0 0 1
118 210 2 0 0 0
119 219 1 0 0 1
120 220 1 0 0 0
121 229 1 0 0 0
122 231 0 0 0 1
123 232 1 0 0 0
124 233 0 1 0 0
125 235 1 0 0 0
126 238 3 0 0 0
127 243 1 0 0 0
128 245 0 1 0 0
129 246 1 1 0 0
130 248 0 1 0 0
131 249 0 2 0 0
132 250 0 2 0 0
133 251 0 1 0 0
134 252 1 2 0 0
135 253 1 0 0 0
136 254 0 1 0 0
137 255 2 0 0 0
138 256 2 1 0 0
139 257 4 0 0 0
140 258 1 1 0 0
141 259 1 0 0 0
142 260 2 1 0 1
143 261 2 1 0 0
144 262 2 0 0 0
145 263 4 1 0 1
146 264 4 0 0 0
147 265 3 4 0 2
148 266 2 1 0 0
149 267 2 1 0 0
150 268 2 3 0 1
151 269 3 1 0 0
152 270 3 0 1 0
153 271 1 1 0 0
154 272 2 2 0 0
155 273 4 3 0 1
156 274 2 1 0 0
157 275 4 2 0 0
158 276 0 2 0 0
159 277 4 4 0 0
160 278 3 3 0 1
161 279 5 0 0 1
162 280 1 0 0 0
163 281 3 4 0 0
164 282 4 1 0 0
165 283 5 1 0 0
166 284 4 1 0 0
167 285 5 1 0 0
168 286 4 2 0 1
169 287 1 0 0 0
170 288 2 3 0 0
171 289 1 0 0 0
172 290 3 0 0 0
173 291 1 1 0 0
174 292 1 3 0 1
175 293 5 1 0 0
176 294 6 0 0 0
177 295 1 2 0 0
178 296 5 1 0 0
179 297 4 1 0 0
180 298 2 1 0 0
181 299 2 0 1 1
182 300 2 0 0 0
183 301 4 2 0 0
184 302 1 0 0 1
185 303 3 0 0 0
186 304 3 0 0 0
187 305 3 0 0 0
188 306 2 1 0 0
189 307 4 1 0 0
190 308 1 0 0 0
191 310 1 0 0 0
192 311 1 0 1 0
193 313 2 2 0 0
194 315 1 0 0 0
195 316 1 0 0 0
196 317 1 0 1 0
197 318 0 2 0 0
198 319 1 0 0 0
199 320 3 1 1 0
200 321 1 0 0 0
201 322 3 0 0 0
202 323 2 0 0 0
203 325 1 0 0 0
204 327 2 1 0 0
205 328 5 1 1 0
206 330 0 1 0 0
207 332 1 0 0 0
208 333 1 1 1 1
209 335 1 0 0 0
210 338 1 0 0 0
211 339 0 0 1 1
212 340 1 1 0 0
213 341 1 0 0 0
214 342 1 0 0 1
215 343 2 1 0 0
216 344 0 0 1 0
217 345 2 0 0 0
218 346 1 0 0 0
219 347 2 0 0 0
220 349 2 0 0 0
221 350 1 0 0 0
222 351 1 0 0 0
223 352 3 0 0 0
224 353 1 0 0 0
225 354 1 0 0 0
226 355 2 0 0 0
227 356 1 1 0 0
228 357 1 2 0 0
229 358 2 1 0 0
230 359 0 0 1 0
231 360 1 0 1 0
232 361 1 3 0 0
233 362 1 2 0 0
234 363 2 3 0 0
235 365 2 1 0 0
236 366 1 0 0 0
237 367 1 0 0 0
238 368 2 1 0 0
239 369 3 1 0 0
240 370 0 2 0 0
241 371 0 2 0 0
242 372 1 0 0 0
243 373 2 2 0 0
244 374 3 1 0 0
245 375 2 1 0 0
246 376 0 1 0 0
247 377 0 1 0 0
248 378 2 0 0 0
249 380 0 3 0 0
250 381 2 1 0 0
251 382 2 1 0 0
252 383 3 1 1 0
253 384 0 1 0 0
254 385 0 1 0 0
255 387 2 2 0 0
256 388 1 2 0 0
257 389 1 0 0 0
258 391 2 0 0 0
259 392 0 1 0 0
260 393 1 1 0 0
261 394 3 0 0 0
262 396 1 0 0 0
263 397 0 2 0 0
264 398 2 0 0 0
265 399 1 1 0 0
266 400 0 0 1 0
267 401 1 1 0 0
268 402 0 1 0 0
269 404 1 1 0 0
270 405 3 1 0 0
271 406 0 1 0 0
272 407 0 2 0 0
273 408 1 1 1 0
274 409 2 0 0 0
275 411 3 0 0 0
276 412 1 0 0 0
277 413 2 1 0 0
278 415 1 0 0 0
279 416 2 0 0 0
280 417 0 1 0 0
281 418 0 1 0 0
282 419 0 1 0 0
283 420 0 1 0 0
284 421 1 2 0 0
285 423 0 2 0 0
286 424 0 1 0 0
287 426 2 0 0 0
288 428 1 0 0 0
289 431 0 1 0 0
290 432 1 0 0 0
291 433 3 0 0 0
292 435 1 0 0 0
293 438 0 1 0 0
294 439 0 1 0 0
295 443 1 0 0 0
296 444 1 0 0 0
297 445 2 1 0 0
298 446 2 0 0 0
299 447 1 0 0 0
300 452 1 0 0 0
301 453 1 0 0 0
302 455 1 0 0 0
303 459 1 0 0 0
304 462 1 0 0 0
305 463 2 0 0 0
306 465 1 0 0 0
307 468 1 0 0 0
308 470 1 0 0 0
309 471 0 0 1 0
310 473 0 0 1 0
311 475 2 0 0 0
312 480 1 0 0 0
313 481 1 0 0 0
314 482 1 0 0 0
315 483 1 0 0 0
316 485 0 0 1 0
317 487 1 0 0 0
318 491 3 0 0 0
319 492 1 0 0 0
320 493 3 0 0 0
321 496 2 0 0 0
322 497 3 0 0 0
323 500 0 0 1 0
324 501 1 0 0 0
325 502 5 0 0 0
326 503 1 0 0 0
327 504 1 0 1 0
328 507 2 0 0 0
329 508 2 0 0 0
330 509 1 0 0 0
331 510 1 0 0 0
332 511 2 0 0 0
333 512 5 0 0 0
334 513 2 0 0 0
335 514 0 0 2 0
336 515 4 0 5 0
337 516 4 0 0 0
338 517 5 0 1 0
339 518 1 0 0 0
340 519 3 0 1 0
341 520 2 0 0 0
342 521 2 0 1 0
343 523 5 0 1 0
344 524 3 0 1 0
345 525 2 0 1 0
346 526 0 0 1 0
347 527 2 0 0 0
348 528 2 0 4 0
349 529 7 0 0 0
350 530 1 0 2 0
351 531 2 0 1 0
352 532 2 0 2 0
353 533 2 0 2 0
354 534 2 0 0 0
355 535 2 0 2 0
356 536 2 0 0 0
357 537 1 0 0 0
358 538 3 0 0 0
359 539 1 0 1 0
360 540 0 0 4 0
361 541 2 0 0 0
362 542 0 0 2 0
363 544 0 0 3 0
364 545 0 0 1 0
365 546 1 0 1 0
366 547 0 0 1 0
367 548 1 0 1 0
368 549 1 0 2 0
369 550 0 0 6 0
370 551 4 0 2 0
371 553 1 0 1 0
372 554 0 0 3 0
373 555 2 0 9 0
374 556 0 0 1 0
375 557 2 0 4 0
376 558 0 0 2 0
377 559 1 0 3 0
378 560 1 0 4 0
379 561 0 0 4 0
380 562 3 0 2 0
381 563 3 0 0 0
382 564 1 0 2 0
383 565 1 0 0 0
384 566 0 0 1 0
385 567 3 0 3 0
386 568 3 0 0 0
387 569 5 0 1 0
388 570 4 0 2 0
389 571 2 0 2 0
390 572 1 0 2 0
391 573 2 0 2 0
392 574 2 0 4 0
393 575 5 0 4 0
394 576 6 0 1 0
395 577 7 0 4 0
396 578 7 0 0 0
397 579 2 0 0 0
398 580 3 0 0 0
399 581 3 0 3 0
400 582 7 0 2 0
401 583 2 0 0 0
402 584 3 0 1 0
403 585 4 0 0 0
404 586 3 0 0 0
405 587 1 0 1 0
406 588 1 0 0 0
407 589 1 0 0 0
408 590 1 0 0 0
409 591 5 0 0 0
410 592 2 0 0 0
411 594 3 0 0 0
412 595 2 0 0 0
413 597 0 0 1 0
414 598 2 0 0 0
415 601 2 0 0 0
416 602 1 0 0 0
417 603 1 0 0 0
418 604 2 0 0 0
419 605 1 0 0 0
420 611 1 0 0 0
421 618 1 0 0 0
422 640 1 0 0 0

A table with the frequencies of the MBBs sizes displayed by Kingdom can be obtained as follows:

size_MBB_df_table= size_MBB_df %>% group_by(Kingdom,size) %>% summarise(n=n())

table_MBB_size = size_MBB_df_table %>% pivot_wider(names_from = Kingdom, values_from = n, values_fill = list(n = 0)) %>% arrange(size)
knitr::kable(table_MBB_size, caption = "MBB Size: Frequency by Kingdom")%>% kable_styling(bootstrap_options = "striped", full_width = FALSE) %>% scroll_box(height = "300px", width = "100%")
MBB Size: Frequency by Kingdom
size Animals Fungi Plants Protists
1 408027 74872 109638 17773
2 7426 2464 2306 678
3 5087 464 758 303
4 2613 324 632 115
5 948 340 49 56
6 390 131 274 46
7 190 167 27 16
8 544 70 127 27
9 137 7 124 10
10 492 20 2 10
11 91 29 132 8
12 44 3 6 4
13 21 4 0 2
14 15 10 0 1
15 195 2 0 5
16 23 6 0 3
17 7 5 1 0
18 410 122 118 3
19 3 7 0 0
20 4 0 0 7
21 1 0 0 0
22 0 0 0 2
23 3 0 0 0
24 5 1 0 0
25 2 1 0 2
27 1 0 1 0
29 1 2 0 0
30 0 1 0 1
31 2 4 0 2
32 0 4 0 1
33 1 2 0 0
34 3 0 0 0
35 5 0 0 0
36 1 0 0 1
37 1 2 0 1
39 1 0 0 0
44 1 0 0 4
45 0 1 0 0
46 1 0 0 0
47 0 0 0 3
48 0 0 0 2
49 1 0 0 1
50 1 0 0 0
51 1 0 0 0
53 2 0 0 0
54 1 0 0 0
56 1 0 0 0
57 3 0 0 0
92 0 0 0 1
103 0 0 0 1
107 0 0 0 1
117 0 0 0 1
118 0 0 0 1
119 0 0 0 1
120 0 0 0 3
121 0 0 0 5
136 1 0 0 0
139 0 0 0 1
162 0 0 0 1
181 1 0 0 0
186 0 0 0 1
189 0 0 0 1
203 0 0 0 1
222 0 0 1 0
224 0 0 0 1
226 0 0 0 1
232 0 0 0 1
233 1 0 0 0
234 0 0 0 1
240 0 0 0 3
241 0 0 0 1
250 1 0 0 0
259 0 0 1 0
276 1 0 0 1
284 0 1 0 1
285 0 0 1 0
286 1 0 0 0
291 0 0 0 1
296 0 1 0 1
300 0 0 1 0
306 0 0 0 1
307 0 1 0 0
314 0 0 1 0
326 1 0 0 0
329 0 0 1 0
331 0 0 0 1
332 0 1 0 0
336 0 0 1 0
340 0 0 0 1
343 0 0 1 0
344 0 1 0 0
348 0 0 0 1
350 0 1 0 0
360 1 0 0 0
361 0 0 1 0
363 0 0 0 1
364 0 1 0 0
371 0 0 0 1
373 1 0 0 0
375 1 0 0 0
381 0 1 1 0
387 0 1 0 0
390 0 1 0 0
392 0 0 0 1
393 1 0 0 0
401 0 2 0 0
402 0 1 0 0
403 1 0 0 0
409 0 1 0 0
410 0 1 1 0
417 1 1 0 0
419 0 1 0 0
420 0 0 1 1
421 0 0 1 0
422 0 2 0 0
423 1 1 0 0
427 0 1 0 0
428 1 1 0 0
429 1 0 0 0
430 0 1 0 0
433 0 1 0 0
437 1 1 0 0
438 0 1 0 0
441 0 1 0 0
443 1 1 0 0
444 0 1 0 0
446 0 1 0 0
447 1 2 0 0
449 1 1 0 0
450 1 0 0 0
451 0 3 0 0
452 1 0 0 0
454 0 1 0 0
455 0 1 0 0
456 1 0 0 0
457 1 1 0 0
458 1 1 0 0
459 2 0 0 0
461 1 1 0 0
462 1 2 0 0
463 0 3 0 0
465 1 0 0 0
466 1 0 0 0
467 1 0 0 0
468 0 2 0 0
469 2 0 0 0
470 1 0 0 0
471 1 1 0 0
473 2 1 0 0
474 3 0 0 0
476 0 2 0 0
477 1 0 0 0
478 1 1 0 0
479 1 0 0 0
480 0 1 0 0
481 2 0 0 0
482 2 0 0 0
483 2 0 0 0
484 2 1 0 1
485 0 2 0 0
486 1 1 0 0
487 6 0 0 0
488 1 2 0 0
489 1 0 0 0
490 1 0 0 0
491 2 1 0 0
492 2 0 0 0
493 1 1 0 0
494 0 1 0 0
495 2 1 0 0
496 4 1 0 0
498 5 3 0 0
499 2 0 0 0
500 2 0 0 0
501 1 1 0 0
502 3 3 0 0
503 1 0 0 0
504 6 0 0 1
505 5 2 0 0
506 5 2 0 0
507 1 2 0 0
508 4 0 0 0
509 0 1 0 0
510 4 2 0 0
511 3 1 0 0
512 1 0 0 0
513 6 1 0 0
514 5 0 0 0
515 1 1 0 0
516 1 0 0 0
517 6 0 0 0
518 6 0 0 0
519 0 1 0 0
521 0 1 0 0
522 5 1 0 0
523 1 2 0 0
524 2 2 0 0
525 2 2 0 0
526 0 1 0 0
527 5 2 0 0
528 6 3 0 0
529 2 1 0 0
530 1 0 0 0
531 3 1 0 0
532 0 1 0 0
533 4 0 0 0
534 3 0 0 0
535 1 0 0 0
536 1 0 0 0
537 1 0 0 0
538 1 0 0 0
539 0 1 0 0
540 1 2 0 0
541 1 1 0 0
542 1 2 0 0
543 0 2 0 0
544 1 2 0 0
545 1 0 0 0
547 0 0 1 0
548 1 0 0 0
550 0 1 0 0
551 2 0 0 0
552 1 0 0 0
553 0 1 0 0
554 1 1 0 0
555 1 2 0 0
556 1 0 0 0
558 1 0 0 0
559 0 2 0 0
561 0 1 0 0
563 2 2 0 0
564 1 0 0 0
565 1 0 0 0
566 1 1 0 0
567 2 1 0 0
568 1 0 0 0
569 1 0 0 0
572 1 2 0 0
574 1 1 0 0
575 0 1 0 0
576 1 3 0 0
577 1 0 0 0
578 1 0 0 0
579 2 1 0 0
580 1 0 0 0
582 1 0 0 0
583 3 0 0 0
584 1 3 0 0
585 1 1 0 0
586 0 2 0 0
587 0 1 0 0
589 0 1 0 0
590 0 1 0 0
591 2 1 0 0
593 1 0 1 0
594 1 0 0 0
595 1 0 0 0
596 2 0 0 0
598 2 0 0 0
599 1 0 0 0
600 1 1 0 0
602 2 0 0 0
603 1 0 0 0
604 2 0 0 0
606 1 1 0 0
607 1 0 0 0
608 0 1 0 0
611 2 0 1 0
613 3 0 1 0
614 1 0 0 0
616 2 1 0 0
617 1 1 0 0
618 1 0 1 0
619 3 0 0 0
620 2 0 0 0
621 1 0 0 0
622 0 0 1 0
623 2 0 0 0
625 2 0 0 0
626 2 0 0 0
628 1 0 0 0
629 3 0 0 0
633 1 0 1 0
634 1 0 1 0
635 1 0 1 0
636 1 0 0 0
637 2 0 0 0
638 2 0 1 0
639 1 0 1 0
640 0 0 1 0
641 1 0 0 0
642 2 0 1 0
643 2 0 1 0
644 1 0 1 0
645 1 0 0 0
646 1 0 1 0
647 3 0 3 0
650 3 0 1 0
651 1 0 1 0
652 1 0 3 0
653 0 0 1 0
654 1 0 2 0
655 0 0 1 0
656 0 0 2 0
658 3 0 0 0
659 0 0 3 0
660 2 0 4 0
661 2 0 5 0
662 0 0 1 0
663 3 0 0 0
664 2 0 2 0
665 0 0 4 0
666 2 0 1 0
667 1 0 2 0
668 0 0 3 0
669 0 0 5 0
670 0 0 5 0
671 2 0 7 0
672 2 0 6 0
673 0 0 6 0
674 1 0 4 0
675 0 0 19 0
676 1 0 9 0
677 2 0 5 0
678 0 0 1 0
679 1 0 2 0
680 4 0 0 0
681 1 0 1 0
682 0 0 1 0
683 2 0 0 0
684 3 0 0 0
685 3 0 0 0
686 2 0 0 0
687 1 0 0 0
689 1 0 0 0
692 1 0 0 0
693 4 0 0 0
695 4 0 0 0
696 3 0 0 0
697 4 0 0 0
698 4 0 0 0
699 4 0 0 0
700 10 0 0 0
701 5 0 0 0
702 9 0 0 0
703 8 0 0 0
704 19 0 0 0
705 19 0 0 0
706 7 0 0 0
707 6 0 0 0
708 4 0 0 0
709 4 0 0 0
710 20 0 0 0
711 6 0 0 0
712 17 0 0 0
713 9 0 0 0
714 10 0 0 0
715 4 0 0 0
716 5 0 0 0
717 3 0 0 0
718 8 0 0 0
719 4 0 0 0
721 2 0 0 0
723 1 0 0 0
724 2 0 0 0
726 1 0 0 0
757 1 0 0 0

3.4.1 More statistics

h

weak_components_size$Kingdom <- factor(weak_components_size$Kingdom)
#weak_components_size


g <- ggplot(weak_components_size) +
  xlab("") +  # Eliminar el título del eje X
  ylab("Size of connected component") +  # Etiqueta del eje Y
  geom_jitter(aes(x = Kingdom, y = csize, color = Kingdom),
             size = 1) +  # Colorear puntos según 'Kingdom' y reducir el tamaño
  scale_y_continuous(breaks = seq(0, 640, by = 100)) +  # Escala del eje Y con saltos de 20
  theme_minimal() + # Tema minimalista con fondo blanco
  ggtitle("")  # Título del gráfico
  g

weak_components_size_max = weak_components_size %>%
  group_by(Organism) %>%
  summarise(csize = max(csize),Kingdom=first(Kingdom))
# Crear el gráfico de violín con puntos jitter
g <- ggplot(weak_components_size_max, aes(x = Kingdom, y = csize)) +
  geom_violin(aes(fill = Kingdom), trim = FALSE, alpha = 0.5) +  # Gráfico de violín con relleno por 'Kingdom' y sin recortar
  geom_jitter(aes(color = Kingdom), size = 1, width = 0.2) +  # Puntos con jitter para evitar solapamientos
  xlab("") +  # Eliminar el título del eje X
  ylab("Size of biggest connected component") +  
  scale_y_continuous(breaks = seq(0, 640, by = 100)) +  
  theme_minimal() +  # theme background white
  ggtitle("")  #
#Size of the largest weakly connected component by Kingdom \n (Violin Plot with Jittered Points)
# Mostrar el gráfico
g