-
,
. .
: . .
-
2007
2
3
1. 4
1.1. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4
1.2. . . . . . . . . . . . . . . . . . . . . . . . . 5
1.3. . . . . . . . . . . . . . . . . . . . . . . . . 7
1.4. . . . . . . . . . . . . . . . . . . . . . . . . . 7
1.5. . . . . . . . . . . . . . . . . . . . . . . . 9
1.6. . . . . . . . . . . . . . . . . . . . . . . 10
1.7. . . . . . . . . . . . . . . . . . 12
2. -
15
2.1. . . . . . . 15
2.2. 16
2.3. . . 17
2.4. 19
2.5. 22
3.
25
3.1. -
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 25
3.1.1. . . . . . . . . . . . . . . . . . . . . 26
3.1.2. . . . . . . . . . . . . . . . . . . . . 27
3.1.3. . . . . . . . . . . . . . . . . . . . 27
3.1.4. . . . . . . . . . . . . . . . . 27
3.2. . . . 28
34
35
3
-
[1]. -
[1]
.
-
, -
[1]. ,
[2], - [3] [4]
[1] .
,
.
. ,
(-
[5],
- [5]),
-
.
, -
,
, .
,
-
. ,
.
4 1.
1.1.
1. [5, . 3] , -
.
. .
+ = \ {}.
2. [5, . 3] w, |w|
w, w.
3. [5, . 4] w
u, x y, u = xwy.
4. [5, . 4] w
u, y, u = wy.
5. [5, . 4] w
u, x, u = xw.
6. x
Fact(x).
7. x
Suff(x).
8. [5, . 4] k w
w[1 . . . k].
9. k- w x[k].
51.2.
10. [5, . 116] x
y Ry(x) = x1Suff(y) = {s | xs Suff(y)}.
, x y
y,
x.
11. [5, . 116] u v -
y (u y v), Ry(u) = Ry(v).
12. [5, . 116] u y
u y end posy(u).
1. [5, 2.3.1] u, v Fact(y) |u| |v|. :
u v, Ry(v) Ry(u);
Ry(v) = Ry(u), end posy(u) = end posy(v) u Suff(v).
2. [5, 2.3.2] u, v, w Fact(y). u
v, v w, u y w, u y v y w.
3. [5, 2.3.3] u, v .
u v ,
:
Ry(u) Ry(v);
Ry(v) Ry(u);
Ry(u) Ry(v) = .
-
, (-
).
13.
C y Repry(C) = {x Fact(y) | Ry(x) = C}.
-
.
6 14.
C y Repry(C), -
.
15.
C y Repry(C), -
.
4.
,
. :
wmax wmin -
C y. , s Repry(C)
, s wmax |s| |wmin|.
s Repry(C). , s Suff(wmax) |s| |wmin|.
14 wmax Repry(C) |wmax| |s|. 1
, s Suff(wmax). , 15
|s| |wmin|, .
s Suff(wmax) |s| |wmin|. , s Repry(C).
15 1 wmin Suff(s). 14
15 , wmax y wmin. , 2 wmax y s y wmin,
.
.
5. -
.
Ry(x). wmax
, lenmin . ,
4 Repry(Ry(x)) = {z | z Suff(wmax) |z| lenmin}.
6. C len -
y len, , Rw(y) = C.
, y1 y2 len ,
C = Rw(y1) = Rw(y2). , y1 = y2. 1 y1
Suff(y2), y2 Suff(y1). , y1 y2
, , , y1 = y2,
.
71.3.
16. [5, . 118] Fact(w)
sw, w,
x Fact(w) \ {} x
x.
7. x2 = sw(x1). -
Rw(x2) Rw(x1).
16 , x2 x1
|x2| < |x1|.
y, x1, -
x2. y = x1[|x1| |x2|
1 . . . |x1|].
16 , y w x1 x2
Rw(x2). 2 , y 6w x1, ,
y Rw(y) = Rw(x1).
|y| = 1 + |x2|, .
8. x Fact(w) -
Rw(x) Rw(sw(x)).
16 sw(x) x, |sw(x)| < |x|
Rw(x) 6= Rw(sw(x)).
-
1.
1.4.
17. T w -
. -
( ). ,
, . |w|+1
, , . -
, (-
)
() w.
8 18. [1, . 122]
,
.
.
19. [1, . 122] v -
v v.
20. v -
.
21. u
RT (u) , -
u , -
u.
22. , v
Rw(x), RT (v) = Rw(x).
23. v u ,
RT (v) = RT (u).
. 1 aabb. -
.
b b
b b
b b
a
a
. 1. aabb
9. v -
v.
9 v x. , RT (v) =
Rw(x).
, y RT (v) , y Rw(x). y
RT (v) , xy w, 10
, y Rw(x).
10. w -
, , .
Rw(x) . Rw(x) ,
x w. , x w, ,
v x.
9.
1.5.
() ([5,
. 108], [1, . 121]). -
.
24. [5, . 108]
w, , ,
, .
. 2 aabb.
.
25. [1, . 122] ,
(u, v), (u, v) .
u (u, v)
, .
. 10 ,
w
( ).
10
abb
bb
b b
a
. 2. aabb
1.6.
26. [5, . 121] A w
-
, w .
. 3 aabb.
b
b
b
b b
a
a
. 3. aabb
27. , s -
Rw(x), RA(s) = Rw(x).
11. [5]
-
w.
11
, , -
, ,
.
12. , -
s, -
.
, -
, -
26, 13 9.
28. s , -
x.
s , s -
, s(x).
suffix(s).
29. s
reprmax(s).
. , -
([5, . 126]) ( -
- -
), , ,
, -
.
13. s -
. s
1 + reprmax(suffix(s)).
28 29,
7.
14. x -
s1 . s2 ,
, x[2 . . . |x|].
:
|x| = reprmax(suffix(s1)) + 1, s2 suffix(s1)
12
|x| > reprmax(suffix(s1)) + 1, s2 s1
, |x| > reprmax(suffix(s1))+1. 28
16 , x[2 . . . |x|] Rw(x). -
, , x[2 . . . |x|],
s1.
, |x| = reprmax(suffix(s1))+1. ,
x[2 . . . |x|] Rw(sw(x)), 16 -
, , x[2 . . . |x|],
suffix(s1).
30. [5, . 132] , s -
, -
:
s
s
1.7.
31. [5, . 132] A
w +,
, -
.
. 4 aabb.
b b
a bb
abb
. 4. aabb
13
15. A w.
A x s1 s2.
x = w[|w| |y| |x|+ 1 . . . |w| |y|], y RA(s2).
, A x
s1 s2 y RA(s2), , xy RA(s1). ,
, , xy Suff(w).
xy w,
xy = w[|w| |xy|+ 1 . . . |w|].
, x = w[|w| |xy| + 1 . . . |w| |y|] = w[|w| |x| +
|y|+ 1 . . . |w| |y|], .
32. longest(s) = maxxRA(s) |x|, -
, , -
s.
33. -
fork, :
s , fork(s) = s
s , fork(s) = t, t -
, s
16. w.
c s1 s2. :
w -
s1 fork(s2) y,
c.
y = w[|w| longest(s2) . . . |w| longest(fork(s2))].
, s2
fork(s2), , , , -
.
-
31.
, 1 +
longest(s2) = |y|+ longest(fork(s2)), 15.
14
, w -
fork longest,
O(|w|).
15
2.
2.1.
34. , u
s , RT (u) = RA(s).
. ,
-
, -
.
17. w -
w.
-
26.
s
, .
.
.
,
.
6, -
. , ,
16
, s,
.
, s
( ).
.
18. -
s, |r|, s
, r s.
2.2.
19. [5, . 132] -
.
20. [5, . 132]
w
w.
21. -
,
.
19.
. ,
-
.
, -
, ,
s, |r|, s
, r s.
17
2.3.
.
-
.
1 public SuffixTrie buildTrie(SuffixAutomaton auto)
{
2 SuffixTrie = new SuffixTrie();
3 int root = walk(trie , auto , auto.
getStartState ());
4 trie.setRootNode(root);
5 return trie;
6 }
7
8 private int walk(SuffixTrie trie ,
9 SuffixAutomaton auto ,
10 int state)
11 {
12 int node = trie.createNode();
13 if (auto.isFinal(state)) {
14 trie.setLeaf(node , true);
15 }
16 for ( Edge edge: auto.getEdges(state)) {
17 int newState = fork(edge.getTarget());
18 int newNode = walk(trie , auto , newState);
19 trie.addEdge(node , newNode , edge.getChar
());
18
20 }
21 return node;
22 }
, buildTrie(A)
A. , walk(T , A, s)
T , s A,
.
-
x, x RA(s) , x
walk .
.
x = . RA(s) , s -
. 13-15 , node
, RA(s).
.
, x 6= . RA(s) -
, A,
s.
, x RA(s), -
T x.
x RA(s) , A, s, -
x. , , , A
s x[1]. ,
16-20. , -
newNode x[2 . . . |x|]. ,
x.
, x 6 RA(s), -
T x.
. , -
x. , node
x[1]. 19. -
, newNode
x[2 . . . |x|], ,
A x[2 . . . |x|], s2, -
, x, s,
19
x 6 RA(s). , ,
node x .
. , -
O(n), n .
2.4.
.
.
1 public SuffixTree buildTree(
CompactSuffixAutomaton auto) {
2 SuffixTree = new SuffixTree();
3 int root = walk(tree , auto , auto.
getStartState ());
4 tree.setRootNode(root);
5 return tree;
6 }
7
8 private int walk(SuffixTree tree ,
9 CompactSuffixAutomaton auto ,
10 int state)
11 {
12 int node = tree.createNode();
13 if (auto.isFinal(state)) {
14 tree.setLeaf(node , true);
15 }
20
16 for ( Edge edge: auto.getEdges(state)) {
17 int newNode = walk(tree , auto , edge.
getTarget());
18 tree.addEdge(node , newNode ,
19 edge.getBegin(),
20 edge.getEnd ());
21 }
22 return node;
23 }
, ,
, , . ,
. ,
, , -
, .
.
22. ,
walk, , .
, node, walk,
, , A
state .
, . ,
, , 13-15 node
, .
. O(n),
n .
, ,
.
, , -
, ,
,
w.
21
15, -
w -
.
.
fork longest.
2.
-
.
1 public SuffixTree buildTree(SuffixAutomaton auto)
{
2 SuffixTree = new SuffixTree();
3 int root = walk(tree , auto , auto.
getStartState ());
4 tree.setRootNode(root);
5 return tree;
6 }
7
8 private int walk(SuffixTree tree ,
9 SuffixAutomaton auto ,
10 int state)
11 {
12 int node = tree.createNode();
13 if (auto.isFinal(state)) {
14 tree.setLeaf(node , true);
15 }
16 for ( Edge edge: auto.getEdges(state)) {
17 int newState = fork(edge.getTarget());
22
18 int b = length () - longest(edge.getTarget
()) - 1;
19 int e = length () - longest(newState);
20 int newNode = walk(tree , auto , newState);
21 tree.addEdge(node , newNode , b, e);
22 }
23 return node;
24 }
, .
2.5.
,
w :
w O(|w|);
, s
fork(s) longest(s);
, -
.
, w
:
;
,
.
, -
, .
23
, , -
O(|w|) fork
longest, , , .
. 1 , -
.
,
.
1.
{a, b}
. .
100000 0.177 0.125
200000 0.388 0.266
300000 0.596 0.428
400000 0.819 0.580
500000 1.035 0.730
600000 1.264 0.884
700000 1.470 1.047
800000 1.694 1.216
900000 1.923 1.342
1000000 2.153 1.502
.
-
, -
24
. ,
,
.
, ,
,
.
25
3.
-
. , -
. , -
.
3.1.
, :
;
, ;
;
;
.
-
.
,
-
, .
, -
, , .
26
, O(|w|) -
. ,
, .
-
. -
.
, -
. -
. v1 v2
, :
v1 v2 s1 s2 ,
s1 s2;
v1 v2 ,
v1 v2.
, .
, -
num , ,
lenmin ,
len
num + len lenmin.
, O(|w|) -
O(1).
.
3.1.1.
, v -
s , RT (v) =
Suff(w) = RA(s).
, , -
, 0 , -
.
27
3.1.2.
, -
.
v
, RT (v).
s ,
v. v ,
RA(s). , , , s -
.
, v -
, s
.
-
,
O(1).
3.1.3.
.
v .
14 , :
len v
s, ,
s, len 1;
-
s, , -
suffix(s), len 1.
, O(1).
3.1.4.
v
, c.
28
x v.
v s -
.
s
c, xc w, , ,
, v ,
c.
,
s t c. f = fork(t). -
31 33 ,
s f .
, 21 16 , -
s f -
y, c. -
, y = w[|w|longest(s2) . . . |w|longest(fork(s2))].
, fork longest,
v , -
c, , O(1)
c,
.
3.2.
(LCP ) .
1 s , -
.
si =
s[i . . . |s|] sj = s[j . . . |s|].
,
O(|s|) , -
O(log |s|) O(1).
29
,
:
;
;
k ;
;
.
LCP
.
s. , -
.
-
.
s,
.
, ,
-
.
, ,
,
3.1.
[6],
O(log |s|).
:
s;
30
-
.
, -
;
.
:
t1 t2
;
, -
, t1 t2;
.
, ,
. , :
s
;
.
-
2.
:
1 public void init(String str) {
2 globalTime = 0;
3 time2depth = new int[4 * str.length () + 1];
4 suffix2time = new int[str.length () + 1];
5 SuffixAutomaton auto = new SuffixAutomaton (
str);
6 walk(auto , auto.getStartState () , 0);
31
7 }
8
9 private void walk(SuffixAutomaton auto ,
10 int state ,
11 int depth)
12 {
13 if (auto.isFinal(state)) {
14 suffix2time[depth ] = globalTime;
15 }
16 time2depth[globalTime ++] = depth;
17 for ( Edge edge: auto.getEdges(state)) {
18 int target = edge.getTarget();
19 int nextState = fork(target);
20 dfs(nextState , depth + 1 +
21 longest(target) - longest(nextState))
;
22 time2depth[globalTime ++] = depth;
23 }
24 }
, -
time2depth suffix2time. , walk
.
, -
. ,
walk node -
.
node , -
state . , node -
, , depth.
14 suffix2time , -
depth.
32
16 time2depth -
node.
17 - 23 node
. node, -
time2depth 22.
. 2 ,
, 1.3 - 1.9 .
2. -
LCP .
O(log |w|).
{a, b}
. .
100000 0.163 0.234
200000 0.323 0.495
300000 0.511 0.769
400000 0.683 1.017
500000 0.852 1.289
600000 1.039 1.552
700000 1.213 1.826
800000 1.381 2.103
900000 1.563 2.381
1000000 1.735 2.631
-
,
.
33
, -
,
. -
.
34
.
-
.
-
( -) -
, . , -
. -
,
, .
, -
. -
,
.
,
,
-
.
-
. , -
.
, -
.
, :
, ,
. , -
, , -
.
35
1. . , . -
. .: ; -
, 2003.
2. Weiner P. Linear pattern matching algorithms / Proc. of the 14th IEEE
Symp. on Swithing and Automata Theory. 1973, pp.1-11.
3. McCreight E. M. A space-economical sux tree construction alorithm // J.
ACM. 1976. Vol 23, pp. 262-272.
4. Ukkonen E. Online construction of sux-trees // Algoritmica. 1995. Vol. 14,
pp. 249-260.
5. Lothaire M. Applied Combinatorics on Words // Encyclopedia of
Mathematics and its Applications, 2005. Vol. 90. Cambridge University
Press, Cambridge.
6. Bender M., Farach-Colton M. The LCA Problem Revisited / LATIN 2000,
pp. 88-94.
7. ., ., . -
, . .: , 2002.
8. ., ., . : .
.: , 2000.
9. Sartaj Sahni Dr. Data Structures, Algorithms, & Applications in
Java. Sux Trees. CISE Department Chair at University of Florida.
http://www.cise.u.edu/sahni/dsaaj/enrich/c16/sux.htm
10. Edelkamp S. Sux tree // Dictionary of Algorithms and Data
Structures. U.S. National Institute of Standards and Technology. 2007.
http://www.nist.gov/dads/HTML/suxtree.html
36
11. Blumer A., Blumer J., Ehrenfeucht A., Hausler D., McConnel R. Linear
size nite automata for the set of all subwords of a word: an outline of results
// Bull. Eur. Assoc. Theoret. Commput. Sci. 1983. Vol 21, pp. 12-20.
12. Blumer A., Blumer J., Ehrenfeucht A., Hausler D., McConnel R. The
smallest automaton recognizing the subwords of a text // Bull. Eur. Assoc.
Theoret. Commput. Sci. 1985. Vol 40(1), pp. 31-55.
13. Eilenberg S. Automata, Languages, and Machines. Vol A. Academic Press.
1974.
14. Kuich W., Salomaa A. Semirings, Automata, Languages. Springer-Verlag.
1986.
15. Alonso L., Remy J. L., Schott R. A linear-time algorithm for the generation
of trees // Algorithmica. 1997. Vol 17(2), pp. 162-182.
16. Devroye L., Szpankowski W., Rais B. A note of the height of sux trees
// SIAM J. Comput. 1992. Vol. 21, pp. 48-53.
17. Farach M. Optimal sux tree construction with large alphabets // In 38th
Foundations of Computer Science (FOCS). 1997, pp. 137-143.
Top Related