OpenShot Library | libopenshot-audio 0.2.0
juce_TextDiff.cpp
1/*
2 ==============================================================================
3
4 This file is part of the JUCE library.
5 Copyright (c) 2017 - ROLI Ltd.
6
7 JUCE is an open source library subject to commercial or open-source
8 licensing.
9
10 The code included in this file is provided under the terms of the ISC license
11 http://www.isc.org/downloads/software-support-policy/isc-license. Permission
12 To use, copy, modify, and/or distribute this software for any purpose with or
13 without fee is hereby granted provided that the above copyright notice and
14 this permission notice appear in all copies.
15
16 JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
17 EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
18 DISCLAIMED.
19
20 ==============================================================================
21*/
22
23namespace juce
24{
25
27{
28 enum { minLengthToMatch = 3,
29 maxComplexity = 16 * 1024 * 1024 };
30
32 {
34 : text (s.getCharPointer()), start (0), length (s.length()) {}
35
37 : text (t), start (s), length (len) {}
38
39 void incrementStart() noexcept { ++text; ++start; --length; }
40
42 int start, length;
43 };
44
45 static void addInsertion (TextDiff& td, String::CharPointerType text, int index, int length)
46 {
48 c.insertedText = String (text, (size_t) length);
49 c.start = index;
50 c.length = 0;
51 td.changes.add (c);
52 }
53
54 static void addDeletion (TextDiff& td, int index, int length)
55 {
57 c.start = index;
58 c.length = length;
59 td.changes.add (c);
60 }
61
62 static void diffSkippingCommonStart (TextDiff& td, StringRegion a, StringRegion b)
63 {
64 for (;;)
65 {
66 auto ca = *a.text;
67 auto cb = *b.text;
68
69 if (ca != cb || ca == 0)
70 break;
71
72 a.incrementStart();
73 b.incrementStart();
74 }
75
76 diffRecursively (td, a, b);
77 }
78
79 static void diffRecursively (TextDiff& td, StringRegion a, StringRegion b)
80 {
81 int indexA = 0, indexB = 0;
82 auto len = findLongestCommonSubstring (a.text, a.length, indexA,
83 b.text, b.length, indexB);
84
85 if (len >= minLengthToMatch)
86 {
87 if (indexA > 0 && indexB > 0)
88 diffSkippingCommonStart (td, StringRegion (a.text, a.start, indexA),
89 StringRegion (b.text, b.start, indexB));
90 else if (indexA > 0)
91 addDeletion (td, b.start, indexA);
92 else if (indexB > 0)
93 addInsertion (td, b.text, b.start, indexB);
94
95 diffRecursively (td, StringRegion (a.text + (indexA + len), a.start + indexA + len, a.length - indexA - len),
96 StringRegion (b.text + (indexB + len), b.start + indexB + len, b.length - indexB - len));
97 }
98 else
99 {
100 if (a.length > 0) addDeletion (td, b.start, a.length);
101 if (b.length > 0) addInsertion (td, b.text, b.start, b.length);
102 }
103 }
104
105 static int findLongestCommonSubstring (String::CharPointerType a, const int lenA, int& indexInA,
106 String::CharPointerType b, const int lenB, int& indexInB) noexcept
107 {
108 if (lenA == 0 || lenB == 0)
109 return 0;
110
111 if (lenA * lenB > maxComplexity)
112 return findCommonSuffix (a, lenA, indexInA,
113 b, lenB, indexInB);
114
115 auto scratchSpace = sizeof (int) * (2 + 2 * (size_t) lenB);
116
117 if (scratchSpace < 4096)
118 {
119 auto* scratch = (int*) alloca (scratchSpace);
120 return findLongestCommonSubstring (a, lenA, indexInA, b, lenB, indexInB, scratchSpace, scratch);
121 }
122
123 HeapBlock<int> scratch (scratchSpace);
124 return findLongestCommonSubstring (a, lenA, indexInA, b, lenB, indexInB, scratchSpace, scratch);
125 }
126
127 static int findLongestCommonSubstring (String::CharPointerType a, const int lenA, int& indexInA,
128 String::CharPointerType b, const int lenB, int& indexInB,
129 const size_t scratchSpace, int* const lines) noexcept
130 {
131 zeromem (lines, scratchSpace);
132
133 auto* l0 = lines;
134 auto* l1 = l0 + lenB + 1;
135
136 int loopsWithoutImprovement = 0;
137 int bestLength = 0;
138
139 for (int i = 0; i < lenA; ++i)
140 {
141 auto ca = a.getAndAdvance();
142 auto b2 = b;
143
144 for (int j = 0; j < lenB; ++j)
145 {
146 if (ca != b2.getAndAdvance())
147 {
148 l1[j + 1] = 0;
149 }
150 else
151 {
152 auto len = l0[j] + 1;
153 l1[j + 1] = len;
154
155 if (len > bestLength)
156 {
157 loopsWithoutImprovement = 0;
158 bestLength = len;
159 indexInA = i;
160 indexInB = j;
161 }
162 }
163 }
164
165 if (++loopsWithoutImprovement > 100)
166 break;
167
168 std::swap (l0, l1);
169 }
170
171 indexInA -= bestLength - 1;
172 indexInB -= bestLength - 1;
173 return bestLength;
174 }
175
176 static int findCommonSuffix (String::CharPointerType a, int lenA, int& indexInA,
177 String::CharPointerType b, int lenB, int& indexInB) noexcept
178 {
179 int length = 0;
180 a += lenA - 1;
181 b += lenB - 1;
182
183 while (length < lenA && length < lenB && *a == *b)
184 {
185 --a;
186 --b;
187 ++length;
188 }
189
190 indexInA = lenA - length;
191 indexInB = lenB - length;
192 return length;
193 }
194};
195
197{
198 TextDiffHelpers::diffSkippingCommonStart (*this, original, target);
199}
200
202{
203 for (auto& c : changes)
204 text = c.appliedTo (text);
205
206 return text;
207}
208
213
214String TextDiff::Change::appliedTo (const String& text) const noexcept
215{
216 return text.replaceSection (start, length, insertedText);
217}
218
219//==============================================================================
220//==============================================================================
221#if JUCE_UNIT_TESTS
222
223class DiffTests : public UnitTest
224{
225public:
226 DiffTests() : UnitTest ("TextDiff class", "Text") {}
227
228 static String createString (Random& r)
229 {
230 juce_wchar buffer[500] = { 0 };
231
232 for (int i = r.nextInt (numElementsInArray (buffer) - 1); --i >= 0;)
233 {
234 if (r.nextInt (10) == 0)
235 {
236 do
237 {
238 buffer[i] = (juce_wchar) (1 + r.nextInt (0x10ffff - 1));
239 }
240 while (! CharPointer_UTF16::canRepresent (buffer[i]));
241 }
242 else
243 buffer[i] = (juce_wchar) ('a' + r.nextInt (3));
244 }
245
246 return CharPointer_UTF32 (buffer);
247 }
248
249 void testDiff (const String& a, const String& b)
250 {
251 TextDiff diff (a, b);
252 auto result = diff.appliedTo (a);
253 expectEquals (result, b);
254 }
255
256 void runTest() override
257 {
258 beginTest ("TextDiff");
259
260 auto r = getRandom();
261
262 testDiff (String(), String());
263 testDiff ("x", String());
264 testDiff (String(), "x");
265 testDiff ("x", "x");
266 testDiff ("x", "y");
267 testDiff ("xxx", "x");
268 testDiff ("x", "xxx");
269
270 for (int i = 1000; --i >= 0;)
271 {
272 auto s = createString (r);
273 testDiff (s, createString (r));
274 testDiff (s + createString (r), s + createString (r));
275 }
276 }
277};
278
279static DiffTests diffTests;
280
281#endif
282
283} // namespace juce
Holds a resizable array of primitive or copy-by-value objects.
Definition juce_Array.h:60
void add(const ElementType &newElement)
Appends a new element at the end of the array.
Definition juce_Array.h:375
Wraps a pointer to a null-terminated UTF-8 character string, and provides various methods to operate ...
A random number generator.
Definition juce_Random.h:39
int nextInt() noexcept
Returns the next random 32 bit integer.
The JUCE String class!
Definition juce_String.h:43
bool isEmpty() const noexcept
Returns true if the string contains no characters.
CharPointer_UTF8 CharPointerType
This is the character encoding type used internally to store the string.
Calculates and applies a sequence of changes to convert one text string into another.
TextDiff(const String &original, const String &target)
Creates a set of diffs for converting the original string into the target.
Array< Change > changes
The list of changes required to perform the transformation.
String appliedTo(String text) const
Applies this sequence of changes to the original string, producing the target string that was specifi...
This is a base class for classes that perform a unit test.
Describes a change, which can be either an insertion or deletion.
String appliedTo(const String &original) const noexcept
Returns the result of applying this change to a string.
String insertedText
If this change is a deletion, this string will be empty; otherwise, it'll be the text that should be ...
bool isDeletion() const noexcept
Returns true if this change is a deletion, or false for an insertion.