Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
GenDB3
lib.intervals
Commits
983e9275
Commit
983e9275
authored
Jan 13, 2014
by
Lukas Jelonek
Browse files
Added an interval based sequence cache that allows lazy loading of arbitrary sequences
parent
bb3f6673
Changes
12
Hide whitespace changes
Inline
Side-by-side
pom.xml
View file @
983e9275
...
...
@@ -65,6 +65,12 @@
<version>
1.3
</version>
<scope>
test
</scope>
</dependency>
<dependency>
<groupId>
org.mockito
</groupId>
<artifactId>
mockito-all
</artifactId>
<version>
1.9.5
</version>
<scope>
test
</scope>
</dependency>
<dependency>
<groupId>
com.google.guava
</groupId>
<artifactId>
guava
</artifactId>
...
...
src/main/java/de/cebitec/common/sequencetools/intervals/cache/CacheCheckResult.java
0 → 100644
View file @
983e9275
/*
* Copyright (C) 2013 Lukas Jelonek <ljelonek at cebitec.uni-bielefeld.de>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package
de.cebitec.common.sequencetools.intervals.cache
;
import
de.cebitec.common.sequencetools.intervals.Interval
;
import
java.util.Collections
;
import
java.util.List
;
/**
*
* @author Lukas Jelonek <ljelonek at cebitec.uni-bielefeld.de>
*/
class
CacheCheckResult
{
private
List
<
Interval
<
Integer
>>
list
;
CacheCheckResult
(
List
<
Interval
<
Integer
>>
list
)
{
if
(
list
==
null
)
{
throw
new
NullPointerException
();
}
this
.
list
=
list
;
}
List
<
Interval
<
Integer
>>
getMissingIntervals
()
{
return
list
==
null
?
Collections
.
EMPTY_LIST
:
list
;
}
boolean
isCached
()
{
return
list
.
isEmpty
();
}
}
src/main/java/de/cebitec/common/sequencetools/intervals/cache/IntervalCache.java
0 → 100644
View file @
983e9275
/*
* Copyright (C) 2013 Lukas Jelonek <ljelonek at cebitec.uni-bielefeld.de>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package
de.cebitec.common.sequencetools.intervals.cache
;
import
de.cebitec.common.sequencetools.intervals.Interval
;
/**
* An IntervalCache is used to avoid the loading of whole, possibly large parts of contigous sequences of data, when
* only (small) parts of them are needed at a time. An IntervalCache needs at leadst three callback methods to work. The
* fetchcallback to resolve lazily loaded parts of the sequence, a mergecallback in order to merge adjacent sequences to
* a whole sequence and an extractcallback in order to retrieve subsequences.<br/>
*
* Examples for sequences may be lists or strings.
*
* @see IntervalCaches
*
* @param <T> The sequencetype
* @author Lukas Jelonek <ljelonek at cebitec.uni-bielefeld.de>
*/
public
interface
IntervalCache
<
T
>
{
public
interface
FetchCallback
<
T
>
{
/**
* Retrieves a part of the sequence.
*
* @param interval
* @return
* @throws IndexOutOfBoundsException If a non existent part of the cached sequence is requested.
*/
T
fetch
(
final
Interval
<
Integer
>
interval
)
throws
IndexOutOfBoundsException
;
}
public
interface
MergeCallback
<
T
>
{
/**
* Merges two adjacent sequences and returns the merged sequence.
*
* @param left
* @param right
* @return
*/
T
merge
(
final
T
left
,
final
T
right
);
}
public
interface
ExtractCallback
<
T
>
{
/**
* Extracts a subinterval from a sequence.
*
* @param interval The subinterval in the coordinate system of the <code>whole</code> sequence.
* @param whole
* @return
*/
T
extract
(
final
Interval
<
Integer
>
interval
,
final
T
whole
);
}
public
interface
LengthCallback
<
T
>
{
/**
* Calculates the length for a sequence.
*
* @param value
* @return
*/
int
length
(
final
T
value
);
}
/**
* Retrieves a substring of the cached string.
*
* @param interval
* @return
*/
T
get
(
final
Interval
<
Integer
>
interval
);
/**
* Clears the cache.
*/
void
invalidate
();
}
src/main/java/de/cebitec/common/sequencetools/intervals/cache/IntervalCacheImpl.java
0 → 100644
View file @
983e9275
/*
* Copyright (C) 2013 Lukas Jelonek <ljelonek at cebitec.uni-bielefeld.de>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package
de.cebitec.common.sequencetools.intervals.cache
;
import
de.cebitec.common.sequencetools.intervals.Interval
;
import
static
de
.
cebitec
.
common
.
sequencetools
.
intervals
.
Intervals
.*;
import
java.util.Collections
;
import
java.util.Iterator
;
import
java.util.List
;
import
java.util.NavigableMap
;
import
java.util.SortedMap
;
import
java.util.TreeMap
;
/**
* This class implements a string cache based on a NavigableMap that contains contigous intervals of texts. The loaded
* string parts are stored in weak references to cope with small memory footprint requirements.
*
* @param <T> The type of the cached sequences.
* @author Lukas Jelonek <ljelonek at cebitec.uni-bielefeld.de>
*/
class
IntervalCacheImpl
<
T
>
implements
IntervalCache
<
T
>
{
private
final
NavigableMap
<
Integer
,
IntervalCacheItem
<
T
>>
map
=
new
TreeMap
<>();
private
final
T
defaultValue
;
private
final
IntervalCache
.
FetchCallback
<
T
>
fetchCallback
;
private
final
IntervalCache
.
MergeCallback
<
T
>
mergeCallback
;
private
final
IntervalCache
.
ExtractCallback
<
T
>
extractCallback
;
private
final
IntervalCache
.
LengthCallback
<
T
>
lengthCallback
;
/**
*
* @param defaultValue The value that is returned for empty intervals. May be null.
* @param fetchCallback The callback that fetches the needed sequences. Must not be null.
* @param mergeCallback The callback that merges the needed sequences. Must not be null.
* @param extractCallback The callback that extracts subsequences. Must not be null.
* @param lengthCallback The callback that calculates the length of a sequence. May be null.
*/
IntervalCacheImpl
(
T
defaultValue
,
FetchCallback
<
T
>
fetchCallback
,
MergeCallback
<
T
>
mergeCallback
,
ExtractCallback
<
T
>
extractCallback
,
LengthCallback
<
T
>
lengthCallback
)
{
if
(
fetchCallback
==
null
||
mergeCallback
==
null
||
extractCallback
==
null
)
{
throw
new
NullPointerException
();
}
this
.
defaultValue
=
defaultValue
;
this
.
fetchCallback
=
fetchCallback
;
this
.
mergeCallback
=
mergeCallback
;
this
.
extractCallback
=
extractCallback
;
this
.
lengthCallback
=
lengthCallback
;
}
void
addValue
(
Interval
<
Integer
>
interval
,
T
value
)
{
addValue
(
interval
,
value
,
false
);
}
/**
* Adds a string with the given interval to the underlying map.
*
* @param interval
* @param string
* @param lock
*/
void
addValue
(
Interval
<
Integer
>
interval
,
T
value
,
boolean
lock
)
{
if
(
lengthCallback
!=
null
&&
interval
.
getLength
()
!=
lengthCallback
.
length
(
value
))
{
throw
new
IllegalArgumentException
(
"Value and interval must have the same length."
);
}
lockInterval
(
interval
);
SortedMap
<
Integer
,
IntervalCacheItem
<
T
>>
subMap
=
getSubMap
(
interval
);
removeGarbageCollectedItems
(
subMap
.
values
());
for
(
IntervalCacheItem
item
:
subMap
.
values
())
{
if
(
operations
().
overlap
(
interval
,
item
))
{
throw
new
IllegalArgumentException
(
"Overlapping intervals can not be added."
);
}
}
unlockInterval
(
interval
);
IntervalCacheItem
<
T
>
item
=
new
IntervalCacheItem
<>(
mergeCallback
,
interval
,
value
);
if
(
lock
)
{
item
.
lock
();
}
map
.
put
(
interval
.
getStart
(),
item
);
}
NavigableMap
<
Integer
,
IntervalCacheItem
<
T
>>
getMap
()
{
return
map
;
}
/**
* Locks all strings in the given interval from garbage collection. If an item already was garbage collected it is
* silently removed.
*
* @param interval
*/
private
void
lockInterval
(
Interval
<
Integer
>
interval
)
{
SortedMap
<
Integer
,
IntervalCacheItem
<
T
>>
subMap
=
getSubMap
(
interval
);
for
(
Iterator
<
IntervalCacheItem
<
T
>>
it
=
subMap
.
values
().
iterator
();
it
.
hasNext
();)
{
IntervalCacheItem
item
=
it
.
next
();
try
{
item
.
lock
();
}
catch
(
IllegalStateException
ex
)
{
it
.
remove
();
}
}
}
/**
* Unlocks all strings in the given interval for garbage collection.
*
* @param interval
*/
private
void
unlockInterval
(
Interval
<
Integer
>
interval
)
{
SortedMap
<
Integer
,
IntervalCacheItem
<
T
>>
subMap
=
getSubMap
(
interval
);
for
(
IntervalCacheItem
<
T
>
item
:
subMap
.
values
())
{
item
.
unlock
();
}
}
/**
* Removes all items from the map that were garbage collected.
*
* @param items
*/
private
void
removeGarbageCollectedItems
(
Iterable
<?
extends
IntervalCacheItem
>
items
)
{
for
(
Iterator
<?
extends
IntervalCacheItem
>
it
=
items
.
iterator
();
it
.
hasNext
();)
{
IntervalCacheItem
item
=
it
.
next
();
if
(
item
.
getStatus
()
==
IntervalCacheItem
.
Status
.
GarbageCollected
)
{
it
.
remove
();
}
}
}
@Override
public
T
get
(
Interval
<
Integer
>
interval
)
{
// entry point to the interval cache api, force zeroopen intervals.
interval
=
interval
.
as
(
Interval
.
Type
.
ZeroOpen
);
if
(
interval
.
isEmpty
())
{
return
defaultValue
;
}
CacheCheckResult
cached
=
isCached
(
interval
);
if
(!
cached
.
isCached
())
{
loadMissingIntervals
(
cached
.
getMissingIntervals
());
}
return
_get
(
interval
);
}
/**
* Uses the callback to load the missing intervals to the cache.
*
* @param missing
*/
final
void
loadMissingIntervals
(
List
<
Interval
<
Integer
>>
missing
)
{
Interval
<
Integer
>
enclose
=
operations
().
enclose
(
missing
);
lockInterval
(
enclose
);
for
(
Interval
<
Integer
>
interval
:
missing
)
{
T
value
=
fetchCallback
.
fetch
(
interval
);
addValue
(
interval
,
value
,
true
);
}
enclose
=
createInterval
(
enclose
.
getStart
()
-
1
,
enclose
.
getEnd
()
+
1
);
mergeAdjacentIntervals
(
enclose
);
unlockInterval
(
enclose
);
}
/**
* Merges all adjacent string items in the given interval.
*
* @param interval
*/
final
void
mergeAdjacentIntervals
(
Interval
<
Integer
>
interval
)
{
SortedMap
<
Integer
,
IntervalCacheItem
<
T
>>
subMap
=
getSubMap
(
interval
);
IntervalCacheItem
previous
=
null
;
for
(
Iterator
<
IntervalCacheItem
<
T
>>
it
=
subMap
.
values
().
iterator
();
it
.
hasNext
();)
{
IntervalCacheItem
<
T
>
current
=
it
.
next
();
if
(
previous
==
null
)
{
previous
=
current
;
}
else
{
if
(
operations
().
adjacent
(
previous
,
current
))
{
previous
.
merge
(
current
);
it
.
remove
();
}
else
{
previous
=
current
;
}
}
}
}
private
IntervalCacheItem
<
T
>
getItem
(
Interval
<
Integer
>
interval
)
{
SortedMap
<
Integer
,
IntervalCacheItem
<
T
>>
subMap
=
getSubMap
(
interval
);
if
(
subMap
.
isEmpty
())
{
return
null
;
}
else
{
IntervalCacheItem
<
T
>
item
=
subMap
.
get
(
subMap
.
firstKey
());
if
(
item
.
getStatus
()
==
IntervalCacheItem
.
Status
.
GarbageCollected
)
{
return
null
;
}
return
item
;
}
}
private
T
_get
(
Interval
<
Integer
>
interval
)
{
lockInterval
(
interval
);
IntervalCacheItem
<
T
>
item
=
getItem
(
interval
);
T
output
;
if
(
item
!=
null
)
{
Interval
<
Integer
>
shift
=
operations
().
shift
(
interval
,
item
.
getStart
());
T
data
=
item
.
getData
();
output
=
extractCallback
.
extract
(
shift
,
data
);
}
else
{
loadMissingIntervals
(
Collections
.
singletonList
(
interval
));
output
=
_get
(
interval
);
}
unlockInterval
(
interval
);
return
output
;
}
/**
* Retrieves a submap that contains all items within the interval or a larger interval if the explicit values are
* not present in the internal map, i.e. for [5,10] it may return [5,10] but also [min,10] or [5,max] or [min,max]
* or other values.
*
* @param interval
* @return
*/
private
SortedMap
<
Integer
,
IntervalCacheItem
<
T
>>
getSubMap
(
Interval
<
Integer
>
interval
)
{
interval
=
interval
.
as
(
Interval
.
Type
.
ZeroOpen
);
Integer
floorKey
=
map
.
floorKey
(
interval
.
getStart
());
Integer
ceilingKey
=
map
.
ceilingKey
(
interval
.
getEnd
());
SortedMap
<
Integer
,
IntervalCacheItem
<
T
>>
subMap
;
if
(
floorKey
==
null
&&
ceilingKey
==
null
)
{
subMap
=
map
;
}
else
if
(
floorKey
==
null
)
{
subMap
=
map
.
headMap
(
ceilingKey
);
}
else
if
(
ceilingKey
==
null
)
{
subMap
=
map
.
tailMap
(
floorKey
);
}
else
{
subMap
=
map
.
subMap
(
floorKey
,
ceilingKey
);
}
return
subMap
;
}
CacheCheckResult
isCached
(
Interval
<
Integer
>
interval
)
{
List
<
Interval
<
Integer
>>
missingparts
=
operations
().
complement
(
interval
,
getSubMap
(
interval
).
values
());
return
new
CacheCheckResult
(
missingparts
);
}
@Override
public
void
invalidate
()
{
map
.
clear
();
}
}
src/main/java/de/cebitec/common/sequencetools/intervals/cache/IntervalCacheItem.java
0 → 100644
View file @
983e9275
/*
* Copyright (C) 2013 Lukas Jelonek <ljelonek at cebitec.uni-bielefeld.de>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package
de.cebitec.common.sequencetools.intervals.cache
;
import
de.cebitec.common.sequencetools.intervals.Interval
;
import
java.lang.ref.WeakReference
;
import
java.util.Objects
;
import
static
de
.
cebitec
.
common
.
sequencetools
.
intervals
.
Intervals
.*;
/**
*
* @author Lukas Jelonek <ljelonek at cebitec.uni-bielefeld.de>
*/
class
IntervalCacheItem
<
T
>
implements
Interval
<
Integer
>
{
private
Interval
<
Integer
>
interval
;
private
WeakReference
<
T
>
data
;
private
T
lock
;
private
final
IntervalCache
.
MergeCallback
<
T
>
mergeCallback
;
enum
Status
{
Cached
,
GarbageCollected
}
IntervalCacheItem
(
IntervalCache
.
MergeCallback
<
T
>
mergeCallback
,
Interval
<
Integer
>
interval
,
T
data
)
{
if
(
data
==
null
)
{
throw
new
NullPointerException
();
}
this
.
mergeCallback
=
mergeCallback
;
this
.
interval
=
interval
;
this
.
data
=
new
WeakReference
<>(
data
);
}
Status
getStatus
()
{
return
data
.
get
()
==
null
?
Status
.
GarbageCollected
:
Status
.
Cached
;
}
Interval
<
Integer
>
getInterval
()
{
return
interval
;
}
T
getData
()
{
return
data
.
get
();
}
@Override
public
Type
getType
()
{
return
interval
.
getType
();
}
@Override
public
Integer
getLength
()
{
return
interval
.
getLength
();
}
@Override
public
Integer
getStart
()
{
return
interval
.
getStart
();
}
@Override
public
Integer
getEnd
()
{
return
interval
.
getEnd
();
}
@Override
public
boolean
isEmpty
()
{
return
interval
.
isEmpty
();
}
/**
* Creates a hard reference to the weakly referenced string and thus locks it from garbage collection.
*/
void
lock
()
{
this
.
lock
=
data
.
get
();
if
(
lock
==
null
)
{
throw
new
IllegalStateException
(
"Locking of cache item not possible as it already got garbage collected."
);
}
}
/**
* Removes the hard reference created by lock.
*/
void
unlock
()
{
this
.
lock
=
null
;
}
/**
* Merges the content of the next item to this item.
*
* @param item
*/
void
merge
(
IntervalCacheItem
<
T
>
item
)
{
if
(
operations
().
leftOf
(
this
,
item
)
&&
operations
().
adjacent
(
this
,
item
))
{
lock
();
Interval
<
Integer
>
union
=
operations
().
union
(
this
,
item
);
T
odata
=
item
.
getData
();
T
merge
=
mergeCallback
.
merge
(
data
.
get
(),
odata
);
data
=
new
WeakReference
<>(
merge
);
interval
=
union
;
unlock
();
}
else
{
throw
new
UnsupportedOperationException
(
"Merging an string cache item that is not "
+
"adjacent and right of the given item is not allowed."
);
}
}
@Override
public
Interval
<
Integer
>
as
(
Type
newType
)
{
return
interval
.
as
(
newType
);
}
@Override
public
int
hashCode
()
{
int
hash
=
7
;
hash
=
59
*
hash
+
Objects
.
hashCode
(
this
.
interval
);
hash
=
59
*
hash
+
Objects
.
hashCode
(
this
.
data
);
return
hash
;
}
@Override
public
boolean
equals
(
Object
obj
)
{
if
(
obj
==
null
)
{
return
false
;
}
if
(
getClass
()
!=
obj
.
getClass
())
{
return
false
;