Merge pull request #152 from icewind1991/chapter-parsing

Chapter recognition improvements
This commit is contained in:
inorichi 2016-02-16 21:08:10 +01:00
commit 96d498e7e5
2 changed files with 32 additions and 2 deletions

View File

@ -15,7 +15,8 @@ public class ChapterRecognition {
private static final Pattern withAlphaPostfix = Pattern.compile("(\\d+[\\.,]?\\d*\\s*)([a-z])($|\\b)"); private static final Pattern withAlphaPostfix = Pattern.compile("(\\d+[\\.,]?\\d*\\s*)([a-z])($|\\b)");
private static final Pattern cleanNumber = Pattern.compile("(\\d+[\\.,]?\\d+)($|\\b)"); private static final Pattern cleanNumber = Pattern.compile("(\\d+[\\.,]?\\d+)($|\\b)");
private static final Pattern uncleanNumber = Pattern.compile("(\\d+[\\.,]?\\d*)"); private static final Pattern uncleanNumber = Pattern.compile("(\\d+[\\.,]?\\d*)");
private static final Pattern withColon = Pattern.compile("(\\d+[\\.,]?\\d*\\s*:)"); private static final Pattern withColon = Pattern.compile("(\\d+[\\.,]?\\d*\\s*:)([^\\d]|$)");
private static final Pattern startingNumber = Pattern.compile("^(\\d+[\\.,]?\\d*)");
private static final Pattern pUnwanted = private static final Pattern pUnwanted =
Pattern.compile("(\\b|\\d)(v|ver|vol|version|volume)\\.?\\s*\\d+\\b"); Pattern.compile("(\\b|\\d)(v|ver|vol|version|volume)\\.?\\s*\\d+\\b");
@ -84,7 +85,7 @@ public class ChapterRecognition {
// Try to remove the manga name from the chapter, and try again // Try to remove the manga name from the chapter, and try again
String mangaName = replaceIrrelevantCharacters(manga.title); String mangaName = replaceIrrelevantCharacters(manga.title);
String nameWithoutManga = difference(mangaName, name); String nameWithoutManga = difference(mangaName, name).trim();
if (!nameWithoutManga.isEmpty()) { if (!nameWithoutManga.isEmpty()) {
matcher = uncleanNumber.matcher(nameWithoutManga); matcher = uncleanNumber.matcher(nameWithoutManga);
occurrences = getAllOccurrences(matcher); occurrences = getAllOccurrences(matcher);
@ -121,6 +122,19 @@ public class ChapterRecognition {
return; return;
} }
} }
// check for a number either at the start or right after the manga title
matcher = startingNumber.matcher(name);
if (matcher.find()) {
chapter.chapter_number = Float.parseFloat(matcher.group(1));
return;
}
matcher = startingNumber.matcher(nameWithoutManga);
if (matcher.find()) {
chapter.chapter_number = Float.parseFloat(matcher.group(1));
return;
}
} }
/** /**

View File

@ -179,4 +179,20 @@ public class ChapterRecognitionTest {
ChapterRecognition.parseChapterNumber(c, randomManga); ChapterRecognition.parseChapterNumber(c, randomManga);
assertThat(c.chapter_number).isEqualTo(-1f); assertThat(c.chapter_number).isEqualTo(-1f);
} }
@Test
public void testChapterWithTime() {
Chapter c = createChapter("Fairy Tail 404: 00:00");
ChapterRecognition.parseChapterNumber(c, randomManga);
assertThat(c.chapter_number).isEqualTo(404f);
}
@Test
public void testPlainNumberInTitle() {
Chapter c = createChapter("Kuroko no Basket 002 Monday at 840 on the Rooftop");
Manga manga = new Manga();
manga.title = "Kuroko no Basket";
ChapterRecognition.parseChapterNumber(c, manga);
assertThat(c.chapter_number).isEqualTo(2f);
}
} }