From b5f093b4b7f3366c3c123a7c88df201e835802cd Mon Sep 17 00:00:00 2001 From: Sumner Date: Wed, 23 Oct 2019 18:39:21 +0200 Subject: [PATCH] added encode to notebook --- jupyter/LabeledRanges.ipynb | 707 ++++++++++++++---------------------- 1 file changed, 272 insertions(+), 435 deletions(-) diff --git a/jupyter/LabeledRanges.ipynb b/jupyter/LabeledRanges.ipynb index 7082b55..3f230ab 100644 --- a/jupyter/LabeledRanges.ipynb +++ b/jupyter/LabeledRanges.ipynb @@ -2,36 +2,37 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "from ntai import Labeler\n", "# from ntai.ranges.labeled_ranges import LabeledRange, LabeledRanges\n", - "from lrng import LabeledRange, LabeledRanges" + "from lrng import LabeledRange, LabeledRanges\n", + "import numpy as np" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "l = Labeler(label_order=['b', 'a'], processes=10)" + "l = Labeler(label_order=['b', 'a'], processes=10, use_other_class=False)" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 3, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -42,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -51,215 +52,25 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([[0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1]])" + "array([[0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1]])" ] }, - "execution_count": 7, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -267,269 +78,295 @@ "source": [ "seq=[i for i in range(300-100)]\n", "enc = l.encode(seq, crngs, 100)\n", - "enc" + "np.array(enc)[:10]" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[[0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [0, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 1, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1]]" + "LabeledRanges(\n", + "\tLabeledRange('b', 150, 250)\n", + "\tLabeledRange('a', 100, 200)\n", + ")" ] }, - "execution_count": 8, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "enc.tolist()" + "rel = l.label(\n", + " [\n", + " 'chr1', 100, 300, 'name', '0', '-'\n", + " ],\n", + " {\n", + " 'chr1': {\n", + " '-': LabeledRanges([\n", + " ['a', 100, 200], ['b',150,250],\n", + " ['a', 10, 20], ['b',15,25]\n", + " ])\n", + " }\n", + " }\n", + ")\n", + "rel" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "LabeledRanges(\n", - "\tLabeledRange('a', 100, 200)\n", - "\tLabeledRange('b', 150, 250)\n", - ")" + "[[0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [0, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 1],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [1, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0]]" ] }, - "execution_count": 6, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "l.label(\n", + "l.encode(\n", + " 'a'*200, \n", + "# rel, \n", " [\n", - " 'chr1', 100, 300, 'name', '0', '-'\n", - " ],\n", - " {\n", - " 'chr1': {\n", - " '-': LabeledRanges([\n", - " ['a', 100, 200], ['b',150,250],\n", - " ['a', 10, 20], ['b',15,25]\n", - " ])\n", - " }\n", - " }\n", + " ['a', 100, 200],\n", + " ['b', 150, 250],\n", + "# ['c', 100, 200],\n", + " ], \n", + " 100\n", ")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "LabeledRanges(\n", + "\tLabeledRange('b', 150, 250)\n", + "\tLabeledRange('a', 100, 200)\n", + ")" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rel + ['c', 100, 200]" + ] }, { "cell_type": "code", @@ -555,7 +392,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.7.4" }, "toc": { "base_numbering": 1, -- GitLab