How do I fix my code so that it prints the largest and lowest values in a matrix? - sorting

def largestlowest(matrix):
largeset = 0
lowest = 0
for row in range(len(matrix)):
for col in range(len(matrix[row]-1)):
if matrix[col] > matrix[col+1]:
largest = matrix[col]
if matrix[col] < matrix[col+1]:
lowest = matrix[col]
else:
print("there is no min/max value")
return largest and lowest
print(largestlowest([[4,8,2,9,34,57,22,44], [1,2,8,9,1,2,55,3,22,4]]))

Don't forget to mention the language. I assume this is python. Also try to run and test the code before posting it. This code is not running at all.
This is the running version of the code you submitted in python:
def largestlowest(matrix):
largeset = 0
lowest = 0
for row in range(len(matrix)):
for col in range(len(matrix[row])-1):
if (matrix[row][col] > matrix[row][col+1]):
largest = matrix[row][col]
if (matrix[row][col] < matrix[row][col+1]):
lowest = matrix[row][col]
return (largest, lowest)
print(largestlowest([[4,8,2,9,34,57,22,44], [1,2,8,9,1,2,55,3,22,4]]))
Note that to access an element of a list you should call it by both row and column : matrix[row][col].
There are a few things wrong:
You go up to (len(matrix[row])-1) and therefore do not include the
last column.
Then you compare neighboring elements matrix[row][col] > matrix[row][col+1] instead of comparing all of them with the lowest and the largest.
You assign the lowest to 0, which will always be lower than all of your elements
return a and b will return a only
I recommend you search each step (e.g how to call a matrix element in python). You will find plenty of information online. Try to run the code as quickly as possible and solve the errors in the console.
Finally, the code that does what you want (I think):
def largestlowest(matrix):
if (not len(matrix)):
return ("Matrix should have at least one element!")
## assign largest and lowest to first element of martix
largest = lowest = matrix[0][0]
for row in range(len(matrix)):
for col in range(len(matrix[row])):
if (matrix[row][col] > largest):
largest = matrix[row][col]
if (matrix[row][col] < lowest):
lowest = matrix[row][col]
# return (largest, lowest)
return "Largest element of matrix: %d, lowest element of matrix: %d" % (largest, lowest)
print(largestlowest([[4,8,2,9,34,57,22,44], [1,2,8,9,1,2,55,3,22,4]]))
print(largestlowest([]))

Your code was actually comparing the wrong values:
if matrix[col] > matrix[col+1]: isn't comparing the current value to the largest, it is comparing it to the one after it. The same thing was happening for the small.
Also, you were handling your data incorrectly. matrix[col] is an array itself, not an individual element (at least, according to the example you gave).
The code below is commented and works. It outputs:
LARGEST: 57
LOWEST: 1
# Function
def largestlowest(matrix):
# Declaring biggest and smallest variable. Setting extraneous at first
largest = -10000
lowest = 10000
# Loop through every array in the matrix as you have defined it
for arr in matrix:
# Loop through each item in the array
for item in arr:
# If it is bigger than the largest, set the largest to be that value
if item > largest:
largest = item
# If it is smaller than the smallest, set the smallest to be that value
if item < lowest:
lowest = item
# Return the largest and lowest
return largest, lowest
# Example main function
def main():
# Define a test matrix (copied from your example)
matrix = [[4,8,2,9,34,57,22,44], [1,2,8,9,1,2,55,3,22,4]]
# Run the function we created
largest, lowest = largestlowest(matrix)
# Print out the results
print("LARGEST: ", largest)
print("LOWEST: ", lowest)
# Call the main function
main()

Related

Specific Max Sum of the elements of an Int array - C/C++

Let's say we have an array: 7 3 1 1 6 13 8 3 3
I have to find the maximum sum of this array such that:
if i add 13 to the sum: i cannot add the neighboring elements from each side: 6 1 and 8 3 cannot be added to the sum
i can skip as many elements as necessary to make the sum max
My algorithm was this:
I take the max element of the array and add that to the sum
I make that element and the neighbor elements -1
I keep doing this until it's not possible to find anymore max
The problem is that for some specific test cases this algorithm is wrong.
Lets see this one: 15 40 45 35
according to my algorithm:
I take 45 and make neighbors -1
The program ends
The correct way to do it is 15 + 35 = 50
This problem can be solved with dynamic programming.
Let A be the array, let DP[m] be the max sum in {A[1]~A[m]}
Every element in A only have two status, been added into the sum or not. First we suppose we have determine DP[1]~DP[m-1], now look at {A[1]~A[m]}, A[m] only have two status that we have said, if A[m] have been added into, A[m-1] and A[m-2] can't be added into the sum, so in add status, the max sum is A[m]+DP[m-3] (intention: DP[m-3] has been the max sum in {A[1]~A[m-3]}), if A[m] have not been added into the sum, the max sum is DP[m-1], so we just need to compare A[m]+DP[m-3] and DP[m-1], the bigger is DP[m]. The thought is the same as mathematical induction.
So the DP equation is DP[m] = max{ DP[m-3]+A[m], DP[m-1] },DP[size(A)] is the result
The complexity is O(n), pseudocode is follow:
DP[1] = A[1];
DP[2] = max(DP[1], DP[2]);
DP[3] = max(DP[1], DP[2], DP[3]);
for(i = 4; i <= size(A); i++) {
DP[i] = DP[i-3] + A[i];
if(DP[i] < DP[i-1])
DP[i] = DP[i-1];
}
It's solvable with a dynamic programming approach, taking O(N) time and O(N) space. Implementation following:
int max_sum(int pos){
if( pos >= N){ // N = array_size
return 0;
}
if( visited(pos) == true ){ // if this state is already checked
return ret[pos]; // ret[i] contains the result for i'th cell
}
ret[pos] = max_sum(pos+3) + A[pos] + ret[pos-2]; // taking this item
ret[pos] = max(ret[pos], ret[pos-1]+ max_sum(pos+1) ); // if skipping this item is better
visited[pos] = true;
return ret[pos];
}
int main(){
// clear the visited array
// and other initializations
cout << max_sum(2) << endl; //for i < 2, ret[i] = A[i]
}
The above problem is max independent set problem (with twist) in a path graph which has dynamic programming solution in O(N).
Recurrence relation for solving it : -
Max(N) = maximum(Max(N-3) + A[N] , Max(N-1))
Explanation:- IF we have to select maximum set from N elements than we can either select Nth element and the maximum set from first N-3 element or we can select maximum from first N-1 elements excluding Nth element.
Pseudo Code : -
Max(1) = A[1];
Max(2) = maximum(A[1],A[2]);
Max(3) = maximum(A[3],Max(2));
for(i=4;i<=N;i++) {
Max(N) = maximum(Max(N-3)+A[N],Max(N-1));
}
As suggested, this is a dynamic programming problem.
First, some notation, Let:
A be the array, of integers, of length N
A[a..b) be the subset of A containing the elements at index a up to
but not including b (the half open interval).
M be an array such that M[k] is the specific max sum of A[0..k)
such that M[N] is the answer to our original problem.
We can describe an element of M (M[n]) by its relation to one or more elements of M (M[k]) where k < n. And this lends itself to a nice linear time algorithm. So what is this relationship?
The base cases are as follows:
M[0] is the max specific sum of the empty list, which must be 0.
M[1] is the max specific sum for a single element, so must be
that element: A[0].
M[2] is the max specific sum of the first two elements. With only
two elements, we can either pick the first or the second, so we better
pick the larger of the two: max(A[0], A[1]).
Now, how do we calculate M[n] if we know M[0..n)? Well, we have a choice to make:
Either we add A[n-1] (the last element in A[0..n)) or we don't. We don't know for
certain whether adding A[n-1] in will make for a larger sum, so we try both and take
the max:
If we don't add A[n-1] what would the sum be? It would be the same as the
max specific sum immediately before it: M[n-1].
If we do add A[n-1] then we can't have the previous two elements in our
solution, but we can have any elements before those. We know that M[n-1] and
M[n-2] might have used those previous two elements, but M[n-3] definitely
didn't, because it is the max in the range A[0..n-3). So we get
M[n-3] + A[n-1].
We don't know which one is bigger though, (M[n-1] or M[n-3] + A[n-1]), so to find
the max specific sum at M[n] we must take the max of those two.
So the relation becomes:
M[0] = 0
M[1] = A[0]
M[2] = max {A[0], A[1]}
M[n] = max {M[n-1], M[n-3] + A[n-1]} where n > 2
Note a lot of answers seem to ignore the case for the empty list, but it is
definitely a valid input, so should be accounted for.
The simple translation of the solution in C++ is as follows:
(Take special note of the fact that the size of m is one bigger than the size of a)
int max_specific_sum(std::vector<int> a)
{
std::vector<int> m( a.size() + 1 );
m[0] = 0; m[1] = a[0]; m[2] = std::max(a[0], a[1]);
for(unsigned int i = 3; i <= a.size(); ++i)
m[i] = std::max(m[i-1], m[i-3] + a[i-1]);
return m.back();
}
BUT This implementation has a linear space requirement in the size of A. If you look at the definition of M[n], you will see that it only relies on M[n-1] and M[n-3] (and not the whole preceding list of elements), and this means you need only store the previous 3 elements in M, resulting in a constant space requirement. (The details of this implementation are left to the OP).

Algorithm - Find the first missing integer in the sequence of integers

Find the first missing integer in the sequence of integers
[4,5,1,2,6,7] missing is 3
Then when there is repeated integers
[1,2,2,2,5,8,9] still missing 3
When you also have negative
[-2,0, 1,2,] missing -1
[1,2,3,4,5] missing 6 or 0
Can anyone help me find a good algorithm to cover all these cases. I have an algorithm which covers first 2 cases but not sure how to cover all the cases in effective manner.
What I consider the classic O(n) solution for this problem is to rely on the fact that the array can contain at most N unique numbers, where N is the input's length. Therefore the range for our record is restricted to N.
Since you seem to allow the expected sequence to start anywhere, including negative numbers, we can start by iterating once over the array and recording, L, the lowest number seen. Now use L as an offset so that 0 + L equals the first number we expect to be present.
Initialise an array record of length (N + 1) and set each entry to false. Iterate over the input and for each entry, A[i], if (A[i] - L) is not greater than N, set record[ A[i] - L ] to true. For example:
[-2, 0, 1, 2] ->
N = 4
L = -2
-2 -> -2 - (-2) = 0
-> record[0] = true
0 -> 0 - (-2) = 2
-> record[2] = true
1 -> 1 - (-2) = 3
-> record[3] = true
2 -> 2 - (-2) = 4
-> record[4] = true
record -> [true, false, true, true, true]
Now iterate over the record. Output the first entry at index i that is set to false as i + L. In our example above, this would be:
record[1] is false
output: 1 + (-2) -> -1
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <stdlib.h>
int main()
{
int n;
scanf("%d",&n);
int a[n],i=0;
//Reading elements
for(i=0;i<n;i++){
scanf("%d",&a[i]);
}
int min=__INT_MAX__,max=0;
//Finding the minimun and maximum from given elements
for(i=0;i<n;i++){
if(a[i]>max)
max=a[i];
if(a[i]<min)
min=a[i];
}
int len=max-min,diff=0-min,miss;
int b[len];
//Creating a new array and assigning 0
for(i=0;i<len;i++)
b[i]=0;
//The corresponding index value is incremented based on the given numbers
for(i=0;i<n;i++){
b[a[i]+diff]++;
}
//Finding the missed value
for(i=0;i<len;i++){
if(b[i]==0){
miss=i-diff;
break;
}
}
printf("%d",miss);
}
Code Explanation:
1.Find the minimum and maximum in the given numbers.
2.Create an count array of size (maximum-minimum) and iniatizing to 0, which maintains the count of the given numbers.
3.Now by iterating, for each given element increment the corresponding index by 1.
4.Finally iterate through the count array and find the first missing number.
This might help you in solving your problem. Correct me if i'm wrong.
I think, it will be easy to solve sort of problems using data-structure like TreeMap in JAVA, e.g:
treeMap.put(array[i], treeMap.get(array[i]) == null ? 1 : treeMap.get(array[i]) + 1);
So, you are putting key and value to the TreeMap the key represent the digit itself e.g, 1,2,3... and the value represent the occurrence times.
Thus, and by taking advantage of this data-structure (Sort elements for us) you can loop through this data-structure and check which key is missing in the sequence, e.g:
for key in treeMap
if(key > currentIndex) // this is the missing digit
if(loop-completed-without-missing-key) // it's not in the array.
Add the numbers to a running array and keep them sorted.
You may also have optional minimum and maximum bounds for the array (to handle your third case, "6 is missing even if not in array"
On examination of a new number:
- try inserting it in the sorting array.
- already present: discard
- below minimum or above maximum: nullify minimum or maximum accordingly
- otherwise add in proper position.
To handle an array: sort it, compare first and last elements to expected minimum / maximum. Nullify minimum if greater than first element, nullify maximum if smaller than last element.
There might be a special case if minimum and maximum are both above first or both above last:
min=5 max=8 array = [ 10, 11, 13 ]
Here 5, 6, 7, 8 and 12 are missing, but what about 9? Should it be considered missing?
When checking for missing numbers include:
- if minimum is not null, all numbers from minimum to first element.
- if maximum is not null, all numbers from last element to maximum.
- if (last - first) = number of elements, no numbers are missing
(total numbers examined minus array size is duplicate count)
- otherwise walk the array and report all missing numbers: when
checking array[i], if array[i]-array[i-1] != 1 you have a gap.
only "first" missing
You still have to manage the whole array even if you're only interested in one missing number. For if you discarded part of the array, and the missing number arrived, then the new missing number might well have been in the discarded part of the array.
However you might keep trace of what the smallest missing number is, and recalculate with cost of o(log n) only when/if it arrives; then you'd be able to tell which is it in o(1) time. To quickly zero on that missing number, consider that there is a gap between arr[i] and arr[j] iff arr[j]-arr[i] > j-i.
So you can use the bisection method: start with i = first, j = last; if gap(i,j) then c = ceil(i+j)/2. If gap(i, c) then j = c, else i = c, and repeat until j-i = 1. At that point arr[i]+1 is your smallest missing number.

Length of maximum subarray such that 1st element is greater than last element

I have been given an array. I need to find the length of maximum subarray in which the first element is greater than the last element.
For example 5 4 3 2 1. Length of max subarray is 5 since first element 5 is greater than last element 1.
Another example, 5 10 4 7 9 8. Length is again 5 and array starts from 10 and goes till last element.
I know the naive approach ie O(n²) but need a better approach.
Here is linear algorithm O(n):
Description of Algorithm
First collect all indices i for which the following is true:
For any j > i : ai < aj
So they are sort of minima, in the sense that there are no lesser-or-equal values at the right of them.
Keep a reference to the left most of these, call that index j. That variable will serve as the end index (inclusive) of a range.
Now start from the left with index i, and as soon as ai > aj, go to the next "minimum" (so j increases) while this condition keeps holding (ai > aj). For the last j where this holds: this could be a solution: verify if this is the longest range so far found.
Whenever j < i, also take the next "mimimum"
Whenever there are no more minima, stop the algorithm.
Visual Representation
The idea is represented here in a graph-representation of the array values, where the red markers are the minima, and the green markers are possible candidates for where a solution sub array could start:
Code
Here is an implementation of that algorithm in JavaScript: you can enter array values, and the largest sub-array will be displayed as you type:
function longestSubArray(a) {
// Preprocessing: collect all i, for which holds:
// if j > i, then a[i] < a[j]
var minima = [-1] // Add a special value first (see later)
var last = Infinity;
for (var i = a.length - 1; i >= 0; i--) {
if (a[i] < last) {
last = a[i];
minima.push(i);
// Optimisation: It is of no use to find more minima if
// this value is less than the first value in the aay
if (last < a[0]) break;
}
}
// Get first value from minima. This will be the rightmost
// value that is less than a[0], or if such does not exist,
// the minimum value in the aay.
var j = minima.pop();
var maxSize = 1;
var maxStart = 0;
// Look for ranges that start at i:
for (i = 0; i < a.length; i++) {
// Check if range (i, j) fulfills the requirement
while (j !== -1 && (a[i] > a[j] || j <= i) ) {
// Check if range (i, j) is the largest so far
if (j - i + 1 > maxSize) {
maxSize = j - i + 1;
maxStart = i;
}
// Take an end index that is more to the right,
// but which will represent a higher value also:
j = minima.pop(); // could be -1: which means "all done"
}
if (j == -1) break;
}
if (maxSize == 1) return []; // no solution
return a.slice(maxStart, maxStart+maxSize);
}
// I/0 handling -- not relevant to the algorithm:
var input = document.querySelector('input');
var output = document.querySelector('span');
input.oninput = function () {
// Translate text input to aay of integers
var a = input.value.match(/-?\d+/g).map(Number);
// Apply function
var sub = longestSubArray(a);
// Output result
output.textContent = sub.join(' ');
}
Enter array: <input size="60"><br>
Longest sub-array: <span></span>
See here for the same function in Python.
Proof of Linear Time Complexity
The algorithm has a linear time complexity:
The loop for collecting the minima iterates at most n times
The loop on i iterates at most n times
The inner loop will in total iterate at most n times, even though it could iterate more than once per i, the array minima is being shortened on every iteration.
So: O(n).
You can try applying a caterpillar method which would be O(n):
The idea is to use two indices, one for the head and another for the tail as well as a maximum length variable. Then, you try to advance the head as long as the condition is valid: The minimum value in the subsequence from the head to the end of your array is less than the value of the tail.
If you can't advance the head, because the condition doesn't fulfill, then you advance the tail. Thus the resemblance to a caterpillar which advances his head and then its tail as it moves.
That minimum can be pre-calculated in a previous iteration over the array.
This is a possible implementation in python:
def subSeqLen(arr):
head = 0
tail = 0
maxLength = 0
minFromLast = [arr[-1]]*len(arr)
for i in xrange(len(arr)-2, -1, -1):
minFromLast[i] = min(minFromLast[i+1], arr[i])
while head >= tail and head < len(arr):
if arr[tail]>arr[head] and head-tail+1 > maxLength:
maxLength = head-tail+1
if head < len(arr)-1 and minFromLast[head+1]<=arr[tail]:
head += 1
else:
tail += 1
head = max(head, tail)
return maxLength
Create another array of pairs such that the first element is same as your array and the second is indices of the element, so for array :
5 4 3 2 1
The new array will be like this
5,1 4,2 3,3 2,4 1,5
Now sort the array of pairs, so it will look like this :
1,5 2,4 3,3 4,2 5,1
Now build a Segment tree using indices of the sorted array of pairs.
Now iterate over the array and for each element find it's equivalent in the sorted array ( this can be done in O(1)), let suppose it's indices is j, know perform range query in the segment [1,i] ( since all elements in the segment [1,i] are lower then the current element because the array of pairs is sorted) to find the maximum indices value in log(n), and finally the answer is the maximum value among all segments lengths.
Complexity is O(nlog(n)).
EDIT: Looks like a very simple O(n) solution exists.
There are two algorithms you can use in order to solve the problem:
Binary-search, Stack, O(n * log(n))
Lets take some j and consider all valid subarrays ending with an element in j-th position. The answer will be such i that a[i] > a[j] and i < j. Iterating over all such i will give us O(n2) complexity.
There is one simple idea to keep in mind in order to improve the complexity. If we have i1 and a[i1] then we will never consider another i2 as a starting point of a valid subarray when i1 < i2 and a[i1] > a[i2] because i1 gives better result. Lets store such pairs (a[i],i) in data structure d.
In any point of time d will look like this (a[i0], i0), (a[i1], i1) ... (a[in], in). Both ip and a[ip] increase from left to right.
When we are considering next j as an end of valid subarray we may look for current best answer in d. All i-s in d are lower than current j. We have to look for the left-most pair (a[i], i) where a[i] > a[j] getting j - i + 1 as a current answer. Since pairs are sorted we can use binary search.
In case a[j] is greater than all a[i] from d then we have a new pair (a[j], j) to be appended to d.
ans <- 0
d <- [] // contains (val, idx) structures
for j = 0..n-1
l <- 0, r <- len(d) - 1
while l <= r
mid = (l + r) / 2 // integer division
if d[mid].val > a[j]
r <- mid - 1
else
l <- mid + 1
if l = len(d)
append (a[j], j) to d
else
curAns <- j - d[l].idx + 1
ans <- max(ans, curAns)
Overall complexity is O(n * log(n)).
Segment Tree, O(n * log(n))
Take a look at Abdenaceur Lichiheb's answer
There is no restriction on the limits of a values as they can be easily sorted and then mapped to indices in the sorted array.
If by maximum you mean by length then really easy solution exists.
Input-
First line n.
Second line n numbers.
Code-
int main(){
fast;
int l=0;
int n;
cin>>n;
int a[n];
for(int i=0;i<n;i++){
cin>>a[i];
}
int m=-IINF;
int l1=0;
for(int i=0;i<n;i++){
if(a[i]>m){
m=a[i];
maximize(l,l1);
l1=1;
}
else{
l1+=1;
//cout<<l1<<" ";
}
}
if(l1>l){
cout<<l1<<endl;
return 0;
}
cout<<l<<endl;
return 0;
}

array- having some issues [duplicate]

An interesting interview question that a colleague of mine uses:
Suppose that you are given a very long, unsorted list of unsigned 64-bit integers. How would you find the smallest non-negative integer that does not occur in the list?
FOLLOW-UP: Now that the obvious solution by sorting has been proposed, can you do it faster than O(n log n)?
FOLLOW-UP: Your algorithm has to run on a computer with, say, 1GB of memory
CLARIFICATION: The list is in RAM, though it might consume a large amount of it. You are given the size of the list, say N, in advance.
If the datastructure can be mutated in place and supports random access then you can do it in O(N) time and O(1) additional space. Just go through the array sequentially and for every index write the value at the index to the index specified by value, recursively placing any value at that location to its place and throwing away values > N. Then go again through the array looking for the spot where value doesn't match the index - that's the smallest value not in the array. This results in at most 3N comparisons and only uses a few values worth of temporary space.
# Pass 1, move every value to the position of its value
for cursor in range(N):
target = array[cursor]
while target < N and target != array[target]:
new_target = array[target]
array[target] = target
target = new_target
# Pass 2, find first location where the index doesn't match the value
for cursor in range(N):
if array[cursor] != cursor:
return cursor
return N
Here's a simple O(N) solution that uses O(N) space. I'm assuming that we are restricting the input list to non-negative numbers and that we want to find the first non-negative number that is not in the list.
Find the length of the list; lets say it is N.
Allocate an array of N booleans, initialized to all false.
For each number X in the list, if X is less than N, set the X'th element of the array to true.
Scan the array starting from index 0, looking for the first element that is false. If you find the first false at index I, then I is the answer. Otherwise (i.e. when all elements are true) the answer is N.
In practice, the "array of N booleans" would probably be encoded as a "bitmap" or "bitset" represented as a byte or int array. This typically uses less space (depending on the programming language) and allows the scan for the first false to be done more quickly.
This is how / why the algorithm works.
Suppose that the N numbers in the list are not distinct, or that one or more of them is greater than N. This means that there must be at least one number in the range 0 .. N - 1 that is not in the list. So the problem of find the smallest missing number must therefore reduce to the problem of finding the smallest missing number less than N. This means that we don't need to keep track of numbers that are greater or equal to N ... because they won't be the answer.
The alternative to the previous paragraph is that the list is a permutation of the numbers from 0 .. N - 1. In this case, step 3 sets all elements of the array to true, and step 4 tells us that the first "missing" number is N.
The computational complexity of the algorithm is O(N) with a relatively small constant of proportionality. It makes two linear passes through the list, or just one pass if the list length is known to start with. There is no need to represent the hold the entire list in memory, so the algorithm's asymptotic memory usage is just what is needed to represent the array of booleans; i.e. O(N) bits.
(By contrast, algorithms that rely on in-memory sorting or partitioning assume that you can represent the entire list in memory. In the form the question was asked, this would require O(N) 64-bit words.)
#Jorn comments that steps 1 through 3 are a variation on counting sort. In a sense he is right, but the differences are significant:
A counting sort requires an array of (at least) Xmax - Xmin counters where Xmax is the largest number in the list and Xmin is the smallest number in the list. Each counter has to be able to represent N states; i.e. assuming a binary representation it has to have an integer type (at least) ceiling(log2(N)) bits.
To determine the array size, a counting sort needs to make an initial pass through the list to determine Xmax and Xmin.
The minimum worst-case space requirement is therefore ceiling(log2(N)) * (Xmax - Xmin) bits.
By contrast, the algorithm presented above simply requires N bits in the worst and best cases.
However, this analysis leads to the intuition that if the algorithm made an initial pass through the list looking for a zero (and counting the list elements if required), it would give a quicker answer using no space at all if it found the zero. It is definitely worth doing this if there is a high probability of finding at least one zero in the list. And this extra pass doesn't change the overall complexity.
EDIT: I've changed the description of the algorithm to use "array of booleans" since people apparently found my original description using bits and bitmaps to be confusing.
Since the OP has now specified that the original list is held in RAM and that the computer has only, say, 1GB of memory, I'm going to go out on a limb and predict that the answer is zero.
1GB of RAM means the list can have at most 134,217,728 numbers in it. But there are 264 = 18,446,744,073,709,551,616 possible numbers. So the probability that zero is in the list is 1 in 137,438,953,472.
In contrast, my odds of being struck by lightning this year are 1 in 700,000. And my odds of getting hit by a meteorite are about 1 in 10 trillion. So I'm about ten times more likely to be written up in a scientific journal due to my untimely death by a celestial object than the answer not being zero.
As pointed out in other answers you can do a sort, and then simply scan up until you find a gap.
You can improve the algorithmic complexity to O(N) and keep O(N) space by using a modified QuickSort where you eliminate partitions which are not potential candidates for containing the gap.
On the first partition phase, remove duplicates.
Once the partitioning is complete look at the number of items in the lower partition
Is this value equal to the value used for creating the partition?
If so then it implies that the gap is in the higher partition.
Continue with the quicksort, ignoring the lower partition
Otherwise the gap is in the lower partition
Continue with the quicksort, ignoring the higher partition
This saves a large number of computations.
Since the numbers are all 64 bits long, we can use radix sort on them, which is O(n). Sort 'em, then scan 'em until you find what you're looking for.
if the smallest number is zero, scan forward until you find a gap. If the smallest number is not zero, the answer is zero.
To illustrate one of the pitfalls of O(N) thinking, here is an O(N) algorithm that uses O(1) space.
for i in [0..2^64):
if i not in list: return i
print "no 64-bit integers are missing"
For a space efficient method and all values are distinct you can do it in space O( k ) and time O( k*log(N)*N ). It's space efficient and there's no data moving and all operations are elementary (adding subtracting).
set U = N; L=0
First partition the number space in k regions. Like this:
0->(1/k)*(U-L) + L, 0->(2/k)*(U-L) + L, 0->(3/k)*(U-L) + L ... 0->(U-L) + L
Find how many numbers (count{i}) are in each region. (N*k steps)
Find the first region (h) that isn't full. That means count{h} < upper_limit{h}. (k steps)
if h - count{h-1} = 1 you've got your answer
set U = count{h}; L = count{h-1}
goto 2
this can be improved using hashing (thanks for Nic this idea).
same
First partition the number space in k regions. Like this:
L + (i/k)->L + (i+1/k)*(U-L)
inc count{j} using j = (number - L)/k (if L < number < U)
find first region (h) that doesn't have k elements in it
if count{h} = 1 h is your answer
set U = maximum value in region h L = minimum value in region h
This will run in O(log(N)*N).
I'd just sort them then run through the sequence until I find a gap (including the gap at the start between zero and the first number).
In terms of an algorithm, something like this would do it:
def smallest_not_in_list(list):
sort(list)
if list[0] != 0:
return 0
for i = 1 to list.last:
if list[i] != list[i-1] + 1:
return list[i-1] + 1
if list[list.last] == 2^64 - 1:
assert ("No gaps")
return list[list.last] + 1
Of course, if you have a lot more memory than CPU grunt, you could create a bitmask of all possible 64-bit values and just set the bits for every number in the list. Then look for the first 0-bit in that bitmask. That turns it into an O(n) operation in terms of time but pretty damned expensive in terms of memory requirements :-)
I doubt you could improve on O(n) since I can't see a way of doing it that doesn't involve looking at each number at least once.
The algorithm for that one would be along the lines of:
def smallest_not_in_list(list):
bitmask = mask_make(2^64) // might take a while :-)
mask_clear_all (bitmask)
for i = 1 to list.last:
mask_set (bitmask, list[i])
for i = 0 to 2^64 - 1:
if mask_is_clear (bitmask, i):
return i
assert ("No gaps")
Sort the list, look at the first and second elements, and start going up until there is a gap.
You can do it in O(n) time and O(1) additional space, although the hidden factor is quite large. This isn't a practical way to solve the problem, but it might be interesting nonetheless.
For every unsigned 64-bit integer (in ascending order) iterate over the list until you find the target integer or you reach the end of the list. If you reach the end of the list, the target integer is the smallest integer not in the list. If you reach the end of the 64-bit integers, every 64-bit integer is in the list.
Here it is as a Python function:
def smallest_missing_uint64(source_list):
the_answer = None
target = 0L
while target < 2L**64:
target_found = False
for item in source_list:
if item == target:
target_found = True
if not target_found and the_answer is None:
the_answer = target
target += 1L
return the_answer
This function is deliberately inefficient to keep it O(n). Note especially that the function keeps checking target integers even after the answer has been found. If the function returned as soon as the answer was found, the number of times the outer loop ran would be bound by the size of the answer, which is bound by n. That change would make the run time O(n^2), even though it would be a lot faster.
Thanks to egon, swilden, and Stephen C for my inspiration. First, we know the bounds of the goal value because it cannot be greater than the size of the list. Also, a 1GB list could contain at most 134217728 (128 * 2^20) 64-bit integers.
Hashing part
I propose using hashing to dramatically reduce our search space. First, square root the size of the list. For a 1GB list, that's N=11,586. Set up an integer array of size N. Iterate through the list, and take the square root* of each number you find as your hash. In your hash table, increment the counter for that hash. Next, iterate through your hash table. The first bucket you find that is not equal to it's max size defines your new search space.
Bitmap part
Now set up a regular bit map equal to the size of your new search space, and again iterate through the source list, filling out the bitmap as you find each number in your search space. When you're done, the first unset bit in your bitmap will give you your answer.
This will be completed in O(n) time and O(sqrt(n)) space.
(*You could use use something like bit shifting to do this a lot more efficiently, and just vary the number and size of buckets accordingly.)
Well if there is only one missing number in a list of numbers, the easiest way to find the missing number is to sum the series and subtract each value in the list. The final value is the missing number.
int i = 0;
while ( i < Array.Length)
{
if (Array[i] == i + 1)
{
i++;
}
if (i < Array.Length)
{
if (Array[i] <= Array.Length)
{//SWap
int temp = Array[i];
int AnoTemp = Array[temp - 1];
Array[temp - 1] = temp;
Array[i] = AnoTemp;
}
else
i++;
}
}
for (int j = 0; j < Array.Length; j++)
{
if (Array[j] > Array.Length)
{
Console.WriteLine(j + 1);
j = Array.Length;
}
else
if (j == Array.Length - 1)
Console.WriteLine("Not Found !!");
}
}
We could use a hash table to hold the numbers. Once all numbers are done, run a counter from 0 till we find the lowest. A reasonably good hash will hash and store in constant time, and retrieves in constant time.
for every i in X // One scan Θ(1)
hashtable.put(i, i); // O(1)
low = 0;
while (hashtable.get(i) <> null) // at most n+1 times
low++;
print low;
The worst case if there are n elements in the array, and are {0, 1, ... n-1}, in which case, the answer will be obtained at n, still keeping it O(n).
Here's my answer written in Java:
Basic Idea:
1- Loop through the array throwing away duplicate positive, zeros, and negative numbers while summing up the rest, getting the maximum positive number as well, and keep the unique positive numbers in a Map.
2- Compute the sum as max * (max+1)/2.
3- Find the difference between the sums calculated at steps 1 & 2
4- Loop again from 1 to the minimum of [sums difference, max] and return the first number that is not in the map populated in step 1.
public static int solution(int[] A) {
if (A == null || A.length == 0) {
throw new IllegalArgumentException();
}
int sum = 0;
Map<Integer, Boolean> uniqueNumbers = new HashMap<Integer, Boolean>();
int max = A[0];
for (int i = 0; i < A.length; i++) {
if(A[i] < 0) {
continue;
}
if(uniqueNumbers.get(A[i]) != null) {
continue;
}
if (A[i] > max) {
max = A[i];
}
uniqueNumbers.put(A[i], true);
sum += A[i];
}
int completeSum = (max * (max + 1)) / 2;
for(int j = 1; j <= Math.min((completeSum - sum), max); j++) {
if(uniqueNumbers.get(j) == null) { //O(1)
return j;
}
}
//All negative case
if(uniqueNumbers.isEmpty()) {
return 1;
}
return 0;
}
As Stephen C smartly pointed out, the answer must be a number smaller than the length of the array. I would then find the answer by binary search. This optimizes the worst case (so the interviewer can't catch you in a 'what if' pathological scenario). In an interview, do point out you are doing this to optimize for the worst case.
The way to use binary search is to subtract the number you are looking for from each element of the array, and check for negative results.
I like the "guess zero" apprach. If the numbers were random, zero is highly probable. If the "examiner" set a non-random list, then add one and guess again:
LowNum=0
i=0
do forever {
if i == N then leave /* Processed entire array */
if array[i] == LowNum {
LowNum++
i=0
}
else {
i++
}
}
display LowNum
The worst case is n*N with n=N, but in practice n is highly likely to be a small number (eg. 1)
I am not sure if I got the question. But if for list 1,2,3,5,6 and the missing number is 4, then the missing number can be found in O(n) by:
(n+2)(n+1)/2-(n+1)n/2
EDIT: sorry, I guess I was thinking too fast last night. Anyway, The second part should actually be replaced by sum(list), which is where O(n) comes. The formula reveals the idea behind it: for n sequential integers, the sum should be (n+1)*n/2. If there is a missing number, the sum would be equal to the sum of (n+1) sequential integers minus the missing number.
Thanks for pointing out the fact that I was putting some middle pieces in my mind.
Well done Ants Aasma! I thought about the answer for about 15 minutes and independently came up with an answer in a similar vein of thinking to yours:
#define SWAP(x,y) { numerictype_t tmp = x; x = y; y = tmp; }
int minNonNegativeNotInArr (numerictype_t * a, size_t n) {
int m = n;
for (int i = 0; i < m;) {
if (a[i] >= m || a[i] < i || a[i] == a[a[i]]) {
m--;
SWAP (a[i], a[m]);
continue;
}
if (a[i] > i) {
SWAP (a[i], a[a[i]]);
continue;
}
i++;
}
return m;
}
m represents "the current maximum possible output given what I know about the first i inputs and assuming nothing else about the values until the entry at m-1".
This value of m will be returned only if (a[i], ..., a[m-1]) is a permutation of the values (i, ..., m-1). Thus if a[i] >= m or if a[i] < i or if a[i] == a[a[i]] we know that m is the wrong output and must be at least one element lower. So decrementing m and swapping a[i] with the a[m] we can recurse.
If this is not true but a[i] > i then knowing that a[i] != a[a[i]] we know that swapping a[i] with a[a[i]] will increase the number of elements in their own place.
Otherwise a[i] must be equal to i in which case we can increment i knowing that all the values of up to and including this index are equal to their index.
The proof that this cannot enter an infinite loop is left as an exercise to the reader. :)
The Dafny fragment from Ants' answer shows why the in-place algorithm may fail. The requires pre-condition describes that the values of each item must not go beyond the bounds of the array.
method AntsAasma(A: array<int>) returns (M: int)
requires A != null && forall N :: 0 <= N < A.Length ==> 0 <= A[N] < A.Length;
modifies A;
{
// Pass 1, move every value to the position of its value
var N := A.Length;
var cursor := 0;
while (cursor < N)
{
var target := A[cursor];
while (0 <= target < N && target != A[target])
{
var new_target := A[target];
A[target] := target;
target := new_target;
}
cursor := cursor + 1;
}
// Pass 2, find first location where the index doesn't match the value
cursor := 0;
while (cursor < N)
{
if (A[cursor] != cursor)
{
return cursor;
}
cursor := cursor + 1;
}
return N;
}
Paste the code into the validator with and without the forall ... clause to see the verification error. The second error is a result of the verifier not being able to establish a termination condition for the Pass 1 loop. Proving this is left to someone who understands the tool better.
Here's an answer in Java that does not modify the input and uses O(N) time and N bits plus a small constant overhead of memory (where N is the size of the list):
int smallestMissingValue(List<Integer> values) {
BitSet bitset = new BitSet(values.size() + 1);
for (int i : values) {
if (i >= 0 && i <= values.size()) {
bitset.set(i);
}
}
return bitset.nextClearBit(0);
}
def solution(A):
index = 0
target = []
A = [x for x in A if x >=0]
if len(A) ==0:
return 1
maxi = max(A)
if maxi <= len(A):
maxi = len(A)
target = ['X' for x in range(maxi+1)]
for number in A:
target[number]= number
count = 1
while count < maxi+1:
if target[count] == 'X':
return count
count +=1
return target[count-1] + 1
Got 100% for the above solution.
1)Filter negative and Zero
2)Sort/distinct
3)Visit array
Complexity: O(N) or O(N * log(N))
using Java8
public int solution(int[] A) {
int result = 1;
boolean found = false;
A = Arrays.stream(A).filter(x -> x > 0).sorted().distinct().toArray();
//System.out.println(Arrays.toString(A));
for (int i = 0; i < A.length; i++) {
result = i + 1;
if (result != A[i]) {
found = true;
break;
}
}
if (!found && result == A.length) {
//result is larger than max element in array
result++;
}
return result;
}
An unordered_set can be used to store all the positive numbers, and then we can iterate from 1 to length of unordered_set, and see the first number that does not occur.
int firstMissingPositive(vector<int>& nums) {
unordered_set<int> fre;
// storing each positive number in a hash.
for(int i = 0; i < nums.size(); i +=1)
{
if(nums[i] > 0)
fre.insert(nums[i]);
}
int i = 1;
// Iterating from 1 to size of the set and checking
// for the occurrence of 'i'
for(auto it = fre.begin(); it != fre.end(); ++it)
{
if(fre.find(i) == fre.end())
return i;
i +=1;
}
return i;
}
Solution through basic javascript
var a = [1, 3, 6, 4, 1, 2];
function findSmallest(a) {
var m = 0;
for(i=1;i<=a.length;i++) {
j=0;m=1;
while(j < a.length) {
if(i === a[j]) {
m++;
}
j++;
}
if(m === 1) {
return i;
}
}
}
console.log(findSmallest(a))
Hope this helps for someone.
With python it is not the most efficient, but correct
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
import datetime
# write your code in Python 3.6
def solution(A):
MIN = 0
MAX = 1000000
possible_results = range(MIN, MAX)
for i in possible_results:
next_value = (i + 1)
if next_value not in A:
return next_value
return 1
test_case_0 = [2, 2, 2]
test_case_1 = [1, 3, 44, 55, 6, 0, 3, 8]
test_case_2 = [-1, -22]
test_case_3 = [x for x in range(-10000, 10000)]
test_case_4 = [x for x in range(0, 100)] + [x for x in range(102, 200)]
test_case_5 = [4, 5, 6]
print("---")
a = datetime.datetime.now()
print(solution(test_case_0))
print(solution(test_case_1))
print(solution(test_case_2))
print(solution(test_case_3))
print(solution(test_case_4))
print(solution(test_case_5))
def solution(A):
A.sort()
j = 1
for i, elem in enumerate(A):
if j < elem:
break
elif j == elem:
j += 1
continue
else:
continue
return j

interviewstreet median challenge

Problem
The median of M numbers is defined as the
1) if M is odd middle number after sorting them in order
2) if M is even the average number of the middle 2 numbers (again after sorting)
You have an empty number list at first. Then you can add or remove some number from the list. For each add or remove operation, output the median of numbers in the list.
Example : For a set of m = 5 numbers, { 9, 2, 8, 4, 1 } the median is the third number in sorted set { 1, 2, 4, 8, 9 } which is 4. Similarly for set of m = 4, { 5, 2, 10, 4 }, the median is the average of second and the third element in the sorted set { 2, 4, 5, 10 } which is (4+5)/2 = 4.5
My approach
I think the problem can be solved in this way..
Idea is to use previous median value and pointer to find new median value instead of recalculating at every add or remove operation.
1) Use multisets which always keep elements in order and allow duplicates. In other words maintain sorted list somehow.
2) If the operation is add
2.1) Insert this element into set and then calculate the median
2.2) if the size of set is 1 then first element will be the median
2.3) if the size of set is even, then
if new element is larger then prev median, new median will be avg of prev median
and the next element in set.
else new median will be avg of prev median and previous of prev element in the set.
2.4) if the size is odd, then
if new element is larger then prev median
if also less then 2nd element of prev median ( 2nd element used to calculate avg
of prev median) then this new element to be added will be new median
else median will be 2nd element use to calculate the avg during last iteration prev
median.
else
new median will be previous of prev median element in the set
3) If the operation is remove
3.1) First calculate the new median
3.2) If the size of set is 0 can't remove
3.3) If the size is 1 if the first element is the element to be removed, remove it else can't remove.
3.4) If the size of set is even, then
if the element to be deleted is greater than or equal to 2nd element of prev median, then
1st element of prev median will be new median
else 2nd element of prev median will be the new median
3.5) If the size of set is odd, then
if the element to be deleted is the prev median then find the avg of its prev and next element.
else if the element to be deleted is greater then prev median, new median will be avg of prev median and previous to prev median
else median will be avg of prev median and next element to prev median.
3.6) Remove the element.
Here is the working code ...http://justprogrammng.blogspot.com/2012/06/interviewstreet-median-challenge.html. What are your views on this approach?
Your approach seems like it could work, but from the description and the code, you can tell that there is a lot of casework involved. I wouldn't like to be the one having to debug that! So let me give you an alternate solution that should involve less cases, and therefore be much simpler to get right.
Keep two multisets (this algorithm also works with two priority queues, as we're only going to look at the extremes of each one). The first, minset, is going to keep the smallest n/2 numbers, and the second, maxset, is going to store the last n/2 numbers.
Whenever you add a number:
If it is greater than max(minset), add it to maxset
Otherwise, add it to minset
Note that this doesn't guarantee the n/2 condition. Therefore, we should add one extra "fixing" step:
If maxset.size() > minset.size(), remove the smallest element from maxset and insert it to minset.
If minset.size() > minset.size() + 1, remove the biggest element from minset and insert it to maxset.
After this is done, we just have to get the median. This should be really easy to do with our data structure: depending on whether the current n is even or odd, it's either max(minset) or the average between max(minset) and min(maxset).
For the removal operation, just try to remove it from any of the sets and do the fixing afterwards.
The main issue with your code is the comparison of each new item with the running median, which might be a calculated average value. Instead you should compare the new item with the value at the previous middle (*prev in your code). At it is, after receiving the sequence of 1 and 5, your median value will be 3. If the next value is 2 or 4 it should become the new median, but since your code follows a different path for each of those, one of the results is wrong.
It would be simpler overall to just keep track of the middle location and not the running median. Instead, calculate the median at the end of each add/remove operation:
if size == 0
median = NaN
else if size is odd
median = *prev
else
median = (*prev + *(prev-1)) / 2
I think you can try to verify two cases:
1) negative number
4
a -1
a 0
a 0
r 0
2) two big integer whose sum will exceed max int
If your list is sorted, then you can calculate the median in constant time with a method similar to the following pseudo-code
if list.length % 2 == 0
median = (list[list.length/2 - 1] + list[list.length/2]) / 2
else
median = list[list.length/2]
Therefore, just maintain a sorted list on every insert/remove. You can do these operations in O(n) time by stepping through the list until you are between an element that is < the added element and one that is >= the added element. You can actually do these insert/removes in O(log n) time if you start in the middle of the list then decide if your element is less than or greater than the middle element. Take that half-list and start in the middle of that and repeat.
Your problem doesn't state what the performance requirements are for this but the entire thing cannot always happen in constant time as far as I am aware. This implementation has the following performance
Insert O(log n)
Remove O(log n)
Median O(1)
This code solves the median challenge on interviewStreet.
# this code solves the median challenge on interviewStreet.
# logic is simple. insert the numbers into a sorted sequence in place.
# use bisection to find the insert index(O(logn)). keep a count of no. of elements in
# the list and print the median using it(O(1)).
!/bin/python
from bisect import bisect_left
List = []
nnode = 0
def printMed():
if nnode>0:
if nnode%2 == 0 :
if (0.5*(List[nnode/2]+List[(nnode/2)-1])).is_integer():
print int(0.5*(List[nnode/2]+List[(nnode/2)-1]))
else:
print 0.5*(List[nnode/2]+List[(nnode/2)-1])
else:
print List[nnode/2]
else:
print "Wrong!"
def rem(val):
global nnode
try:
List.remove(val)
except:
print "Wrong!"
else:
nnode = nnode-1
printMed()
if __name__ == "__main__":
n = int(raw_input())
for i in range(0,n):
l = raw_input().split()
if(l[0] == 'r'):
rem(int(l[1]))
else:
index = bisect_left(List , int(l[1])) ;
List.insert(index ,int(l[1]))
nnode = nnode+1
printMed()
This is the solution for median challenge in java using collections.sort(list)
import java.util.*;
public class SolutionMedian{
ArrayList<Integer> sortedList = new ArrayList<Integer>();
public static void main(String args[]){
SolutionMedian m = new SolutionMedian();
Scanner in = new Scanner(System.in);
int n = in.nextInt();
char[] op = new char[n];
int[] val = new int[n];
for(int i=0; i<n; i++){
op[i] = in.next().charAt(0);
val[i] = in.nextInt();
}
for(int i=0; i<n; i++)
if(op[i] == 'a')
m.add(val[i]);
else
m.remove(val[i]);
}
void add(int val){
sortedList.add(val);
getMedian();
}
void remove(int val){
int index = sortedList.indexOf(val);
if(index>=0){
sortedList.remove(index);
getMedian();
}else{
System.out.println("Wrong!");
}
}
void getMedian(){
Collections.sort(sortedList);
int size = sortedList.size();
switch(size){
case 0:
System.out.println("Wrong!");
break;
case 1:
System.out.println(sortedList.get(0));
break;
default:
if(size%2 == 0) {//even size
int halfIndex = size/2;
long sum = sortedList.get(halfIndex)
+ sortedList.get(halfIndex-1);
if(1==(sum&1))
System.out.println((sum/2)+".5");
else
System.out.println(sum/2);
}else{//odd size
System.out.println(sortedList.get((size-1)/2));
}
}
}
}

Resources